diff --git a/README.md b/README.md index e60ea4175b1050d03efda7b52c48a5a1009b09e3..e9e8f2d29717259dd954afcc804c34548597e567 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,11 @@ # 文本定位器 -![主界面](https://images.gitee.com/uploads/images/2021/1001/193814_76bc7801_995027.png "主界面") -![索引重建](https://images.gitee.com/uploads/images/2021/1001/193844_2bdbd85a_995027.png "索引重建") -![预览](https://images.gitee.com/uploads/images/2021/1001/193954_8c7e90ce_995027.png "预览") -![分词](https://images.gitee.com/uploads/images/2021/1001/194106_c65124b9_995027.png "分词") +![主界面](%E4%B8%BB%E7%95%8C%E9%9D%A2.png) +![预览](%E9%A2%84%E8%A7%88.png) +![类型筛选](%E7%B1%BB%E5%9E%8B%E7%AD%9B%E9%80%89.png) +![自动分词](%E8%87%AA%E5%8A%A8%E5%88%86%E8%AF%8D.png) +![手动分词](%E6%89%8B%E5%8A%A8%E5%88%86%E8%AF%8D.png) +![索引重建确认](%E7%B4%A2%E5%BC%95%E9%87%8D%E5%BB%BA%E7%A1%AE%E8%AE%A4.png) +![重建索引](%E9%87%8D%E5%BB%BA%E7%B4%A2%E5%BC%95.png) #### 介绍 基于.net实现的本地文档的全文索引定位器,根据关键词搜索定位本地文档内容。便于查找历史文档时节省时间。 diff --git a/TextLocator/App.xaml.cs b/TextLocator/App.xaml.cs index 7fc2692b272861b693458875c6c43ff24e531d1e..b30583a04d4c37ded06177b4eb114e7d0b58559e 100644 --- a/TextLocator/App.xaml.cs +++ b/TextLocator/App.xaml.cs @@ -57,7 +57,7 @@ namespace TextLocator // Excel服务 FileInfoServiceFactory.Register(FileType.Excel表格, new ExcelFileService()); // PowerPoint服务 - FileInfoServiceFactory.Register(FileType.PPT演示文稿, new PowerPointFileService()); + FileInfoServiceFactory.Register(FileType.PPT文稿, new PowerPointFileService()); // PDF服务 FileInfoServiceFactory.Register(FileType.PDF文档, new PdfFileService()); // HTML或XML服务 diff --git a/TextLocator/Core/AppConst.cs b/TextLocator/Core/AppConst.cs index 03ed748216174c70d823829d47b53480279635d0..8c40080dab205f83376dd65ac2a779637fb96bca 100644 --- a/TextLocator/Core/AppConst.cs +++ b/TextLocator/Core/AppConst.cs @@ -15,11 +15,11 @@ namespace TextLocator.Core /// /// 线程池最小数量 /// - public static readonly int THREAD_POOL_MIN_SIZE = int.Parse(AppUtil.ReadValue("ThreadPool", "MinSize", "4")); + public static readonly int THREAD_POOL_MIN_SIZE = int.Parse(AppUtil.ReadValue("ThreadPool", "MinSize", "8")); /// /// 线程池最大数量 /// - public static readonly int THREAD_POOL_MAX_SIZE = int.Parse(AppUtil.ReadValue("ThreadPool", "MaxSize", "8")); + public static readonly int THREAD_POOL_MAX_SIZE = int.Parse(AppUtil.ReadValue("ThreadPool", "MaxSize", "16")); /// /// 应用目录 /// diff --git a/TextLocator/Enums/EnumExtension.cs b/TextLocator/Enums/EnumExtension.cs index 9b8ea38abe8ad4a7f7682ba2ae2823f4629bf7c3..181f002ab1352ed8658661515164a1270cd08f2f 100644 --- a/TextLocator/Enums/EnumExtension.cs +++ b/TextLocator/Enums/EnumExtension.cs @@ -1,11 +1,4 @@ -using System; -using System.Collections.Generic; -using System.ComponentModel; -using System.Globalization; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using System.Windows.Data; +using System.ComponentModel; namespace TextLocator.Enums { diff --git a/TextLocator/Enums/FileType.cs b/TextLocator/Enums/FileType.cs index fa7bf2de87cd75fdd9cba7d53d711fc14c6d5de6..93dd77c9be8050c4d5b251f9312c1d0e8560c5d4 100644 --- a/TextLocator/Enums/FileType.cs +++ b/TextLocator/Enums/FileType.cs @@ -21,7 +21,7 @@ namespace TextLocator.Enums /// PowerPoint /// [Description("ppt,pptx")] - PPT演示文稿, + PPT文稿, /// /// PDF /// diff --git a/TextLocator/Index/IndexCore.cs b/TextLocator/Index/IndexCore.cs index 89c30625fe3283c0fc15978226a57c12807bbc83..a6ca5790c68f60c5d03011086a806cdb22517b7a 100644 --- a/TextLocator/Index/IndexCore.cs +++ b/TextLocator/Index/IndexCore.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; using System.IO; using System.Linq; +using System.Text; using System.Text.RegularExpressions; using System.Threading; using TextLocator.Core; @@ -158,17 +159,19 @@ namespace TextLocator.Index { TaskInfo taskInfo = obj as TaskInfo; try - { + { + // 开始时间1 + var taskMark = TaskTime.StartNew(); + + // 索引写入 Lucene.Net.Index.IndexWriter indexWriter = taskInfo.IndexWriter; + // 文件路径 string filePath = taskInfo.FilePath; // 写入 AppUtil.WriteValue("FileIndex", filePath, "1"); - // 开始时间 - var taskMark = TaskTime.StartNew(); - // 文件信息 FileInfo fileInfo = new FileInfo(filePath); // 文件名 @@ -181,9 +184,20 @@ namespace TextLocator.Index // 根据文件路径获取文件类型(自定义文件类型分类) FileType fileType = FileTypeUtil.GetFileType(filePath); + string filePathPadding = filePath; + try + { + filePathPadding = filePath.Substring(0, 35) + "......" + filePath.Substring(filePath.Length - 35); + } + catch { } + + StringBuilder msg = new StringBuilder("[" + finishCount * 1.0F + "/" + taskInfo.TotalCount + "] => 引擎:" + (int)fileType + ",文件:" + filePathPadding); + // 文件内容 string content = FileInfoServiceFactory.GetFileInfoService(fileType).GetFileContent(filePath); + msg.Append(",解析:" + taskMark.ConsumeTime + "秒"); + // 缩略信息 string breviary = AppConst.REGIX_LINE_BREAKS_AND_WHITESPACE.Replace(content, ""); if (breviary.Length > 150) @@ -194,6 +208,9 @@ namespace TextLocator.Index // 文件标记 string fileMark = MD5Util.GetMD5Hash(filePath); //fileInfo.DirectoryName + fileInfo.CreationTime.ToString(); + // 开始时间2 + taskMark = TaskTime.StartNew(); + lock (locker) { // 当索引文件中含有与filemark相等的field值时,会先删除再添加,以防出现重复 @@ -216,11 +233,10 @@ namespace TextLocator.Index // 优化索引 indexWriter.Optimize(); } - - string msg = "解析文件:[" + finishCount * 1.0F + "/" + taskInfo.TotalCount + "] => 引擎:" + (int)fileType + ",文件:" + filePath + ",耗时:" + taskMark.ConsumeTime + "秒"; + msg.Append(",索引:" + taskMark.ConsumeTime + "秒"); // 执行状态回调 - taskInfo.Callback(msg, CalcCompletionRatio(finishCount, taskInfo.TotalCount)); + taskInfo.Callback(msg.ToString(), CalcCompletionRatio(finishCount, taskInfo.TotalCount)); ; log.Debug(msg); } diff --git a/TextLocator/MainWindow.xaml b/TextLocator/MainWindow.xaml index 57b0bb2951a668a6c4b9487c78f67fe47440848d..abf7dbf7f295fab3191f0281f4ab7d44f8e762fd 100644 --- a/TextLocator/MainWindow.xaml +++ b/TextLocator/MainWindow.xaml @@ -12,7 +12,7 @@ - + @@ -20,7 +20,7 @@ - + @@ -48,10 +48,11 @@ + +