public JsonResult AddKeywords(string keywords) { if (string.IsNullOrEmpty(keywords)) { return(Json(ErrorModel.InputError)); } var model = new TraficKeywords { Keywords = keywords }; var keywordsBll = new TraficKeywordsBll(); keywordsBll.Insert(model); if (model.Id > 0) { DataUpdateLog.SingleUpdate(typeof(TraficKeywords).Name, model.Id, DataUpdateType.Insert); SearchHelper.AddSearchTask(2, model.Id); return(Json(ErrorModel.OperateSuccess)); } return(Json(ErrorModel.OperateFailed)); }
/// <summary> /// 去除重复的关键字(单独处理 TraficKeywords 表) /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void btnDistinct_Click(object sender, EventArgs e) { if (MessageBox.Show("确定要清除 TraficKeywods 表中的重复项吗?", "警告", MessageBoxButtons.YesNo, MessageBoxIcon.Warning) == DialogResult.Yes) { var bll = new TraficKeywordsBll(); var list = bll.QueryAll().Distinct(); var removed = new List <string>(); var newList = new List <TraficKeywords>(); foreach (var item in list) { if (!Regex.IsMatch(item.Keywords, "\\d")) { newList.Add(new TraficKeywords { Keywords = item.Keywords }); } else { removed.Add(item.Keywords); } } bll.ExecuteSql("TRUNCATE TABLE TraficKeywords;"); bll.BulkInsert(newList); using (var fs = new FileStream("E:\\removed.txt", FileMode.OpenOrCreate, FileAccess.Write)) { using (var writer = new StreamWriter(fs)) { writer.Write(string.Join("\r\n", removed)); } } } }
public JsonResult GetKeywords() { var keywordsBll = new TraficKeywordsBll(); var list = keywordsBll.QueryList(null, null, null, "AddTime", true); return(Json(ErrorModel.GetDataSuccess(list))); }
/// <summary> /// 针对新增文件搜索所有关键字 /// </summary> /// <param name="fileId"></param> private static void SearchForNewFile(int fileId) { var fileBll = new TraficFilesBll(); var file = fileBll.QuerySingle(fileId); if (file != null && file.FileExtension.ToLower() == ".zip") { var keywordsBll = new TraficKeywordsBll(); var keywordsList = keywordsBll.QueryAll(); var zipFileName = PathExtension.MapPath(file.FilePath); var zipTempFileName = Path.Combine(ZipTempPath, Path.GetFileName(file.FilePath)); if (!Directory.Exists(ZipTempPath)) { Directory.CreateDirectory(ZipTempPath); } if (File.Exists(zipFileName)) { FileHelper.ExtractZip(zipFileName, ExtractPath); var html = GetHtmlStr(ExtractPath, out string htmlFileName, out Encoding encoding); html = AddIdForHtmlDom(html); var tasks = new List <Task>(); foreach (var keywords in keywordsList) { tasks.Add(Task.Factory.StartNew(() => { try { var searchResult = SearchFromHtml(keywords.Keywords, html); SearchResultEnqueue(searchResult, fileId, keywords.Id); } catch (Exception ex) { ExceptionLogBll.ExceptionPersistence(nameof(SearchHelper), nameof(SearchHelper), ex); } })); } Task.WaitAll(tasks.ToArray()); ClearSearchResultQueue(); FileHelper.Write(htmlFileName, html, encoding); SearchCompleted(zipTempFileName, zipFileName, ExtractPath); tasks = null; GC.Collect(); } } }
/// <summary> /// 将获得的关键字持久化 /// </summary> /// <param name="keywords"></param> private void KeywordsPersistence(List <string> keywords) { var list = keywords.Distinct(); var keywordsBll = new TraficKeywordsBll(); var modelList = new List <TraficKeywords>(); foreach (var word in list) { modelList.Add(new TraficKeywords { Keywords = word, AddTime = DateTime.Now }); } keywordsBll.BulkInsert(modelList); }
/// <summary> /// 添加搜索关键字,此任务仅在数据库中关键字条目数少于100个时才会执行 /// </summary> public static void AddSearchKeywords() { // 判断数据库中关键字个数 var keywordsBll = new TraficKeywordsBll(); var totalCount = keywordsBll.GetTotalCount(); if (totalCount < 100) { // 读取文件中的关键字列表 var filePath = PathExtension.MapPath(AppSettings.KeywordsFilePath); try { using (var fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read)) { using (var streamReader = new StreamReader(fileStream)) { var text = streamReader.ReadToEnd(); var keywords = text.Split(new[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries); // @FrancisTan 2017-10-27 // 批量插入关键字 var modelList = keywords.Select(k => new TraficKeywords { Keywords = k, AddTime = DateTime.Now }); keywordsBll.BulkInsert(modelList); DataUpdateLog.BulkUpdate(nameof(TraficKeywords), 0); // 依次对单个文件执行关键字搜索 var fileBll = new TraficFilesBll(); var idList = fileBll.QueryList("IsDelete=0", new string[] { "Id" }).Select(t => t.Id).ToList(); idList.ForEach(id => { SearchHelper.AddSearchTask(1, id); }); } // end streamreader } // end filestream } catch (Exception ex) { ExceptionLogBll.ExceptionPersistence(nameof(TempTask), nameof(TempTask), ex); } } else { // Ignore } }
/// <summary> /// 处理由重复的关键字引起的重复搜索结果 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void btnDistinct2_Click(object sender, EventArgs e) { var bll = new TraficKeywordsBll(); var resBll = new TraficSearchResultBll(); var dbUpdateBll = new DbUpdateLogBll(); var list = bll.QueryAll(); var searchResults = resBll.QueryList("", new string[] { "Id", "KeywordsId" }); var groups = list.GroupBy(k => k.Keywords); foreach (var group in groups) { if (group.Count() > 1) { var ids = group.Select(k => k.Id).ToList(); var firstId = ids.First(); ids.RemoveAt(0); // 删除重复项 var idsStr = string.Join(",", ids); var deleteKeywodsSql = $"DELETE FROM TraficKeywords WHERE Id IN({idsStr})"; var deleteKeywordsUpdates = $"DELETE FROM DbUpdateLog WHERE TargetId IN({idsStr}) AND TableName='TraficKeywords'"; var results = searchResults.Where(r => ids.Contains(r.KeywordsId)); var resIds = results.Select(r => r.Id); var resIdsStr = string.Join(",", resIds); var deleteResultSql = $"DELETE FROM TraficSearchResult WHERE KeywordsId IN({idsStr})"; var deleteResultUpdates = $"DELETE FROM DbUpdateLog WHERE TargetId IN({(resIdsStr == "" ? "0" : resIdsStr)}) AND TableName='TraficSearchResult'"; bll.ExecuteTransation( () => bll.ExecuteSql(deleteKeywodsSql) >= 0, () => bll.ExecuteSql(deleteKeywordsUpdates) >= 0, () => bll.ExecuteSql(deleteResultSql) >= 0, () => bll.ExecuteSql(deleteResultUpdates) >= 0 ); } } }
/// <summary> /// 针对新增关键字搜索所有文件 /// </summary> /// <param name="keywordsId"></param> private static void SearchForNewKeywords(int keywordsId) { var keywordsBll = new TraficKeywordsBll(); var keywords = keywordsBll.QuerySingle(keywordsId); if (keywords != null) { var fileBll = new TraficFilesBll(); var fileList = fileBll.QueryList("IsDelete=0", new[] { "Id", "FilePath", "FileExtension" }).Where(t => t.FileExtension.ToLower() == ".zip"); foreach (var file in fileList) { var zipFileName = PathExtension.MapPath(file.FilePath); if (File.Exists(zipFileName)) { FileHelper.ExtractZip(zipFileName, ExtractPath); var html = GetHtmlStr(ExtractPath, out string htmlFileName, out Encoding encoding); html = AddIdForHtmlDom(html); var searchResult = SearchFromHtml(keywords.Keywords, html); FileHelper.Write(htmlFileName, html, encoding); var zipTempFileName = Path.Combine(ZipTempPath, Path.GetFileName(file.FilePath)); if (!Directory.Exists(ZipTempPath)) { Directory.CreateDirectory(ZipTempPath); } SearchResultEnqueue(searchResult, file.Id, keywordsId); SearchCompleted(zipTempFileName, zipFileName, ExtractPath); } } ClearSearchResultQueue(); } }