public void BuildSercherIndexToSQLDB(Action <double, string> IndexesProgress = null) { //hashLoadBalance.RemoveAllDBData(); //hashLoadBalance = new ConsistentHashLoadBalance(); SetServerDBCount(); RedBlackTree <string, string> documentIndices_cachList = new RedBlackTree <string, string>(); var DocumentToatalList = documentDB.GetNotIndexDocument(); int remainder = DocumentToatalList.Count; var remotewords = SercherIndexesDB.GetWords(hashLoadBalance.GetServerNodes()); var localwords = new HashSet <string>(); Dictionary <string, TextComponent> textComponent = new Dictionary <string, TextComponent>();//使用到的时候进行缓存 int curWordCachNum = 0; for (int i = 0, j = 0; i < DocumentToatalList.Count; i++) { var doc = DocumentToatalList[i]; documentDB.UpdateDocumentStateIndexStatus(doc._id, "pro_" + Config.CurrentConfig.IndexesServiceName); IEnumerable <SegmenterToken> textSplit = Pretreatment(doc); Dictionary <string, DocumentIndex> documentIndices = new Dictionary <string, DocumentIndex>(); int wordTotal = textSplit.Count(); foreach (var token in textSplit) { string word = token.Word.Trim().ToLower(); if (!remotewords.Contains(word)) { if (!localwords.Contains(word)) { localwords.Add(word); remotewords.Add(word); } } //记录一个文档的所有相同词汇 if (documentIndices.TryGetValue(word, out DocumentIndex documentIndex)) { documentIndex.WordFrequency++; if (documentIndex.WordFrequency <= Config.CurrentConfig.MaxIndexWordStartLocation) { documentIndex.BeginIndex += ',' + token.StartIndex.ToString(); } documentIndex.DocumentWordTotal = wordTotal; } else { documentIndices[word] = new DocumentIndex { IndexTime = DateTime.Now.Ticks, DocId = doc._id, WordFrequency = 1, BeginIndex = token.StartIndex.ToString(), DocumentWordTotal = wordTotal, Permission = doc.Permission == 0 ? Config.CurrentConfig.DefaultPermission : doc.Permission } }; } //转换为脚本并加入全局缓存等待上传 documentIndices.AsParallel().ForAll(kvp => { //UpdateIndex(kvp.Key, kvp.Value); if (documentIndices_cachList.ContainsKey(kvp.Key.ToString())) { string sql = InsetValueIntoMemory(kvp.Key, new DocumentIndex[1] { kvp.Value }, false); lock (lockobj1)//因为此循环内Key唯一,所以只锁了添加代码 { documentIndices_cachList[kvp.Key] += "," + sql; } } else { string sql = InsetValueIntoMemory(kvp.Key, new DocumentIndex[1] { kvp.Value }, true); lock (lockobj1) { documentIndices_cachList.Add(kvp.Key, sql); } } }); remainder--; IndexesProgress?.Invoke(i / (double)DocumentToatalList.Count, "文档:" + doc.Name + " 缓存完成"); curWordCachNum += documentIndices.Count; documentIndices.Clear(); if (Config.CurrentConfig.MaxIndexCachWordNum < curWordCachNum || i == DocumentToatalList.Count - 1) { IndexesProgress?.Invoke(i / (double)DocumentToatalList.Count, "以达缓存上限,开始创建表"); //对每一个同数据库的词汇的脚本进行组合,创建表 var group1 = localwords.GroupBy(w => hashLoadBalance.FindCloseServerDBsByTableName(w).DbName).ToArray(); System.Diagnostics.Stopwatch watch = new Stopwatch(); watch.Start(); Parallel.ForEach(group1, g => { var wordgroup = g.ToArray(); hashLoadBalance.GetServerNodes().First(n => n.DbName == g.Key) //!##GroupKey欠妥,不过数据库比较少的时候影响不大 .CreateIndexTable(wordgroup); IndexesProgress?.Invoke(i / (double)DocumentToatalList.Count, g.Key + ":一组表创建完成"); }); watch.Stop(); IndexesProgress?.Invoke(i / (double)DocumentToatalList.Count, "表创建完成,用时(s):" + watch.ElapsedMilliseconds / 1000); localwords.Clear(); IndexesProgress?.Invoke(i / (double)DocumentToatalList.Count, "开始上传索引"); //对每一个同数据库的词汇的脚本进行组合,上传 var group2 = documentIndices_cachList.AsQueryable().GroupBy(kv => hashLoadBalance.FindCloseServerDBsByTableName(kv.Key).DbName).ToArray(); watch.Restart(); Parallel.ForEach(group2, new ParallelOptions() { MaxDegreeOfParallelism = Config.CurrentConfig.UploadThreadNum }, g => { //上传此db的inser脚本 hashLoadBalance.FindCloseServerDBsByTableName(g.First().Key) .UploadDocumentIndex(g.Select(s => s.Value + ";").ToArray()); IndexesProgress?.Invoke(i / (double)DocumentToatalList.Count, g.Key + ":一组索引创建完成"); }); watch.Stop(); IndexesProgress?.Invoke(i / (double)DocumentToatalList.Count, "上传索引完成,用时(s):" + watch.ElapsedMilliseconds / 1000); documentIndices_cachList.Clear(); while (j <= i) { documentDB.UpdateDocumentStateIndexStatus(DocumentToatalList[j]._id, "yes"); j++; } curWordCachNum = 0; IndexesProgress?.Invoke(i / (double)DocumentToatalList.Count, "一批上传完成,刷新缓存"); } } }
public void RemoveIndexServiceNodes(SercherIndexesDB sercherIndexesDB) { hashLoadBalance.RemoveHashMap(sercherIndexesDB); sercherIndexesDB.IndexesTableCount = sercherIndexesDB.GetSercherIndexCollectionCount(); }