예제 #1
0
        public void BuildSercherIndexToSQLDB(Action <double, string> IndexesProgress = null)
        {
            //hashLoadBalance.RemoveAllDBData();
            //hashLoadBalance = new ConsistentHashLoadBalance();
            SetServerDBCount();
            RedBlackTree <string, string> documentIndices_cachList = new RedBlackTree <string, string>();
            var DocumentToatalList = documentDB.GetNotIndexDocument();
            int remainder          = DocumentToatalList.Count;
            var remotewords        = SercherIndexesDB.GetWords(hashLoadBalance.GetServerNodes());
            var localwords         = new HashSet <string>();
            Dictionary <string, TextComponent> textComponent = new Dictionary <string, TextComponent>();//使用到的时候进行缓存
            int curWordCachNum = 0;

            for (int i = 0, j = 0; i < DocumentToatalList.Count; i++)
            {
                var doc = DocumentToatalList[i];
                documentDB.UpdateDocumentStateIndexStatus(doc._id, "pro_" + Config.CurrentConfig.IndexesServiceName);

                IEnumerable <SegmenterToken>       textSplit       = Pretreatment(doc);
                Dictionary <string, DocumentIndex> documentIndices = new Dictionary <string, DocumentIndex>();
                int wordTotal = textSplit.Count();

                foreach (var token in textSplit)
                {
                    string word = token.Word.Trim().ToLower();
                    if (!remotewords.Contains(word))
                    {
                        if (!localwords.Contains(word))
                        {
                            localwords.Add(word);
                            remotewords.Add(word);
                        }
                    }
                    //记录一个文档的所有相同词汇
                    if (documentIndices.TryGetValue(word, out DocumentIndex documentIndex))
                    {
                        documentIndex.WordFrequency++;
                        if (documentIndex.WordFrequency <= Config.CurrentConfig.MaxIndexWordStartLocation)
                        {
                            documentIndex.BeginIndex += ',' + token.StartIndex.ToString();
                        }
                        documentIndex.DocumentWordTotal = wordTotal;
                    }
                    else
                    {
                        documentIndices[word] = new DocumentIndex
                        {
                            IndexTime         = DateTime.Now.Ticks,
                            DocId             = doc._id,
                            WordFrequency     = 1,
                            BeginIndex        = token.StartIndex.ToString(),
                            DocumentWordTotal = wordTotal,
                            Permission        = doc.Permission == 0 ? Config.CurrentConfig.DefaultPermission : doc.Permission
                        }
                    };
                }

                //转换为脚本并加入全局缓存等待上传
                documentIndices.AsParallel().ForAll(kvp =>
                {
                    //UpdateIndex(kvp.Key, kvp.Value);
                    if (documentIndices_cachList.ContainsKey(kvp.Key.ToString()))
                    {
                        string sql = InsetValueIntoMemory(kvp.Key, new DocumentIndex[1] {
                            kvp.Value
                        }, false);
                        lock (lockobj1)//因为此循环内Key唯一,所以只锁了添加代码
                        {
                            documentIndices_cachList[kvp.Key] += "," + sql;
                        }
                    }
                    else
                    {
                        string sql = InsetValueIntoMemory(kvp.Key, new DocumentIndex[1] {
                            kvp.Value
                        }, true);
                        lock (lockobj1)
                        {
                            documentIndices_cachList.Add(kvp.Key, sql);
                        }
                    }
                });


                remainder--;

                IndexesProgress?.Invoke(i / (double)DocumentToatalList.Count, "文档:" + doc.Name + " 缓存完成");
                curWordCachNum += documentIndices.Count;
                documentIndices.Clear();
                if (Config.CurrentConfig.MaxIndexCachWordNum < curWordCachNum || i == DocumentToatalList.Count - 1)
                {
                    IndexesProgress?.Invoke(i / (double)DocumentToatalList.Count, "以达缓存上限,开始创建表");
                    //对每一个同数据库的词汇的脚本进行组合,创建表
                    var group1 = localwords.GroupBy(w => hashLoadBalance.FindCloseServerDBsByTableName(w).DbName).ToArray();
                    System.Diagnostics.Stopwatch watch = new Stopwatch();
                    watch.Start();

                    Parallel.ForEach(group1, g =>
                    {
                        var wordgroup = g.ToArray();
                        hashLoadBalance.GetServerNodes().First(n => n.DbName == g.Key) //!##GroupKey欠妥,不过数据库比较少的时候影响不大
                        .CreateIndexTable(wordgroup);
                        IndexesProgress?.Invoke(i / (double)DocumentToatalList.Count, g.Key + ":一组表创建完成");
                    });
                    watch.Stop();
                    IndexesProgress?.Invoke(i / (double)DocumentToatalList.Count, "表创建完成,用时(s):" + watch.ElapsedMilliseconds / 1000);
                    localwords.Clear();
                    IndexesProgress?.Invoke(i / (double)DocumentToatalList.Count, "开始上传索引");
                    //对每一个同数据库的词汇的脚本进行组合,上传
                    var group2 = documentIndices_cachList.AsQueryable().GroupBy(kv => hashLoadBalance.FindCloseServerDBsByTableName(kv.Key).DbName).ToArray();

                    watch.Restart();
                    Parallel.ForEach(group2, new ParallelOptions()
                    {
                        MaxDegreeOfParallelism = Config.CurrentConfig.UploadThreadNum
                    }, g =>
                    {
                        //上传此db的inser脚本
                        hashLoadBalance.FindCloseServerDBsByTableName(g.First().Key)
                        .UploadDocumentIndex(g.Select(s => s.Value + ";").ToArray());
                        IndexesProgress?.Invoke(i / (double)DocumentToatalList.Count, g.Key + ":一组索引创建完成");
                    });
                    watch.Stop();
                    IndexesProgress?.Invoke(i / (double)DocumentToatalList.Count, "上传索引完成,用时(s):" + watch.ElapsedMilliseconds / 1000);

                    documentIndices_cachList.Clear();
                    while (j <= i)
                    {
                        documentDB.UpdateDocumentStateIndexStatus(DocumentToatalList[j]._id, "yes");
                        j++;
                    }
                    curWordCachNum = 0;
                    IndexesProgress?.Invoke(i / (double)DocumentToatalList.Count, "一批上传完成,刷新缓存");
                }
            }
        }