private Searcher() { this.MongoDbServer = Constants.DefaultServerAddress; this.DbName = Constants.DefaultDbName; _segmentor = new ChineseSegmentor(); }
/// <summary> /// 建索引主體 /// </summary> /// <returns></returns> public bool MakeIndex() { try { // 1. 解文 ParseSourceDoc(); // 2. 建索引 ChineseSegmentor segmentor = new ChineseSegmentor(); var server = MongoDbLib.GetServerConnection(_indexer.MongoDbServer); var database = server.GetDatabase(_indexer.DbName); var tblSourceText = database.GetCollection <SourceText>(Constants.TblSourceText); // 斷詞,處理每個 Token var sourcees = from s in tblSourceText.AsQueryable <SourceText>() orderby s.DocId, s.ParaId select s; Dictionary <String, InvertedIndex> fullIndexes = new Dictionary <String, InvertedIndex>(); InvertedIndex aIndex = null; foreach (var aSourceText in sourcees) { List <Pair <String, Int32> > result = segmentor.SegWords(aSourceText.Para); foreach (var aToken in result) { if (fullIndexes.ContainsKey(aToken.First)) { aIndex = fullIndexes[aToken.First]; } else { aIndex = new InvertedIndex(); aIndex.Word = aToken.First; } aIndex.Indexes.Add(new IndexElement() { DocId = aSourceText.DocId, ParaId = aSourceText.ParaId, Offset = aToken.Second }); fullIndexes[aToken.First] = aIndex; } } // 在 Storage 存入 Word List var wordListCollection = database.GetCollection(Constants.TblWordList); List <BsonDocument> batch = new List <BsonDocument>(); List <String> wordList = fullIndexes.Keys.ToList(); for (int wordId = 0; wordId < fullIndexes.Count; wordId++) { aIndex = fullIndexes[wordList[wordId]]; aIndex.WordId = wordId; batch.Add(new BsonDocument() { { "Word", wordList[wordId] }, { "WordId", wordId } }); } wordListCollection.InsertBatch(batch); // 儲存全文索引 var tblFullText = database.GetCollection(Constants.TblFullText); List <InvertedIndex> fullText = new List <InvertedIndex>(); tblFullText.InsertBatch <InvertedIndex>(fullIndexes.Values.ToList()); return(true); } catch (Exception e) { Console.WriteLine(e.StackTrace); return(false); } }