Esempio n. 1
0
        private Searcher()
        {
            this.MongoDbServer = Constants.DefaultServerAddress;
            this.DbName        = Constants.DefaultDbName;

            _segmentor = new ChineseSegmentor();
        }
Esempio n. 2
0
        /// <summary>
        /// 建索引主體
        /// </summary>
        /// <returns></returns>
        public bool MakeIndex()
        {
            try
            {
                // 1. 解文
                ParseSourceDoc();

                // 2. 建索引
                ChineseSegmentor segmentor = new ChineseSegmentor();
                var server        = MongoDbLib.GetServerConnection(_indexer.MongoDbServer);
                var database      = server.GetDatabase(_indexer.DbName);
                var tblSourceText = database.GetCollection <SourceText>(Constants.TblSourceText);

                // 斷詞,處理每個 Token
                var sourcees = from s in tblSourceText.AsQueryable <SourceText>()
                               orderby s.DocId, s.ParaId
                select s;
                Dictionary <String, InvertedIndex> fullIndexes = new Dictionary <String, InvertedIndex>();
                InvertedIndex aIndex = null;
                foreach (var aSourceText in sourcees)
                {
                    List <Pair <String, Int32> > result = segmentor.SegWords(aSourceText.Para);
                    foreach (var aToken in result)
                    {
                        if (fullIndexes.ContainsKey(aToken.First))
                        {
                            aIndex = fullIndexes[aToken.First];
                        }
                        else
                        {
                            aIndex      = new InvertedIndex();
                            aIndex.Word = aToken.First;
                        }

                        aIndex.Indexes.Add(new IndexElement()
                        {
                            DocId  = aSourceText.DocId,
                            ParaId = aSourceText.ParaId,
                            Offset = aToken.Second
                        });

                        fullIndexes[aToken.First] = aIndex;
                    }
                }

                // 在 Storage 存入 Word List
                var wordListCollection       = database.GetCollection(Constants.TblWordList);
                List <BsonDocument> batch    = new List <BsonDocument>();
                List <String>       wordList = fullIndexes.Keys.ToList();
                for (int wordId = 0; wordId < fullIndexes.Count; wordId++)
                {
                    aIndex        = fullIndexes[wordList[wordId]];
                    aIndex.WordId = wordId;

                    batch.Add(new BsonDocument()
                    {
                        { "Word", wordList[wordId] },
                        { "WordId", wordId }
                    });
                }

                wordListCollection.InsertBatch(batch);

                // 儲存全文索引
                var tblFullText = database.GetCollection(Constants.TblFullText);
                List <InvertedIndex> fullText = new List <InvertedIndex>();
                tblFullText.InsertBatch <InvertedIndex>(fullIndexes.Values.ToList());

                return(true);
            }
            catch (Exception e)
            {
                Console.WriteLine(e.StackTrace);
                return(false);
            }
        }