Beispiel #1
0
 /// <summary>
 /// 搜尋主體
 /// </summary>
 /// <param name="keyword"></param>
 /// <param name="fetchSize"></param>
 /// <returns></returns>
 public SearchResult Search(Repository repo, String keyword, int fetchSize)
 {
     try
     {
         return Search(repo, keyword, 0, fetchSize);
     }
     catch (Exception e)
     {
         return null;
     }
 }
Beispiel #2
0
        /// <summary>
        /// 新增索引庫
        /// </summary>
        /// <param name="name"></param>
        /// <returns></returns>
        public Repository CreateRepository(String name)
        {
            try
            {
                if (this.Repositories.ContainsKey(name))
                    return Repositories[name];
                else
                {
                    try
                    {
                        // 新增 repository 資訊
                        Repository repo = new Repository(this, name);
                        this.Repositories.Add(name, repo);

                        // 將 repository 資訊寫入 MongoDB 中
                        var server = MongoDbLib.GetServerConnection(MongoDbServer);
                        var database = server.GetDatabase(DbName);
                        var tblRepos = database.GetCollection<RepositoryInfo>(Constants.TblRepository);
                        tblRepos.Insert(new RepositoryInfo()
                        {
                            Name = name
                        });

                        return repo;
                    }
                    catch (Exception e)
                    {
                        // roll back
                        if (this.Repositories.ContainsKey(name))
                            this.Repositories.Remove(name);
                        throw e;
                    }
                }
            }
            catch (Exception e)
            {
                throw e;
            }
        }
Beispiel #3
0
        public SearchResult Search(Repository repo, String keyword, int startPos, int fetchSize)
        {
            try
            {
                SearchResult result = new SearchResult();

                // 計時器
                Stopwatch sw = new Stopwatch();
                sw.Reset();
                sw.Start();

                // MongoDb 初始化
                var server = MongoDbLib.GetServerConnection(MongoDbServer);
                var database = server.GetDatabase(DbName);
                var tblWordList = database.GetCollection<WordItem>(Constants.TblWordList);
                var tblFullText = database.GetCollection<InvertedIndex>(Constants.TblFullText);

                // 針對搜尋關鍵字斷詞
                List<Pair<String, Int32>> keywordTokens = _segmentor.SegWords(keyword);

                // 自索引中取出對應的 word list
                var buf = (from t in keywordTokens select t.First).ToList();
                var query = from w in tblWordList.AsQueryable<WordItem>()
                            where w.Word.In(buf)
                            select new { w.WordId };
                List<Int32> wordIdList = new List<Int32>();
                foreach (var aWord in query)
                {
                    wordIdList.Add(aWord.WordId);
                }

                // word id 為 0 筆,表示搜尋結果為 0
                if (wordIdList.Count == 0)
                {
                    sw.Stop();
                    result.SearchTime = sw.ElapsedMilliseconds / 1000.0;
                    return result;
                }

                // 自全文索引中,取出對應的記錄
                var indexes = from i in tblFullText.AsQueryable<InvertedIndex>()
                              where i.WordId.In(wordIdList)
                              select i;

                if (indexes.Count() != wordIdList.Count)
                {
                    return null;
                }

                // 將每個 keyword token 對應回相對應的 index
                List<List<IndexElement>> checkedIndex = new List<List<IndexElement>>();
                foreach (var aToken in keywordTokens)
                {
                    checkedIndex.Add(indexes.Where(t => t.Word == aToken.First).First().Indexes);
                }

                // 檢查各文件是否為符合的文件
                var firstTokenIndex = checkedIndex[0];
                Dictionary<Int32, Int32> hittedDocs = new Dictionary<Int32, Int32>();
                foreach (var currentIndex in firstTokenIndex)
                {
                    if (keywordTokens.Count == 1 || CheckDocumentIsHitted(keywordTokens, 1, checkedIndex, currentIndex))
                    {
                        if (hittedDocs.ContainsKey(currentIndex.DocId))
                            hittedDocs[currentIndex.DocId]++;
                        else
                            hittedDocs[currentIndex.DocId] = 1;
                    }
                }

                // 文件照分數排序,取出指定區間的 doc id 列表
                var sortedDocIds = (from entry in hittedDocs orderby entry.Value descending select entry.Key).Skip(startPos).Take(fetchSize).ToList();

                // 結果儲存
                result.Matches = hittedDocs.Count;
                sw.Stop();
                result.SearchTime = sw.ElapsedMilliseconds / 1000.0;

                for (int i = 0; i < fetchSize && i < sortedDocIds.Count; i++)
                {
                    String rawText = this.GetDocumentById(sortedDocIds[i]);
                    result.Results.Add(new ResultItem()
                    {
                        Rank = startPos + 1 + i,
                        Score = hittedDocs[sortedDocIds[i]],
                        HitField = rawText.Replace(keyword, "<<" + keyword + ">>")
                    });
                }
                return result;
            }
            catch (Exception e)
            {
                return null;
            }
        }