/// <summary> /// 搜尋主體 /// </summary> /// <param name="keyword"></param> /// <param name="fetchSize"></param> /// <returns></returns> public SearchResult Search(Repository repo, String keyword, int fetchSize) { try { return Search(repo, keyword, 0, fetchSize); } catch (Exception e) { return null; } }
/// <summary> /// 新增索引庫 /// </summary> /// <param name="name"></param> /// <returns></returns> public Repository CreateRepository(String name) { try { if (this.Repositories.ContainsKey(name)) return Repositories[name]; else { try { // 新增 repository 資訊 Repository repo = new Repository(this, name); this.Repositories.Add(name, repo); // 將 repository 資訊寫入 MongoDB 中 var server = MongoDbLib.GetServerConnection(MongoDbServer); var database = server.GetDatabase(DbName); var tblRepos = database.GetCollection<RepositoryInfo>(Constants.TblRepository); tblRepos.Insert(new RepositoryInfo() { Name = name }); return repo; } catch (Exception e) { // roll back if (this.Repositories.ContainsKey(name)) this.Repositories.Remove(name); throw e; } } } catch (Exception e) { throw e; } }
public SearchResult Search(Repository repo, String keyword, int startPos, int fetchSize) { try { SearchResult result = new SearchResult(); // 計時器 Stopwatch sw = new Stopwatch(); sw.Reset(); sw.Start(); // MongoDb 初始化 var server = MongoDbLib.GetServerConnection(MongoDbServer); var database = server.GetDatabase(DbName); var tblWordList = database.GetCollection<WordItem>(Constants.TblWordList); var tblFullText = database.GetCollection<InvertedIndex>(Constants.TblFullText); // 針對搜尋關鍵字斷詞 List<Pair<String, Int32>> keywordTokens = _segmentor.SegWords(keyword); // 自索引中取出對應的 word list var buf = (from t in keywordTokens select t.First).ToList(); var query = from w in tblWordList.AsQueryable<WordItem>() where w.Word.In(buf) select new { w.WordId }; List<Int32> wordIdList = new List<Int32>(); foreach (var aWord in query) { wordIdList.Add(aWord.WordId); } // word id 為 0 筆,表示搜尋結果為 0 if (wordIdList.Count == 0) { sw.Stop(); result.SearchTime = sw.ElapsedMilliseconds / 1000.0; return result; } // 自全文索引中,取出對應的記錄 var indexes = from i in tblFullText.AsQueryable<InvertedIndex>() where i.WordId.In(wordIdList) select i; if (indexes.Count() != wordIdList.Count) { return null; } // 將每個 keyword token 對應回相對應的 index List<List<IndexElement>> checkedIndex = new List<List<IndexElement>>(); foreach (var aToken in keywordTokens) { checkedIndex.Add(indexes.Where(t => t.Word == aToken.First).First().Indexes); } // 檢查各文件是否為符合的文件 var firstTokenIndex = checkedIndex[0]; Dictionary<Int32, Int32> hittedDocs = new Dictionary<Int32, Int32>(); foreach (var currentIndex in firstTokenIndex) { if (keywordTokens.Count == 1 || CheckDocumentIsHitted(keywordTokens, 1, checkedIndex, currentIndex)) { if (hittedDocs.ContainsKey(currentIndex.DocId)) hittedDocs[currentIndex.DocId]++; else hittedDocs[currentIndex.DocId] = 1; } } // 文件照分數排序,取出指定區間的 doc id 列表 var sortedDocIds = (from entry in hittedDocs orderby entry.Value descending select entry.Key).Skip(startPos).Take(fetchSize).ToList(); // 結果儲存 result.Matches = hittedDocs.Count; sw.Stop(); result.SearchTime = sw.ElapsedMilliseconds / 1000.0; for (int i = 0; i < fetchSize && i < sortedDocIds.Count; i++) { String rawText = this.GetDocumentById(sortedDocIds[i]); result.Results.Add(new ResultItem() { Rank = startPos + 1 + i, Score = hittedDocs[sortedDocIds[i]], HitField = rawText.Replace(keyword, "<<" + keyword + ">>") }); } return result; } catch (Exception e) { return null; } }