/// <summary> /// 得到结果 /// </summary> /// <param name="searchQuery"></param> /// <returns></returns> protected virtual SearchResultInfo GetSearchResultByFind(SearchQueryInfo searchQuery) { var result = new SearchResultInfo { Terms = Analyzer.Resolve(searchQuery.Key) }; if (result.Terms == null) { return(result); } if (!StoreIndexs.ContainsKey(searchQuery.Name)) { return(result); } var storeIndex = StoreIndexs[searchQuery.Name]; if (storeIndex == null) { return(result); } result.Feilds = storeIndex.StoreFields.Select(it => it.Name).ToList(); result.Words = new List <WordInfo>(); foreach (var term in result.Terms) { if (result.Words.Count(it => it.Name == term.Name) > 0) { continue; } var word = Worder.GetInfo(storeIndex, term.Name); if (word != null) { result.Words.Add(word); } } var documentIds = GetSearchDocumentIds(storeIndex, result, searchQuery); if (documentIds != null) { result.DocumentCount = documentIds.Count; AddSearchDocuments(storeIndex, searchQuery, result, documentIds); } if (result.Words.Count > 0) { result.DataCount = result.Words.Sum(it => it.DocumentCount) / result.Words.Count; } return(result); }
/// <summary> /// 得到文档向量 /// </summary> /// <param name="result"></param> /// <returns></returns> protected virtual IDictionary <long, IList <double> > GetSeachDocumentVector(SearchResultInfo result) { var scDocs = new Dictionary <long, IList <double> >(); var i = 0; foreach (var word in result.Words) { foreach (var inversion in word.Inversions) { if (!scDocs.ContainsKey(inversion.DocumentId)) { var values = new double[result.Words.Count]; scDocs.Add(inversion.DocumentId, values); } scDocs[inversion.DocumentId][i] = inversion.Weight; } i++; } return(scDocs); }
/// <summary> /// 添加行 /// </summary> /// <param name="storeIndex"></param> /// <param name="searchQuery"></param> /// <param name="result"></param> /// <param name="documentIds"></param> protected virtual void AddSearchDocuments(StoreIndexInfo storeIndex, SearchQueryInfo searchQuery, SearchResultInfo result, IList <long> documentIds) { if (searchQuery.PageSize > 0) { documentIds = documentIds.Skip(searchQuery.PageIndex * searchQuery.PageSize).Take(searchQuery.PageSize).ToList(); } foreach (var documentId in documentIds) { var document = Documentor.GetInfo(storeIndex, documentId); if (document != null) { result.Documents.Add(document); } } }
/// <summary> /// 查询得到结果 /// </summary> /// <param name="storeIndex"></param> /// <param name="result"></param> /// <param name="searchQuery"></param> /// <returns></returns> protected virtual IList <long> GetSearchDocumentIds(StoreIndexInfo storeIndex, SearchResultInfo result, SearchQueryInfo searchQuery) { if (result.Words == null || result.Words.Count == 0) { return(null); } result.Documents = new List <DocumentInfo>(); if (result.Words.Count == 1) { return(result.Words[0].Inversions.Select(it => it.DocumentId).ToList()); } var scQuery = new List <double>(); foreach (var word in result.Words) { var weight = word.DocumentCount == 0 ? 0 : ((double)result.Terms.Count(it => it.Name == word.Name) / result.Terms.Count * Math.Log(storeIndex.StoreDocument.DataCount / (word.DocumentCount / (double)storeIndex.StoreDocument.DataCount))); scQuery.Add(weight); } var scDocuments = GetSeachDocumentVector(result); return(GetOrderbySearchDocuments(scQuery, scDocuments)); }