/// <summary> /// 查询得到结果 /// </summary> /// <param name="storeIndex"></param> /// <param name="result"></param> /// <param name="searchQuery"></param> /// <returns></returns> protected virtual IList <long> GetSearchDocumentIds(StoreIndexInfo storeIndex, SearchResultInfo result, SearchQueryInfo searchQuery) { if (result.Words == null || result.Words.Count == 0) { return(null); } result.Documents = new List <DocumentInfo>(); if (result.Words.Count == 1) { return(result.Words[0].Inversions.Select(it => it.DocumentId).ToList()); } var scQuery = new List <double>(); foreach (var word in result.Words) { var weight = word.DocumentCount == 0 ? 0 : ((double)result.Terms.Count(it => it.Name == word.Name) / result.Terms.Count * Math.Log(storeIndex.StoreDocument.DataCount / (word.DocumentCount / (double)storeIndex.StoreDocument.DataCount))); scQuery.Add(weight); } var scDocuments = GetSeachDocumentVector(result); return(GetOrderbySearchDocuments(scQuery, scDocuments)); }
/// <summary> /// 保存索引 /// </summary> /// <param name="storeIndex"></param> /// <param name="words"></param> protected virtual void InsertWords(StoreIndexInfo storeIndex, IList <WordInfo> words) { foreach (var word in words) { Worder.Insert(storeIndex, word); } }
/// <summary> /// 得到词库信息 /// </summary> /// <param name="doc"></param> /// <returns></returns> protected virtual void LoadStoreIndexsByXml(XmlDocument doc) { var storeIndexs = new Dictionary <string, StoreIndexInfo>(); XmlNodeList nodes = doc.SelectNodes("/configuration/Search/XmlIndexer/Info"); if (nodes != null) { foreach (XmlNode node in nodes) { var storeDocumentNode = node.SelectSingleNode("StoreDocument"); var storeFieldNodes = node.SelectNodes("StoreField"); var storeSequences = node.SelectNodes("StoreSequence"); var storeIndex = new StoreIndexInfo { Name = node.Attributes["Name"].Value, TopDocumentCount = node.Attributes["TopDocumentCount"] == null ? 500 : int.Parse(node.Attributes["TopDocumentCount"].Value), StoreDocument = GetStoreDocumentByXmlNode(storeDocumentNode), StoreFields = GetStoreFieldsByXmlNodes(storeFieldNodes), StoreSequences = GetStoreSequencesByXmlNodes(storeSequences) }; storeIndexs.Add(storeIndex.Name, storeIndex); } } StoreIndexs = storeIndexs; }
/// <summary> /// 创建词典 /// </summary> /// <param name="storeIndex"></param> /// <param name="document"></param> /// <param name="feildTerms"></param> protected virtual void CreateWords(StoreIndexInfo storeIndex, DocumentInfo document, IDictionary <int, IList <TermInfo> > feildTerms) { if (feildTerms == null || document == null) { return; } var wordKeys = new Dictionary <string, WordInfo>(); var words = new List <WordInfo>(); foreach (var feildTerm in feildTerms) { var groupTems = from p in feildTerm.Value group p by p.Name into g select new { g.Key, Frequency = (double)g.Count() / (feildTerm.Value.Count == 0 ? 1 : feildTerm.Value.Count) }; foreach (var groupTem in groupTems) { WordInfo word; if (!wordKeys.ContainsKey(groupTem.Key)) { word = new WordInfo { Name = groupTem.Key, DocumentCount = 1, Inversions = new List <InversionInfo> { new InversionInfo { DocumentId = document.Id, Feilds = new List <InversionFeildInfo>() } } }; words.Add(word); wordKeys.Add(groupTem.Key, word); } else { word = wordKeys[groupTem.Key]; } var inversion = word.Inversions.First(); var feild = new InversionFeildInfo { FeildIndex = feildTerm.Key, Frequency = groupTem.Frequency }; inversion.Feilds.Add(feild); } } InsertWords(storeIndex, words); }
/// <summary> /// 添加行 /// </summary> /// <param name="storeIndex"></param> /// <param name="searchQuery"></param> /// <param name="result"></param> /// <param name="documentIds"></param> protected virtual void AddSearchDocuments(StoreIndexInfo storeIndex, SearchQueryInfo searchQuery, SearchResultInfo result, IList <long> documentIds) { if (searchQuery.PageSize > 0) { documentIds = documentIds.Skip(searchQuery.PageIndex * searchQuery.PageSize).Take(searchQuery.PageSize).ToList(); } foreach (var documentId in documentIds) { var document = Documentor.GetInfo(storeIndex, documentId); if (document != null) { result.Documents.Add(document); } } }
/// <summary> /// 保存文档 /// </summary> /// <param name="storeIndex"></param> /// <param name="document"></param> /// <returns></returns> protected virtual IDictionary <int, IList <TermInfo> > SaveDocument(StoreIndexInfo storeIndex, DocumentInfo document) { var feildTerms = new Dictionary <int, IList <TermInfo> >(); var i = 0; foreach (var feild in document.Feilds) { var storeFeild = storeIndex.GetStoreField(i); if (storeFeild != null && storeFeild.StoreType != FieldIndexType.OnlyStore) { var terms = Analyzer.Resolve(feild.Text); feildTerms.Add(i, terms ?? new List <TermInfo>()); } if (storeFeild != null && storeFeild.StoreType == FieldIndexType.OnlyIndex) { document.Feilds[i] = null; } i++; } Documentor.Insert(storeIndex, document); return(feildTerms); }