public byte[] ProcessQuery(byte[] bytes) { try { Packet packet = Serializer.DeserializeFromBytes <Packet>(bytes); string type = System.Text.Encoding.UTF8.GetString(packet.Data); //如果是文本搜索 if (type == "Text-Query") { string currentTextQuery = GetGlobalCache <string>("Current-Text-Query"); if (!string.IsNullOrEmpty(currentTextQuery)) { string query = GetGlobalCache <string>(currentTextQuery); WriteToConsole("Get Text Query:" + query); string[] keywords = segment.Split(query); List <string> docs = TextFragment.FindCommonDocumentByKeys(keywords, 10); string content = string.Empty; string bestQuestion = string.Empty; decimal bestQuesVal = 0; foreach (var doc in docs) { Template.Question question = DataStore.Read(int.Parse(doc)); Tuple <string, decimal> tuple = question.GetBestQuestion(query); if (tuple.Item2 > bestQuesVal) { bestQuesVal = tuple.Item2; bestQuestion = tuple.Item1; } content += question.ToString() + ";"; } string inputResultKey = Dns.GetHostName() + "," + _searchPort + query; WriteToConsole("Result write into cache key=" + inputResultKey + ", count=" + docs.Count); foreach (var keyword in keywords) { if (keyword.Length > 0) { content = content.Replace(keyword, "<strong>" + keyword + "</strong>"); } } if (bestQuestion.Length > 0 && bestQuesVal > (Decimal)0.1) { content = bestQuestion + ";" + content; } SetGlobalCache(inputResultKey, content); } } //如果是相关搜索 else if (type == "Relative-Query") { string result = string.Empty; string currentRelativeQuery = GetGlobalCache <string>("Current-Relative-Query"); if (!string.IsNullOrEmpty(currentRelativeQuery)) { string query = GetGlobalCache <string>(currentRelativeQuery); WriteToConsole("Get Relative Query:" + query); List <Tuple <string, double> > tuples = RelativeTable[query].GetAllKeyValue(20); string inputResultKey = Dns.GetHostName() + "," + _searchPort + query; if (tuples == null) { tuples = new List <Tuple <string, double> >(); } foreach (var tuple in tuples) { result += tuple.Item1 + ":" + tuple.Item2 + ";"; } SetGlobalCache(inputResultKey, result); } } } catch (Exception exception) { Logger.Warn(exception); } return(Serializer.SerializeToBytes(true)); }
/// <summary> /// 索引程序入口 /// </summary> /// <param name="pages">网页信息集合</param> public void Indexer(ref List <Page> pages) { //自动分析网页表达的含义 //WriteToConsole(string.Format("开始自动分析网页表达的含义,共{0}条记录。", pages.Count)); List <Template.Question> questions = new List <Template.Question>(); const string delimiter = ".?。!\t?…●|\r\n])!"; foreach (Page page in pages) { string[] sentences = page.Content.Split(delimiter.ToCharArray(), StringSplitOptions.RemoveEmptyEntries); foreach (string sentence in sentences) { if (sentence.Length >= 5) { Template.Question result = Bot.GetInstance(GetRootFolder()) .BuildQuestion(sentence, page.Url, page.Title); if (result != null && result.Description != null && result.Description.Count > 0) { result.Content = sentence; questions.Add(result); } } } } pages.Clear(); //对表达的语义建议索引 // WriteToConsole(string.Format("对表达的语义建议索引,共{0}条记录。", questions.Count)); if (File.Exists(_textIndexFile) && TextFragment == null) { TextFragment = Serializer.DeserializeFromFile <InvertFragment>(_textIndexFile); } if (File.Exists(_relativeIndexFile) && RelativeTable == null) { RelativeTable = Serializer.DeserializeFromFile <DimensionTable <string, string, double> >(_relativeIndexFile); } if (questions.Any()) { foreach (Template.Question question in questions) { int id = question.GetHashCode(); question.Id = id; TextFragment.AddDocument(id, question.Content, false); foreach (var entity in question.Entity) { double oldValue = RelativeTable[entity.Item1][entity.Item2] == null ? 0 : RelativeTable[entity.Item1][entity.Item2]; RelativeTable[entity.Item1][entity.Item2] = oldValue + 0.0001; } DataStore.Write(question); } } questions.Clear(); Serializer.SerializeToFile(TextFragment, _textIndexFile); Serializer.SerializeToFile(RelativeTable, _relativeIndexFile); }