Ejemplo n.º 1
0
        public byte[] ProcessQuery(byte[] bytes)
        {
            try
            {
                Packet packet = Serializer.DeserializeFromBytes <Packet>(bytes);
                string type   = System.Text.Encoding.UTF8.GetString(packet.Data);

                //如果是文本搜索
                if (type == "Text-Query")
                {
                    string currentTextQuery = GetGlobalCache <string>("Current-Text-Query");
                    if (!string.IsNullOrEmpty(currentTextQuery))
                    {
                        string query = GetGlobalCache <string>(currentTextQuery);
                        WriteToConsole("Get Text Query:" + query);
                        string[]      keywords = segment.Split(query);
                        List <string> docs     = TextFragment.FindCommonDocumentByKeys(keywords, 10);


                        string  content      = string.Empty;
                        string  bestQuestion = string.Empty;
                        decimal bestQuesVal  = 0;
                        foreach (var doc in docs)
                        {
                            Template.Question       question = DataStore.Read(int.Parse(doc));
                            Tuple <string, decimal> tuple    = question.GetBestQuestion(query);
                            if (tuple.Item2 > bestQuesVal)
                            {
                                bestQuesVal  = tuple.Item2;
                                bestQuestion = tuple.Item1;
                            }
                            content += question.ToString() + ";";
                        }

                        string inputResultKey = Dns.GetHostName() + "," + _searchPort + query;
                        WriteToConsole("Result write into cache key=" + inputResultKey + ", count=" + docs.Count);

                        foreach (var keyword in keywords)
                        {
                            if (keyword.Length > 0)
                            {
                                content = content.Replace(keyword, "<strong>" + keyword + "</strong>");
                            }
                        }

                        if (bestQuestion.Length > 0 && bestQuesVal > (Decimal)0.1)
                        {
                            content = bestQuestion + ";" + content;
                        }

                        SetGlobalCache(inputResultKey, content);
                    }
                }

                //如果是相关搜索
                else if (type == "Relative-Query")
                {
                    string result = string.Empty;
                    string currentRelativeQuery = GetGlobalCache <string>("Current-Relative-Query");
                    if (!string.IsNullOrEmpty(currentRelativeQuery))
                    {
                        string query = GetGlobalCache <string>(currentRelativeQuery);
                        WriteToConsole("Get Relative Query:" + query);
                        List <Tuple <string, double> > tuples = RelativeTable[query].GetAllKeyValue(20);
                        string inputResultKey = Dns.GetHostName() + "," + _searchPort + query;
                        if (tuples == null)
                        {
                            tuples = new List <Tuple <string, double> >();
                        }
                        foreach (var tuple in tuples)
                        {
                            result += tuple.Item1 + ":" + tuple.Item2 + ";";
                        }
                        SetGlobalCache(inputResultKey, result);
                    }
                }
            }
            catch (Exception exception)
            {
                Logger.Warn(exception);
            }
            return(Serializer.SerializeToBytes(true));
        }
Ejemplo n.º 2
0
        /// <summary>
        /// 索引程序入口
        /// </summary>
        /// <param name="pages">网页信息集合</param>
        public void Indexer(ref List <Page> pages)
        {
            //自动分析网页表达的含义
            //WriteToConsole(string.Format("开始自动分析网页表达的含义,共{0}条记录。", pages.Count));
            List <Template.Question> questions = new List <Template.Question>();
            const string             delimiter = ".?。!\t?…●|\r\n])!";

            foreach (Page page in pages)
            {
                string[] sentences = page.Content.Split(delimiter.ToCharArray(),
                                                        StringSplitOptions.RemoveEmptyEntries);
                foreach (string sentence in sentences)
                {
                    if (sentence.Length >= 5)
                    {
                        Template.Question result = Bot.GetInstance(GetRootFolder())
                                                   .BuildQuestion(sentence, page.Url, page.Title);
                        if (result != null && result.Description != null && result.Description.Count > 0)
                        {
                            result.Content = sentence;
                            questions.Add(result);
                        }
                    }
                }
            }
            pages.Clear();

            //对表达的语义建议索引
            // WriteToConsole(string.Format("对表达的语义建议索引,共{0}条记录。", questions.Count));

            if (File.Exists(_textIndexFile) && TextFragment == null)
            {
                TextFragment = Serializer.DeserializeFromFile <InvertFragment>(_textIndexFile);
            }

            if (File.Exists(_relativeIndexFile) && RelativeTable == null)
            {
                RelativeTable =
                    Serializer.DeserializeFromFile <DimensionTable <string, string, double> >(_relativeIndexFile);
            }


            if (questions.Any())
            {
                foreach (Template.Question question in questions)
                {
                    int id = question.GetHashCode();
                    question.Id = id;
                    TextFragment.AddDocument(id, question.Content, false);
                    foreach (var entity in question.Entity)
                    {
                        double oldValue = RelativeTable[entity.Item1][entity.Item2] == null
                            ? 0
                            : RelativeTable[entity.Item1][entity.Item2];
                        RelativeTable[entity.Item1][entity.Item2] = oldValue + 0.0001;
                    }
                    DataStore.Write(question);
                }
            }
            questions.Clear();
            Serializer.SerializeToFile(TextFragment, _textIndexFile);
            Serializer.SerializeToFile(RelativeTable, _relativeIndexFile);
        }