Exemple #1
0
        public void Update(FormatObject formatObject)
        {
            if (LocalStore.Exists(_tableName))
            {
                var dt  = LocalStore.Read(_tableName);
                var row = dt.AsEnumerable().Single(x =>
                {
                    return((x.Field <string>("SourceName") == formatObject.SourceName));
                });

                UpdateRow(row, formatObject);

                LocalStore.Write(dt, true);
            }
            else
            {
                var dt = CreateDataTable();

                DataRow dr = dt.NewRow();
                UpdateRow(dr, formatObject);
                dt.Rows.Add(dr);

                LocalStore.Write(dt, false);
            }
        }
Exemple #2
0
        /// <summary>
        /// 索引程序入口
        /// </summary>
        /// <param name="pages">网页信息集合</param>
        public void Indexer(ref List <Page> pages)
        {
            //自动分析网页表达的含义
            //WriteToConsole(string.Format("开始自动分析网页表达的含义,共{0}条记录。", pages.Count));
            List <Template.Question> questions = new List <Template.Question>();
            const string             delimiter = ".?。!\t?…●|\r\n])!";

            foreach (Page page in pages)
            {
                string[] sentences = page.Content.Split(delimiter.ToCharArray(),
                                                        StringSplitOptions.RemoveEmptyEntries);
                foreach (string sentence in sentences)
                {
                    if (sentence.Length >= 5)
                    {
                        Template.Question result = Bot.GetInstance(GetRootFolder())
                                                   .BuildQuestion(sentence, page.Url, page.Title);
                        if (result != null && result.Description != null && result.Description.Count > 0)
                        {
                            result.Content = sentence;
                            questions.Add(result);
                        }
                    }
                }
            }
            pages.Clear();

            //对表达的语义建议索引
            // WriteToConsole(string.Format("对表达的语义建议索引,共{0}条记录。", questions.Count));

            if (File.Exists(_textIndexFile) && TextFragment == null)
            {
                TextFragment = Serializer.DeserializeFromFile <InvertFragment>(_textIndexFile);
            }

            if (File.Exists(_relativeIndexFile) && RelativeTable == null)
            {
                RelativeTable =
                    Serializer.DeserializeFromFile <DimensionTable <string, string, double> >(_relativeIndexFile);
            }


            if (questions.Any())
            {
                foreach (Template.Question question in questions)
                {
                    int id = question.GetHashCode();
                    question.Id = id;
                    TextFragment.AddDocument(id, question.Content, false);
                    foreach (var entity in question.Entity)
                    {
                        double oldValue = RelativeTable[entity.Item1][entity.Item2] == null
                            ? 0
                            : RelativeTable[entity.Item1][entity.Item2];
                        RelativeTable[entity.Item1][entity.Item2] = oldValue + 0.0001;
                    }
                    DataStore.Write(question);
                }
            }
            questions.Clear();
            Serializer.SerializeToFile(TextFragment, _textIndexFile);
            Serializer.SerializeToFile(RelativeTable, _relativeIndexFile);
        }