Esempio n. 1
0
        /// <summary>
        /// 附加索引数据
        /// </summary>
        /// <param name="items">数据列表</param>
        /// <param name="columns">需要出列的列</param>
        /// <returns></returns>
        public async Task AppendIndex(List <dynamic> items, List <Column> columns)
        {
            List <IndexWord> wordList = new List <IndexWord>();

            foreach (var item in items)
            {
                List <PropertyInfo> fileds = new List <PropertyInfo>();
                foreach (var property in (IDictionary <String, Object>)item)
                {
                    var columun = columns.SingleOrDefault(c => c.Name.ToLower() == property.Key.ToLower());
                    if (columun != null)
                    {
                        fileds.Add(new PropertyInfo()
                        {
                            Name = columun.Name, Rank = columun.Rank, Value = property.Value.ToString(), Save = columun.Save
                        });
                    }
                }
                var docId = await DocOp.save(fileds);

                var words     = Tokenizer.GetWords(fileds);
                var indexList = Extraction(words, docId);
                wordList.AddRange(indexList);
            }
            ;
            await AppendIndex(wordList);
        }
Esempio n. 2
0
        public async Task <Page <string> > Select(string query, int page, int size)
        {
            var words = Tokenizer.GetWords(query).Where(w => w.Word.Length > 1).ToList();
            int skip  = (page - 1) * size;
            List <Task <IndexList> > lists = new List <Task <IndexList> >();

            foreach (var word in words)
            {
                lists.Add(IndexOp.GetOrAddIndexList(word));
            }
            var indexs = await Task.WhenAll(lists);

            List <QIndexAtom> atomList = new List <QIndexAtom>(); //查询结果
            List <Result>     mResults = new List <Result>();     //未排序计算完成结果

            if (indexs.Count() > 0)
            {
                for (int i = 0; i < indexs.Count(); i++)
                {
                    var currentWord = words[i];
                    foreach (var atom in indexs[i].IndexAtoms)
                    {
                        var qAtom = new QIndexAtom();
                        qAtom.Word  = currentWord;
                        qAtom.DocId = atom.Key;
                        qAtom.Atom  = atom.Value;
                        atomList.Add(qAtom);
                    }
                }
            }
            var groups = atomList.GroupBy(a => a.DocId);

            foreach (var group in groups)
            {
                Result r = new Result();
                r.DocId = group.Key;
                if (group.Count() == 1)
                {
                    var first = group.First();
                    r.Position = first.Atom.Position;
                    r.Rank     = first.Atom.Rank;
                    r.Rank    += GetFrequencyRank(first.Atom.Frequency);
                    r.Rank    += first.Word.Rank;//把查询权重加到原权重上干扰结果排序
                }
                else
                {
                    float max = 0, rangeRank = 0;
                    var   list = group.ToList();
                    for (int i = 0; i < list.Count(); i++)
                    {
                        var idata = list[i];
                        if (idata.Word.Rank > max)
                        {
                            max = idata.Word.Rank;
                        }
                        r.Rank += GetFrequencyRank(idata.Atom.Frequency);
                        if (idata.Atom.Position < r.Position)
                        {
                            r.Position = idata.Atom.Position;
                        }
                        for (int j = i + 1; j < list.Count(); j++)
                        {
                            var   jdata     = list[j];
                            var   wordRange = jdata.Word.Position - idata.Word.Position - idata.Word.Word.Length;
                            var   docRange  = jdata.Atom.Position - idata.Atom.Position - idata.Word.Word.Length;
                            float rank      = 3;
                            if (wordRange != docRange)
                            {
                                rank = rank + (wordRange - docRange) / 10.0f;
                                if (rank < 0.3f)
                                {
                                    rank = 0.3f;
                                }
                                if (rank > 3.3f)
                                {
                                    rank = 3.3f;
                                }
                            }
                            rangeRank += rank;
                        }
                    }
                    r.Rank += max;//把查询权重加到原权重上干扰结果排序
                    r.Rank += rangeRank;
                }

                mResults.Add(r);
            }
            Page <string> result = new Page <string>();

            result.CurrentPage = page;
            result.Items       = mResults.OrderByDescending(d => d.Rank).OrderBy(d => d.Position).Skip(skip).Take(size).Select(d => d.DocId).ToList();
            result.TotalItems  = mResults.Count();
            result.PageSize    = size;
            return(result);
        }