Exemple #1
0
        public static List <ResultItem> SearchFragments(Db db, /*Db db_post,*/ Db db_words, string query)
        {
            var results = new List <ResultItem>();
            var interm  = new List <IntermResult>();

            int       max_step = db.Table <PostFragment>().LastStep();
            int       step     = 50;
            Stopwatch sp       = new Stopwatch();

            sp.Start();
            for (int i = 0; i <= max_step; i += step)
            {
                var res = db.Table <PostFragment>().Search(f => f.Text, query, i, step)
                          .Select(f => new { f.Id, f.Text, f.QuestionId })
                          .Select(f => new IntermResult {
                    Id = f.Id, Text = f.Text, QuestionId = f.QuestionId
                });
                interm.AddRange(res);
                if (sp.ElapsedMilliseconds > 1000 || interm.GroupBy(f => f.QuestionId).Count() >= 1000)
                {
                    break;
                }
            }
            foreach (var inter in interm)
            {
                inter.Score += /*5000 +*/ GetFragmentScore(inter.Text, query);
            }

            //tfidf
            var dic = new Dictionary <string, Dictionary <int, short> >();

            foreach (var w in query.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries))
            {
                var d = db_words.Table <WordTfIdfData>().Where(f => f.Word == w).SelectEntity().FirstOrDefault();
                if (d == null)
                {
                    continue;
                }
                var data = Utils.TfIdfFromData(d.Data);
                dic[w] = data.Item1;
            }
            foreach (var inter in interm)
            {
                foreach (var w in query.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries))
                {
                    if (dic.ContainsKey(w) && dic[w].ContainsKey(inter.Id))
                    {
                        inter.Score += dic[w][inter.Id];
                    }
                }
            }

            foreach (var r in interm.GroupBy(f => f.QuestionId).OrderByDescending(f => f.OrderByDescending(z => z.Score).First().Score).Take(5))
            {
                var q        = r.OrderByDescending(z => z.Score).First();
                var first_id = q.QuestionId;
                var item     = new ResultItem()
                {
                    Fragment = q.Text,
                    Id       = q.QuestionId,
                    Title    = db.Table <WholePost>().Where(f => f.Id == first_id).Select(f => new { f.Title }).First().Title,
                    Score    = q.Score
                };
                results.Add(item);
            }

            if (!results.Any())
            {
                //return SearchLogic.SearchPosts(db_post, db, query);
                return(SearchLogic.SearchSubQueries(db, db_words, query));
            }
            else
            {
                return(results);
            }
        }
        public static List <ResultItem> SearchTitles(Db db, Db db_words, string query)
        {
            //var tfidf_dic = new Dictionary<string, Dictionary<int, short>>();
            //HashSet<int> current_set = null;
            //foreach (var w in query.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries))
            //{
            //    var d = db_words.Table<WordTfIdfData>().Where(f => f.Word == w).SelectEntity().FirstOrDefault();
            //    if (d == null)
            //    {
            //        current_set = new HashSet<int>();
            //    }
            //    else
            //    {
            //        var data = Utils.TfIdfFromData(d.Data);
            //        tfidf_dic[w] = data.Item1;
            //        if (current_set == null)
            //        {
            //            current_set = data.Item2;
            //        }
            //        else
            //        {
            //            current_set.IntersectWith(data.Item2);
            //        }
            //    }
            //}

            //if (current_set.Any())
            //{
            //    var list = new List<Tuple<int, int>>();
            //    foreach (var id in current_set)
            //    {
            //        int score = 0;
            //        foreach (var w in query.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries))
            //        {
            //            score += tfidf_dic[w][id];
            //        }
            //        list.Add(new Tuple<int, int>(id, score));
            //    }

            //    var result_items_tf = new List<ResultItem>();
            //    foreach (var id in list.OrderByDescending(f => f.Item2).Take(10))
            //    {
            //        var item = db.Table<WholePost>().Where(f => f.Id == id.Item1).Select(f => new { f.Id, f.Title, f.Votes }).First();
            //        var afr = db.Table<AnswerFragment>().Where(f => f.Id == id.Item1).Select(f => new { f.Text }).FirstOrDefault();
            //        var ri = new ResultItem()
            //        {
            //            Title = item.Title,
            //            Fragment = afr != null ? Encoding.UTF8.GetString(afr.Text) : "",
            //            Id = item.Id,
            //            Score = id.Item2 + 1000000//item.Votes + 100
            //        };

            //        result_items_tf.Add(ri);
            //    }

            //    return result_items_tf;
            //}

            //object _lock = new object();
            var result_items = new List <ResultItem>();

            int count = 0;
            var res   = db.Table <WholePost>().Search(f => f.Title, query).OrderByDescending(f => f.Votes).Take(200).Select(f => new { f.Id, f.Title, f.Votes }, out count);

            foreach (var r in res)
            {
                var afr = db.Table <AnswerFragment>().Where(f => f.Id == r.Id).Select(f => new { f.Text }).FirstOrDefault();
                var ri  = new ResultItem()
                {
                    Title                            = r.Title,
                    Fragment                         = afr != null?Encoding.UTF8.GetString(afr.Text) : "",
                                               Id    = r.Id,
                                               Score = /*r.Votes +*/ 100
                };

                result_items.Add(ri);
            }

            //stems
            //var words = query.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
            //if (/*result_items.Count() < 10 &&*/ words.Count() > 1)
            //{
            //    var stems = new List<string>();
            //    foreach (var w in words)
            //    {
            //        stems.Add(Utils.GetStemFromWord(w));
            //    }
            //    var s_query = stems.Aggregate((a, b) => a + " " + b);

            //    res = db.Table<WholePost>().Search(f => f.TitleStem, s_query).OrderByDescending(f => f.Votes).Take(10).Select(f => new { f.Id, f.Title, f.Votes }, out count);
            //    foreach (var r in res)
            //    {
            //        var afr = db.Table<AnswerFragment>().Where(f => f.Id == r.Id).Select(f => new { f.Text }).FirstOrDefault();
            //        var ri = new ResultItem()
            //        {
            //            Title = r.Title,
            //            Fragment = afr != null ? Encoding.UTF8.GetString(afr.Text) : "",
            //            Id = r.Id,
            //            Score = r.Votes + 100
            //        };
            //        result_items.Add(ri);
            //    }
            //}
            //var queries = GetAllPossibleQueries(db, query);

            ////full
            //int count = 0;
            //var res = db.Table<WholePost>().Search(f => f.Title, queries.Item1[0]).OrderByDescending(f => f.Votes).Take(10).Select(f => new { f.Id, f.Title, f.Votes }, out count);
            //foreach (var r in res)
            //{
            //    var afr = db.Table<AnswerFragment>().Where(f => f.Id == r.Id).Select(f => new { f.Text }).FirstOrDefault();
            //    var ri = new ResultItem()
            //    {
            //        Title = r.Title,
            //        Fragment = afr != null ? Encoding.UTF8.GetString(afr.Text) : "",
            //        Id = r.Id,
            //        Score = (r.Votes < 3 ? 1 : (int)Math.Log(r.Votes)) + 10000
            //    };
            //    lock (_lock)
            //    {
            //        result_items.Add(ri);
            //    }
            //}

            //Parallel.ForEach(queries.Item1.Skip(1), q =>
            //{
            //    int count_s = 0;
            //    var res_s = db.Table<WholePost>().Search(f => f.Title, q).OrderByDescending(f => f.Votes).Take(10).Select(f => new { f.Id, f.Title, f.Votes }, out count_s);
            //    foreach (var r in res_s)
            //    {
            //        var afr = db.Table<AnswerFragment>().Where(f => f.Id == r.Id).Select(f => new { f.Text }).FirstOrDefault();
            //        var ri = new ResultItem()
            //        {
            //            Title = r.Title,
            //            Fragment = afr != null ? Encoding.UTF8.GetString(afr.Text) : "",
            //            Id = r.Id,
            //            Score = r.Votes < 3 ? 1 : (int)Math.Log(r.Votes)
            //        };
            //        lock (_lock)
            //        {
            //            result_items.Add(ri);
            //        }
            //    }
            //});
            ////stem
            //foreach (var q in queries.Item2)
            //{
            //    int count_st = 0;
            //    var res_st = db.Table<WholePost>().Search(f => f.TitleStem, q).OrderByDescending(f => f.Votes).Take(10).Select(f => new { f.Id, f.Title, f.Votes }, out count_st);
            //    foreach (var r in res_st)
            //    {
            //        var afr = db.Table<AnswerFragment>().Where(f => f.Id == r.Id).Select(f => new { f.Text }).FirstOrDefault();
            //        var ri = new ResultItem()
            //        {
            //            Title = r.Title,
            //            Fragment = afr != null ? Encoding.UTF8.GetString(afr.Text) : "",
            //            Id = r.Id,
            //            Score = (r.Votes < 3 ? 1 : (int)Math.Log(r.Votes)) + 5000
            //        };
            //        lock (_lock)
            //        {
            //            result_items.Add(ri);
            //        }
            //    }
            //}
            foreach (var inter in result_items)
            {
                inter.Score += TitleScore(inter.Title);
            }

            foreach (var inter in result_items)
            {
                inter.Score += GetFragmentScore(inter.Title, query);
            }

            //tfidf
            var dic = new Dictionary <string, Dictionary <int, short> >();

            foreach (var w in query.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries))
            {
                var d = db_words.Table <WordTfIdfData>().Where(f => f.Word == w).SelectEntity().FirstOrDefault();
                if (d == null)
                {
                    continue;
                }
                var data = Utils.TfIdfFromData(d.Data);
                dic[w] = data.Item1;
            }
            foreach (var inter in result_items)
            {
                foreach (var w in query.Split(" ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries))
                {
                    if (dic.ContainsKey(w) && dic[w].ContainsKey(inter.Id))
                    {
                        inter.Score += dic[w][inter.Id];
                    }
                }
            }

            return(result_items.GroupBy(f => f.Title).Select(f => f.First()).OrderByDescending(f => f.Score).Take(5).ToList());
        }