Exemplo n.º 1
0
        static void ByLinks()
        {
            //liczenie podobienstwa kategori na podstawie linkow
            foreach (var art in articles.Values)
            {
                var art1_cats = art.Categories;
                foreach (var f in art.Features.Values)
                {
                    var art2 = articlesByNameDict[f.Name];
                    if (art.Id != art2.Id)
                    {
                        var art2_cats = art2.Categories;
                        foreach (var cat1 in art1_cats)
                        {
                            foreach (var cat2 in art2_cats)
                            {
                                if (cat1.Id != cat2.Id)
                                {
                                    if (cat1.SimilarCategories2.ContainsKey(cat2.Id))
                                    {
                                        var old = cat1.SimilarCategories2[cat2.Id];
                                        cat1.SimilarCategories2.Remove(cat2.Id);
                                        cat1.SimilarCategories2.Add(cat2.Id, new Tuple <Category, double, int>(cat2, old.Item2 + f.Value, old.Item3 + 1));
                                    }
                                    else
                                    {
                                        cat1.SimilarCategories2.Add(cat2.Id, new Tuple <Category, double, int>(cat2, f.Value, 1));
                                    }
                                }
                            }
                        }
                    }
                }
            }
            List <CatLinkResult> bestCats = new List <CatLinkResult>();

            //filtrowanie powiazan
            foreach (var cat in cats.Values)
            {
                var thisGroupSimilar = groupSimilarLinks(cat, cat.SimilarCategories2.Values.Where(x => x.Item2 / x.Item3 >= 0.09), int.MaxValue, realCatMap);
                if (thisGroupSimilar.Any())
                {
                    var thisGroupBest = new GroupBest(thisGroupSimilar, cat);
                    Console.WriteLine("{0}", thisGroupBest.ToStringSummary());
                    {
                        foreach (var groupBest in thisGroupBest.best.OrderByDescending(x => x.val).Take(7))
                        {
                            Console.WriteLine("\t{0}", thisGroupBest.ToStringLink(groupBest));
                        }
                    }
                }
            }
        }
Exemplo n.º 2
0
        static void ByWords()
        {
            var total      = cats.Values.Count;
            var step       = 0;
            var stemmer    = new TextStemmerEN();
            var categories = cats.Values.Select(c => new CetegoryWordSimilarity()
            {
                category = c, SimilarCategories = new List <Tuple <Category, double> >()
            });

            foreach (var cat in categories)
            {
                ++step;
                var progress = (double)step / total * 100.0;
                if (step % 100 == 0)
                {
                    Console.WriteLine("Progress: {0:0.00}%", progress);
                }
                var name  = cat.category.Name;
                var parts = name.Split(' ');
                foreach (var otherCat in cats.Values)
                {
                    if (otherCat.Id == cat.category.Id)
                    {
                        continue;
                    }

                    double min = double.MaxValue;
                    foreach (var part in parts)
                    {
                        stemmer.add(part.ToLower().ToCharArray(), part.Length);
                        stemmer.stem();
                        var    stemmedPart = stemmer.ToString();
                        double sum         = 0;
                        foreach (var art in otherCat.Articles)
                        {
                            if (art.Features.ContainsKey(stemmedPart))
                            {
                                var value = art.Features[stemmedPart];
                                sum += value.Value;
                            }
                        }
                        if (sum < min)
                        {
                            min = sum;
                        }
                    }
                    cat.SimilarCategories.Add(new Tuple <Category, double>(otherCat, min));
                }

                var thisGroupSimilar = groupSimilarWords(cat.category, cat.SimilarCategories.Where(x => x.Item2 >= 5), int.MaxValue, realCatMap);
                if (thisGroupSimilar.Any())
                {
                    var thisGroupBest = new GroupBest(thisGroupSimilar, cat.category);
                    Console.WriteLine("{0}", thisGroupBest.ToStringSummary());
                    {
                        foreach (var groupBest in thisGroupBest.best.OrderByDescending(x => x.val).Take(7))
                        {
                            Console.WriteLine("\t{0}", thisGroupBest.ToStringLink(groupBest));
                        }
                    }
                }
                //zwalnaimy miejsce bo nie uzywamy juz tego
                cat.SimilarCategories.Clear();
            }
        }