static void ByLinks() { //liczenie podobienstwa kategori na podstawie linkow foreach (var art in articles.Values) { var art1_cats = art.Categories; foreach (var f in art.Features.Values) { var art2 = articlesByNameDict[f.Name]; if (art.Id != art2.Id) { var art2_cats = art2.Categories; foreach (var cat1 in art1_cats) { foreach (var cat2 in art2_cats) { if (cat1.Id != cat2.Id) { if (cat1.SimilarCategories2.ContainsKey(cat2.Id)) { var old = cat1.SimilarCategories2[cat2.Id]; cat1.SimilarCategories2.Remove(cat2.Id); cat1.SimilarCategories2.Add(cat2.Id, new Tuple <Category, double, int>(cat2, old.Item2 + f.Value, old.Item3 + 1)); } else { cat1.SimilarCategories2.Add(cat2.Id, new Tuple <Category, double, int>(cat2, f.Value, 1)); } } } } } } } List <CatLinkResult> bestCats = new List <CatLinkResult>(); //filtrowanie powiazan foreach (var cat in cats.Values) { var thisGroupSimilar = groupSimilarLinks(cat, cat.SimilarCategories2.Values.Where(x => x.Item2 / x.Item3 >= 0.09), int.MaxValue, realCatMap); if (thisGroupSimilar.Any()) { var thisGroupBest = new GroupBest(thisGroupSimilar, cat); Console.WriteLine("{0}", thisGroupBest.ToStringSummary()); { foreach (var groupBest in thisGroupBest.best.OrderByDescending(x => x.val).Take(7)) { Console.WriteLine("\t{0}", thisGroupBest.ToStringLink(groupBest)); } } } } }
static void ByWords() { var total = cats.Values.Count; var step = 0; var stemmer = new TextStemmerEN(); var categories = cats.Values.Select(c => new CetegoryWordSimilarity() { category = c, SimilarCategories = new List <Tuple <Category, double> >() }); foreach (var cat in categories) { ++step; var progress = (double)step / total * 100.0; if (step % 100 == 0) { Console.WriteLine("Progress: {0:0.00}%", progress); } var name = cat.category.Name; var parts = name.Split(' '); foreach (var otherCat in cats.Values) { if (otherCat.Id == cat.category.Id) { continue; } double min = double.MaxValue; foreach (var part in parts) { stemmer.add(part.ToLower().ToCharArray(), part.Length); stemmer.stem(); var stemmedPart = stemmer.ToString(); double sum = 0; foreach (var art in otherCat.Articles) { if (art.Features.ContainsKey(stemmedPart)) { var value = art.Features[stemmedPart]; sum += value.Value; } } if (sum < min) { min = sum; } } cat.SimilarCategories.Add(new Tuple <Category, double>(otherCat, min)); } var thisGroupSimilar = groupSimilarWords(cat.category, cat.SimilarCategories.Where(x => x.Item2 >= 5), int.MaxValue, realCatMap); if (thisGroupSimilar.Any()) { var thisGroupBest = new GroupBest(thisGroupSimilar, cat.category); Console.WriteLine("{0}", thisGroupBest.ToStringSummary()); { foreach (var groupBest in thisGroupBest.best.OrderByDescending(x => x.val).Take(7)) { Console.WriteLine("\t{0}", thisGroupBest.ToStringLink(groupBest)); } } } //zwalnaimy miejsce bo nie uzywamy juz tego cat.SimilarCategories.Clear(); } }