public Partitioner(Func <T, T, double> comparator, Func <T, T, string> keyLookup) { metrics = new DistanceMetrics <T>(comparator, keyLookup); }
public IActionResult Partition2(int partitionCount) { using (var db = new FusekiContext()) { var articles = db.Articles.Where(el => el.Published == true) .Include(el => el.Tags).Take(20).ToList(); //get a tag vector for each article. var allTags = new HashSet <string>(); //TODO: What happens if we remove all tags which only occur once. foreach (var article in articles) { foreach (var tag in article.Tags) { allTags.Add(tag.Name); } } var newAllTags = new HashSet <string>(); foreach (var t in allTags) { var relatedArticles = db.Articles.Where(el => el.Tags.Select(tag => tag.Name).Contains(t)); if (relatedArticles.Count() > 1) { newAllTags.Add(t); } } allTags = newAllTags; var allTagsOrdered = allTags.OrderBy(el => el); var obs = new List <List <double> >(); var dict = new Dictionary <string, object>(); foreach (var article in articles) { var articleTags = article.Tags.Select(el => el.Name); var vector = new List <double>(); foreach (var tag in allTagsOrdered) { if (articleTags.Contains(tag)) { vector.Add(1); } else { vector.Add(0); } } obs.Add(vector); } var vecvec = obs.Select(el => el.ToArray()).ToArray(); var kmeans = new KMeans(k: partitionCount); var clusters = kmeans.Learn(vecvec); dict["Kmeans Error"] = kmeans.Error; dict["dimensionality"] = kmeans.Dimension; dict["Iterations"] = kmeans.Iterations; dict["MaxIterations"] = kmeans.MaxIterations; dict["Tolerance"] = kmeans.Tolerance; int[] labels = clusters.Decide(vecvec); //labels is array[articleId] => partitionNumber var ii = 0; var psets = new List <PartitionSet <Article> >(); //this is totally fake. TODO: refactor these to be dumber - no need to have comparators etc. var dm = new DistanceMetrics <Article>((a, b) => Comparators.GetTagCommonality(a, b), (a, b) => Comparators.ArticleKeyLookup(a, b)); while (ii < partitionCount) { //TODO: is accord zero indexed? psets.Add(new PartitionSet <Article>(dm, ii)); ii++; } var index = 0; foreach (var l in labels) { var article = articles[index]; index++; psets[l].Add(article); } var partitiondata = new PartitionData <Article>(psets, dict); var model = new ArticlePartitionModel(partitiondata); return(View("ArticlePartitions", model)); } }
public PartitionSet(DistanceMetrics <T> dm, int number) { DistanceMetrics = dm; Items = new List <T>(); Number = number; }