Example #1
0
 public Partitioner(Func <T, T, double> comparator, Func <T, T, string> keyLookup)
 {
     metrics = new DistanceMetrics <T>(comparator, keyLookup);
 }
        public IActionResult Partition2(int partitionCount)
        {
            using (var db = new FusekiContext())
            {
                var articles = db.Articles.Where(el => el.Published == true)
                               .Include(el => el.Tags).Take(20).ToList();
                //get a tag vector for each article.

                var allTags = new HashSet <string>();

                //TODO: What happens if we remove all tags which only occur once.
                foreach (var article in articles)
                {
                    foreach (var tag in article.Tags)
                    {
                        allTags.Add(tag.Name);
                    }
                }

                var newAllTags = new HashSet <string>();
                foreach (var t in allTags)
                {
                    var relatedArticles = db.Articles.Where(el => el.Tags.Select(tag => tag.Name).Contains(t));
                    if (relatedArticles.Count() > 1)
                    {
                        newAllTags.Add(t);
                    }
                }

                allTags = newAllTags;

                var allTagsOrdered = allTags.OrderBy(el => el);

                var obs  = new List <List <double> >();
                var dict = new Dictionary <string, object>();

                foreach (var article in articles)
                {
                    var articleTags = article.Tags.Select(el => el.Name);
                    var vector      = new List <double>();
                    foreach (var tag in allTagsOrdered)
                    {
                        if (articleTags.Contains(tag))
                        {
                            vector.Add(1);
                        }
                        else
                        {
                            vector.Add(0);
                        }
                    }
                    obs.Add(vector);
                }

                var vecvec = obs.Select(el => el.ToArray()).ToArray();

                var kmeans = new KMeans(k: partitionCount);

                var clusters = kmeans.Learn(vecvec);
                dict["Kmeans Error"]   = kmeans.Error;
                dict["dimensionality"] = kmeans.Dimension;
                dict["Iterations"]     = kmeans.Iterations;
                dict["MaxIterations"]  = kmeans.MaxIterations;
                dict["Tolerance"]      = kmeans.Tolerance;


                int[] labels = clusters.Decide(vecvec);
                //labels is array[articleId] => partitionNumber
                var ii    = 0;
                var psets = new List <PartitionSet <Article> >();

                //this is totally fake. TODO: refactor these to be dumber - no need to have comparators etc.
                var dm = new DistanceMetrics <Article>((a, b) => Comparators.GetTagCommonality(a, b), (a, b) => Comparators.ArticleKeyLookup(a, b));
                while (ii < partitionCount)
                {
                    //TODO: is accord zero indexed?
                    psets.Add(new PartitionSet <Article>(dm, ii));
                    ii++;
                }
                var index = 0;
                foreach (var l in labels)
                {
                    var article = articles[index];
                    index++;
                    psets[l].Add(article);
                }


                var partitiondata = new PartitionData <Article>(psets, dict);

                var model = new ArticlePartitionModel(partitiondata);
                return(View("ArticlePartitions", model));
            }
        }
Example #3
0
 public PartitionSet(DistanceMetrics <T> dm, int number)
 {
     DistanceMetrics = dm;
     Items           = new List <T>();
     Number          = number;
 }