public IActionResult Partition2(int partitionCount)
        {
            using (var db = new FusekiContext())
            {
                var articles = db.Articles.Where(el => el.Published == true)
                               .Include(el => el.Tags).Take(20).ToList();
                //get a tag vector for each article.

                var allTags = new HashSet <string>();

                //TODO: What happens if we remove all tags which only occur once.
                foreach (var article in articles)
                {
                    foreach (var tag in article.Tags)
                    {
                        allTags.Add(tag.Name);
                    }
                }

                var newAllTags = new HashSet <string>();
                foreach (var t in allTags)
                {
                    var relatedArticles = db.Articles.Where(el => el.Tags.Select(tag => tag.Name).Contains(t));
                    if (relatedArticles.Count() > 1)
                    {
                        newAllTags.Add(t);
                    }
                }

                allTags = newAllTags;

                var allTagsOrdered = allTags.OrderBy(el => el);

                var obs  = new List <List <double> >();
                var dict = new Dictionary <string, object>();

                foreach (var article in articles)
                {
                    var articleTags = article.Tags.Select(el => el.Name);
                    var vector      = new List <double>();
                    foreach (var tag in allTagsOrdered)
                    {
                        if (articleTags.Contains(tag))
                        {
                            vector.Add(1);
                        }
                        else
                        {
                            vector.Add(0);
                        }
                    }
                    obs.Add(vector);
                }

                var vecvec = obs.Select(el => el.ToArray()).ToArray();

                var kmeans = new KMeans(k: partitionCount);

                var clusters = kmeans.Learn(vecvec);
                dict["Kmeans Error"]   = kmeans.Error;
                dict["dimensionality"] = kmeans.Dimension;
                dict["Iterations"]     = kmeans.Iterations;
                dict["MaxIterations"]  = kmeans.MaxIterations;
                dict["Tolerance"]      = kmeans.Tolerance;


                int[] labels = clusters.Decide(vecvec);
                //labels is array[articleId] => partitionNumber
                var ii    = 0;
                var psets = new List <PartitionSet <Article> >();

                //this is totally fake. TODO: refactor these to be dumber - no need to have comparators etc.
                var dm = new DistanceMetrics <Article>((a, b) => Comparators.GetTagCommonality(a, b), (a, b) => Comparators.ArticleKeyLookup(a, b));
                while (ii < partitionCount)
                {
                    //TODO: is accord zero indexed?
                    psets.Add(new PartitionSet <Article>(dm, ii));
                    ii++;
                }
                var index = 0;
                foreach (var l in labels)
                {
                    var article = articles[index];
                    index++;
                    psets[l].Add(article);
                }


                var partitiondata = new PartitionData <Article>(psets, dict);

                var model = new ArticlePartitionModel(partitiondata);
                return(View("ArticlePartitions", model));
            }
        }
Beispiel #3
0
        ///obviously impossible to test every possibility.
        ///Might be possible to iterate or do local hill climbing?  order matters for all that.
        ///title length has weird transitivity; don't use that because it won't apply for other metrics.
        ///I could just do it 100 times with random starts, iterating adding members to the set which they are closest to.
        ///Then as a final step test each element to see if it belongs better in another set.

        public PartitionData <T> GetPartitions(int partitionCount, List <T> Elements)
        {
            if (partitionCount < 2 || partitionCount > 100)
            {
                throw new Exception("Are you sure you want to generate that many partitions?");
            }

            var sets = new List <PartitionSet <T> >();
            var ii   = 0;

            while (ii < partitionCount)
            {
                var px = new PartitionSet <T>(metrics, ii);
                sets.Add(px);
                ii++;
            }

            using (var db = new FusekiContext())
            {
                foreach (var el in Elements)
                {
                    var targetSet = FindBestPartition(el, sets);
                    targetSet.Add(el);
                }
            }

            //initial assignment done.
            //now iterate over each item, removing it and then readding it where it belongs til we reach stability or N iterations.
            var loopCt = 0;
            var moveCt = 100;

            var stats = new Dictionary <string, object>();

            stats["InitialQuality"] = FindQuality(sets);

            while (loopCt < 200)
            {
                moveCt = 0;
                var PlannedMoves = new Dictionary <T, Tuple <PartitionSet <T>, PartitionSet <T> > >();
                foreach (var set in sets)
                {
                    foreach (var el in set.Items)
                    {
                        //remove it first so it has a free choice
                        var targetSet = FindBestPartition(el, sets);
                        if (targetSet != set)
                        {
                            moveCt++;
                            var data = new Tuple <PartitionSet <T>, PartitionSet <T> >(set, targetSet);
                            PlannedMoves[el] = data;
                        }
                        if (moveCt > 0)
                        {
                            break;
                        }
                    }
                    if (moveCt > 0)
                    {
                        break;
                    }
                }

                //problem: I am moving to favor the article, not to favor the overall quality of matches.  i.e. if there is a linking article who is happier in a dedicated node, but removing him hurts the parent, how to do it?
                foreach (var article in PlannedMoves.Keys)
                {
                    var tup    = PlannedMoves[article];
                    var old    = tup.Item1;
                    var newset = tup.Item2;
                    old.Remove(article);
                    newset.Add(article);
                }
                loopCt++;
                stats[$"quality:{loopCt} moved:{moveCt}"] = FindQuality(sets);
                if (moveCt == 0)
                {
                    break;
                }
            }

            stats["moveCt"]        = moveCt;
            stats["loopCt"]        = loopCt;
            stats["Final quality"] = FindQuality(sets);

            var pdata = new PartitionData <T>(sets, stats);

            return(pdata);
        }