private static PartitionSet <T> FindBestPartition(T item, List <PartitionSet <T> > sets) { var bestSeen = double.MaxValue; PartitionSet <T> targetSet = null; foreach (var set in sets) { var candidate = set.Test(item); if (candidate < bestSeen) { bestSeen = candidate; targetSet = set; } } return(targetSet); }
///obviously impossible to test every possibility. ///Might be possible to iterate or do local hill climbing? order matters for all that. ///title length has weird transitivity; don't use that because it won't apply for other metrics. ///I could just do it 100 times with random starts, iterating adding members to the set which they are closest to. ///Then as a final step test each element to see if it belongs better in another set. public PartitionData <T> GetPartitions(int partitionCount, List <T> Elements) { if (partitionCount < 2 || partitionCount > 100) { throw new Exception("Are you sure you want to generate that many partitions?"); } var sets = new List <PartitionSet <T> >(); var ii = 0; while (ii < partitionCount) { var px = new PartitionSet <T>(metrics, ii); sets.Add(px); ii++; } using (var db = new FusekiContext()) { foreach (var el in Elements) { var targetSet = FindBestPartition(el, sets); targetSet.Add(el); } } //initial assignment done. //now iterate over each item, removing it and then readding it where it belongs til we reach stability or N iterations. var loopCt = 0; var moveCt = 100; var stats = new Dictionary <string, object>(); stats["InitialQuality"] = FindQuality(sets); while (loopCt < 200) { moveCt = 0; var PlannedMoves = new Dictionary <T, Tuple <PartitionSet <T>, PartitionSet <T> > >(); foreach (var set in sets) { foreach (var el in set.Items) { //remove it first so it has a free choice var targetSet = FindBestPartition(el, sets); if (targetSet != set) { moveCt++; var data = new Tuple <PartitionSet <T>, PartitionSet <T> >(set, targetSet); PlannedMoves[el] = data; } if (moveCt > 0) { break; } } if (moveCt > 0) { break; } } //problem: I am moving to favor the article, not to favor the overall quality of matches. i.e. if there is a linking article who is happier in a dedicated node, but removing him hurts the parent, how to do it? foreach (var article in PlannedMoves.Keys) { var tup = PlannedMoves[article]; var old = tup.Item1; var newset = tup.Item2; old.Remove(article); newset.Add(article); } loopCt++; stats[$"quality:{loopCt} moved:{moveCt}"] = FindQuality(sets); if (moveCt == 0) { break; } } stats["moveCt"] = moveCt; stats["loopCt"] = loopCt; stats["Final quality"] = FindQuality(sets); var pdata = new PartitionData <T>(sets, stats); return(pdata); }