public static AllResults GroupIntoClusters(AllResults loaded, int imagePerCandidateMaxTreshold = 2000, int imagesPerCandidateMinTreshold = 4) { imgColors.Clear(); totalColors = 0; var directional = loaded.Rows.SelectMany(x => x.Hits.Select(y => new { x.Query, y.Hit })).Distinct().ToList(); neighbours = directional.Union(directional.Select(a => new { Query = a.Hit, Hit = a.Query })) .ToLookup(x => x.Query, x => x.Hit); stack = new Stack <Patch>(neighbours.Select(x => x.Key)); while (stack.Count > 0) { Visit(stack.Pop()); } var stats = imgColors.GroupBy(x => x.Value) .Where(x => x.Select(z => z.Key.ImageId).Distinct().Count() < imagePerCandidateMaxTreshold && x.Select(y => y.Key.ImageId).Distinct().Count() >= imagesPerCandidateMinTreshold); Console.WriteLine("Having " + stats.Count() + " nice clusters"); var rareImgs = new HashSet <Patch>(stats.SelectMany(x => x.Select(y => y.Key))); var artificalResults = new AllResults { ImageEncoding = loaded.ImageEncoding, PatchEncoding = loaded.PatchEncoding }; var colorGroups = loaded.Rows .Where(r => rareImgs.Contains(r.Query)) .GroupBy(x => imgColors[x.Query]); foreach (var g in colorGroups) { artificalResults.Rows.Add(new ResultsRow { Query = g.First().Query, Hits = g.SelectMany(x => x.Hits.Concat(new[] { new SearchHit { Hit = x.Query, Distance = g.Max(colorGroup => colorGroup.Hits.Min(h => h.Distance)) } })) .GroupBy(x => x.Hit) .Select(x => new SearchHit { Hit = x.Key, Distance = x.Min(y => y.Distance) }) .Distinct() .ToArray() }); } artificalResults.RefreshReferenceMap(); return(artificalResults); }
private static string Filter(AllResults loadedFile, string file) { for (int i = 0; i < 31; i++) { Console.WriteLine($"iteration {i} starting"); loadedFile.RefreshReferenceMap(); loadedFile.RefBasedShrink(); } var newName = file.Replace(".bin", "-refShrink" + 30 + ".bin"); loadedFile.Save(newName); return(newName); }
public static void AgglomerativeClustering(AllResults loaded) { var imgName = "241666.jpg"; //"159161.jpg"; var imgId = loaded.ImageEncoding[imgName]; var relevantRows = loaded.Rows //.Where(r => r.Query.ImageId == imgId) .ToList(); // cluster all of them together? Include query into dissimilarity function then // Or product by product, filter down to big elements, and offer to transitively load more and more? var metric = new ResultsRowSetBasedDistance(); var linkage = new AverageLinkage <ResultsRow>(metric); var algorithm = new AgglomerativeClusteringAlgorithm <ResultsRow>(linkage); var clusters = algorithm.GetClustering(new HashSet <ResultsRow>(relevantRows)); clusters.SaveToCsv(@"G:\siret\zoot\protobuf\clustertest.csv"); //RenderData(); var dummyResults = new AllResults { ImageEncoding = loaded.ImageEncoding, PatchEncoding = loaded.PatchEncoding }; var clusterQueue = new Queue <Cluster <ResultsRow> >(new[] { clusters.SingleCluster }); while (clusterQueue.Count > 0) { var item = clusterQueue.Dequeue(); if (item.Dissimilarity <= 0.70 && item.Count < 50) { dummyResults.Rows.Add(new ResultsRow { Query = item.First().Query, Hits = item.SelectMany(x => x.Hits) .GroupBy(x => x.Hit) .Select(x => new SearchHit { Hit = x.Key, Distance = x.Min(y => y.Distance) }) .Concat(item.Select(i => new SearchHit { Hit = i.Query, Distance = -1 })) .ToArray() }); } else { clusterQueue.Enqueue(item.Parent1); clusterQueue.Enqueue(item.Parent2); } } loaded.RefreshReferenceMap(); foreach (var k in AllResults.ReferenceMap.Keys) { AllResults.ReferenceMap[k][k] = 1; } using (var sw = new StreamWriter(@"G:\siret\zoot\protobuf\clusteringTestMega.html", append: false)) { dummyResults.Render(sw); } }