private static void Print(string newName, AllResults loadedFile) { using (var sw = new StreamWriter(newName.Replace(".bin", ".html"), append: false)) { loadedFile.Render(sw); } }
public static void AgglomerativeClustering(AllResults loaded) { var imgName = "241666.jpg"; //"159161.jpg"; var imgId = loaded.ImageEncoding[imgName]; var relevantRows = loaded.Rows //.Where(r => r.Query.ImageId == imgId) .ToList(); // cluster all of them together? Include query into dissimilarity function then // Or product by product, filter down to big elements, and offer to transitively load more and more? var metric = new ResultsRowSetBasedDistance(); var linkage = new AverageLinkage <ResultsRow>(metric); var algorithm = new AgglomerativeClusteringAlgorithm <ResultsRow>(linkage); var clusters = algorithm.GetClustering(new HashSet <ResultsRow>(relevantRows)); clusters.SaveToCsv(@"G:\siret\zoot\protobuf\clustertest.csv"); //RenderData(); var dummyResults = new AllResults { ImageEncoding = loaded.ImageEncoding, PatchEncoding = loaded.PatchEncoding }; var clusterQueue = new Queue <Cluster <ResultsRow> >(new[] { clusters.SingleCluster }); while (clusterQueue.Count > 0) { var item = clusterQueue.Dequeue(); if (item.Dissimilarity <= 0.70 && item.Count < 50) { dummyResults.Rows.Add(new ResultsRow { Query = item.First().Query, Hits = item.SelectMany(x => x.Hits) .GroupBy(x => x.Hit) .Select(x => new SearchHit { Hit = x.Key, Distance = x.Min(y => y.Distance) }) .Concat(item.Select(i => new SearchHit { Hit = i.Query, Distance = -1 })) .ToArray() }); } else { clusterQueue.Enqueue(item.Parent1); clusterQueue.Enqueue(item.Parent2); } } loaded.RefreshReferenceMap(); foreach (var k in AllResults.ReferenceMap.Keys) { AllResults.ReferenceMap[k][k] = 1; } using (var sw = new StreamWriter(@"G:\siret\zoot\protobuf\clusteringTestMega.html", append: false)) { dummyResults.Render(sw); } }