private static string Filter(AllResults loadedFile, string file) { for (int i = 0; i < 31; i++) { Console.WriteLine($"iteration {i} starting"); loadedFile.RefreshReferenceMap(); loadedFile.RefBasedShrink(); } var newName = file.Replace(".bin", "-refShrink" + 30 + ".bin"); loadedFile.Save(newName); return(newName); }
public static void CreateProtobufFile() { Console.WriteLine("Enter name of .csv file"); var path = Console.ReadLine(); Console.WriteLine("Enter number of items to fetch"); var topN = Int32.Parse(Console.ReadLine() ?? "50"); Console.WriteLine("Enter number of neighbours to show"); var localTake = Int32.Parse(Console.ReadLine() ?? "100"); var parseInfo = new AllResults(); int rowsProcessed = 0; foreach (var line in File.ReadLines(path).Take(topN)) { var parts = line.Split(';'); var query = parts.Take(1); var hits = parts .Skip(1) .Take(localTake) .Skip(1) .Select <string, SearchHit>(x => NameToHit(x, parseInfo)) .ToArray(); var queryObject = NameToPatch(query.First(), parseInfo); parseInfo.Rows.Add(new ResultsRow { Hits = hits, Query = queryObject }); if (rowsProcessed++ % 1000 == 0) { Console.WriteLine(rowsProcessed); } } parseInfo.Rows.Sort((first, second) => first.Query.CompareTo(second.Query)); Console.WriteLine( $"Images = {parseInfo.ImageEncoding.Count}, Patches = {parseInfo.PatchEncoding.Count}, Rows = {parseInfo.Rows.Count}"); var name = Path.GetFileNameWithoutExtension(path) + ".bin"; parseInfo.Save(name); Console.WriteLine("Done, press enter"); Console.ReadLine(); }
private static void CalculateStats(string filename) { using (var file = new StreamWriter("filtering-statistics-selection.csv", append: true)) using (var sw = new CompositionWriter(new[] { file, Console.Out })) { AllResults loadedFile; void PrintStats(string stepName) { loadedFile.PrintStats(filename, stepName, sw); } var smallerFileName = filename.Replace(".bin", "-tresholdBasedCleaned.bin"); if (!File.Exists(smallerFileName)) { Console.WriteLine("Starting from scatch, no previous save point"); loadedFile = AllResults.Load(filename); PrintStats("Default-all"); loadedFile.Rows.RemoveAll(r => r.HasNearDuplicates()); GC.Collect(); PrintStats("Near-duplicate-candidates-removed"); loadedFile.Rows.RemoveAll(r => r.HasTooManyCloseMatches()); PrintStats("Too-large-candidates-removed"); loadedFile.Rows.RemoveAll(r => r.IsTooEquidistant()); PrintStats("Equidistant-candidates-removed"); loadedFile.Save(smallerFileName); } var combinations = new[] { new { File = "conv3-local.bin", Ratio = 0.91, Max = 400, Min = 10 }, new { File = "conv3-local.bin", Ratio = 0.88, Max = 400, Min = 8 }, new { File = "conv3-local.bin", Ratio = 0.96, Max = 50, Min = 10 }, new { File = "conv4-local.bin", Ratio = 0.91, Max = 800, Min = 12 }, new { File = "conv4-local.bin", Ratio = 0.89, Max = 800, Min = 8 }, new { File = "conv4-local.bin", Ratio = 0.94, Max = 400, Min = 12 }, new { File = "conv5-local.bin", Ratio = 0.8, Max = 800, Min = 8 }, new { File = "conv5-local.bin", Ratio = 0.76, Max = 50, Min = 6 }, new { File = "conv5-local.bin", Ratio = 0.92, Max = 200, Min = 8 }, }.ToLookup(x => x.File); // new[]{5,6,7,9,10,11,13,14,15} var bigFile = AllResults.Load(filename); Console.WriteLine(filename + " was big-loaded."); foreach (var c in combinations[filename]) { var ratio = c.Ratio; loadedFile = AllResults.Load(smallerFileName); loadedFile.Rows.ForEach(r => r.FilterNeigbhoursUsingDistanceDerivative(ratio)); loadedFile.RefreshReferenceMap(); loadedFile.RefBasedShrink(); loadedFile.RefreshReferenceMap(); for (int i = 1; i < 31; i++) { var removed = loadedFile.RefBasedShrink(); loadedFile.RefreshReferenceMap(); if (removed == 0) { Console.WriteLine($"Nothing removed at iteration {i}, stopping ref-based shrink for {ratio}"); break; } } // foreach (var maxImagesTreshold in new[]{25,50,100,200,400,800,1600}) // foreach (var minImagesTreshold in new[]{2,4,6,8,10,12}) var clustered = ClusterDecomposition.GroupIntoClusters(loadedFile, c.Max, c.Min); clustered.PrintStats(filename, $"After-clustering;{ratio};{c.Max};{c.Min}", sw); var clusterName = filename.Replace(".bin", $"deriv_{ratio}-max_{c.Max}-min_{c.Min}.bin"); clustered.Save(clusterName); Console.WriteLine(clusterName + " was saved."); using (var htmlw = new StreamWriter(clusterName.Replace("bin", ".html"))) { clustered.Render(htmlw); } Console.WriteLine(clusterName + " was rendered."); var wlie = clustered.ImageEncoding.Reverse(); var wlpe = clustered.PatchEncoding.Reverse(); var interestingImagePatches = clustered .Rows.SelectMany(r => r.Hits.Select(h => h.Hit).Concat(new[] { r.Query })) .Distinct() .Select(p => new Patch { ImageId = bigFile.ImageEncoding[wlie[p.ImageId]], PatchId = bigFile.PatchEncoding[wlpe[p.PatchId]] }) .ToLookup(x => x); var newBigFile = new AllResults { ImageEncoding = bigFile.ImageEncoding, PatchEncoding = bigFile.PatchEncoding, Rows = bigFile.Rows.Where(rr => interestingImagePatches.Contains(rr.Query)).ToList() }; Console.WriteLine(clusterName + " 's essential knn was shrinked."); newBigFile.Save(clusterName.Replace(".bin", "-essential-knn.bin")); Console.WriteLine("After filtering of {2} = {0} rows remaining, {1} was removed", newBigFile.Rows.Count, bigFile.Rows.Count - newBigFile.Rows.Count, clusterName); } } Console.WriteLine(filename + " Done"); }