Beispiel #1
0
        private static string Filter(AllResults loadedFile, string file)
        {
            for (int i = 0; i < 31; i++)
            {
                Console.WriteLine($"iteration {i} starting");
                loadedFile.RefreshReferenceMap();
                loadedFile.RefBasedShrink();
            }

            var newName = file.Replace(".bin", "-refShrink" + 30 + ".bin");

            loadedFile.Save(newName);
            return(newName);
        }
Beispiel #2
0
        public static void CreateProtobufFile()
        {
            Console.WriteLine("Enter name of .csv file");
            var path = Console.ReadLine();

            Console.WriteLine("Enter number of items to fetch");
            var topN = Int32.Parse(Console.ReadLine() ?? "50");

            Console.WriteLine("Enter number of neighbours to show");
            var localTake = Int32.Parse(Console.ReadLine() ?? "100");

            var parseInfo     = new AllResults();
            int rowsProcessed = 0;

            foreach (var line in File.ReadLines(path).Take(topN))
            {
                var parts = line.Split(';');
                var query = parts.Take(1);
                var hits  = parts
                            .Skip(1)
                            .Take(localTake)
                            .Skip(1)
                            .Select <string, SearchHit>(x => NameToHit(x, parseInfo))
                            .ToArray();

                var queryObject = NameToPatch(query.First(), parseInfo);

                parseInfo.Rows.Add(new ResultsRow {
                    Hits = hits, Query = queryObject
                });

                if (rowsProcessed++ % 1000 == 0)
                {
                    Console.WriteLine(rowsProcessed);
                }
            }

            parseInfo.Rows.Sort((first, second) => first.Query.CompareTo(second.Query));


            Console.WriteLine(
                $"Images = {parseInfo.ImageEncoding.Count}, Patches = {parseInfo.PatchEncoding.Count}, Rows = {parseInfo.Rows.Count}");
            var name = Path.GetFileNameWithoutExtension(path) + ".bin";

            parseInfo.Save(name);

            Console.WriteLine("Done, press enter");
            Console.ReadLine();
        }
Beispiel #3
0
        private static void CalculateStats(string filename)
        {
            using (var file = new StreamWriter("filtering-statistics-selection.csv", append: true))
                using (var sw = new CompositionWriter(new[] { file, Console.Out }))
                {
                    AllResults loadedFile;
                    void PrintStats(string stepName)
                    {
                        loadedFile.PrintStats(filename, stepName, sw);
                    }

                    var smallerFileName = filename.Replace(".bin", "-tresholdBasedCleaned.bin");
                    if (!File.Exists(smallerFileName))
                    {
                        Console.WriteLine("Starting from scatch, no previous save point");
                        loadedFile = AllResults.Load(filename);
                        PrintStats("Default-all");

                        loadedFile.Rows.RemoveAll(r => r.HasNearDuplicates());
                        GC.Collect();
                        PrintStats("Near-duplicate-candidates-removed");

                        loadedFile.Rows.RemoveAll(r => r.HasTooManyCloseMatches());
                        PrintStats("Too-large-candidates-removed");

                        loadedFile.Rows.RemoveAll(r => r.IsTooEquidistant());
                        PrintStats("Equidistant-candidates-removed");

                        loadedFile.Save(smallerFileName);
                    }



                    var combinations = new[]
                    {
                        new { File = "conv3-local.bin", Ratio = 0.91, Max = 400, Min = 10 },
                        new { File = "conv3-local.bin", Ratio = 0.88, Max = 400, Min = 8 },
                        new { File = "conv3-local.bin", Ratio = 0.96, Max = 50, Min = 10 },

                        new { File = "conv4-local.bin", Ratio = 0.91, Max = 800, Min = 12 },
                        new { File = "conv4-local.bin", Ratio = 0.89, Max = 800, Min = 8 },
                        new { File = "conv4-local.bin", Ratio = 0.94, Max = 400, Min = 12 },

                        new { File = "conv5-local.bin", Ratio = 0.8, Max = 800, Min = 8 },
                        new { File = "conv5-local.bin", Ratio = 0.76, Max = 50, Min = 6 },
                        new { File = "conv5-local.bin", Ratio = 0.92, Max = 200, Min = 8 },
                    }.ToLookup(x => x.File);
                    // new[]{5,6,7,9,10,11,13,14,15}

                    var bigFile = AllResults.Load(filename);
                    Console.WriteLine(filename + " was big-loaded.");
                    foreach (var c in combinations[filename])
                    {
                        var ratio = c.Ratio;
                        loadedFile = AllResults.Load(smallerFileName);
                        loadedFile.Rows.ForEach(r => r.FilterNeigbhoursUsingDistanceDerivative(ratio));
                        loadedFile.RefreshReferenceMap();
                        loadedFile.RefBasedShrink();
                        loadedFile.RefreshReferenceMap();

                        for (int i = 1; i < 31; i++)
                        {
                            var removed = loadedFile.RefBasedShrink();
                            loadedFile.RefreshReferenceMap();
                            if (removed == 0)
                            {
                                Console.WriteLine($"Nothing removed at iteration {i}, stopping ref-based shrink for {ratio}");
                                break;
                            }
                        }

                        // foreach (var maxImagesTreshold in new[]{25,50,100,200,400,800,1600})
                        //  foreach (var minImagesTreshold in new[]{2,4,6,8,10,12})
                        var clustered = ClusterDecomposition.GroupIntoClusters(loadedFile, c.Max, c.Min);
                        clustered.PrintStats(filename, $"After-clustering;{ratio};{c.Max};{c.Min}", sw);
                        var clusterName = filename.Replace(".bin", $"deriv_{ratio}-max_{c.Max}-min_{c.Min}.bin");
                        clustered.Save(clusterName);
                        Console.WriteLine(clusterName + " was saved.");
                        using (var htmlw = new StreamWriter(clusterName.Replace("bin", ".html")))
                        {
                            clustered.Render(htmlw);
                        }
                        Console.WriteLine(clusterName + " was rendered.");

                        var wlie = clustered.ImageEncoding.Reverse();
                        var wlpe = clustered.PatchEncoding.Reverse();
                        var interestingImagePatches = clustered
                                                      .Rows.SelectMany(r => r.Hits.Select(h => h.Hit).Concat(new[] { r.Query }))
                                                      .Distinct()
                                                      .Select(p => new Patch {
                            ImageId = bigFile.ImageEncoding[wlie[p.ImageId]], PatchId = bigFile.PatchEncoding[wlpe[p.PatchId]]
                        })
                                                      .ToLookup(x => x);

                        var newBigFile = new AllResults
                        {
                            ImageEncoding = bigFile.ImageEncoding,
                            PatchEncoding = bigFile.PatchEncoding,
                            Rows          = bigFile.Rows.Where(rr => interestingImagePatches.Contains(rr.Query)).ToList()
                        };
                        Console.WriteLine(clusterName + " 's essential knn was shrinked.");
                        newBigFile.Save(clusterName.Replace(".bin", "-essential-knn.bin"));

                        Console.WriteLine("After filtering of {2} = {0} rows remaining, {1} was removed", newBigFile.Rows.Count, bigFile.Rows.Count - newBigFile.Rows.Count, clusterName);
                    }
                }

            Console.WriteLine(filename + " Done");
        }