예제 #1
0
        public static int Estimate(QualityOptions opts)
        {
            Random rnd = new Random(0xf00d);

            // Do a learning call and just ignore the result.
            // To warm-up PROSE. The first learning call always takes longer for some reason.
            Synthesizer.Learn(1, Synthesizer.StringToState(">)#*$&"), Synthesizer.StringToState("969dvb"));

            var log_path = Path.Combine(Utils.Paths.Root, "logs", $"Quality.FlashProfile.{opts.Mu,4:F2}x{opts.Theta,4:F2}.{opts.ProfileFraction:F2}.log");

            File.WriteAllText(log_path, "");

            var datasets_dir = Path.Combine(Utils.Paths.Root, "logs", $"datasets.{opts.ProfileFraction}");

            if (!Directory.Exists(datasets_dir))
            {
                Directory.CreateDirectory(datasets_dir);
            }

            Console.WriteLine($"  {"FILENAME",50} => (+VE) - (-VE) =   Δ   | Avg Δ");
            Console.WriteLine(new string('-', 128));

            double final_result = 0, count = 0, precision = 0, recall = 0;

            for (int i = 0; i < Utils.Paths.DomainsDatasets.Length; ++i)
            {
                var file_path       = Utils.Paths.DomainsDatasets[i];
                var file_name       = Path.GetFileNameWithoutExtension(file_path);
                var short_file_path = file_path.Replace(Utils.Paths.Root, "");
                var inputs          = TestCase.LoadNonEmptyData(file_path).OrderBy(s => rnd.Next());

                Console.Write($"\r[+] ({i,2} / {Utils.Paths.DomainsDatasets.Length,2}) {short_file_path} ... ");

                int profile_data_size = Convert.ToInt32(inputs.Count() * opts.ProfileFraction);
                if (profile_data_size < 8)
                {
                    Console.WriteLine($"\r> {short_file_path,50} => ignore: dataset too small");
                    continue;
                }

                var profile_data = inputs.Take(profile_data_size).ToList();
                var constraints  = profile_data.Select(s => Learner.Instance.BuildPositiveConstraint(s, true, false))
                                   .Append(new AllowedTokens <string, bool>(Utils.Default.Atoms))
                                   .Append(new ClusteringParameters <string, bool>(opts.Mu, opts.Theta));
                var program = Learner.Instance.Learn(constraints);

                var should_match_data         = inputs.Skip(profile_data_size);
                int should_mismatch_data_size = Math.Max(1, should_match_data.Count()
                                                         / (Utils.Paths.DomainsDatasets.Length - 1));
                var should_mismatch_data
                    = Utils.Paths.DomainsDatasets.Where(s => s != file_path)
                      .SelectMany(src => TestCase.LoadNonEmptyData(src).OrderBy(s => rnd.Next())
                                  .Take(should_mismatch_data_size)).ToList();
                should_match_data = should_match_data.Take(should_mismatch_data.Count);

                double mismatch = should_mismatch_data.Count(s => program?.Run(s) ?? false);
                double match    = should_match_data.Count(s => program?.Run(s) ?? false);

                precision += match / (match + mismatch);
                recall    += match / should_mismatch_data.Count;

                match    /= should_mismatch_data.Count;
                mismatch /= should_mismatch_data.Count;
                double result = match - mismatch;
                final_result += result;
                Console.WriteLine($"\r> {short_file_path,50} => {match:F3} - {mismatch:F3} = {result:F3} | {final_result / ++count:F3}");

                // Since `file_name` may be same for two files within test/*, we preprend i.
                File.WriteAllText(Path.Combine(datasets_dir, $"{i}_{file_name}.profiled"), string.Join("\n", profile_data));
                File.WriteAllText(Path.Combine(datasets_dir, $"{i}_{file_name}.match"), string.Join("\n", should_match_data));
                File.WriteAllText(Path.Combine(datasets_dir, $"{i}_{file_name}.mismatch"), string.Join("\n", should_mismatch_data));

                File.AppendAllText(log_path, $"> {short_file_path}\n\n");
                File.AppendAllText(log_path, $"  * Profiled Subset of Data:\n    {string.Join("\n    ", profile_data)}\n\n");
                File.AppendAllText(log_path, $"  * Default Profile:\n    {string.Join("\n    ", program?.Description() ?? Enumerable.Empty<string>())}\n\n");
                File.AppendAllText(log_path, $"  * Result:\n    + Match = {match}\n    + Mismatch = {mismatch}\n    + Score = {result}\n\n- - - - - - - -\n\n");
            }

            precision /= count;
            recall    /= count;
            double f1 = 2.0 * precision * recall / (precision + recall);

            File.AppendAllText(log_path, $"> Summary:");
            File.AppendAllText(log_path, $"\n  * Average Score = {final_result / count}");
            File.AppendAllText(log_path, $"\n  * Precision = {precision}");
            File.AppendAllText(log_path, $"\n  * Recall = {recall}");
            File.AppendAllText(log_path, $"\n  * F1 = {f1}");

            Console.WriteLine($"\n> Processed = {count}   :   Precision = {precision}  |  Recall = {recall}  |  F1 = {f1}");
            return(0);
        }