Esempio n. 1
0
        public static string testAllLanguages(LangDescriptor[] languages, string[] corpusDirs, string imageFileName)
        {
            IList <string> languageNames = BuffUtils.map(languages, l => l.name + "_err");
            //		Collections.sort(languageNames);
            IDictionary <string, int?> corpusSizes = new Dictionary <string, int?>();

            for (int i = 0; i < languages.Length; i++)
            {
                LangDescriptor language  = languages[i];
                IList <string> filenames = Tool.getFilenames(corpusDirs[i], language.fileRegex);
                corpusSizes[language.name] = filenames.Count;
            }
            IList <string> languageNamesAsStr = BuffUtils.map(languages, l => '"' + l.name + "\\nn=" + corpusSizes[l.name] + '"');
            //		Collections.sort(languageNamesAsStr);

            StringBuilder data = new StringBuilder();

            for (int i = 0; i < languages.Length; i++)
            {
                LangDescriptor       language  = languages[i];
                string               corpus    = corpusDirs[i];
                LeaveOneOutValidator validator = new LeaveOneOutValidator(corpus, language);
                Triple <IList <Formatter>, IList <float>, IList <float> > results = validator.validateDocuments(true, "/tmp");
                IList <Formatter> formatters = results.a;
                IList <float>     distances  = results.b;
                IList <float>     errors     = results.c;
                //			data.append(language.name+"_dist = "+distances+"\n");
                data.Append(language.name + "_err = " + errors + "\n");
            }

            string python = "#\n" + "# AUTO-GENERATED FILE. DO NOT EDIT\n" + "# CodeBuff %s '%s'\n" + "#\n" + "import numpy as np\n" + "import pylab\n" + "import matplotlib.pyplot as plt\n\n" + "%s\n" + "language_data = %s\n" + "labels = %s\n" + "fig = plt.figure()\n" + "ax = plt.subplot(111)\n" + "ax.boxplot(language_data,\n" + "           whis=[10, 90], # 10 and 90 %% whiskers\n" + "           widths=.35,\n" + "           labels=labels,\n" + "           showfliers=False)\n" + "ax.set_xticklabels(labels, rotation=60, fontsize=18)\n" + "ax.tick_params(axis='both', which='major', labelsize=18)\n" + "plt.xticks(range(1,len(labels)+1), labels, rotation=60, fontsize=18)\n" + "pylab.ylim([0,.28])\n" + "ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5)\n" + "ax.set_xlabel(\"Grammar and corpus size\", fontsize=20)\n" + "ax.set_ylabel(\"Misclassification Error Rate\", fontsize=20)\n" + "# ax.set_title(\"Leave-one-out Validation Using Error Rate\\nBetween Formatted and Original File\")\n" + "plt.tight_layout()\n" + "fig.savefig('images/%s', format='pdf')\n" + "plt.show()\n";

            return(string.Format(python, Tool.version, DateTime.Now, data, languageNames, languageNamesAsStr, imageFileName));
        }
Esempio n. 2
0
        public static IList <float?> checkStability(LangDescriptor language)
        {
            IList <float?> errorRates = new List <float?>();

            // format the corpus into tmp dir
            LeaveOneOutValidator validator0 = new LeaveOneOutValidator(language.corpusDir, language);
            Triple <IList <Formatter>, IList <float>, IList <float> > results0 = validator0.validateDocuments(false, "/tmp/stability/1");

            errorRates.Add(BuffUtils.median(results0.c));

            IList <Formatter> formatters0 = results0.a;

            // now try formatting it over and over
            for (int i = 1; i <= STAGES; i++)
            {
                string inputDir  = "/tmp/stability/" + i;
                string outputDir = "/tmp/stability/" + (i + 1);
                LeaveOneOutValidator validator = new LeaveOneOutValidator(inputDir, language);
                Triple <IList <Formatter>, IList <float>, IList <float> > results = validator.validateDocuments(false, outputDir);
                IList <Formatter> formatters = results.a;
                IList <float?>    distances  = new List <float?>();
                for (int j = 0; j < formatters.Count; j++)
                {
                    Formatter f0           = formatters0[j];
                    Formatter f            = formatters[j];
                    float     editDistance = Dbg.normalizedLevenshteinDistance(f.Output, f0.Output);
                    distances.Add(editDistance);
                }
                errorRates.Add(BuffUtils.median(distances));
            }

            return(errorRates);
        }
Esempio n. 3
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public static void main(String[] args) throws Exception
        public static void Main(string[] args)
        {
            LeaveOneOutValidator validator = new LeaveOneOutValidator(JAVA8_DESCR.corpusDir, JAVA8_DESCR);
            Triple <IList <Formatter>, IList <float?>, IList <float?> > results = validator.validateDocuments(false, "output");

            Log.WriteLine(results.b);
            Log.WriteLine(results.c);
        }
Esempio n. 4
0
        public static void Main(string[] args)
        {
            LeaveOneOutValidator validator = new LeaveOneOutValidator(Tool.ANTLR4_DESCR.corpusDir, Tool.ANTLR4_DESCR);
            Triple <IList <Formatter>, IList <float>, IList <float> > results = validator.validateDocuments(false, "output");

            Console.WriteLine(results.b);
            Console.WriteLine(results.c);
        }
        public static void Main(string[] args)
        {
            LangDescriptor[] languages  = new LangDescriptor[] { JAVA_DESCR, JAVA8_DESCR, JAVA_GUAVA_DESCR };
            IList <string>   corpusDirs = BuffUtils.map(languages, l => l.corpusDir);

            string[] dirs     = corpusDirs.ToArray();
            string   python   = LeaveOneOutValidator.testAllLanguages(languages, dirs, "all_java_leave_one_out.pdf");
            string   fileName = "python/src/all_java_leave_one_out.py";

            org.antlr.codebuff.misc.Utils.writeFile(fileName, python);
            Log.WriteLine("wrote python code to " + fileName);
        }
        public static void Main(string[] args)
        {
            LangDescriptor[] languages = new LangDescriptor[] { QUORUM_DESCR, JAVA_DESCR, JAVA8_DESCR, JAVA_GUAVA_DESCR, ANTLR4_DESCR, SQLITE_CLEAN_DESCR, TSQL_CLEAN_DESCR };

            // walk and generator output but no edit distance
            for (int i = 0; i < languages.Length; i++)
            {
                LangDescriptor       language  = languages[i];
                LeaveOneOutValidator validator = new LeaveOneOutValidator(language.corpusDir, language);
                validator.validateDocuments(false, "output");
            }
        }
Esempio n. 7
0
        public static IList <float> getAlignmentErrorRates(LangDescriptor language, FeatureMetaData[] injectWSFeatures, FeatureMetaData[] alignmentFeatures)
        {
            LeaveOneOutValidator validator = new LeaveOneOutValidator(language.corpusDir, language);
            Triple <IList <Formatter>, IList <float>, IList <float> > results = validator.validateDocuments(injectWSFeatures, alignmentFeatures, false, null);
            IList <Formatter> formatters      = results.a;
            IList <float>     alignErrorRates = new List <float>();       // don't include align errors

            foreach (Formatter formatter in formatters)
            {
                ClassificationAnalysis analysis = new ClassificationAnalysis(formatter.testDoc, formatter.AnalysisPerToken);
                alignErrorRates.Add(analysis.AlignmentErrorRate);
            }
            //		System.out.println(results.c);
            //		System.out.println("vs");
            //		System.out.println(alignErrorRates);
            return(alignErrorRates);
        }
Esempio n. 8
0
        public static void Main(string[] args)
        {
            // we need to get all of the results in order so that we can compare
            LeaveOneOutValidator.FORCE_SINGLE_THREADED = true;
            float sql_median;
            float java_st_median;
            float java_guava_median;

            {
                // JAVA
                IList <float?>       distances      = new List <float?>();
                LeaveOneOutValidator javaValidator  = new LeaveOneOutValidator(Tool.JAVA_DESCR.corpusDir, JAVA_DESCR);
                LeaveOneOutValidator java8Validator = new LeaveOneOutValidator(JAVA8_DESCR.corpusDir, JAVA8_DESCR);
                Triple <IList <Formatter>, IList <float?>, IList <float?> > javaResults  = javaValidator.validateDocuments(false, null);
                Triple <IList <Formatter>, IList <float?>, IList <float?> > java8Results = java8Validator.validateDocuments(false, null);
                IList <Formatter> javaFormatters  = javaResults.a;
                IList <Formatter> java8Formatters = java8Results.a;

                for (int i = 0; i < javaFormatters.Count; i++)
                {
                    Formatter java         = javaFormatters[i];
                    Formatter java8        = java8Formatters[i];
                    float     editDistance = Dbg.normalizedLevenshteinDistance(java.Output, java8.Output);
                    distances.Add(editDistance);
                    //				System.out.println(java.testDoc.fileName+" edit distance "+editDistance);
                }

                {
                    distances.Sort();
                    int    n       = distances.Count;
                    float  min     = distances[0].Value;
                    float  quart   = distances[(int)(0.27 * n)].Value;
                    float  median  = distances[n / 2].Value;
                    float  quart3  = distances[(int)(0.75 * n)].Value;
                    float  max     = distances[distances.Count - 1].Value;
                    string display = "(" + min + "," + median + "," + max + ")";
                    java_st_median = median;
                }
            }

            {
                // JAVA GUAVA
                IList <float?>       distances            = new List <float?>();
                LeaveOneOutValidator java_guavaValidator  = new LeaveOneOutValidator(JAVA_GUAVA_DESCR.corpusDir, JAVA_GUAVA_DESCR);
                LeaveOneOutValidator java8_guavaValidator = new LeaveOneOutValidator(JAVA8_GUAVA_DESCR.corpusDir, JAVA8_GUAVA_DESCR);
                Triple <IList <Formatter>, IList <float?>, IList <float?> > java_guavaResults  = java_guavaValidator.validateDocuments(false, null);
                Triple <IList <Formatter>, IList <float?>, IList <float?> > java8_guavaResults = java8_guavaValidator.validateDocuments(false, null);
                IList <Formatter> java_guavaFormatters  = java_guavaResults.a;
                IList <Formatter> java8_guavaFormatters = java8_guavaResults.a;

                for (int i = 0; i < java_guavaFormatters.Count; i++)
                {
                    Formatter java_guava   = java_guavaFormatters[i];
                    Formatter java8_guava  = java8_guavaFormatters[i];
                    float     editDistance = Dbg.normalizedLevenshteinDistance(java_guava.Output, java8_guava.Output);
                    distances.Add(editDistance);
                    //				System.out.println(java_guava.testDoc.fileName+" edit distance "+editDistance);
                }

                {
                    distances.Sort();
                    int    n       = distances.Count;
                    float  min     = distances[0].Value;
                    float  quart   = distances[(int)(0.27 * n)].Value;
                    float  median  = distances[n / 2].Value;
                    float  quart3  = distances[(int)(0.75 * n)].Value;
                    float  max     = distances[distances.Count - 1].Value;
                    string display = "(" + min + "," + median + "," + max + ")";
                    java_guava_median = median;
                }
            }
            Console.WriteLine("clean SQLite vs TSQL edit distance info median=" + sql_median);
            Console.WriteLine("Java vs Java8 edit distance info median=" + java_st_median);
            Console.WriteLine("Java vs Java8 guava edit distance info median=" + java_guava_median);
        }