public static string testAllLanguages(LangDescriptor[] languages, string[] corpusDirs, string imageFileName) { IList <string> languageNames = BuffUtils.map(languages, l => l.name + "_err"); // Collections.sort(languageNames); IDictionary <string, int?> corpusSizes = new Dictionary <string, int?>(); for (int i = 0; i < languages.Length; i++) { LangDescriptor language = languages[i]; IList <string> filenames = Tool.getFilenames(corpusDirs[i], language.fileRegex); corpusSizes[language.name] = filenames.Count; } IList <string> languageNamesAsStr = BuffUtils.map(languages, l => '"' + l.name + "\\nn=" + corpusSizes[l.name] + '"'); // Collections.sort(languageNamesAsStr); StringBuilder data = new StringBuilder(); for (int i = 0; i < languages.Length; i++) { LangDescriptor language = languages[i]; string corpus = corpusDirs[i]; LeaveOneOutValidator validator = new LeaveOneOutValidator(corpus, language); Triple <IList <Formatter>, IList <float>, IList <float> > results = validator.validateDocuments(true, "/tmp"); IList <Formatter> formatters = results.a; IList <float> distances = results.b; IList <float> errors = results.c; // data.append(language.name+"_dist = "+distances+"\n"); data.Append(language.name + "_err = " + errors + "\n"); } string python = "#\n" + "# AUTO-GENERATED FILE. DO NOT EDIT\n" + "# CodeBuff %s '%s'\n" + "#\n" + "import numpy as np\n" + "import pylab\n" + "import matplotlib.pyplot as plt\n\n" + "%s\n" + "language_data = %s\n" + "labels = %s\n" + "fig = plt.figure()\n" + "ax = plt.subplot(111)\n" + "ax.boxplot(language_data,\n" + " whis=[10, 90], # 10 and 90 %% whiskers\n" + " widths=.35,\n" + " labels=labels,\n" + " showfliers=False)\n" + "ax.set_xticklabels(labels, rotation=60, fontsize=18)\n" + "ax.tick_params(axis='both', which='major', labelsize=18)\n" + "plt.xticks(range(1,len(labels)+1), labels, rotation=60, fontsize=18)\n" + "pylab.ylim([0,.28])\n" + "ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5)\n" + "ax.set_xlabel(\"Grammar and corpus size\", fontsize=20)\n" + "ax.set_ylabel(\"Misclassification Error Rate\", fontsize=20)\n" + "# ax.set_title(\"Leave-one-out Validation Using Error Rate\\nBetween Formatted and Original File\")\n" + "plt.tight_layout()\n" + "fig.savefig('images/%s', format='pdf')\n" + "plt.show()\n"; return(string.Format(python, Tool.version, DateTime.Now, data, languageNames, languageNamesAsStr, imageFileName)); }
public static IList <float?> checkStability(LangDescriptor language) { IList <float?> errorRates = new List <float?>(); // format the corpus into tmp dir LeaveOneOutValidator validator0 = new LeaveOneOutValidator(language.corpusDir, language); Triple <IList <Formatter>, IList <float>, IList <float> > results0 = validator0.validateDocuments(false, "/tmp/stability/1"); errorRates.Add(BuffUtils.median(results0.c)); IList <Formatter> formatters0 = results0.a; // now try formatting it over and over for (int i = 1; i <= STAGES; i++) { string inputDir = "/tmp/stability/" + i; string outputDir = "/tmp/stability/" + (i + 1); LeaveOneOutValidator validator = new LeaveOneOutValidator(inputDir, language); Triple <IList <Formatter>, IList <float>, IList <float> > results = validator.validateDocuments(false, outputDir); IList <Formatter> formatters = results.a; IList <float?> distances = new List <float?>(); for (int j = 0; j < formatters.Count; j++) { Formatter f0 = formatters0[j]; Formatter f = formatters[j]; float editDistance = Dbg.normalizedLevenshteinDistance(f.Output, f0.Output); distances.Add(editDistance); } errorRates.Add(BuffUtils.median(distances)); } return(errorRates); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public static void main(String[] args) throws Exception public static void Main(string[] args) { LeaveOneOutValidator validator = new LeaveOneOutValidator(JAVA8_DESCR.corpusDir, JAVA8_DESCR); Triple <IList <Formatter>, IList <float?>, IList <float?> > results = validator.validateDocuments(false, "output"); Log.WriteLine(results.b); Log.WriteLine(results.c); }
public static void Main(string[] args) { LeaveOneOutValidator validator = new LeaveOneOutValidator(Tool.ANTLR4_DESCR.corpusDir, Tool.ANTLR4_DESCR); Triple <IList <Formatter>, IList <float>, IList <float> > results = validator.validateDocuments(false, "output"); Console.WriteLine(results.b); Console.WriteLine(results.c); }
public static void Main(string[] args) { LangDescriptor[] languages = new LangDescriptor[] { JAVA_DESCR, JAVA8_DESCR, JAVA_GUAVA_DESCR }; IList <string> corpusDirs = BuffUtils.map(languages, l => l.corpusDir); string[] dirs = corpusDirs.ToArray(); string python = LeaveOneOutValidator.testAllLanguages(languages, dirs, "all_java_leave_one_out.pdf"); string fileName = "python/src/all_java_leave_one_out.py"; org.antlr.codebuff.misc.Utils.writeFile(fileName, python); Log.WriteLine("wrote python code to " + fileName); }
public static void Main(string[] args) { LangDescriptor[] languages = new LangDescriptor[] { QUORUM_DESCR, JAVA_DESCR, JAVA8_DESCR, JAVA_GUAVA_DESCR, ANTLR4_DESCR, SQLITE_CLEAN_DESCR, TSQL_CLEAN_DESCR }; // walk and generator output but no edit distance for (int i = 0; i < languages.Length; i++) { LangDescriptor language = languages[i]; LeaveOneOutValidator validator = new LeaveOneOutValidator(language.corpusDir, language); validator.validateDocuments(false, "output"); } }
public static IList <float> getAlignmentErrorRates(LangDescriptor language, FeatureMetaData[] injectWSFeatures, FeatureMetaData[] alignmentFeatures) { LeaveOneOutValidator validator = new LeaveOneOutValidator(language.corpusDir, language); Triple <IList <Formatter>, IList <float>, IList <float> > results = validator.validateDocuments(injectWSFeatures, alignmentFeatures, false, null); IList <Formatter> formatters = results.a; IList <float> alignErrorRates = new List <float>(); // don't include align errors foreach (Formatter formatter in formatters) { ClassificationAnalysis analysis = new ClassificationAnalysis(formatter.testDoc, formatter.AnalysisPerToken); alignErrorRates.Add(analysis.AlignmentErrorRate); } // System.out.println(results.c); // System.out.println("vs"); // System.out.println(alignErrorRates); return(alignErrorRates); }
public static void Main(string[] args) { // we need to get all of the results in order so that we can compare LeaveOneOutValidator.FORCE_SINGLE_THREADED = true; float sql_median; float java_st_median; float java_guava_median; { // JAVA IList <float?> distances = new List <float?>(); LeaveOneOutValidator javaValidator = new LeaveOneOutValidator(Tool.JAVA_DESCR.corpusDir, JAVA_DESCR); LeaveOneOutValidator java8Validator = new LeaveOneOutValidator(JAVA8_DESCR.corpusDir, JAVA8_DESCR); Triple <IList <Formatter>, IList <float?>, IList <float?> > javaResults = javaValidator.validateDocuments(false, null); Triple <IList <Formatter>, IList <float?>, IList <float?> > java8Results = java8Validator.validateDocuments(false, null); IList <Formatter> javaFormatters = javaResults.a; IList <Formatter> java8Formatters = java8Results.a; for (int i = 0; i < javaFormatters.Count; i++) { Formatter java = javaFormatters[i]; Formatter java8 = java8Formatters[i]; float editDistance = Dbg.normalizedLevenshteinDistance(java.Output, java8.Output); distances.Add(editDistance); // System.out.println(java.testDoc.fileName+" edit distance "+editDistance); } { distances.Sort(); int n = distances.Count; float min = distances[0].Value; float quart = distances[(int)(0.27 * n)].Value; float median = distances[n / 2].Value; float quart3 = distances[(int)(0.75 * n)].Value; float max = distances[distances.Count - 1].Value; string display = "(" + min + "," + median + "," + max + ")"; java_st_median = median; } } { // JAVA GUAVA IList <float?> distances = new List <float?>(); LeaveOneOutValidator java_guavaValidator = new LeaveOneOutValidator(JAVA_GUAVA_DESCR.corpusDir, JAVA_GUAVA_DESCR); LeaveOneOutValidator java8_guavaValidator = new LeaveOneOutValidator(JAVA8_GUAVA_DESCR.corpusDir, JAVA8_GUAVA_DESCR); Triple <IList <Formatter>, IList <float?>, IList <float?> > java_guavaResults = java_guavaValidator.validateDocuments(false, null); Triple <IList <Formatter>, IList <float?>, IList <float?> > java8_guavaResults = java8_guavaValidator.validateDocuments(false, null); IList <Formatter> java_guavaFormatters = java_guavaResults.a; IList <Formatter> java8_guavaFormatters = java8_guavaResults.a; for (int i = 0; i < java_guavaFormatters.Count; i++) { Formatter java_guava = java_guavaFormatters[i]; Formatter java8_guava = java8_guavaFormatters[i]; float editDistance = Dbg.normalizedLevenshteinDistance(java_guava.Output, java8_guava.Output); distances.Add(editDistance); // System.out.println(java_guava.testDoc.fileName+" edit distance "+editDistance); } { distances.Sort(); int n = distances.Count; float min = distances[0].Value; float quart = distances[(int)(0.27 * n)].Value; float median = distances[n / 2].Value; float quart3 = distances[(int)(0.75 * n)].Value; float max = distances[distances.Count - 1].Value; string display = "(" + min + "," + median + "," + max + ")"; java_guava_median = median; } } Console.WriteLine("clean SQLite vs TSQL edit distance info median=" + sql_median); Console.WriteLine("Java vs Java8 edit distance info median=" + java_st_median); Console.WriteLine("Java vs Java8 guava edit distance info median=" + java_guava_median); }