Esempio n. 1
0
        public virtual Triple <Formatter, float, float> validate(LangDescriptor language, IList <InputDocument> documents, string fileToExclude, int k, FeatureMetaData[] injectWSFeatures, FeatureMetaData[] alignmentFeatures, string outputDir, bool computeEditDistance, bool collectAnalysis)
        {
            string path = System.IO.Path.GetFullPath(fileToExclude);
            IList <InputDocument> others   = BuffUtils.filter(documents, d => !d.fileName.Equals(path));
            IList <InputDocument> excluded = BuffUtils.filter(documents, d => d.fileName.Equals(path));

            Debug.Assert(others.Count == documents.Count - 1);
            //		kNNClassifier.resetCache();
            if (excluded.Count == 0)
            {
                Console.Error.WriteLine("Doc not in corpus: " + path);
                return(null);
            }
            InputDocument testDoc = excluded[0];
            DateTime      start   = System.DateTime.Now;
            Corpus        corpus  = new Corpus(others, language);

            corpus.train();
            DateTime      stop         = System.DateTime.Now;
            Formatter     formatter    = new Formatter(corpus, language.indentSize, k, injectWSFeatures, alignmentFeatures);
            InputDocument originalDoc  = testDoc;
            DateTime      format_start = System.DateTime.Now;
            string        output       = formatter.format(testDoc, collectAnalysis);
            DateTime      format_stop  = System.DateTime.Now;
            float         editDistance = 0;

            if (computeEditDistance)
            {
                editDistance = Dbg.normalizedLevenshteinDistance(testDoc.content, output);
            }
            ClassificationAnalysis analysis = new ClassificationAnalysis(originalDoc, formatter.AnalysisPerToken);

            Console.WriteLine(testDoc.fileName + ": edit distance = " + editDistance + ", error rate = " + analysis.ErrorRate);
            if (!string.ReferenceEquals(outputDir, null))
            {
                string dir = outputDir + "/" + language.name + "/" + Tool.version;
                if (!System.IO.Directory.Exists(dir))
                {
                    System.IO.Directory.CreateDirectory(dir);
                }
                org.antlr.codebuff.misc.Utils.writeFile(dir + "/" + System.IO.Path.GetFileName(testDoc.fileName), output);
            }
            var tms = (stop - start);
            var fms = format_stop - format_start;

            trainingTimes.Add((double)tms.Milliseconds);
            float tokensPerMS = testDoc.tokens.Size / (float)fms.TotalMilliseconds;

            formattingTokensPerMS.Add((double)tokensPerMS);
            Console.Write("Training time = {0:D} ms, formatting {1:D} ms, {2,5:F3} tokens/ms ({3:D} tokens)\n", tms, fms, tokensPerMS, testDoc.tokens.Size);
            //		System.out.printf("classify calls %d, hits %d rate %f\n",
            //		                  kNNClassifier.nClassifyCalls, kNNClassifier.nClassifyCacheHits,
            //		                  kNNClassifier.nClassifyCacheHits/(float) kNNClassifier.nClassifyCalls);
            //		System.out.printf("kNN calls %d, hits %d rate %f\n",
            //						  kNNClassifier.nNNCalls, kNNClassifier.nNNCacheHits,
            //						  kNNClassifier.nNNCacheHits/(float) kNNClassifier.nNNCalls);
            return(new Triple <Formatter, float, float>(formatter, editDistance, analysis.ErrorRate));
        }
Esempio n. 2
0
        /// <summary>
        /// Select one document at random, then n others w/o replacement as corpus </summary>
        public virtual org.antlr.codebuff.misc.Pair <InputDocument, IList <InputDocument> > selectSample(IList <InputDocument> documents, int n)
        {
            int                   i            = random.Next(documents.Count);
            InputDocument         testDoc      = documents[i];
            IList <InputDocument> others       = BuffUtils.filter(documents, d => d != testDoc);
            IList <InputDocument> corpusSubset = getRandomDocuments(others, n);

            return(new org.antlr.codebuff.misc.Pair <InputDocument, IList <InputDocument> >(testDoc, corpusSubset));
        }
Esempio n. 3
0
        public static void Main(string[] args)
        {
            string         langname     = args[0].Substring(1);
            string         testFilename = args[1];
            LangDescriptor language     = null;

            for (int i = 0; i < languages.length; i++)
            {
                if (languages[i].name.Equals(langname))
                {
                    language = languages[i];
                    break;
                }
            }
            if (language == null)
            {
                Log.WriteLine("Language " + langname + " unknown");
                return;
            }

            // load all files up front
            DateTime              load_start = System.DateTime.Now;
            IList <string>        allFiles   = Tool.getFilenames(language.corpusDir, language.fileRegex);
            IList <InputDocument> documents  = Tool.load(allFiles, language);
            DateTime              load_stop  = System.DateTime.Now;
            DateTime              load_time  = (load_stop - load_start) / 1000000;

            Log.Write("Loaded {0:D} files in {1:D}ms\n", documents.Count, load_time);

            string path = System.IO.Path.GetFullPath(testFilename);
            IList <InputDocument> others   = BuffUtils.filter(documents, d => !d.fileName.Equals(path));
            IList <InputDocument> excluded = BuffUtils.filter(documents, d => d.fileName.Equals(path));

            Debug.Assert(others.Count == documents.Count - 1);
            if (excluded.Count == 0)
            {
                Log.WriteLine("Doc not in corpus: " + path);
                return;
            }
            InputDocument testDoc = excluded[0];

            IList <int> training   = new List <int>();
            IList <int> formatting = new List <int>();

            for (int i = 1; i <= TRIALS; i++)
            {
                org.antlr.codebuff.misc.Pair <int, int> timing = test(language, others, testDoc);
                training.Add(timing.a);
                formatting.Add(timing.b);
            }
            // drop first four
            training   = training.subList(5, training.Count);
            formatting = formatting.subList(5, formatting.Count);
            Log.Write("median of [5:{0:D}] training {1:D}ms\n", TRIALS - 1, BuffUtils.median(training));
            Log.Write("median of [5:{0:D}] formatting {1:D}ms\n", TRIALS - 1, BuffUtils.median(formatting));
        }
Esempio n. 4
0
        public virtual Triple <IList <Formatter>, IList <float>, IList <float> > validateDocuments(FeatureMetaData[] injectWSFeatures, FeatureMetaData[] alignmentFeatures, bool computeEditDistance, string outputDir)
        {
            IList <Formatter> formatters = new List <Formatter>();
            IList <float>     distances  = new List <float>();
            IList <float>     errors     = new List <float>();

            System.DateTime start = System.DateTime.Now;
            try
            {
                IList <string>        allFiles          = Tool.getFilenames(rootDir, language.fileRegex);
                IList <InputDocument> documents         = Tool.load(allFiles, language);
                IList <InputDocument> parsableDocuments = BuffUtils.filter(documents, d => d.tree != null);
                System.DateTime       stop = System.DateTime.Now;
                //Console.Write("Load/parse all docs from {0} time {1:D} ms\n", rootDir, (stop - start) / 1000000);

                int ncpu = 1;
                if (FORCE_SINGLE_THREADED)
                {
                    ncpu = 2;
                }

                for (int i = 0; i < parsableDocuments.Count; i++)
                {
                    string fileName = parsableDocuments[i].fileName;

                    {
                        try
                        {
                            Triple <Formatter, float, float> results = validate(language, parsableDocuments, fileName,
                                                                                Formatter.DEFAULT_K, injectWSFeatures, alignmentFeatures, outputDir, computeEditDistance, false);
                            formatters.Add(results.a);
                            float editDistance = results.b;
                            distances.Add(editDistance);
                            float errorRate = results.c;
                            errors.Add(errorRate);
                        }
                        catch (Exception t)
                        {
                            System.Console.WriteLine(t.StackTrace);
                        }
                        return(null);
                    }
                }
            }
            finally
            {
                DateTime final_stop            = System.DateTime.Now;
                double   medianTrainingTime    = BuffUtils.median(trainingTimes);
                double   medianFormattingPerMS = BuffUtils.median(formattingTokensPerMS);
                Console.Write("Total time {0:D}ms\n", final_stop - start);
                Console.Write("Median training time {0:D}ms\n", medianTrainingTime);
                Console.Write("Median formatting time tokens per ms {0,5:F4}ms, min {1,5:F4} max {2,5:F4}\n", medianFormattingPerMS, BuffUtils.min(formattingTokensPerMS), BuffUtils.max(formattingTokensPerMS));
            }
            return(new Triple <IList <Formatter>, IList <float>, IList <float> >(formatters, distances, errors));
        }
Esempio n. 5
0
        public static IList <Tree> getSeparators <T1>(ParserRuleContext ctx, IList <T1> siblings)
            where T1 : Antlr4.Runtime.ParserRuleContext
        {
            ParserRuleContext first = siblings[0] as ParserRuleContext;
            ParserRuleContext last  = siblings[siblings.Count - 1] as ParserRuleContext;
            int start = BuffUtils.indexOf(ctx, first);
            int end   = BuffUtils.indexOf(ctx, last);
            IEnumerable <ITree> xxxx     = Trees.GetChildren(ctx).Where((n, i) => i >= start && i < end + 1);
            IList <Tree>        elements = xxxx.ToList();

            return(BuffUtils.filter(elements, c => c is TerminalNode));
        }
Esempio n. 6
0
        public static void runCaptureForOneLanguage(LangDescriptor language)
        {
            IList <string>        filenames = Tool.getFilenames(language.corpusDir, language.fileRegex);
            IList <InputDocument> documents = Tool.load(filenames, language);

            foreach (string fileName in filenames)
            {
                // Examine info for this file in isolation
                Corpus fileCorpus = new Corpus(fileName, language);
                fileCorpus.train();
                Console.WriteLine(fileName);
                //			examineCorpus(corpus);
                ArrayListMultiMap <FeatureVectorAsObject, int> ws   = getWSContextCategoryMap(fileCorpus);
                ArrayListMultiMap <FeatureVectorAsObject, int> hpos = getHPosContextCategoryMap(fileCorpus);

                // Compare with corpus minus this file
                string path = fileName;
                IList <InputDocument> others = BuffUtils.filter(documents, d => !d.fileName.Equals(path));
                Corpus corpus = new Corpus(others, language);
                corpus.train();
                //			examineCorpus(corpus);
                ArrayListMultiMap <FeatureVectorAsObject, int> corpus_ws   = getWSContextCategoryMap(corpus);
                ArrayListMultiMap <FeatureVectorAsObject, int> corpus_hpos = getHPosContextCategoryMap(corpus);

                foreach (FeatureVectorAsObject x in ws.Keys)
                {
                    HashBag <int> fwsCats   = getCategoriesBag(ws[x]);
                    IList <float> fwsRatios = getCategoryRatios(fwsCats.Values);
                    HashBag <int> wsCats    = getCategoriesBag(corpus_ws[x]);
                    IList <float> wsRatios  = getCategoryRatios(wsCats.Values);
                    // compare file predictions with corpus predictions
                    if (!fwsRatios.SequenceEqual(wsRatios))
                    {
                        Console.WriteLine(fwsRatios + " vs " + wsRatios);
                    }

                    HashBag <int> fhposCats = getCategoriesBag(hpos[x]);
                    HashBag <int> hposCats  = getCategoriesBag(corpus_hpos[x]);
                }

                break;
            }
        }
Esempio n. 7
0
        public static void writePython(LangDescriptor[] languages, IList <int?> ks, float[][] medians)
        {
            StringBuilder data = new StringBuilder();
            StringBuilder plot = new StringBuilder();

            for (int i = 0; i < languages.Length; i++)
            {
                LangDescriptor language        = languages[i];
                IList <float?> filteredMedians = BuffUtils.filter(Arrays.asList(medians[i]), m => m != null);
                data.Append(language.name + '=' + filteredMedians + '\n');
                plot.Append(string.Format("ax.plot(ks, {0}, label=\"{1}\", marker='{2}', color='{3}')\n", language.name, language.name, nameToGraphMarker.get(language.name), nameToGraphColor.get(language.name)));
            }

            string python = "#\n" + "# AUTO-GENERATED FILE. DO NOT EDIT\n" + "# CodeBuff %s '%s'\n" + "#\n" + "import numpy as np\n" + "import matplotlib.pyplot as plt\n\n" + "%s\n" + "ks = %s\n" + "fig = plt.figure()\n" + "ax = plt.subplot(111)\n" + "%s" + "ax.tick_params(axis='both', which='major', labelsize=18)\n" + "ax.set_xlabel(\"$k$ nearest neighbors\", fontsize=20)\n" + "ax.set_ylabel(\"Median error rate\", fontsize=20)\n" + "#ax.set_title(\"k Nearest Neighbors vs\\nLeave-one-out Validation Error Rate\")\n" + "plt.legend(fontsize=18)\n\n" + "fig.savefig('images/vary_k.pdf', format='pdf')\n" + "plt.show()\n";
            string code   = string.format(python, Tool.version, DateTime.Now, data, ks, plot);

            string fileName = "python/src/vary_k.py";

            org.antlr.codebuff.misc.Utils.writeFile(fileName, code);
            Log.WriteLine("wrote python code to " + fileName);
        }
Esempio n. 8
0
        public static void Main(string[] args)
        {
            if (args.Length < 2)
            {
                Console.Error.WriteLine("Dbg [-leave-one-out] [-java|-java8|-antlr|-sqlite|-tsql] test-file");
            }

            int    arg             = 0;
            bool   leaveOneOut     = true;
            bool   collectAnalysis = true;
            string language        = args[arg++];

            language = language.Substring(1);
            string        testFilename = args[arg];
            string        output       = "???";
            InputDocument testDoc      = null;
            IList <TokenPositionAnalysis> analysisPerToken = null;

            org.antlr.codebuff.misc.Pair <string, IList <TokenPositionAnalysis> > results;
            LangDescriptor lang = null;

            System.DateTime start, stop;
            for (int i = 0; i < Tool.languages.Length; i++)
            {
                if (Tool.languages[i].name.Equals(language))
                {
                    lang = Tool.languages[i];
                    break;
                }
            }
            if (lang != null)
            {
                start = System.DateTime.Now;
                LeaveOneOutValidator             validator = new LeaveOneOutValidator(lang.corpusDir, lang);
                Triple <Formatter, float, float> val       = validator.validateOneDocument(testFilename, null, collectAnalysis);
                testDoc = Tool.parse(testFilename, lang);
                stop    = System.DateTime.Now;
                Formatter formatter = val.a;
                output = formatter.Output;
                Console.WriteLine("output len = " + output.Length);
                float editDistance = normalizedLevenshteinDistance(testDoc.content, output);
                Console.WriteLine("normalized Levenshtein distance: " + editDistance);
                analysisPerToken = formatter.AnalysisPerToken;

                Regex             rex             = new Regex("^\\s+$");
                CommonTokenStream original_tokens = Tool.tokenize(testDoc.content, lang.lexerClass);
                IList <Token>     wsTokens        = BuffUtils.filter(original_tokens.GetTokens(), t => rex.IsMatch(t.Text));
                string            originalWS      = tokenText(wsTokens);
                Console.WriteLine("origin ws tokens len: " + originalWS.Length);
                CommonTokenStream formatted_tokens = Tool.tokenize(output, lang.lexerClass);
                wsTokens = BuffUtils.filter(formatted_tokens.GetTokens(), t => rex.IsMatch(t.Text));
                string formattedWS = tokenText(wsTokens);
                Console.WriteLine("formatted ws tokens len: " + formattedWS.Length);
                editDistance  = levenshteinDistance(originalWS, formattedWS);
                editDistance /= Math.Max(testDoc.content.Length, output.Length);
                Console.WriteLine("Levenshtein distance of ws normalized to output len: " + editDistance);

                ClassificationAnalysis analysis = new ClassificationAnalysis(testDoc, analysisPerToken);
                Console.WriteLine(analysis);
            }

            if (lang != null)
            {
                //            GUIController controller;
                //            controller = new GUIController(analysisPerToken, testDoc, output, lang.lexerClass);
                //controller.show();
                //			System.out.println(output);
                //Console.Write("formatting time {0:D}s\n", (stop - start) / 1000000);
                Console.Write("classify calls {0:D}, hits {1:D} rate {2:F}\n", kNNClassifier.nClassifyCalls, kNNClassifier.nClassifyCacheHits, kNNClassifier.nClassifyCacheHits / (float)kNNClassifier.nClassifyCalls);
                Console.Write("kNN calls {0:D}, hits {1:D} rate {2:F}\n", kNNClassifier.nNNCalls, kNNClassifier.nNNCacheHits, kNNClassifier.nNNCacheHits / (float)kNNClassifier.nNNCalls);
            }
        }
Esempio n. 9
0
        public static string Main(object[] args)
        {
            Log.Reset();
            try
            {
                if (args.Length < 7)
                {
                    Log.WriteLine("org.antlr.codebuff.Tool -g grammar-name -rule start-rule -corpus root-dir-of-samples \\\n" + "   [-files file-extension] [-indent num-spaces] \\" + "   [-comment line-comment-name] [-o output-file] file-to-format");
                    return(Log.Message());
                }

                formatted_output = null;
                string outputFileName  = "";
                string grammarName     = null;
                string startRule       = null;
                string corpusDir       = null;
                string indentS         = "4";
                string commentS        = null;
                string input_file_name = null;
                string fileExtension   = null;
                int    i           = 0;
                Type   parserClass = null;
                Type   lexerClass  = null;
                while (i < args.Length && ((string)args[i]).StartsWith("-", StringComparison.Ordinal))
                {
                    switch (args[i])
                    {
                    case "-g":
                        i++;
                        grammarName = (string)args[i++];
                        break;

                    case "-lexer":
                        i++;
                        lexerClass = (Type)args[i++];
                        break;

                    case "-parser":
                        i++;
                        parserClass = (Type)args[i++];
                        break;

                    case "-rule":
                        i++;
                        startRule = (string)args[i++];
                        break;

                    case "-corpus":
                        i++;
                        corpusDir = (string)args[i++];
                        break;

                    case "-files":
                        i++;
                        fileExtension = (string)args[i++];
                        break;

                    case "-indent":
                        i++;
                        indentS = (string)args[i++];
                        break;

                    case "-comment":
                        i++;
                        commentS = (string)args[i++];
                        break;

                    case "-o":
                        i++;
                        outputFileName = (string)args[i++];
                        break;

                    case "-inoutstring":
                        i++;
                        formatted_output = "";
                        outputFileName   = null;
                        break;
                    }
                }
                input_file_name = (string)args[i]; // must be last

                Log.WriteLine("gramm: " + grammarName);
                string parserClassName = grammarName + "Parser";
                string lexerClassName  = grammarName + "Lexer";
                Lexer  lexer           = null;
                if (lexerClass == null || parserClass == null)
                {
                    Log.WriteLine("You must specify a lexer and parser.");
                }
                if (parserClass == null | lexerClass == null)
                {
                    return(Log.Message());
                }
                int indentSize            = int.Parse(indentS);
                int singleLineCommentType = -1;
                if (!string.ReferenceEquals(commentS, null))
                {
                    try
                    {
                        lexer = getLexer(lexerClass, null);
                    }
                    catch (Exception e)
                    {
                        Log.WriteLine("Can't instantiate lexer " + lexerClassName);
                        Log.WriteLine(e.StackTrace);
                    }
                    if (lexer == null)
                    {
                        return(Log.Message());
                    }
                    IDictionary <string, int> tokenTypeMap = lexer.TokenTypeMap;
                    if (tokenTypeMap.ContainsKey(commentS))
                    {
                        singleLineCommentType = tokenTypeMap[commentS];
                    }
                }
                string fileRegex = null;
                if (!string.ReferenceEquals(fileExtension, null))
                {
                    var pattern            = "";
                    var allowable_suffices = fileExtension.Split(';').ToList <string>();
                    foreach (var s in allowable_suffices)
                    {
                        var no_dot = s.Substring(s.IndexOf('.') + 1);
                        pattern = pattern == "" ? ("(" + no_dot) : (pattern + "|" + no_dot);
                    }
                    pattern   = pattern + ")";
                    fileRegex = ".*\\." + pattern;
                }
                LangDescriptor language = new LangDescriptor(grammarName, corpusDir, fileRegex, lexerClass, parserClass, startRule, indentSize, singleLineCommentType);

                ////////
                // load all corpus files up front
                IList <string>        allFiles  = getFilenames(language.corpusDir, language.fileRegex);
                IList <InputDocument> documents = load(allFiles, language);

                // Handle formatting of document if it's passed as a string or not.
                if (unformatted_input == null)
                {
                    // Don't include file to format in corpus itself.
                    string path = System.IO.Path.GetFullPath(input_file_name);
                    IList <InputDocument> others = BuffUtils.filter(documents, d => !d.fileName.Equals(path));
                    // Perform training of formatter.
                    Corpus corpus = new Corpus(others, language);
                    corpus.train();

                    // Parse code contained in file.
                    InputDocument unformatted_document = parse(input_file_name, language);

                    // Format document.
                    Formatter formatter = new Formatter(corpus, language.indentSize, Formatter.DEFAULT_K, Trainer.FEATURES_INJECT_WS, Trainer.FEATURES_HPOS);
                    formatted_output = formatter.format(unformatted_document, false);
                }
                else
                {
                    // Perform training of formatter.
                    Corpus corpus = new Corpus(documents, language);
                    corpus.train();

                    // Parse code that was represented as a string.
                    InputDocument unformatted_document = parse(input_file_name, unformatted_input, language);

                    // Format document.
                    Formatter formatter = new Formatter(corpus, language.indentSize, Formatter.DEFAULT_K, Trainer.FEATURES_INJECT_WS, Trainer.FEATURES_HPOS);
                    formatted_output = formatter.format(unformatted_document, false);
                }
                ///////
                if (outputFileName != null && outputFileName == "")
                {
                    Log.WriteLine(formatted_output);
                }
                else if (!string.IsNullOrEmpty(outputFileName))
                {
                    org.antlr.codebuff.misc.Utils.writeFile(outputFileName, formatted_output);
                }
            }
            catch (Exception e)
            {
                throw e;
            }

            return(formatted_output);
        }
Esempio n. 10
0
        /// <summary>
        /// Return a new map from rulename to List of (a,b) pairs stripped of
        ///  tuples (a,b) where a or b is in rule repeated token set.
        ///  E.g., before removing repeated token ',', we see:
        ///
        ///  elementValueArrayInitializer: 4:'{',',' 1:'{','}' 4:',','}'
        ///
        ///  After removing tuples containing repeated tokens, we get:
        ///
        ///  elementValueArrayInitializer: 1:'{','}'
        /// </summary>
        protected internal virtual IDictionary <RuleAltKey, IList <org.antlr.codebuff.misc.Pair <int, int> > > stripPairsWithRepeatedTokens()
        {
            IDictionary <RuleAltKey, IList <org.antlr.codebuff.misc.Pair <int, int> > > ruleToPairsWoRepeats = new Dictionary <RuleAltKey, IList <org.antlr.codebuff.misc.Pair <int, int> > >();

            // For each rule
            foreach (RuleAltKey ruleAltKey in ruleToPairsBag.Keys)
            {
                ISet <int> ruleRepeatedTokens = null;
                ruleToRepeatedTokensSet.TryGetValue(ruleAltKey, out ruleRepeatedTokens);
                ISet <org.antlr.codebuff.misc.Pair <int, int> > pairsBag = null;
                ruleToPairsBag.TryGetValue(ruleAltKey, out pairsBag);
                // If there are repeated tokens for this rule
                if (ruleRepeatedTokens != null)
                {
                    // Remove all (a,b) for b in repeated token set
                    IList <org.antlr.codebuff.misc.Pair <int, int> > pairsWoRepeats = BuffUtils.filter(pairsBag, p => !ruleRepeatedTokens.Contains(p.a) && !ruleRepeatedTokens.Contains(p.b));
                    ruleToPairsWoRepeats[ruleAltKey] = pairsWoRepeats;
                }
                else
                {
                    ruleToPairsWoRepeats[ruleAltKey] = new List <org.antlr.codebuff.misc.Pair <int, int> >(pairsBag);
                }
            }
            return(ruleToPairsWoRepeats);
        }
Esempio n. 11
0
        public static void Main(string[] args)
        {
            if (args.Length < 7)
            {
                Console.Error.WriteLine("org.antlr.codebuff.Tool -g grammar-name -rule start-rule -corpus root-dir-of-samples \\\n" + "   [-files file-extension] [-indent num-spaces] \\" + "   [-comment line-comment-name] [-o output-file] file-to-format");
                return;
            }

            formatted_output = null;
            string outputFileName  = "";
            string grammarName     = null;
            string startRule       = null;
            string corpusDir       = null;
            string indentS         = "4";
            string commentS        = null;
            string input_file_name = null;
            string fileExtension   = null;
            int    i = 0;

            while (i < args.Length && args[i].StartsWith("-", StringComparison.Ordinal))
            {
                switch (args[i])
                {
                case "-g":
                    i++;
                    grammarName = args[i++];
                    break;

                case "-rule":
                    i++;
                    startRule = args[i++];
                    break;

                case "-corpus":
                    i++;
                    corpusDir = args[i++];
                    break;

                case "-files":
                    i++;
                    fileExtension = args[i++];
                    break;

                case "-indent":
                    i++;
                    indentS = args[i++];
                    break;

                case "-comment":
                    i++;
                    commentS = args[i++];
                    break;

                case "-o":
                    i++;
                    outputFileName = args[i++];
                    break;

                case "-inoutstring":
                    i++;
                    formatted_output = "";
                    outputFileName   = null;
                    break;
                }
            }
            input_file_name = args[i];             // must be last

            Console.WriteLine("gramm: " + grammarName);
            string parserClassName = grammarName + "Parser";
            string lexerClassName  = grammarName + "Lexer";
            Type   parserClass     = null;
            Type   lexerClass      = null;
            Lexer  lexer           = null;

            try
            {
                parserClass = (Type)Type.GetType(parserClassName);
                lexerClass  = (Type)Type.GetType(lexerClassName);
            }
            catch (Exception e)
            {
                Console.Error.WriteLine("Can't load " + parserClassName + " or maybe " + lexerClassName);
                Console.Error.WriteLine("Make sure they are generated by ANTLR, compiled, and in CLASSPATH");
                System.Console.WriteLine(e.StackTrace);
            }
            if (parserClass == null | lexerClass == null)
            {
                return;                 // don't return from catch!
            }
            int indentSize            = int.Parse(indentS);
            int singleLineCommentType = -1;

            if (!string.ReferenceEquals(commentS, null))
            {
                try
                {
                    lexer = getLexer(lexerClass, null);
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("Can't instantiate lexer " + lexerClassName);
                    System.Console.WriteLine(e.StackTrace);
                }
                if (lexer == null)
                {
                    return;
                }
                IDictionary <string, int> tokenTypeMap = lexer.TokenTypeMap;
                if (tokenTypeMap.ContainsKey(commentS))
                {
                    singleLineCommentType = tokenTypeMap[commentS];
                }
            }
            string fileRegex = null;

            if (!string.ReferenceEquals(fileExtension, null))
            {
                fileRegex = ".*\\." + fileExtension;
            }
            LangDescriptor language = new LangDescriptor(grammarName, corpusDir, fileRegex, lexerClass, parserClass, startRule, indentSize, singleLineCommentType);

            ////////
            // load all corpus files up front
            IList <string>        allFiles  = getFilenames(language.corpusDir, language.fileRegex);
            IList <InputDocument> documents = load(allFiles, language);

            // Handle formatting of document if it's passed as a string or not.
            if (unformatted_input == null)
            {
                // Don't include file to format in corpus itself.
                string path = System.IO.Path.GetFullPath(input_file_name);
                IList <InputDocument> others = BuffUtils.filter(documents, d => !d.fileName.Equals(path));
                // Perform training of formatter.
                Corpus corpus = new Corpus(others, language);
                corpus.train();

                // Parse code contained in file.
                InputDocument unformatted_document = parse(input_file_name, language);

                // Format document.
                Formatter formatter = new Formatter(corpus, language.indentSize, Formatter.DEFAULT_K, Trainer.FEATURES_INJECT_WS, Trainer.FEATURES_HPOS);
                formatted_output = formatter.format(unformatted_document, false);
            }
            else
            {
                // Perform training of formatter.
                Corpus corpus = new Corpus(documents, language);
                corpus.train();

                // Parse code that was represented as a string.
                InputDocument unformatted_document = parse(input_file_name, unformatted_input, language);

                // Format document.
                Formatter formatter = new Formatter(corpus, language.indentSize, Formatter.DEFAULT_K, Trainer.FEATURES_INJECT_WS, Trainer.FEATURES_HPOS);
                formatted_output = formatter.format(unformatted_document, false);
            }
            ///////
            if (outputFileName != null && outputFileName == "")
            {
                System.Console.WriteLine(formatted_output);
            }
            else if (!string.IsNullOrEmpty(outputFileName))
            {
                org.antlr.codebuff.misc.Utils.writeFile(outputFileName, formatted_output);
            }
        }