Beispiel #1
0
 /// <summary>
 /// Free anything we can to reduce memory footprint after a format().
 ///  keep analysis, testDoc as they are used for results.
 /// </summary>
 public virtual void releaseMemory()
 {
     corpus                 = null;
     realTokens             = null;
     originalTokens         = null;
     tokenToNodeMap         = null;
     originalTokenToNodeMap = null;
     tokenToListInfo        = null;
     wsClassifier           = null;
     hposClassifier         = null;
 }
Beispiel #2
0
        public kNNClassifier(Corpus corpus, FeatureMetaData[] FEATURES, IList <int> Y)
        {
            this.corpus   = corpus;
            this.FEATURES = FEATURES;
            Debug.Assert(FEATURES.Length <= Trainer.NUM_FEATURES);
            int n = 0;

            foreach (FeatureMetaData FEATURE in FEATURES)
            {
                n += (int)FEATURE.mismatchCost;
            }
            maxDistanceCount = n;
            this.Y           = Y;
        }
Beispiel #3
0
 public Formatter(Corpus corpus, int indentSize, int k, FeatureMetaData[] wsFeatures, FeatureMetaData[] hposFeatures) : this(corpus, indentSize)
 {
     this.k            = k;
     this.wsFeatures   = wsFeatures;
     this.hposFeatures = hposFeatures;
 }
Beispiel #4
0
        public static string Main(object[] args)
        {
            Log.Reset();
            try
            {
                if (args.Length < 7)
                {
                    Log.WriteLine("org.antlr.codebuff.Tool -g grammar-name -rule start-rule -corpus root-dir-of-samples \\\n" + "   [-files file-extension] [-indent num-spaces] \\" + "   [-comment line-comment-name] [-o output-file] file-to-format");
                    return(Log.Message());
                }

                formatted_output = null;
                string outputFileName  = "";
                string grammarName     = null;
                string startRule       = null;
                string corpusDir       = null;
                string indentS         = "4";
                string commentS        = null;
                string input_file_name = null;
                string fileExtension   = null;
                int    i           = 0;
                Type   parserClass = null;
                Type   lexerClass  = null;
                while (i < args.Length && ((string)args[i]).StartsWith("-", StringComparison.Ordinal))
                {
                    switch (args[i])
                    {
                    case "-g":
                        i++;
                        grammarName = (string)args[i++];
                        break;

                    case "-lexer":
                        i++;
                        lexerClass = (Type)args[i++];
                        break;

                    case "-parser":
                        i++;
                        parserClass = (Type)args[i++];
                        break;

                    case "-rule":
                        i++;
                        startRule = (string)args[i++];
                        break;

                    case "-corpus":
                        i++;
                        corpusDir = (string)args[i++];
                        break;

                    case "-files":
                        i++;
                        fileExtension = (string)args[i++];
                        break;

                    case "-indent":
                        i++;
                        indentS = (string)args[i++];
                        break;

                    case "-comment":
                        i++;
                        commentS = (string)args[i++];
                        break;

                    case "-o":
                        i++;
                        outputFileName = (string)args[i++];
                        break;

                    case "-inoutstring":
                        i++;
                        formatted_output = "";
                        outputFileName   = null;
                        break;
                    }
                }
                input_file_name = (string)args[i]; // must be last

                Log.WriteLine("gramm: " + grammarName);
                string parserClassName = grammarName + "Parser";
                string lexerClassName  = grammarName + "Lexer";
                Lexer  lexer           = null;
                if (lexerClass == null || parserClass == null)
                {
                    Log.WriteLine("You must specify a lexer and parser.");
                }
                if (parserClass == null | lexerClass == null)
                {
                    return(Log.Message());
                }
                int indentSize            = int.Parse(indentS);
                int singleLineCommentType = -1;
                if (!string.ReferenceEquals(commentS, null))
                {
                    try
                    {
                        lexer = getLexer(lexerClass, null);
                    }
                    catch (Exception e)
                    {
                        Log.WriteLine("Can't instantiate lexer " + lexerClassName);
                        Log.WriteLine(e.StackTrace);
                    }
                    if (lexer == null)
                    {
                        return(Log.Message());
                    }
                    IDictionary <string, int> tokenTypeMap = lexer.TokenTypeMap;
                    if (tokenTypeMap.ContainsKey(commentS))
                    {
                        singleLineCommentType = tokenTypeMap[commentS];
                    }
                }
                string fileRegex = null;
                if (!string.ReferenceEquals(fileExtension, null))
                {
                    var pattern            = "";
                    var allowable_suffices = fileExtension.Split(';').ToList <string>();
                    foreach (var s in allowable_suffices)
                    {
                        var no_dot = s.Substring(s.IndexOf('.') + 1);
                        pattern = pattern == "" ? ("(" + no_dot) : (pattern + "|" + no_dot);
                    }
                    pattern   = pattern + ")";
                    fileRegex = ".*\\." + pattern;
                }
                LangDescriptor language = new LangDescriptor(grammarName, corpusDir, fileRegex, lexerClass, parserClass, startRule, indentSize, singleLineCommentType);

                ////////
                // load all corpus files up front
                IList <string>        allFiles  = getFilenames(language.corpusDir, language.fileRegex);
                IList <InputDocument> documents = load(allFiles, language);

                // Handle formatting of document if it's passed as a string or not.
                if (unformatted_input == null)
                {
                    // Don't include file to format in corpus itself.
                    string path = System.IO.Path.GetFullPath(input_file_name);
                    IList <InputDocument> others = BuffUtils.filter(documents, d => !d.fileName.Equals(path));
                    // Perform training of formatter.
                    Corpus corpus = new Corpus(others, language);
                    corpus.train();

                    // Parse code contained in file.
                    InputDocument unformatted_document = parse(input_file_name, language);

                    // Format document.
                    Formatter formatter = new Formatter(corpus, language.indentSize, Formatter.DEFAULT_K, Trainer.FEATURES_INJECT_WS, Trainer.FEATURES_HPOS);
                    formatted_output = formatter.format(unformatted_document, false);
                }
                else
                {
                    // Perform training of formatter.
                    Corpus corpus = new Corpus(documents, language);
                    corpus.train();

                    // Parse code that was represented as a string.
                    InputDocument unformatted_document = parse(input_file_name, unformatted_input, language);

                    // Format document.
                    Formatter formatter = new Formatter(corpus, language.indentSize, Formatter.DEFAULT_K, Trainer.FEATURES_INJECT_WS, Trainer.FEATURES_HPOS);
                    formatted_output = formatter.format(unformatted_document, false);
                }
                ///////
                if (outputFileName != null && outputFileName == "")
                {
                    Log.WriteLine(formatted_output);
                }
                else if (!string.IsNullOrEmpty(outputFileName))
                {
                    org.antlr.codebuff.misc.Utils.writeFile(outputFileName, formatted_output);
                }
            }
            catch (Exception e)
            {
                throw e;
            }

            return(formatted_output);
        }
Beispiel #5
0
        public readonly int corpusVectorIndex;         // refers to both X (independent) and Y (dependent/predictor) variables

        public Neighbor(Corpus corpus, double distance, int corpusVectorIndex)
        {
            this.corpus            = corpus;
            this.distance          = distance;
            this.corpusVectorIndex = corpusVectorIndex;
        }
Beispiel #6
0
        public static void Main(string[] args)
        {
            if (args.Length < 7)
            {
                Console.Error.WriteLine("org.antlr.codebuff.Tool -g grammar-name -rule start-rule -corpus root-dir-of-samples \\\n" + "   [-files file-extension] [-indent num-spaces] \\" + "   [-comment line-comment-name] [-o output-file] file-to-format");
                return;
            }

            formatted_output = null;
            string outputFileName  = "";
            string grammarName     = null;
            string startRule       = null;
            string corpusDir       = null;
            string indentS         = "4";
            string commentS        = null;
            string input_file_name = null;
            string fileExtension   = null;
            int    i = 0;

            while (i < args.Length && args[i].StartsWith("-", StringComparison.Ordinal))
            {
                switch (args[i])
                {
                case "-g":
                    i++;
                    grammarName = args[i++];
                    break;

                case "-rule":
                    i++;
                    startRule = args[i++];
                    break;

                case "-corpus":
                    i++;
                    corpusDir = args[i++];
                    break;

                case "-files":
                    i++;
                    fileExtension = args[i++];
                    break;

                case "-indent":
                    i++;
                    indentS = args[i++];
                    break;

                case "-comment":
                    i++;
                    commentS = args[i++];
                    break;

                case "-o":
                    i++;
                    outputFileName = args[i++];
                    break;

                case "-inoutstring":
                    i++;
                    formatted_output = "";
                    outputFileName   = null;
                    break;
                }
            }
            input_file_name = args[i];             // must be last

            Console.WriteLine("gramm: " + grammarName);
            string parserClassName = grammarName + "Parser";
            string lexerClassName  = grammarName + "Lexer";
            Type   parserClass     = null;
            Type   lexerClass      = null;
            Lexer  lexer           = null;

            try
            {
                parserClass = (Type)Type.GetType(parserClassName);
                lexerClass  = (Type)Type.GetType(lexerClassName);
            }
            catch (Exception e)
            {
                Console.Error.WriteLine("Can't load " + parserClassName + " or maybe " + lexerClassName);
                Console.Error.WriteLine("Make sure they are generated by ANTLR, compiled, and in CLASSPATH");
                System.Console.WriteLine(e.StackTrace);
            }
            if (parserClass == null | lexerClass == null)
            {
                return;                 // don't return from catch!
            }
            int indentSize            = int.Parse(indentS);
            int singleLineCommentType = -1;

            if (!string.ReferenceEquals(commentS, null))
            {
                try
                {
                    lexer = getLexer(lexerClass, null);
                }
                catch (Exception e)
                {
                    Console.Error.WriteLine("Can't instantiate lexer " + lexerClassName);
                    System.Console.WriteLine(e.StackTrace);
                }
                if (lexer == null)
                {
                    return;
                }
                IDictionary <string, int> tokenTypeMap = lexer.TokenTypeMap;
                if (tokenTypeMap.ContainsKey(commentS))
                {
                    singleLineCommentType = tokenTypeMap[commentS];
                }
            }
            string fileRegex = null;

            if (!string.ReferenceEquals(fileExtension, null))
            {
                fileRegex = ".*\\." + fileExtension;
            }
            LangDescriptor language = new LangDescriptor(grammarName, corpusDir, fileRegex, lexerClass, parserClass, startRule, indentSize, singleLineCommentType);

            ////////
            // load all corpus files up front
            IList <string>        allFiles  = getFilenames(language.corpusDir, language.fileRegex);
            IList <InputDocument> documents = load(allFiles, language);

            // Handle formatting of document if it's passed as a string or not.
            if (unformatted_input == null)
            {
                // Don't include file to format in corpus itself.
                string path = System.IO.Path.GetFullPath(input_file_name);
                IList <InputDocument> others = BuffUtils.filter(documents, d => !d.fileName.Equals(path));
                // Perform training of formatter.
                Corpus corpus = new Corpus(others, language);
                corpus.train();

                // Parse code contained in file.
                InputDocument unformatted_document = parse(input_file_name, language);

                // Format document.
                Formatter formatter = new Formatter(corpus, language.indentSize, Formatter.DEFAULT_K, Trainer.FEATURES_INJECT_WS, Trainer.FEATURES_HPOS);
                formatted_output = formatter.format(unformatted_document, false);
            }
            else
            {
                // Perform training of formatter.
                Corpus corpus = new Corpus(documents, language);
                corpus.train();

                // Parse code that was represented as a string.
                InputDocument unformatted_document = parse(input_file_name, unformatted_input, language);

                // Format document.
                Formatter formatter = new Formatter(corpus, language.indentSize, Formatter.DEFAULT_K, Trainer.FEATURES_INJECT_WS, Trainer.FEATURES_HPOS);
                formatted_output = formatter.format(unformatted_document, false);
            }
            ///////
            if (outputFileName != null && outputFileName == "")
            {
                System.Console.WriteLine(formatted_output);
            }
            else if (!string.IsNullOrEmpty(outputFileName))
            {
                org.antlr.codebuff.misc.Utils.writeFile(outputFileName, formatted_output);
            }
        }