Example #1
0
        /// <summary>
        /// Look into the originalTokens stream to get the comments to the left of current
        ///  token. Emit all whitespace and comments except for whitespace at the
        ///  end as we'll inject that per newline prediction.
        ///
        ///  We able to see original input stream for comment purposes only. With all
        ///  whitespace removed, we can't emit this stuff properly. This
        ///  is the only place that examines the original token stream during formatting.
        /// </summary>
        public virtual int emitCommentsToTheLeft(int tokenIndexInStream, int injectNL_WS)
        {
            IList <Token> hiddenTokensToLeft = originalTokens.GetHiddenTokensToLeft(tokenIndexInStream);

            if (hiddenTokensToLeft != null)
            {
                // if at least one is not whitespace, assume it's a comment and print all hidden stuff including whitespace
                bool hasComment = Trainer.hasCommentToken(hiddenTokensToLeft);
                if (hasComment)
                {
                    // avoid whitespace at end of sequence as we'll inject that
                    int last = -1;
                    for (int i = hiddenTokensToLeft.Count - 1; i >= 0; i--)
                    {
                        Token  hidden     = hiddenTokensToLeft[i];
                        string hiddenText = hidden.Text;
                        Regex  rex        = new Regex("^\\s+$");
                        if (!rex.IsMatch(hiddenText))
                        {
                            last = i;
                            break;
                        }
                    }
                    Token         commentToken = hiddenTokensToLeft[last];
                    IList <Token> truncated    = hiddenTokensToLeft.Take(last + 1).ToList();
                    foreach (Token hidden in truncated)
                    {
                        string hiddenText = hidden.Text;
                        output.Append(hiddenText);
                        Regex rex = new Regex("^\\n+$");                     // KED SUSPECT THIS MAY BE WRONG FOR WINDOWS (\n\r).
                        if (rex.IsMatch(hiddenText))
                        {
                            line         += Tool.count(hiddenText, '\n');
                            charPosInLine = 0;
                        }
                        else
                        {
                            // if a comment or plain ' ', must count char position
                            charPosInLine += hiddenText.Length;
                        }
                    }
                    // failsafe. make sure single-line comments have \n on the end.
                    // If not predicted, must override and inject one
                    if (commentToken.Type == corpus.language.singleLineCommentType && (injectNL_WS & 0xFF) != Trainer.CAT_INJECT_NL)
                    {
                        return(Trainer.nlcat(1));                        // force formatter to predict newline then trigger alignment
                    }
                }
            }

            return(injectNL_WS);            // send same thing back out unless we trigger failsafe
        }
Example #2
0
        /// <summary>
        /// Compute a document difference metric 0-1.0 between two documents that
        ///  are identical other than (likely) the whitespace and comments.
        ///
        ///  1.0 means the docs are maximally different and 0 means docs are identical.
        ///
        ///  The Levenshtein distance between the docs counts only
        ///  whitespace diffs as the non-WS content is identical.
        ///  Levenshtein distance is bounded by 0..max(len(doc1),len(doc2)) so
        ///  we normalize the distance by dividing by max WS count.
        ///
        ///  TODO: can we simplify this to a simple walk with two
        ///  cursors through the original vs formatted counting
        ///  mismatched whitespace? real text are like anchors.
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public static double docDiff(String original, String formatted, Class lexerClass) throws Exception
        public static double docDiff(string original, string formatted, Type lexerClass)
        {
            // Grammar must strip all but real tokens and whitespace (and put that on hidden channel)
            CodeBuffTokenStream original_tokens = Tool.tokenize(original, lexerClass);
            //		String s = original_tokens.getText();
            CodeBuffTokenStream formatted_tokens = Tool.tokenize(formatted, lexerClass);
            //		String t = formatted_tokens.getText();

            // walk token streams and examine whitespace in between tokens
            int i            = -1;
            int ws_distance  = 0;
            int original_ws  = 0;
            int formatted_ws = 0;

            while (true)
            {
                Token ot = original_tokens.LT(i);                 // TODO: FIX THIS! can't use LT()
                if (ot == null || ot.Type == TokenConstants.EOF)
                {
                    break;
                }
                IList <Token> ows = original_tokens.GetHiddenTokensToLeft(ot.TokenIndex);
                original_ws += tokenText(ows).Length;

                Token ft = formatted_tokens.LT(i);                 // TODO: FIX THIS! can't use LT()
                if (ft == null || ft.Type == TokenConstants.EOF)
                {
                    break;
                }
                IList <Token> fws = formatted_tokens.GetHiddenTokensToLeft(ft.TokenIndex);
                formatted_ws += tokenText(fws).Length;

                ws_distance += whitespaceEditDistance(tokenText(ows), tokenText(fws));
                i++;
            }
            // it's probably ok to ignore ws diffs after last real token

            int    max_ws = Math.Max(original_ws, formatted_ws);
            double normalized_ws_distance = ((float)ws_distance) / max_ws;

            return(normalized_ws_distance);
        }
Example #3
0
        public override void visitNonSingletonWithSeparator <T1>(ParserRuleContext ctx, IList <T1> siblings, IToken separator)
        {
            ParserRuleContext first              = siblings[0] as Antlr4.Runtime.ParserRuleContext;
            ParserRuleContext last               = siblings[siblings.Count - 1] as Antlr4.Runtime.ParserRuleContext;
            IList <Token>     hiddenToLeft       = tokens.GetHiddenTokensToLeft(first.Start.TokenIndex);
            IList <Token>     hiddenToLeftOfSep  = tokens.GetHiddenTokensToLeft(separator.TokenIndex);
            IList <Token>     hiddenToRightOfSep = tokens.GetHiddenTokensToRight(separator.TokenIndex);
            IList <Token>     hiddenToRight      = tokens.GetHiddenTokensToRight(last.Stop.TokenIndex);

            Token hiddenTokenToLeft  = hiddenToLeft != null ? hiddenToLeft[0] : null;
            Token hiddenTokenToRight = hiddenToRight != null ? hiddenToRight[0] : null;

            int[] ws = new int[4];             // '\n' (before list, before sep, after sep, after last element)
            // KED. naked new lines is not platform independent!!!!!!!!!!!!!!!!!!!!
            // STOP using naked new lines!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
            if (hiddenTokenToLeft != null && Tool.count(hiddenTokenToLeft.Text, '\n') > 0)
            {
                ws[0] = '\n';
            }
            if (hiddenToLeftOfSep != null && Tool.count(hiddenToLeftOfSep[0].Text, '\n') > 0)
            {
                ws[1] = '\n';
                //			System.out.println("BEFORE "+JavaParser.ruleNames[ctx.getRuleIndex()]+
                //				                   "->"+JavaParser.ruleNames[ctx.getRuleIndex()]+" sep "+
                //				                   JavaParser.tokenNames[separator.getType()]+
                //				                   " "+separator);
            }
            if (hiddenToRightOfSep != null && Tool.count(hiddenToRightOfSep[0].Text, '\n') > 0)
            {
                ws[2] = '\n';
                //			System.out.println("AFTER "+JavaParser.ruleNames[ctx.getRuleIndex()]+
                //				                   "->"+JavaParser.ruleNames[ctx.getRuleIndex()]+" sep "+
                //				                   JavaParser.tokenNames[separator.getType()]+
                //				                   " "+separator);
            }
            if (hiddenTokenToRight != null && Tool.count(hiddenTokenToRight.Text, '\n') > 0)
            {
                ws[3] = '\n';
            }
            bool isSplitList = ws[1] == '\n' || ws[2] == '\n';

            // now track length of parent:alt,child:alt list or split-list
            ParentSiblingListKey pair = new ParentSiblingListKey(ctx, first, separator.Type);
            IDictionary <ParentSiblingListKey, IList <int> > info = isSplitList ? splitListInfo : listInfo;
            IList <int> lens = null;

            info.TryGetValue(pair, out lens);
            if (lens == null)
            {
                lens       = new List <int>();
                info[pair] = lens;
            }
            lens.Add(Trainer.getSiblingsLength(siblings));

            // track the form split lists take for debugging
            if (isSplitList)
            {
                int         form  = Trainer.listform(ws);
                IList <int> forms = null;
                splitListForm.TryGetValue(pair, out forms);
                if (forms == null)
                {
                    forms = new List <int>();
                    splitListForm[pair] = forms;
                }
                forms.Add(form);                 // track where we put newlines for this list
            }

            IDictionary <Token, org.antlr.codebuff.misc.Pair <bool, int> > tokenInfo = getInfoAboutListTokens(ctx, tokens, tokenToNodeMap, siblings, isSplitList);

            // copy sibling list info for associated tokens into overall list
            // but don't overwrite existing so that most general (largest construct)
            // list information is use/retained (i.e., not overwritten).
            foreach (Token t in tokenInfo.Keys)
            {
                if (!tokenToListInfo.ContainsKey(t))
                {
                    tokenToListInfo[t] = tokenInfo[t];
                }
            }
        }