private Token stringify(Token pos, Argument arg) { StringBuilder buf = new StringBuilder(); concat(buf, arg); // System.out.println("Concat: " + arg + " -> " + buf); StringBuilder str = new StringBuilder("\""); escape(str, buf.ToString()); str.append("\""); // System.out.println("Escape: " + buf + " -> " + str); return new Token(Token.STRING, pos.getLine(), pos.getColumn(), str.toString(), buf.toString()); }
protected void pragma(Token name, List<Token> value) { warning(name, "Unknown #" + "pragma: " + name.getText()); }
/** * Handles an error. * * If a PreprocessorListener is installed, it receives the * error. Otherwise, an exception is thrown. * * @see #error(int, int, String) */ protected void error(Token tok, String msg) { error(tok.getLine(), tok.getColumn(), msg); }
private Token _token() { SKIP_TOKEN: for (;;) { Token tok; if (!isActive()) { try { /* XXX Tell lexer to ignore warnings. */ source.setActive(false); tok = source_token(); } finally { /* XXX Tell lexer to stop ignoring warnings. */ source.setActive(true); } switch (tok.getType()) { case Token.HASH: case Token.NL: case Token.EOF: /* The preprocessor has to take action here. */ break; case Token.WHITESPACE: return tok; case Token.CCOMMENT: case Token.CPPCOMMENT: // Patch up to preserve whitespace. if (getFeature(Feature.KEEPALLCOMMENTS)) return tok; if (!isActive()) return toWhitespace(tok); if (getFeature(Feature.KEEPCOMMENTS)) return tok; return toWhitespace(tok); default: // Return Token.NL to preserve whitespace. /* XXX This might lose a comment. */ return source_skipline(false); } } else { tok = source_token(); } LEX: switch (tok.getType()) { case Token.EOF: /* Pop the stacks. */ return tok; case Token.WHITESPACE: case Token.NL: //goto SKIP_TOKEN; return tok; case Token.CCOMMENT: case Token.CPPCOMMENT: //if(!getFeature(Feature.KEEPALLCOMMENTS)) { // goto SKIP_TOKEN; //} return tok; case '!': case '%': case '&': case '(': case ')': case '*': case '+': case ',': case '-': case '/': case ':': case ';': case '<': case '=': case '>': case '?': case '[': case ']': case '^': case '{': case '|': case '}': case '~': case '.': /* From Olivier Chafik for Objective C? */ case '@': /* The one remaining ASCII, might as well. */ case '`': // case '#': case Token.AND_EQ: case Token.ARROW: case Token.CHARACTER: case Token.DEC: case Token.DIV_EQ: case Token.ELLIPSIS: case Token.EQ: case Token.GE: case Token.HEADER: /* Should only arise from include() */ case Token.INC: case Token.LAND: case Token.LE: case Token.LOR: case Token.LSH: case Token.LSH_EQ: case Token.SUB_EQ: case Token.MOD_EQ: case Token.MULT_EQ: case Token.NE: case Token.OR_EQ: case Token.PLUS_EQ: case Token.RANGE: case Token.RSH: case Token.RSH_EQ: case Token.STRING: case Token.XOR_EQ: return tok; case Token.INTEGER: return tok; case Token.IDENTIFIER: Macro m; macros.TryGetValue(tok.getText(), out m); if(tok.getText() == "__has_include_next") { Console.WriteLine(); } if (m == null) return tok; if (source.isExpanding(m)) return tok; if (macro(m, tok)) break; return tok; case Token.P_LINE: if (getFeature(Feature.LINEMARKERS)) return tok; break; case Token.INVALID: if (getFeature(Feature.CSYNTAX)) error(tok, (String)tok.getValue()); return tok; default: throw new Exception("Bad token " + tok); // break; case Token.HASH: tok = source_token_nonwhite(); // (new Exception("here")).printStackTrace(); switch (tok.getType()) { case Token.NL: goto BREAK_LEX; /* Some code has #\n */ case Token.IDENTIFIER: break; default: error(tok, "Preprocessor directive not a word " + tok.getText()); return source_skipline(false); } int _ppcmd = ppcmds[tok.getText()]; if (_ppcmd == null) { error(tok, "Unknown preprocessor directive " + tok.getText()); return source_skipline(false); } int ppcmd = _ppcmd; PP: switch(ppcmd) { case PP_DEFINE: if(!isActive()) return source_skipline(false); else return define(); // break; case PP_UNDEF: if(!isActive()) return source_skipline(false); else return undef(); // break; case PP_INCLUDE: if(!isActive()) return source_skipline(false); else return include(false, false); // break; case PP_INCLUDE_NEXT: if(!isActive()) return source_skipline(false); if(!getFeature(Feature.INCLUDENEXT)) { error(tok, "Directive include_next not enabled" ); return source_skipline(false); } return include(true, false); // break; case PP_WARNING: case PP_ERROR: if(!isActive()) return source_skipline(false); else error(tok, ppcmd == PP_ERROR); break; case PP_IF: push_state(); if(!isActive()) { return source_skipline(false); } _expr_token = null; states.Peek().setActive(expr(0) != 0); tok = expr_token(); /* unget */ if(tok.getType() == Token.NL) return tok; return source_skipline(true); // break; case PP_ELIF: State state = states.Peek(); if(false) { /* Check for 'if' */ ; } else if(state.sawElse()) { error(tok, "#elif after #" + "else"); return source_skipline(false); } else if(!state.isParentActive()) { /* Nested in skipped 'if' */ return source_skipline(false); } else if(state.isActive()) { /* The 'if' part got executed. */ state.setParentActive(false); /* This is like # else # if but with * only one # end. */ state.setActive(false); return source_skipline(false); } else { _expr_token = null; state.setActive(expr(0) != 0); tok = expr_token(); /* unget */ if(tok.getType() == Token.NL) return tok; return source_skipline(true); } // break; case PP_ELSE: state = states.Peek(); if(false) /* Check for 'if' */ ; else if(state.sawElse()) { error(tok, "#" + "else after #" + "else"); return source_skipline(false); } else { state.setSawElse(); state.setActive(!state.isActive()); return source_skipline(warnings.HasFlag(Warning.ENDIF_LABELS)); } // break; case PP_IFDEF: push_state(); if(!isActive()) { return source_skipline(false); } else { tok = source_token_nonwhite(); // System.out.println("ifdef " + tok); if(tok.getType() != Token.IDENTIFIER) { error(tok, "Expected identifier, not " + tok.getText()); return source_skipline(false); } else { String text = tok.getText(); bool exists = macros.ContainsKey(text); states.Peek().setActive(exists); return source_skipline(true); } } // break; case PP_IFNDEF: push_state(); if(!isActive()) { return source_skipline(false); } else { tok = source_token_nonwhite(); if(tok.getType() != Token.IDENTIFIER) { error(tok, "Expected identifier, not " + tok.getText()); return source_skipline(false); } else { String text = tok.getText(); bool exists = macros.ContainsKey(text); states.Peek().setActive(!exists); return source_skipline(true); } } // break; case PP_ENDIF: pop_state(); return source_skipline(warnings.HasFlag(Warning.ENDIF_LABELS)); // break; case PP_LINE: return source_skipline(false); // break; case PP_PRAGMA: if(!isActive()) return source_skipline(false); return pragma(); // break; case PP_IMPORT: if(!isActive()) return source_skipline(false); else return import(); default: /* Actual unknown directives are * processed above. If we get here, * we succeeded the map lookup but * failed to handle it. Therefore, * this is (unconditionally?) fatal. */ // if (isActive()) /* XXX Could be warning. */ throw new Exception( "Internal error: Unknown directive " + tok); // return source_skipline(false); } BREAK_PP: ; break; } BREAK_LEX: ; } }
private Token toWhitespace(Token tok) { String text = tok.getText(); int len = text.Length; bool cr = false; int nls = 0; for (int i = 0; i < len; i++) { char c = text[i]; switch (c) { case '\r': cr = true; nls++; break; case '\n': if (cr) { cr = false; break; } goto case '\u2028'; /* fallthrough */ case '\u2028': case '\u2029': case '\u000B': case '\u000C': case '\u0085': cr = false; nls++; break; } } char[] cbuf = new char[nls]; for(int i = 0; i < nls; i++) { cbuf[i] = '\n'; } return new Token(Token.WHITESPACE, tok.getLine(), tok.getColumn(), new String(cbuf)); }
private void source_untoken(Token tok) { if (this._source_token != null) throw new InvalidOperationException("Cannot return two tokens"); this._source_token = tok; }
private Token source_token() { if(_source_token != null) { Token tok = _source_token; _source_token = null; if (getFeature(Feature.DEBUG)) System.Console.Error.WriteLine("Returning unget token " + tok); return tok; } for (;;) { Source s = getSource(); if (s == null) { if (inputs.Count == 0) return new Token(Token.EOF); Source t = inputs[0]; inputs.RemoveAt(0); push_source(t, true); if (getFeature(Feature.LINEMARKERS)) return line_token(t.getLine(), t.getName(), " 1"); continue; } Token tok = s.token(); /* XXX Refactor with skipline() */ if(tok.getType() == Token.EOF && s.isAutopop()) { // System.out.println("Autopop " + s); pop_source(); Source t = getSource(); if (getFeature(Feature.LINEMARKERS) && s.isNumbered() && t != null) { /* We actually want 'did the nested source * contain a newline token', which isNumbered() * approximates. This is not perfect, but works. */ return line_token(t.getLine() + 1, t.getName(), " 2"); } continue; } if (getFeature(Feature.DEBUG)) System.Console.Error.WriteLine("Returning fresh token " + tok); return tok; } }
/** * Adds a "paste" operator to the expansion of this macro. * * A paste operator causes the next token added to be pasted * to the previous token when the macro is expanded. * It is an error for a macro to end with a paste token. */ public void addPaste(Token tok) { /* * Given: tok0 ## tok1 * We generate: M_PASTE, tok0, tok1 * This extends as per a stack language: * tok0 ## tok1 ## tok2 -> * M_PASTE, tok0, M_PASTE, tok1, tok2 */ this.tokens.Insert(tokens.Count - 1, tok); }
private bool isWhite(Token tok) { int type = tok.getType(); return (type == Token.WHITESPACE) || (type == Token.CCOMMENT) || (type == Token.CPPCOMMENT); }
private void expr_untoken(Token tok) { if(_expr_token != null) throw new Exception ( "Cannot unget two expression tokens." ); _expr_token = tok; }
private Token expr_token() { Token tok = _expr_token; if (tok != null) { // System.out.println("ungetting"); _expr_token = null; } else { tok = expanded_token_nonwhite(); // System.out.println("expt is " + tok); if (tok.getType() == Token.IDENTIFIER && tok.getText() == "defined") { Token la = source_token_nonwhite(); bool paren = false; if (la.getType() == '(') { paren = true; la = source_token_nonwhite(); } // System.out.println("Core token is " + la); if (la.getType() != Token.IDENTIFIER) { error(la, "defined() needs identifier, not " + la.getText()); tok = new Token(Token.INTEGER, la.getLine(), la.getColumn(), "0", 0); } else if (macros.ContainsKey(la.getText())) { // System.out.println("Found macro"); tok = new Token(Token.INTEGER, la.getLine(), la.getColumn(), "1", 1); } else if(la.getText() == "__has_include_next" || la.getText() == "__has_include" || la.getText() == "__has_feature") { tok = new Token(Token.INTEGER, la.getLine(), la.getColumn(), "1", 1); } else { // System.out.println("Not found macro"); tok = new Token(Token.INTEGER, la.getLine(), la.getColumn(), "0", 0); } if (paren) { la = source_token_nonwhite(); if (la.getType() != ')') { expr_untoken(la); error(la, "Missing ) in defined()"); } } } } // System.out.println("expr_token returns " + tok); return tok; }
private int expr_priority(Token op) { switch (op.getType()) { case '/': return 11; case '%': return 11; case '*': return 11; case '+': return 10; case '-': return 10; case Token.LSH: return 9; case Token.RSH: return 9; case '<': return 8; case '>': return 8; case Token.LE: return 8; case Token.GE: return 8; case Token.EQ: return 7; case Token.NE: return 7; case '&': return 6; case '^': return 5; case '|': return 4; case Token.LAND: return 3; case Token.LOR: return 2; case '?': return 1; default: // System.out.println("Unrecognised operator " + op); return 0; } }
public override Token token() { Token tok = null; int _l = line; int _c = column; int c = read(); int d; switch (c) { case '\n': if (ppvalid) { bol = true; if (include) { tok = new Token(Token.NL, _l, _c, "\n"); } else { int nls = 0; do { nls++; d = read(); } while (d == '\n'); unread(d); char[] text = new char[nls]; for (int i = 0; i < text.Length; i++) text[i] = '\n'; // Skip the bol = false below. tok = new Token(Token.NL, _l, _c, new String(text)); } if (DEBUG) System.Console.Error.WriteLine("lx: Returning NL: " + tok); return tok; } /* Let it be handled as whitespace. */ break; case '!': tok = cond('=', Token.NE, '!'); break; case '#': if (bol) tok = new Token(Token.HASH); else tok = cond('#', Token.PASTE, '#'); break; case '+': d = read(); if (d == '+') tok = new Token(Token.INC); else if (d == '=') tok = new Token(Token.PLUS_EQ); else unread(d); break; case '-': d = read(); if (d == '-') tok = new Token(Token.DEC); else if (d == '=') tok = new Token(Token.SUB_EQ); else if (d == '>') tok = new Token(Token.ARROW); else unread(d); break; case '*': tok = cond('=', Token.MULT_EQ, '*'); break; case '/': d = read(); if (d == '*') tok = ccomment(); else if (d == '/') tok = cppcomment(); else if (d == '=') tok = new Token(Token.DIV_EQ); else unread(d); break; case '%': d = read(); if (d == '=') tok = new Token(Token.MOD_EQ); else if (digraphs && d == '>') tok = new Token('}'); // digraph else if (digraphs && d == ':') { bool paste = true; d = read(); if (d != '%') { unread(d); tok = new Token('#'); // digraph paste = false; } d = read(); if (d != ':') { unread(d); // Unread 2 chars here. unread('%'); tok = new Token('#'); // digraph paste = false; } if(paste) { tok = new Token(Token.PASTE); // digraph } } else unread(d); break; case ':': /* :: */ d = read(); if (digraphs && d == '>') tok = new Token(']'); // digraph else unread(d); break; case '<': if (include) { tok = String('<', '>'); } else { d = read(); if (d == '=') tok = new Token(Token.LE); else if (d == '<') tok = cond('=', Token.LSH_EQ, Token.LSH); else if (digraphs && d == ':') tok = new Token('['); // digraph else if (digraphs && d == '%') tok = new Token('{'); // digraph else unread(d); } break; case '=': tok = cond('=', Token.EQ, '='); break; case '>': d = read(); if (d == '=') tok = new Token(Token.GE); else if (d == '>') tok = cond('=', Token.RSH_EQ, Token.RSH); else unread(d); break; case '^': tok = cond('=', Token.XOR_EQ, '^'); break; case '|': d = read(); if (d == '=') tok = new Token(Token.OR_EQ); else if (d == '|') tok = cond('=', Token.LOR_EQ, Token.LOR); else unread(d); break; case '&': d = read(); if (d == '&') tok = cond('=', Token.LAND_EQ, Token.LAND); else if (d == '=') tok = new Token(Token.AND_EQ); else unread(d); break; case '.': d = read(); if (d == '.') tok = cond('.', Token.ELLIPSIS, Token.RANGE); else unread(d); /* XXX decimal fraction */ break; case '0': /* octal or hex */ d = read(); if (d == 'x' || d == 'X') tok = number_hex((char)d); else { unread(d); tok = number_octal(); } break; case '\'': tok = character(); break; case '"': tok = String('"', '"'); break; case -1: close(); tok = new Token(Token.EOF, _l, _c, "<eof>"); break; } if (tok == null) { if (char.IsWhiteSpace((char)c)) { tok = whitespace(c); } else if (char.IsDigit((char)c)) { tok = number_decimal(c); } else if (isJavaIdentifierStart(c)) { tok = identifier(c); } else { tok = new Token(c); } } if (bol) { switch (tok.getType()) { case Token.WHITESPACE: case Token.CCOMMENT: break; default: bol = false; break; } } tok.setLocation(_l, _c); if (DEBUG) System.Console.WriteLine("lx: Returning " + tok); // (new Exception("here")).printStackTrace(System.out); return tok; }
/* At this point, we have consumed the first M_PASTE. * @see Macro#addPaste(Token) */ private void paste(Token ptok) { StringBuilder buf = new StringBuilder(); Token err = null; /* We know here that arg is null or expired, * since we cannot paste an expanded arg. */ int count = 2; for (int i = 0; i < count; i++) { if (!tokens.hasNext()) { /* XXX This one really should throw. */ error(ptok.getLine(), ptok.getColumn(), "Paste at end of expansion"); buf.append(' ').append(ptok.getText()); break; } Token tok = tokens.next(); // System.out.println("Paste " + tok); switch (tok.getType()) { case Token.M_PASTE: /* One extra to paste, plus one because the * paste token didn't count. */ count += 2; ptok = tok; break; case Token.M_ARG: int idx = (int)tok.getValue(); concat(buf, args.get(idx)); break; /* XXX Test this. */ case Token.CCOMMENT: case Token.CPPCOMMENT: break; default: buf.append(tok.getText()); break; } } /* Push and re-lex. */ /* StringBuilder src = new StringBuilder(); escape(src, buf); StringLexerSource sl = new StringLexerSource(src.toString()); */ StringLexerSource sl = new StringLexerSource(buf.toString()); /* XXX Check that concatenation produces a valid token. */ arg = new SourceIterator(sl); }
/** * Handles a warning. * * If a PreprocessorListener is installed, it receives the * warning. Otherwise, an exception is thrown. * * @see #warning(int, int, String) */ protected void warning(Token tok, String msg) { warning(tok.getLine(), tok.getColumn(), msg); }
/* processes and expands a macro. */ private bool macro(Macro m, Token orig) { Token tok; List<Argument> args; // System.out.println("pp: expanding " + m); if (m.isFunctionLike()) { for (;;) { tok = source_token(); // System.out.println("pp: open: token is " + tok); switch (tok.getType()) { case Token.WHITESPACE: /* XXX Really? */ case Token.CCOMMENT: case Token.CPPCOMMENT: case Token.NL: break; /* continue */ case '(': goto BREAK_OPEN; default: source_untoken(tok); return false; } } BREAK_OPEN: // tok = expanded_token_nonwhite(); tok = source_token_nonwhite(); /* We either have, or we should have args. * This deals elegantly with the case that we have * one empty arg. */ if (tok.getType() != ')' || m.getArgs() > 0) { args = new List<Argument>(); Argument arg = new Argument(); int depth = 0; bool space = false; ARGS: for (;;) { // System.out.println("pp: arg: token is " + tok); switch (tok.getType()) { case Token.EOF: error(tok, "EOF in macro args"); return false; case ',': if (depth == 0) { if (m.isVariadic() && /* We are building the last arg. */ args.Count == m.getArgs() - 1) { /* Just add the comma. */ arg.addToken(tok); } else { args.Add(arg); arg = new Argument(); } } else { arg.addToken(tok); } space = false; break; case ')': if (depth == 0) { args.Add(arg); goto BREAK_ARGS; } else { depth--; arg.addToken(tok); } space = false; break; case '(': depth++; arg.addToken(tok); space = false; break; case Token.WHITESPACE: case Token.CCOMMENT: case Token.CPPCOMMENT: /* Avoid duplicating spaces. */ space = true; break; default: /* Do not put space on the beginning of * an argument token. */ if (space && arg.Count != 0) arg.addToken(Token.space); arg.addToken(tok); space = false; break; } // tok = expanded_token(); tok = source_token(); } BREAK_ARGS: if(m.isVariadic() && args.Count < m.getArgs()) { args.Add(new Argument()); } /* space may still be true here, thus trailing space * is stripped from arguments. */ if (args.Count != m.getArgs()) { error(tok, "macro " + m.getName() + " has " + m.getArgs() + " parameters " + "but given " + args.Count + " args"); /* We could replay the arg tokens, but I * note that GNU cpp does exactly what we do, * i.e. output the macro name and chew the args. */ return false; } /* for (Argument a : args) a.expand(this); */ for (int i = 0; i < args.Count; i++) { args[i].expand(this); } // System.out.println("Macro " + m + " args " + args); } else { /* nargs == 0 and we (correctly) got () */ args = null; } } else { /* Macro without args. */ args = null; } if (m == __LINE__) { push_source(new FixedTokenSource( new Token[] { new Token(Token.INTEGER, orig.getLine(), orig.getColumn(), orig.getLine().ToString(), orig.getLine()) } ), true); } else if (m == __FILE__) { StringBuilder buf = new StringBuilder("\""); String name = getSource().getName(); if (name == null) name = "<no file>"; for (int i = 0; i < name.Length; i++) { char c = name[i]; switch (c) { case '\\': buf.Append("\\\\"); break; case '"': buf.Append("\\\""); break; default: buf.Append(c); break; } } buf.Append("\""); String text = buf.ToString(); push_source(new FixedTokenSource( new Token[] { new Token(Token.STRING, orig.getLine(), orig.getColumn(), text, text) } ), true); } else if (m == __COUNTER__) { /* This could equivalently have been done by adding * a special Macro subclass which overrides getTokens(). */ int value = this.counter++; push_source(new FixedTokenSource( new Token[] { new Token(Token.INTEGER, orig.getLine(), orig.getColumn(), value.ToString(), value) } ), true); } else { push_source(new MacroTokenSource(m, args), true); } return true; }
/* For #error and #warning. */ private void error(Token pptok, bool is_error) { StringBuilder buf = new StringBuilder(); buf.Append('#').Append(pptok.getText()).Append(' '); /* Peculiar construction to ditch first whitespace. */ Token tok = source_token_nonwhite(); for (;;) { switch (tok.getType()) { case Token.NL: case Token.EOF: goto BREAK_ERROR; default: buf.Append(tok.getText()); break; } tok = source_token(); } BREAK_ERROR: if (is_error) error(pptok, buf.ToString()); else warning(pptok, buf.ToString()); }
/** * Adds a token to the expansion of this macro. */ public void addToken(Token tok) { this.tokens.Add(tok); }