/** * Factory method for creating a new token. This method can be * overridden to provide other token implementations than the * default one. * * @param pattern the token pattern * @param image the token image (i.e. characters) * @param line the line number of the first character * @param column the column number of the first character * * @return the token created * * @since 1.5 */ protected virtual Token NewToken(TokenPattern pattern, string image, int line, int column) { return(new Token(pattern, image, line, column)); }
/// <summary> /// Adds a string match to this automaton. New states and /// transitions will be added to extend this automaton to support /// the specified string. /// </summary> /// <param name="str">The string to match</param> /// <param name="ignoreCase">The case-insensitive match flag</param> /// <param name="value">The match value</param> public void AddTextMatch(string str, bool ignoreCase, TokenPattern value) { NFAState state; char ch = str[0]; if (ch < 128 && !ignoreCase) { state = this.initialChar[ch]; if (state == null) { state = this.initialChar[ch] = new NFAState(); } } else { state = this.initial.AddOut(ch, ignoreCase, null); } for (int i = 1; i < str.Length; i++) { state = state.AddOut(str[i], ignoreCase, null); } state.Value = value; }
/** * Adds a string match to this automaton. New states and * transitions will be added to extend this automaton to * support the specified string. * * @param str the string to match * @param caseInsensitive the case-insensitive flag * @param value the match value */ public void AddMatch(string str, bool caseInsensitive, TokenPattern value) { DFAState state; DFAState next; char c = str[0]; int start = 0; if (caseInsensitive) { c = Char.ToLower(c); } if (c < 128) { state = ascii[c]; if (state == null) { state = ascii[c] = new DFAState(); } start++; } else { state = nonAscii; } for (int i = start; i < str.Length; i++) { next = state.tree.Find(str[i], caseInsensitive); if (next == null) { next = new DFAState(); state.tree.Add(str[i], caseInsensitive, next); } state = next; } state.value = value; }
/** * <summary>Initializes the tokenizer by creating all the token * patterns.</summary> * * <exception cref='ParserCreationException'>if the tokenizer * couldn't be initialized correctly</exception> */ private void CreatePatterns() { TokenPattern pattern; pattern = new TokenPattern((int) GrammarConstants.NUMBER, "NUMBER", TokenPattern.PatternType.REGEXP, "[0-9]+"); AddPattern(pattern); pattern = new TokenPattern((int) GrammarConstants.CHARACTER, "CHARACTER", TokenPattern.PatternType.REGEXP, "[a-zA-Z]+"); AddPattern(pattern); pattern = new TokenPattern((int) GrammarConstants.WHITESPACE, "WHITESPACE", TokenPattern.PatternType.REGEXP, "[ \\t\\n\\r]+"); AddPattern(pattern); pattern = new TokenPattern((int) GrammarConstants.EVERYTHING, "EVERYTHING", TokenPattern.PatternType.REGEXP, "."); AddPattern(pattern); }
/** * Searches for matching token patterns at the start of the * input stream. If a match is found, the token match object * is updated. * * @param buffer the input buffer to check * @param match the token match to update * * @throws IOException if an I/O error occurred */ public override void Match(ReaderBuffer buffer, TokenMatch match) { TokenPattern res = automaton.Match(buffer, ignoreCase); if (res != null) { match.Update(res.Pattern.Length, res); } }
/** * Adds a regular expression match to this automaton. New states * and transitions will be added to extend this automaton to * support the specified string. Note that this method only * supports a subset of the full regular expression syntax, so * a more complete regular expression library must also be * provided. * * @param pattern the regular expression string * @param ignoreCase the case-insensitive match flag * @param value the match value * * @throws RegExpException if the regular expression parsing * failed */ public void AddRegExpMatch(string pattern, bool ignoreCase, TokenPattern value) { TokenRegExpParser parser = new TokenRegExpParser(pattern, ignoreCase); string debug = "DFA regexp; " + parser.GetDebugInfo(); bool isAscii; isAscii = parser.start.IsAsciiOutgoing(); for (int i = 0; isAscii && i < 128; i++) { bool match = false; for (int j = 0; j < parser.start.outgoing.Length; j++) { if (parser.start.outgoing[j].Match((char)i)) { if (match) { isAscii = false; break; } match = true; } } if (match && initialChar[i] != null) { isAscii = false; } } if (parser.start.incoming.Length > 0) { initial.AddOut(new NFAEpsilonTransition(parser.start)); debug += ", uses initial epsilon"; } else if (isAscii && !ignoreCase) { for (int i = 0; isAscii && i < 128; i++) { for (int j = 0; j < parser.start.outgoing.Length; j++) { if (parser.start.outgoing[j].Match((char)i)) { initialChar[i] = parser.start.outgoing[j].state; } } } debug += ", uses ASCII lookup"; } else { parser.start.MergeInto(initial); debug += ", uses initial state"; } parser.end.value = value; value.DebugInfo = debug; }
/// <summary> /// Checks if the automaton matches an input stream. The /// matching will be performed from a specified position. This /// method will not read any characters from the stream, just /// peek ahead. The comparison can be done either in /// case-sensitive or case-insensitive mode. /// </summary> /// <param name="buffer">The buffer to read from</param> /// <param name="caseInsensitive">The case-insensitive flag</param> /// <returns>The match value, or null if no match found</returns> /// <exception cref="System.IO.IOException"> /// If an I/O error occurred /// </exception> public TokenPattern Match(ReaderBuffer buffer, bool caseInsensitive) { TokenPattern result = null; DFAState state; int pos = 0; int c; c = buffer.Peek(0); if (c < 0) { return(null); } if (caseInsensitive) { c = char.ToLower((char)c); } if (c < 128) { state = this.ascii[c]; if (state == null) { return(null); } else if (state.Value != null) { result = state.Value; } pos++; } else { state = this.nonAscii; } while ((c = buffer.Peek(pos)) >= 0) { state = state.Tree.Find((char)c, caseInsensitive); if (state == null) { break; } else if (state.Value != null) { result = state.Value; } pos++; } return(result); }
/** * Adds a token pattern to this matcher. * * @param pattern the pattern to add * * @throws Exception if the pattern couldn't be added to the matcher */ public override void AddPattern(TokenPattern pattern) { if (pattern.Type == TokenPattern.PatternType.STRING) { automaton.AddTextMatch(pattern.Pattern, ignoreCase, pattern); } else { automaton.AddRegExpMatch(pattern.Pattern, ignoreCase, pattern); } base.AddPattern(pattern); }
/// <summary> /// Adds a token pattern to this matcher. /// </summary> /// <param name="pattern">The pattern to add</param> /// <exception cref="Exception"> /// If the pattern couldn't be added to the matcher /// </exception> public override void AddPattern(TokenPattern pattern) { if (pattern.Type == TokenPattern.PatternType.String) { this.automaton.AddTextMatch(pattern.Pattern, this.IgnoreCase, pattern); } else { this.automaton.AddRegExpMatch(pattern.Pattern, this.IgnoreCase, pattern); } base.AddPattern(pattern); }
/** * Creates a new token. * * @param pattern the token pattern * @param image the token image (i.e. characters) * @param line the line number of the first character * @param col the column number of the first character */ public Token(TokenPattern pattern, string image, int line, int col) { this.pattern = pattern; this.image = image; this.startLine = line; this.startColumn = col; this.endLine = line; this.endColumn = col + image.Length - 1; for (int pos = 0; image.IndexOf('\n', pos) >= 0;) { pos = image.IndexOf('\n', pos) + 1; this.endLine++; endColumn = image.Length - pos; } }
/// <summary> /// Adds a new token pattern to the tokenizer. The pattern will be /// added last in the list, choosing a previous token pattern in /// case two matches the same string. /// </summary> /// <param name="pattern">The pattern to add</param> /// <exception cref="ParserCreationException"> /// If the pattern couldn't be added to the tokenizer /// </exception> public void AddPattern(TokenPattern pattern) { switch (pattern.Type) { case TokenPattern.PatternType.String: try { this.stringDfaMatcher.AddPattern(pattern); } catch (Exception e) { throw new ParserCreationException( ParserCreationException.ErrorType.InvalidToken, pattern.Name, "error adding string token: " + e.Message); } break; case TokenPattern.PatternType.RegExp: try { this.nfaMatcher.AddPattern(pattern); } catch (Exception) { try { this.regExpMatcher.AddPattern(pattern); } catch (Exception e) { throw new ParserCreationException( ParserCreationException.ErrorType.InvalidToken, pattern.Name, "regular expression contains error(s): " + e.Message); } } break; default: throw new ParserCreationException( ParserCreationException.ErrorType.InvalidToken, pattern.Name, "pattern type " + pattern.Type + " is undefined"); } }
/** * Adds a regular expression token pattern to this matcher. * * @param pattern the pattern to add * * @throws Exception if the pattern couldn't be added to the matcher */ public override void AddPattern(TokenPattern pattern) { REHandler re; try { re = new GrammaticaRE(pattern.Pattern, ignoreCase); pattern.DebugInfo = "Grammatica regexp\n" + re; } catch (Exception) { re = new SystemRE(pattern.Pattern, ignoreCase); pattern.DebugInfo = "native .NET regexp"; } Array.Resize(ref regExps, regExps.Length + 1); regExps[regExps.Length - 1] = re; base.AddPattern(pattern); }
/** * <summary>Initializes the tokenizer by creating all the token * patterns.</summary> * * <exception cref='ParserCreationException'>if the tokenizer * couldn't be initialized correctly</exception> */ private void CreatePatterns() { TokenPattern pattern; pattern = new TokenPattern((int) CommandGrammarConstants.STRING, "STRING", TokenPattern.PatternType.REGEXP, "\"([^\"\\\\;\\[\\]]|\"\"|\\\\.)*\""); AddPattern(pattern); pattern = new TokenPattern((int) CommandGrammarConstants.TEXT, "TEXT", TokenPattern.PatternType.REGEXP, "[^ \\t\\n\\r\"$;\\[\\]]+"); AddPattern(pattern); pattern = new TokenPattern((int) CommandGrammarConstants.PARAMETER, "PARAMETER", TokenPattern.PatternType.REGEXP, "\\$[1-9]"); AddPattern(pattern); pattern = new TokenPattern((int) CommandGrammarConstants.COMMAND_START, "COMMAND_START", TokenPattern.PatternType.STRING, "["); AddPattern(pattern); pattern = new TokenPattern((int) CommandGrammarConstants.COMMAND_END, "COMMAND_END", TokenPattern.PatternType.STRING, "]"); AddPattern(pattern); pattern = new TokenPattern((int) CommandGrammarConstants.COMMAND_SEPARATOR, "COMMAND_SEPARATOR", TokenPattern.PatternType.STRING, ";"); AddPattern(pattern); pattern = new TokenPattern((int) CommandGrammarConstants.LAYOUT, "LAYOUT", TokenPattern.PatternType.REGEXP, "[ \\t\\n\\r]+"); pattern.Ignore = true; AddPattern(pattern); }
/** * Adds a string match to this automaton. New states and * transitions will be added to extend this automaton to support * the specified string. * * @param str the string to match * @param ignoreCase the case-insensitive match flag * @param value the match value */ public void AddTextMatch(string str, bool ignoreCase, TokenPattern value) { NFAState state; char ch = str[0]; if (ch < 128 && !ignoreCase) { state = initialChar[ch]; if (state == null) { state = initialChar[ch] = new NFAState(); } } else { state = initial.AddOut(ch, ignoreCase, null); } for (int i = 1; i < str.Length; i++) { state = state.AddOut(str[i], ignoreCase, null); } state.value = value; }
/// <summary> /// Adds a string match to this automaton. New states and /// transitions will be added to extend this automaton to /// support the specified string. /// </summary> /// <param name="str">The string to match</param> /// <param name="caseInsensitive">The case-insensitive flag</param> /// <param name="value">The match value</param> public void AddMatch(string str, bool caseInsensitive, TokenPattern value) { DFAState state; DFAState next; char c = str[0]; int start = 0; if (caseInsensitive) { c = char.ToLower(c); } if (c < 128) { state = this.ascii[c]; if (state == null) { state = this.ascii[c] = new DFAState(); } start++; } else { state = this.nonAscii; } for (int i = start; i < str.Length; i++) { next = state.Tree.Find(str[i], caseInsensitive); if (next == null) { next = new DFAState(); state.Tree.Add(str[i], caseInsensitive, next); } state = next; } // TODO: check that this is useful. state.Value = value; }
/** * Adds a new token pattern to the tokenizer. The pattern will be * added last in the list, choosing a previous token pattern in * case two matches the same string. * * @param pattern the pattern to add * * @throws ParserCreationException if the pattern couldn't be * added to the tokenizer */ public void AddPattern(TokenPattern pattern) { switch (pattern.Type) { case TokenPattern.PatternType.STRING: try { stringDfaMatcher.AddPattern(pattern); } catch (Exception e) { throw new ParserCreationException( ParserCreationException.ErrorType.INVALID_TOKEN, pattern.Name, "error adding string token: " + e.Message); } break; case TokenPattern.PatternType.REGEXP: try { nfaMatcher.AddPattern(pattern); } catch (Exception) { try { regExpMatcher.AddPattern(pattern); } catch (Exception e) { throw new ParserCreationException( ParserCreationException.ErrorType.INVALID_TOKEN, pattern.Name, "regular expression contains error(s): " + e.Message); } } break; default: throw new ParserCreationException( ParserCreationException.ErrorType.INVALID_TOKEN, pattern.Name, "pattern type " + pattern.Type + " is undefined"); } }
/** * <summary>Initializes the tokenizer by creating all the token * patterns.</summary> * * <exception cref='ParserCreationException'>if the tokenizer * couldn't be initialized correctly</exception> */ private void CreatePatterns() { TokenPattern pattern; pattern = new TokenPattern((int) ExpressionConstants.ADD, "ADD", TokenPattern.PatternType.STRING, "+"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.SUB, "SUB", TokenPattern.PatternType.STRING, "-"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.MUL, "MUL", TokenPattern.PatternType.STRING, "*"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.DIV, "DIV", TokenPattern.PatternType.STRING, "/"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.POWER, "POWER", TokenPattern.PatternType.STRING, "^"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.MOD, "MOD", TokenPattern.PatternType.STRING, "%"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.LEFT_PAREN, "LEFT_PAREN", TokenPattern.PatternType.STRING, "("); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.RIGHT_PAREN, "RIGHT_PAREN", TokenPattern.PatternType.STRING, ")"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.LEFT_BRACE, "LEFT_BRACE", TokenPattern.PatternType.STRING, "["); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.RIGHT_BRACE, "RIGHT_BRACE", TokenPattern.PatternType.STRING, "]"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.EQ, "EQ", TokenPattern.PatternType.STRING, "="); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.LT, "LT", TokenPattern.PatternType.STRING, "<"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.GT, "GT", TokenPattern.PatternType.STRING, ">"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.LTE, "LTE", TokenPattern.PatternType.STRING, "<="); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.GTE, "GTE", TokenPattern.PatternType.STRING, ">="); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.NE, "NE", TokenPattern.PatternType.STRING, "<>"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.AND, "AND", TokenPattern.PatternType.STRING, "AND"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.OR, "OR", TokenPattern.PatternType.STRING, "OR"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.XOR, "XOR", TokenPattern.PatternType.STRING, "XOR"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.NOT, "NOT", TokenPattern.PatternType.STRING, "NOT"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.IN, "IN", TokenPattern.PatternType.STRING, "in"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.DOT, "DOT", TokenPattern.PatternType.STRING, "."); AddPattern(pattern); var seperatorpattern = new ArgumentSeparatorPattern(); seperatorpattern.Initialize((int)ExpressionConstants.ARGUMENT_SEPARATOR, "ARGUMENT_SEPARATOR", TokenPattern.PatternType.STRING, ",", _expressionContext); AddPattern(seperatorpattern); pattern = new TokenPattern((int) ExpressionConstants.ARRAY_BRACES, "ARRAY_BRACES", TokenPattern.PatternType.STRING, "[]"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.LEFT_SHIFT, "LEFT_SHIFT", TokenPattern.PatternType.STRING, "<<"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.RIGHT_SHIFT, "RIGHT_SHIFT", TokenPattern.PatternType.STRING, ">>"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.WHITESPACE, "WHITESPACE", TokenPattern.PatternType.REGEXP, "\\s+"); pattern.Ignore = true; AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.INTEGER, "INTEGER", TokenPattern.PatternType.REGEXP, "\\d+(u|l|ul|lu|f|m)?"); AddPattern(pattern); var customRealPattern = new RealPattern(); customRealPattern.Initialize((int)ExpressionConstants.REAL, "REAL", TokenPattern.PatternType.REGEXP, "\\d{0}\\{1}\\d+([eE][+-]\\d+)?(d|f|m)?", _expressionContext); AddPattern(customRealPattern); pattern = new TokenPattern((int)ExpressionConstants.STRING_LITERAL, "STRING_LITERAL", TokenPattern.PatternType.REGEXP, "\"([^\"\\r\\n\\\\]|\\\\u[0-9a-f][0-9a-f][0-9a-f][0-9a-f]|\\\\[\\\\\"'trn])*\""); AddPattern(pattern); pattern = new TokenPattern((int)ExpressionConstants.CHAR_LITERAL, "CHAR_LITERAL", TokenPattern.PatternType.REGEXP, "'([^'\\r\\n\\\\]|\\\\u[0-9a-f][0-9a-f][0-9a-f][0-9a-f]|\\\\[\\\\\"'trn])*'"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.TRUE, "TRUE", TokenPattern.PatternType.STRING, "True"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.FALSE, "FALSE", TokenPattern.PatternType.STRING, "False"); AddPattern(pattern); pattern = new TokenPattern((int)ExpressionConstants.IDENTIFIER, "IDENTIFIER", TokenPattern.PatternType.REGEXP, "[a-z_]\\w*"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.HEX_LITERAL, "HEX_LITERAL", TokenPattern.PatternType.REGEXP, "0x[0-9a-f]+(u|l|ul|lu)?"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.NULL_LITERAL, "NULL_LITERAL", TokenPattern.PatternType.STRING, "null"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.TIMESPAN, "TIMESPAN", TokenPattern.PatternType.REGEXP, "##(\\d+\\.)?\\d\\d:\\d\\d(:\\d\\d(\\.\\d*)?)?#"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.DATETIME, "DATETIME", TokenPattern.PatternType.REGEXP, "#[^#]+#"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.IF, "IF", TokenPattern.PatternType.STRING, "if"); AddPattern(pattern); pattern = new TokenPattern((int) ExpressionConstants.CAST, "CAST", TokenPattern.PatternType.STRING, "cast"); AddPattern(pattern); }
/** * <summary>Initializes the tokenizer by creating all the token * patterns.</summary> * * <exception cref='ParserCreationException'>if the tokenizer * couldn't be initialized correctly</exception> */ private void CreatePatterns() { TokenPattern pattern; pattern = new TokenPattern((int) IrfConstants.LT, "LT", TokenPattern.PatternType.STRING, "<"); AddPattern(pattern); pattern = new TokenPattern((int) IrfConstants.GT, "GT", TokenPattern.PatternType.STRING, ">"); AddPattern(pattern); pattern = new TokenPattern((int) IrfConstants.EQUAL, "EQUAL", TokenPattern.PatternType.STRING, "="); AddPattern(pattern); pattern = new TokenPattern((int) IrfConstants.QUOTE, "QUOTE", TokenPattern.PatternType.REGEXP, "[\"]"); AddPattern(pattern); pattern = new TokenPattern((int) IrfConstants.PLAYER_NOTES, "PLAYER_NOTES", TokenPattern.PatternType.STRING, "playernotes"); AddPattern(pattern); pattern = new TokenPattern((int) IrfConstants.PLAYER_NOTE_SET, "PLAYER_NOTE_SET", TokenPattern.PatternType.STRING, "playernoteset"); AddPattern(pattern); pattern = new TokenPattern((int) IrfConstants.USER_NAME, "USER_NAME", TokenPattern.PatternType.REGEXP, "username=[^\\n]*"); AddPattern(pattern); pattern = new TokenPattern((int) IrfConstants.PLAYER_NOTE, "PLAYER_NOTE", TokenPattern.PatternType.STRING, "playernote"); AddPattern(pattern); pattern = new TokenPattern((int) IrfConstants.PLAYER_NAME, "PLAYER_NAME", TokenPattern.PatternType.STRING, "playername"); AddPattern(pattern); pattern = new TokenPattern((int) IrfConstants.NOTE_TEXT, "NOTE_TEXT", TokenPattern.PatternType.STRING, "notetext"); AddPattern(pattern); pattern = new TokenPattern((int) IrfConstants.TIMESTAMP, "TIMESTAMP", TokenPattern.PatternType.STRING, "timestamp"); AddPattern(pattern); pattern = new TokenPattern((int) IrfConstants.CLASSIFICATION, "CLASSIFICATION", TokenPattern.PatternType.STRING, "classificationindex"); AddPattern(pattern); pattern = new TokenPattern((int) IrfConstants.NUMBER, "NUMBER", TokenPattern.PatternType.REGEXP, "[0-9]*"); AddPattern(pattern); pattern = new TokenPattern((int) IrfConstants.WHITESPACE, "WHITESPACE", TokenPattern.PatternType.REGEXP, "[ \\t\\n\\r]+"); pattern.Ignore = true; AddPattern(pattern); pattern = new TokenPattern((int) IrfConstants.QUOTED_STRING, "QUOTED_STRING", TokenPattern.PatternType.REGEXP, "(\"([^\"\\\\]|\\\\.)*\")"); AddPattern(pattern); pattern = new TokenPattern((int) IrfConstants.IDENTIFIER, "IDENTIFIER", TokenPattern.PatternType.REGEXP, "[A-Za-z0-9_]*"); AddPattern(pattern); }
/** * <summary>Initializes the tokenizer by creating all the token * patterns.</summary> * * <exception cref='ParserCreationException'>if the tokenizer * couldn't be initialized correctly</exception> */ private void CreatePatterns() { TokenPattern pattern; pattern = new TokenPattern((int) PicoConstants.MOV, "MOV", TokenPattern.PatternType.STRING, "mov"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.ADD, "ADD", TokenPattern.PatternType.STRING, "add"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.SUB, "SUB", TokenPattern.PatternType.STRING, "sub"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.MUL, "MUL", TokenPattern.PatternType.STRING, "mul"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.DIV, "DIV", TokenPattern.PatternType.STRING, "div"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.BEQ, "BEQ", TokenPattern.PatternType.STRING, "beq"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.BGT, "BGT", TokenPattern.PatternType.STRING, "bgt"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.IN, "IN", TokenPattern.PatternType.STRING, "in"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.OUT, "OUT", TokenPattern.PatternType.STRING, "out"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.JSR, "JSR", TokenPattern.PatternType.STRING, "jsr"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.RTS, "RTS", TokenPattern.PatternType.STRING, "rts"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.STOP, "STOP", TokenPattern.PatternType.STRING, "stop"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.ORG, "ORG", TokenPattern.PatternType.STRING, "org"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.EQUALS, "EQUALS", TokenPattern.PatternType.STRING, "="); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.LEFT_PAREN, "LEFT_PAREN", TokenPattern.PatternType.STRING, "("); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.RIGHT_PAREN, "RIGHT_PAREN", TokenPattern.PatternType.STRING, ")"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.HASH, "HASH", TokenPattern.PatternType.STRING, "#"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.COLON, "COLON", TokenPattern.PatternType.STRING, ":"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.COMMA, "COMMA", TokenPattern.PatternType.STRING, ","); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.SIGN, "SIGN", TokenPattern.PatternType.REGEXP, "[+-]"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.NUMBER, "NUMBER", TokenPattern.PatternType.REGEXP, "[0-9]+"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.IDENTIFIER, "IDENTIFIER", TokenPattern.PatternType.REGEXP, "[a-z][a-z0-9_]*"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.ENTER, "ENTER", TokenPattern.PatternType.REGEXP, "[\\n\\r]+"); AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.SINGLE_LINE_COMMENT, "SINGLE_LINE_COMMENT", TokenPattern.PatternType.REGEXP, ";.*"); pattern.Ignore = true; AddPattern(pattern); pattern = new TokenPattern((int) PicoConstants.WHITESPACE, "WHITESPACE", TokenPattern.PatternType.REGEXP, "[ \\t]+"); pattern.Ignore = true; AddPattern(pattern); }
/** * <summary>Initializes the tokenizer by creating all the token * patterns.</summary> * * <exception cref='ParserCreationException'>if the tokenizer * couldn't be initialized correctly</exception> */ private void CreatePatterns() { TokenPattern pattern; pattern = new TokenPattern((int) DokiScriptConstants.WORLD, "WORLD", TokenPattern.PatternType.STRING, "world"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.BACKGROUND, "BACKGROUND", TokenPattern.PatternType.STRING, "background"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.WEATHER, "WEATHER", TokenPattern.PatternType.STRING, "weather"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.SOUND, "SOUND", TokenPattern.PatternType.STRING, "sound"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.BGM, "BGM", TokenPattern.PatternType.STRING, "bgm"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.VIDEO, "VIDEO", TokenPattern.PatternType.STRING, "video"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.MOVE, "MOVE", TokenPattern.PatternType.STRING, "move"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.POSTURE, "POSTURE", TokenPattern.PatternType.STRING, "posture"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.VOICE, "VOICE", TokenPattern.PatternType.STRING, "voice"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.ROLE, "ROLE", TokenPattern.PatternType.STRING, "role"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.OTHER, "OTHER", TokenPattern.PatternType.STRING, "other"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.SRC, "SRC", TokenPattern.PatternType.STRING, "src"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.TRANSITION, "TRANSITION", TokenPattern.PatternType.STRING, "transition"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.TIME, "TIME", TokenPattern.PatternType.STRING, "time"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.TYPE, "TYPE", TokenPattern.PatternType.STRING, "type"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.LEVEL, "LEVEL", TokenPattern.PatternType.STRING, "level"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.MODE, "MODE", TokenPattern.PatternType.STRING, "mode"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.POSITION, "POSITION", TokenPattern.PatternType.STRING, "position"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.NAME, "NAME", TokenPattern.PatternType.STRING, "name"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.ANCHOR, "ANCHOR", TokenPattern.PatternType.STRING, "anchor"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.TAG_PARAMETER, "TAG_PARAMETER", TokenPattern.PatternType.STRING, "tag"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.KEY1, "KEY1", TokenPattern.PatternType.STRING, "key1"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.KEY2, "KEY2", TokenPattern.PatternType.STRING, "key2"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.KEY3, "KEY3", TokenPattern.PatternType.STRING, "key3"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.KEY4, "KEY4", TokenPattern.PatternType.STRING, "key4"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.KEY5, "KEY5", TokenPattern.PatternType.STRING, "key5"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.KEY6, "KEY6", TokenPattern.PatternType.STRING, "key6"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.KEY7, "KEY7", TokenPattern.PatternType.STRING, "key7"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.KEY8, "KEY8", TokenPattern.PatternType.STRING, "key8"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.KEY9, "KEY9", TokenPattern.PatternType.STRING, "key9"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.LIVE2D, "LIVE2D", TokenPattern.PatternType.STRING, "live2d"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.ZOOM, "ZOOM", TokenPattern.PatternType.STRING, "zoom"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.BRACKET_LEFT, "BRACKET_LEFT", TokenPattern.PatternType.STRING, "{"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.BRACKET_RIGHT, "BRACKET_RIGHT", TokenPattern.PatternType.STRING, "}"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.SQUARE_BRACKET_LEFT, "SQUARE_BRACKET_LEFT", TokenPattern.PatternType.STRING, "["); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.SQUARE_BRACKET_RIGHT, "SQUARE_BRACKET_RIGHT", TokenPattern.PatternType.STRING, "]"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.PARENTHESE_LEFT, "PARENTHESE_LEFT", TokenPattern.PatternType.STRING, "("); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.PARENTHESE_RIGHT, "PARENTHESE_RIGHT", TokenPattern.PatternType.STRING, ")"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.ANGLE_BRACKET_LEFT, "ANGLE_BRACKET_LEFT", TokenPattern.PatternType.STRING, "<"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.DOUBLE_QUOTE, "DOUBLE_QUOTE", TokenPattern.PatternType.STRING, "\""); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.PERIOD, "PERIOD", TokenPattern.PatternType.STRING, "."); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.COMMA, "COMMA", TokenPattern.PatternType.STRING, ","); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.SEMICOLON, "SEMICOLON", TokenPattern.PatternType.STRING, ";"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.EQUAL, "EQUAL", TokenPattern.PatternType.STRING, "="); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.CLICK, "CLICK", TokenPattern.PatternType.STRING, ">"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.CLICK_NEXT_DIALOGUE_PAGE, "CLICK_NEXT_DIALOGUE_PAGE", TokenPattern.PatternType.STRING, ">>"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.OR, "OR", TokenPattern.PatternType.STRING, "|"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.TAB, "TAB", TokenPattern.PatternType.REGEXP, "\\t+"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.RETURN, "RETURN", TokenPattern.PatternType.REGEXP, "[\\n\\r]+"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.SPACE, "SPACE", TokenPattern.PatternType.REGEXP, " +"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.IDENTIFIER, "IDENTIFIER", TokenPattern.PatternType.REGEXP, "[a-zA-Z_][0-9a-zA-Z_]*"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.DECIMAL, "DECIMAL", TokenPattern.PatternType.REGEXP, "\\d+\\.?\\d*"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.TEXT, "TEXT", TokenPattern.PatternType.REGEXP, ">.*"); AddPattern(pattern); pattern = new TokenPattern((int) DokiScriptConstants.QUOTED_TEXT, "QUOTED_TEXT", TokenPattern.PatternType.REGEXP, "\"[^\"]*\""); AddPattern(pattern); }
/** * Adds a string token pattern to this matcher. * * @param pattern the pattern to add */ public override void AddPattern(TokenPattern pattern) { automaton.AddMatch(pattern.Pattern, ignoreCase, pattern); base.AddPattern(pattern); }
/** * Factory method for creating a new token. This method can be * overridden to provide other token implementations than the * default one. * * @param pattern the token pattern * @param image the token image (i.e. characters) * @param line the line number of the first character * @param column the column number of the first character * * @return the token created * * @since 1.5 */ protected virtual Token NewToken(TokenPattern pattern, string image, int line, int column) { return new Token(pattern, image, line, column); }
/** * Adds a string token pattern to this matcher. * * @param pattern the pattern to add * * @throws Exception if the pattern couldn't be added to the matcher */ public virtual void AddPattern(TokenPattern pattern) { Array.Resize(ref patterns, patterns.Length + 1); patterns[patterns.Length - 1] = pattern; }
/** * Adds a regular expression match to this automaton. New states * and transitions will be added to extend this automaton to * support the specified string. Note that this method only * supports a subset of the full regular expression syntax, so * a more complete regular expression library must also be * provided. * * @param pattern the regular expression string * @param ignoreCase the case-insensitive match flag * @param value the match value * * @throws RegExpException if the regular expression parsing * failed */ public void AddRegExpMatch(string pattern, bool ignoreCase, TokenPattern value) { TokenRegExpParser parser = new TokenRegExpParser(pattern, ignoreCase); string debug = "DFA regexp; " + parser.GetDebugInfo(); bool isAscii; isAscii = parser.start.IsAsciiOutgoing(); for (int i = 0; isAscii && i < 128; i++) { bool match = false; for (int j = 0; j < parser.start.outgoing.Length; j++) { if (parser.start.outgoing[j].Match((char) i)) { if (match) { isAscii = false; break; } match = true; } } if (match && initialChar[i] != null) { isAscii = false; } } if (parser.start.incoming.Length > 0) { initial.AddOut(new NFAEpsilonTransition(parser.start)); debug += ", uses initial epsilon"; } else if (isAscii && !ignoreCase) { for (int i = 0; isAscii && i < 128; i++) { for (int j = 0; j < parser.start.outgoing.Length; j++) { if (parser.start.outgoing[j].Match((char) i)) { initialChar[i] = parser.start.outgoing[j].state; } } } debug += ", uses ASCII lookup"; } else { parser.start.MergeInto(initial); debug += ", uses initial state"; } parser.end.value = value; value.DebugInfo = debug; }
/** * <summary>Initializes the tokenizer by creating all the token * patterns.</summary> * * <exception cref='ParserCreationException'>if the tokenizer * couldn't be initialized correctly</exception> */ private void CreatePatterns() { TokenPattern pattern; pattern = new TokenPattern((int) GherkinConstants.T_FEATURE, "T_FEATURE", TokenPattern.PatternType.REGEXP, "Feature:?"); AddPattern(pattern); pattern = new TokenPattern((int) GherkinConstants.T_BACKGROUND, "T_BACKGROUND", TokenPattern.PatternType.REGEXP, "Background:?"); AddPattern(pattern); pattern = new TokenPattern((int) GherkinConstants.T_SCENARIO, "T_SCENARIO", TokenPattern.PatternType.REGEXP, "Scenario:?"); AddPattern(pattern); pattern = new TokenPattern((int) GherkinConstants.T_SCENARIO_OUTLINE, "T_SCENARIO_OUTLINE", TokenPattern.PatternType.REGEXP, "Scenario Outline:?"); AddPattern(pattern); pattern = new TokenPattern((int) GherkinConstants.T_EXAMPLES, "T_EXAMPLES", TokenPattern.PatternType.REGEXP, "Examples:?"); AddPattern(pattern); pattern = new TokenPattern((int) GherkinConstants.T_GIVEN, "T_GIVEN", TokenPattern.PatternType.REGEXP, "Given|given:"); AddPattern(pattern); pattern = new TokenPattern((int) GherkinConstants.T_WHEN, "T_WHEN", TokenPattern.PatternType.REGEXP, "When|when:"); AddPattern(pattern); pattern = new TokenPattern((int) GherkinConstants.T_THEN, "T_THEN", TokenPattern.PatternType.REGEXP, "Then|then:"); AddPattern(pattern); pattern = new TokenPattern((int) GherkinConstants.T_AND, "T_AND", TokenPattern.PatternType.REGEXP, "And|and:"); AddPattern(pattern); pattern = new TokenPattern((int) GherkinConstants.T_BUT, "T_BUT", TokenPattern.PatternType.REGEXP, "But|but:"); AddPattern(pattern); pattern = new TokenPattern((int) GherkinConstants.HORIZONTAL_WHITESPACE, "HORIZONTAL_WHITESPACE", TokenPattern.PatternType.REGEXP, "[\\t ]"); AddPattern(pattern); pattern = new TokenPattern((int) GherkinConstants.EOL, "EOL", TokenPattern.PatternType.REGEXP, "\\r?\\n"); AddPattern(pattern); pattern = new TokenPattern((int) GherkinConstants.COMMENT, "COMMENT", TokenPattern.PatternType.REGEXP, "[\\t ]*#[^\\r^\\n]*\\r?\\n"); pattern.Ignore = true; AddPattern(pattern); pattern = new TokenPattern((int) GherkinConstants.TEXT_CHAR, "TEXT_CHAR", TokenPattern.PatternType.REGEXP, "[^\\r^\\n^\\|^\\t^ ^@^:]"); AddPattern(pattern); pattern = new TokenPattern((int) GherkinConstants.PIPE, "PIPE", TokenPattern.PatternType.STRING, "|"); AddPattern(pattern); pattern = new TokenPattern((int) GherkinConstants.COLON, "COLON", TokenPattern.PatternType.STRING, ":"); AddPattern(pattern); pattern = new TokenPattern((int) GherkinConstants.AT, "AT", TokenPattern.PatternType.STRING, "@"); AddPattern(pattern); }
/// <summary> /// Adds a string token pattern to this matcher. /// </summary> /// <param name="pattern">The pattern to add</param> /// <exception cref="Exception">If the pattern couldn't be added to the matcher</exception> public virtual void AddPattern(TokenPattern pattern) { this.patterns.Add(pattern); }
/// <summary> /// Adds a regular expression match to this automaton. New states /// and transitions will be added to extend this automaton to /// support the specified string. Note that this method only /// supports a subset of the full regular expression syntax, so /// a more complete regular expression library must also be /// provided. /// </summary> /// <param name="pattern">The regular expression string</param> /// <param name="ignoreCase">The case-insensitive match flag</param> /// <param name="value">The match value</param> /// <exception cref="Grammatica.RE.RegExpException">If the regular expression parsing /// failed</exception> public void AddRegExpMatch( string pattern, bool ignoreCase, TokenPattern value) { TokenRegExpParser parser = new TokenRegExpParser(pattern, ignoreCase); string debug = "DFA regexp; " + parser.DebugInfo; bool isAscii; isAscii = parser.Start.IsAsciiOutgoing; for (int i = 0; isAscii && i < 128; i++) { bool match = false; foreach (var outTrans in parser.Start.Outgoing) { if (outTrans.Match((char)i)) { if (match) { isAscii = false; break; } match = true; } } if (match && this.initialChar[i] != null) { isAscii = false; } } if (parser.Start.Incoming.Count > 0) { this.initial.AddOut(new NFAEpsilonTransition(parser.Start)); debug += ", uses initial epsilon"; } else if (isAscii && !ignoreCase) { for (int i = 0; isAscii && i < 128; i++) { foreach (var outTrans in parser.Start.Outgoing) { if (outTrans.Match((char)i)) { this.initialChar[i] = outTrans.State; } } } debug += ", uses ASCII lookup"; } else { parser.Start.MergeInto(this.initial); debug += ", uses initial state"; } parser.End.Value = value; value.DebugInfo = debug; }