public void NonEmptyGrammarNonEmptyText() { LexerGrammar <LexerState> grammar = new LexerGrammar <LexerState>( new List <LexerTokenRule <LexerState> >() { new LexerTokenRule <LexerState>(1, "Text", "[a-zA-Z0-9]*"), new LexerTokenRule <LexerState>( 2, "NewLine", "\n", (LexerState state, string lexem) => { state.LineNumber++; return(LexerRuleReturnDecision.ReturnToken); }), }, new List <LexerDynamicRule>()); Lexer <LexerState> lexer = new Lexer <LexerState>(grammar); var s = new LexerState(); var tokens = lexer.GetTokens("Line1\nLine2\n", s).Tokens.ToArray(); Assert.Equal(5, tokens.Count()); Assert.Equal(3, s.LineNumber); }
public Token NextToken() { State = LexerState.Ready; char c; while (StringEnd != Input.Length) { c = read(); switchState(c); if (State == LexerState.Accepted) { Token T = Accept(AcceptType); if (T.Type != TokenType.Token_SKIP) return T; } } if (State == LexerState.Ready) { Token T = new Token(); T.Lexeme = ""; return T; } else throw new LexerException("End of file reached"); }
// @formatter:on public Node(LexerState type, bool terminal, Dictionary <char, Pair <LexerState, int> > transitions) { // @formatter:off Type = type; Terminal = terminal; Transitions = transitions; // @formatter:on }
public Lexer(string source) { Source = source; Tokens = new List <Token>(); CurrentLine = 0; StartIndex = CurrentIndex = -1; CurrentState = LexerState.Undetermined; }
/// <summary> /// Reset reader for parsing again. /// </summary> public void Reset() { _pos = new LexerState(); _pos.Pos = -1; _pos.Line = 1; _pos.Text = string.Empty; _whiteSpaceChars = new Dictionary <char, char>(); _escapeChar = '\\'; }
void EnterState(LexerState state) { currentState = state; if (currentState.setTrackNextIndentation) { shouldTrackNextIndentation = true; } }
private LexerState GetLexerState(int State) { LexerState state1 = (LexerState)this.lexerStates[State]; if (state1 == null) { state1 = new LexerState(); this.lexerStates[State] = state1; } return(state1); }
private void SingleTypeState() { SkipWhitespace(); if (Peek(0) == 0) { return; } if (XmlCharType.Instance.IsNameChar(Peek(0))) { ConsumeQName(); m_state = LexerState.Operator; } }
protected Token Accept(TokenType Type) { string Lexeme = Input.Substring(StringBegin, StringEnd - StringBegin); StringBegin = StringEnd; Token Temp = new Token(); Temp.Lexeme = Lexeme; Temp.Type = Type; State = LexerState.Ready; if (Temp.Lexeme == "if" || Temp.Lexeme == "while" || Temp.Lexeme == "\r\nif" || Temp.Lexeme == "\r\nwhile") Temp.Type = TokenType.Token_RESERVEDWORD; TokensList.Add(Temp); return Temp; }
private void ShowLexerOutput() { LexerState LexerState = new LexerState(tbEditor.Text); LexerState.Reset(); var token = Lexer.GetToken(LexerState); while (token.Type != TokenType.EOF) { tbLexerOutput.AppendText(token.ToString()); tbLexerOutput.AppendText("\r\n"); token = Lexer.GetToken(LexerState); } }
private static Token GetToken(int line, int column, string lexeme, LexerState lexerState) { if (lexeme.ToLower() == "true" || lexeme.ToLower() == "false") { return(new BooleanToken(line, column, lexeme)); } if (LexerStateTypeToTokenType.ContainsKey(lexerState)) { return(Token.TokenConstructors[LexerStateTypeToTokenType[lexerState]](line, column, lexeme)); } return(null); }
public override bool Equals(LexerState obj) { if (!base.Equals(obj)) { return(false); } MetaGeneratorLexerState other = obj as MetaGeneratorLexerState; if (other != null) { return(this.templateBrackets == other.templateBrackets && this.templateParenthesis == other.templateParenthesis); } return(false); }
private void KindTestState() { SkipWhitespace(); if (Peek(0) == 0) { return; } BeginToken(); char c = Peek(0); if (c == '{') { ConsumeChar(Read()); m_states.Push(LexerState.Operator); m_state = LexerState.Default; } else if (c == ')') { ConsumeChar(Read()); m_state = m_states.Pop(); } else if (c == '*') { ConsumeChar(Read()); m_state = LexerState.CloseKindTest; } else if (MatchIdentifer("element", "(")) { EndToken("element"); ConsumeToken(Token.ELEMENT); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.KindTest); } else if (MatchIdentifer("schema-element", "(")) { EndToken("schema-element"); ConsumeToken(Token.SCHEMA_ELEMENT); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.KindTest); } else if (XmlCharType.Instance.IsNameChar(c)) { ConsumeQName(); m_state = LexerState.CloseKindTest; } }
private LexerState StateStringEscape(char ch, LexerState guess) { switch (ch) { case 'n': Append('\n'); return(LexerState.String); default: if (guess == LexerState.EndOfLine) { throw new TokenizationException("EOL during string literal"); } Append(ch); return(LexerState.String); } }
protected Token Accept(TokenType Type) { string Lexeme = Input.Substring(StringBegin, StringEnd - StringBegin); StringBegin = StringEnd; Token Temp = new Token(); Temp.Lexeme = Lexeme; Temp.Type = Type; State = LexerState.Ready; if (Temp.Lexeme == "if" || Temp.Lexeme == "while" || Temp.Lexeme == "\r\nif" || Temp.Lexeme == "\r\nwhile") { Temp.Type = TokenType.Token_RESERVEDWORD; } TokensList.Add(Temp); return(Temp); }
private void _LeaveMode() { //throw new NotImplementedException(); if (this._position > 0) { var text = this._templateString.Substring(this._savedPosition, this._position - this._savedPosition); var token = this._CreateToken(text); if (token != null) { this._tokens.Add(token); this._Save(); } } this._currentState = this._lexerModes.Pop(); }
public void ErrorRecoveryTest() { Lexicon lexicon = new Lexicon(); LexerState global = lexicon.DefaultLexer; var ID = global.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many())); var NUM = global.DefineToken(RE.Range('0', '9').Many1()); var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many()); ScannerInfo info = lexicon.CreateScannerInfo(); PeekableScanner scanner = new PeekableScanner(info); string source = "asdf04a 1107 !@#$!@ Z if vvv xmlns 772737"; StringReader sr = new StringReader(source); scanner.SetSource(new SourceReader(sr)); scanner.SetTriviaTokens(WHITESPACE.Index); scanner.RecoverErrors = true; CompilationErrorManager em = new CompilationErrorManager(); em.DefineError(101, 0, CompilationStage.Scanning, "Invalid token: {0}"); scanner.ErrorManager = em; scanner.LexicalErrorId = 101; Lexeme l1 = scanner.Read(); Assert.AreEqual(ID.Index, l1.TokenIndex); Lexeme l2 = scanner.Read(); Assert.AreEqual(NUM.Index, l2.TokenIndex); Assert.AreEqual(0, em.Errors.Count); Lexeme l3 = scanner.Read(); Assert.AreEqual(ID.Index, l3.TokenIndex); Assert.IsTrue(em.Errors.Count > 0); Assert.AreEqual(101, em.Errors[0].Info.Id); }
private LexerState StateSeparator(char ch, LexerState guess) { switch (guess) { case LexerState.Internal: case LexerState.KeywordOrIdent: case LexerState.Number: case LexerState.String: AddToken(TokenType.Separator, ","); Rewind(); return(afterWord); case LexerState.Whitespace: AddToken(TokenType.Separator, ","); return(afterWord); default: throw new TokenizationException($"Invalid transition: {State} => {guess}"); } }
private LexerState StateString(char ch, LexerState guess) { switch (ch) { case '\\': return(LexerState.StringEscape); case '"': AddToken(TokenType.String); return(afterWord); default: if (guess == LexerState.EndOfLine) { throw new TokenizationException("EOL during string literal"); } Append(ch); return(State); } }
private LexerState StateArgumentList(char ch, LexerState guess) { afterWord = LexerState.ArgumentList; switch (guess) { case LexerState.KeywordOrIdent: case LexerState.Number: case LexerState.Operator: case LexerState.Separator: Append(ch); return(guess); case LexerState.Reference: case LexerState.Internal: return(guess); case LexerState.Whitespace: return(State); case LexerState.LeftParens: parensDepth++; AddToken(TokenType.LeftParens, "("); return(State); case LexerState.RightParens: if (parensDepth > 0) { parensDepth--; AddToken(TokenType.RightParens, ")"); return(State); } Append(ch); AddOperatorToken(); afterWord = LexerState.None; return(afterWord); default: throw new TokenizationException($"Invalid character in argument list: {ch}"); } }
private void ReturnConst(LexerState state, Symbol trigger) { Log(LogEventLevel.Information, "Found a constant"); var value = ConstantToken <float> .Parse(CurrentToken.ToString()); var con = Constants.FirstOrDefault(x => Math.Abs(x.Value - value) < 1E-5)?.Clone() as ConstantToken <float>; if (con == null) { con = new ConstantToken <float>(CurrentToken.ToString()) { TokenIndex = ConstIndex, Substring = CurrentToken.ToString() }; Constants.Add(con); } else { Log(LogEventLevel.Information, "The constant is already processed"); } con.Line = Line; ReturnToken(con, trigger); }
private void KindTestForPiState() { SkipWhitespace(); if (Peek(0) == 0) { return; } char c = Peek(0); BeginToken(); if (c == ')') { ConsumeChar(Read()); m_state = m_states.Pop(); } else if (XmlCharType.Instance.IsNCNameChar(c)) { ConsumeNCName(); } else if (c == '\'' || c == '"') { ConsumeLiteral(); } }
void CreateStates() { var patterns = new Dictionary <TokenType, string> (); patterns[TokenType.Text] = ".*"; patterns[TokenType.Number] = @"\-?[0-9]+(\.[0-9+])?"; patterns[TokenType.String] = @"""([^""\\]*(?:\\.[^""\\]*)*)"""; patterns[TokenType.LeftParen] = @"\("; patterns[TokenType.RightParen] = @"\)"; patterns[TokenType.EqualTo] = @"(==|is(?!\w)|eq(?!\w))"; patterns[TokenType.EqualToOrAssign] = @"(=|to(?!\w))"; patterns[TokenType.NotEqualTo] = @"(\!=|neq(?!\w))"; patterns[TokenType.GreaterThanOrEqualTo] = @"\>="; patterns[TokenType.GreaterThan] = @"\>"; patterns[TokenType.LessThanOrEqualTo] = @"\<="; patterns[TokenType.LessThan] = @"\<"; patterns[TokenType.AddAssign] = @"\+="; patterns[TokenType.MinusAssign] = @"\-="; patterns[TokenType.MultiplyAssign] = @"\*="; patterns[TokenType.DivideAssign] = @"\/="; patterns[TokenType.Add] = @"\+"; patterns[TokenType.Minus] = @"\-"; patterns[TokenType.Multiply] = @"\*"; patterns[TokenType.Divide] = @"\/"; patterns [TokenType.And] = @"(\&\&|and(?!\w))"; patterns [TokenType.Or] = @"(\|\||or(?!\w))"; patterns [TokenType.Xor] = @"(\^|xor(?!\w))"; patterns [TokenType.Not] = @"(\!|not(?!\w))"; patterns[TokenType.Variable] = @"\$([A-Za-z0-9_\.])+"; patterns[TokenType.Comma] = @","; patterns[TokenType.True] = @"true(?!\w)"; patterns[TokenType.False] = @"false(?!\w)"; patterns[TokenType.Null] = @"null(?!\w)"; patterns[TokenType.BeginCommand] = @"\<\<"; patterns[TokenType.EndCommand] = @"\>\>"; patterns[TokenType.OptionStart] = @"\[\["; patterns[TokenType.OptionEnd] = @"\]\]"; patterns[TokenType.OptionDelimit] = @"\|"; patterns[TokenType.Identifier] = @"[a-zA-Z0-9_:\.]+"; patterns[TokenType.If] = @"if(?!\w)"; patterns[TokenType.Else] = @"else(?!\w)"; patterns[TokenType.ElseIf] = @"elseif(?!\w)"; patterns[TokenType.EndIf] = @"endif(?!\w)"; patterns[TokenType.Set] = @"set(?!\w)"; patterns[TokenType.ShortcutOption] = @"\-\>"; states = new Dictionary <string, LexerState> (); states ["base"] = new LexerState(patterns); states ["base"].AddTransition(TokenType.BeginCommand, "command", delimitsText: true); states ["base"].AddTransition(TokenType.OptionStart, "link", delimitsText: true); states ["base"].AddTransition(TokenType.ShortcutOption, "shortcut-option"); states ["base"].AddTextRule(TokenType.Text); states ["shortcut-option"] = new LexerState(patterns); states ["shortcut-option"].setTrackNextIndentation = true; states ["shortcut-option"].AddTransition(TokenType.BeginCommand, "expression", delimitsText: true); states ["shortcut-option"].AddTextRule(TokenType.Text, "base"); states ["command"] = new LexerState(patterns); states ["command"].AddTransition(TokenType.If, "expression"); states ["command"].AddTransition(TokenType.Else); states ["command"].AddTransition(TokenType.ElseIf, "expression"); states ["command"].AddTransition(TokenType.EndIf); states ["command"].AddTransition(TokenType.Set, "assignment"); states ["command"].AddTransition(TokenType.EndCommand, "base", delimitsText: true); states ["command"].AddTransition(TokenType.Identifier, "command-or-expression"); states ["command"].AddTextRule(TokenType.Text); states ["command-or-expression"] = new LexerState(patterns); states ["command-or-expression"].AddTransition(TokenType.LeftParen, "expression"); states ["command-or-expression"].AddTransition(TokenType.EndCommand, "base", delimitsText: true); states ["command-or-expression"].AddTextRule(TokenType.Text); states ["assignment"] = new LexerState(patterns); states ["assignment"].AddTransition(TokenType.Variable); states ["assignment"].AddTransition(TokenType.EqualToOrAssign, "expression"); states ["assignment"].AddTransition(TokenType.AddAssign, "expression"); states ["assignment"].AddTransition(TokenType.MinusAssign, "expression"); states ["assignment"].AddTransition(TokenType.MultiplyAssign, "expression"); states ["assignment"].AddTransition(TokenType.DivideAssign, "expression"); states ["expression"] = new LexerState(patterns); states ["expression"].AddTransition(TokenType.EndCommand, "base"); states ["expression"].AddTransition(TokenType.Number); states ["expression"].AddTransition(TokenType.String); states ["expression"].AddTransition(TokenType.LeftParen); states ["expression"].AddTransition(TokenType.RightParen); states ["expression"].AddTransition(TokenType.EqualTo); states ["expression"].AddTransition(TokenType.EqualToOrAssign); states ["expression"].AddTransition(TokenType.NotEqualTo); states ["expression"].AddTransition(TokenType.GreaterThanOrEqualTo); states ["expression"].AddTransition(TokenType.GreaterThan); states ["expression"].AddTransition(TokenType.LessThanOrEqualTo); states ["expression"].AddTransition(TokenType.LessThan); states ["expression"].AddTransition(TokenType.Add); states ["expression"].AddTransition(TokenType.Minus); states ["expression"].AddTransition(TokenType.Multiply); states ["expression"].AddTransition(TokenType.Divide); states ["expression"].AddTransition(TokenType.And); states ["expression"].AddTransition(TokenType.Or); states ["expression"].AddTransition(TokenType.Xor); states ["expression"].AddTransition(TokenType.Not); states ["expression"].AddTransition(TokenType.Variable); states ["expression"].AddTransition(TokenType.Comma); states ["expression"].AddTransition(TokenType.True); states ["expression"].AddTransition(TokenType.False); states ["expression"].AddTransition(TokenType.Null); states ["expression"].AddTransition(TokenType.Identifier); states ["link"] = new LexerState(patterns); states ["link"].AddTransition(TokenType.OptionEnd, "base", delimitsText: true); states ["link"].AddTransition(TokenType.OptionDelimit, "link-destination", delimitsText: true); states ["link"].AddTextRule(TokenType.Text); states ["link-destination"] = new LexerState(patterns); states ["link-destination"].AddTransition(TokenType.Identifier); states ["link-destination"].AddTransition(TokenType.OptionEnd, "base"); defaultState = states ["base"]; }
public LexerState Next(NFA nf, int ch) { LexerState l; if (dfc.TryGetValue(ch, out l)) return l; l = new LexerState(nf); for (int i = 0; i < nstates.Length; i++) { int bm = nstates[i]; for (int j = 0; j < 32; j++) { if ((bm & (1 << j)) == 0) continue; foreach (NFA.Edge e in nf.nodes[32*i + j].edges) { if (e.when != null && e.when.Accepts(ch)) l.Add(e.to); } } } nf.Close(l); LexerState cl; if (!nf.dfashare.TryGetValue(l, out cl)) { nf.dfashare[l] = cl = l; } dfc[ch] = cl; return cl; }
private void SingleTypeState() { SkipWhitespace(); if (Peek(0) == 0) return; if (XmlCharType.Instance.IsNameChar(Peek(0))) { ConsumeQName(); m_state = LexerState.Operator; } //else if (MatchText("(:")) //{ // m_states.Push(m_state); // m_state = LexerState.ExprComment; // ExprCommentState(); //} }
// Parset een Mustache-achtige-template-string (met max. 1 parameter) en print deze op de console // met de parameter in de accentedColor. public static void Write(string str, ConsoleColor basicColor, ConsoleColor accentedColor) { string before = ""; string accented = ""; string after = ""; LexerState ps = LexerState.BEFORE; foreach (char c in str) { switch (ps) { case LexerState.BEFORE: if (c != '{') { before += c; } else { ps = LexerState.FIRST_OPENING_BRACE; } break; case LexerState.FIRST_OPENING_BRACE: if (c == '{') { ps = LexerState.SECOND_OPENING_BRACE; } else { ps = LexerState.BEFORE; before += '{'; before += c; } break; case LexerState.SECOND_OPENING_BRACE: if (c == '}') { ps = LexerState.FIRST_CLOSING_BRACE; } else { ps = LexerState.ACCENTED; accented += c; } break; case LexerState.ACCENTED: if (c == '}') { ps = LexerState.FIRST_CLOSING_BRACE; } else { accented += c; } break; case LexerState.FIRST_CLOSING_BRACE: if (c == '}') { ps = LexerState.SECOND_CLOSING_BRACE; } else { accented += '}'; accented += c; } break; case LexerState.SECOND_CLOSING_BRACE: ps = LexerState.AFTER; after += c; break; case LexerState.AFTER: after += c; break; } } Console2.Write(before, basicColor); Console2.Write(accented, accentedColor); Console2.WriteLine(after, basicColor); }
private void ItemTypeState() { SkipWhitespace(); if (Peek(0) == 0) return; BeginToken(); char c = Peek(0); if (c == '$') { ConsumeChar(Read()); m_state = LexerState.VarName; } else if (MatchIdentifer("empty-sequence", "(", ")")) { EndToken(); ConsumeToken(Token.EMPTY_SEQUENCE); m_state = LexerState.Operator; } //else if (MatchText("(:")) //{ // m_states.Push(m_state); // m_state = LexerState.ExprComment; // ExprCommentState(); //} else if (MatchIdentifer("element", "(")) { EndToken("element"); ConsumeToken(Token.ELEMENT); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.OccurrenceIndicator); m_state = LexerState.KindTest; } else if (MatchIdentifer("attribute", "(")) { EndToken("attribute"); ConsumeToken(Token.ATTRIBUTE); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.OccurrenceIndicator); m_state = LexerState.KindTest; } else if (MatchIdentifer("schema-element", "(")) { EndToken("schema-element"); ConsumeToken(Token.SCHEMA_ELEMENT); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.OccurrenceIndicator); m_state = LexerState.KindTest; } else if (MatchIdentifer("schema-attribute", "(")) { EndToken("schema-attribute"); ConsumeToken(Token.SCHEMA_ATTRIBUTE); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.OccurrenceIndicator); m_state = LexerState.KindTest; } else if (MatchIdentifer("comment", "(")) { EndToken("comment"); ConsumeToken(Token.COMMENT); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.OccurrenceIndicator); m_state = LexerState.KindTest; } else if (MatchIdentifer("text", "(")) { EndToken("text"); ConsumeToken(Token.TEXT); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.OccurrenceIndicator); m_state = LexerState.KindTest; } else if (MatchIdentifer("node", "(")) { EndToken("node"); ConsumeToken(Token.NODE); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.OccurrenceIndicator); m_state = LexerState.KindTest; } else if (MatchIdentifer("document-node", "(")) { EndToken("document-node"); ConsumeToken(Token.DOCUMENT_NODE); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.OccurrenceIndicator); m_state = LexerState.KindTest; } else if (MatchIdentifer("processing-instruction", "(")) { EndToken("processing-instruction"); ConsumeToken(Token.PROCESSING_INSTRUCTION); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.OccurrenceIndicator); m_state = LexerState.KindTestForPi; } else if (MatchIdentifer("item", "(", ")")) { EndToken(); ConsumeToken(Token.ITEM); m_state = LexerState.OccurrenceIndicator; } else if (c == ';') { ConsumeChar(Read()); m_state = LexerState.Default; } else if (MatchIdentifer("then")) { EndToken(); ConsumeToken(Token.THEN); m_state = LexerState.Default; } else if (MatchIdentifer("else")) { EndToken(); ConsumeToken(Token.ELSE); m_state = LexerState.Default; } else if (MatchIdentifer("at")) { EndToken(); ConsumeToken(Token.AT); SkipWhitespace(); c = Peek(0); if (c == '\'' || c == '"') { ConsumeLiteral(); m_state = LexerState.NamespaceDecl; } else m_state = LexerState.Default; } else if (c == '=' || c == '(' || c == '[' || c == '|') { ConsumeChar(Read()); if (c == '[') m_states.Push(m_state); m_state = LexerState.Default; } else if (c == ':' && Peek(1) == '=') { ConsumeChar(Read()); BeginToken(); ConsumeChar(Read()); m_state = LexerState.Default; } else if (c == '!' && Peek(1) == '=') { ConsumeChar(Read()); BeginToken(); ConsumeChar(Read()); m_state = LexerState.Default; } else if (c == '>') { ConsumeChar(Read()); if (Peek(0) == '=' || Peek(0) == '>') { BeginToken(); ConsumeChar(Read()); } m_state = LexerState.Default; } else if (c == '<') { ConsumeChar(Read()); if (Peek(0) == '=' || Peek(0) == '<') { BeginToken(); ConsumeChar(Read()); } m_state = LexerState.Default; } else if (c == ')') { ConsumeChar(Read()); SkipWhitespace(); BeginToken(); if (MatchIdentifer("as")) { EndToken(); ConsumeToken(Token.AS); m_state = LexerState.ItemType; } } else if (MatchIdentifer("external")) { EndToken(); ConsumeToken(Token.EXCEPT); m_state = LexerState.Default; } else if (MatchIdentifer("and")) { EndToken(); ConsumeToken(Token.AND); m_state = LexerState.Default; } else if (MatchIdentifer("at")) { EndToken(); ConsumeToken(Token.AT); m_state = LexerState.Default; } else if (MatchIdentifer("div")) { EndToken(); ConsumeToken(Token.DIV); m_state = LexerState.Default; } else if (MatchIdentifer("except")) { EndToken(); ConsumeToken(Token.EXCEPT); m_state = LexerState.Default; } else if (MatchIdentifer("eq")) { EndToken(); ConsumeToken(Token.EQ); m_state = LexerState.Default; } else if (MatchIdentifer("ge")) { EndToken(); ConsumeToken(Token.GE); m_state = LexerState.Default; } else if (MatchIdentifer("gt")) { EndToken(); ConsumeToken(Token.GT); m_state = LexerState.Default; } else if (MatchIdentifer("le")) { EndToken(); ConsumeToken(Token.LE); m_state = LexerState.Default; } else if (MatchIdentifer("lt")) { EndToken(); ConsumeToken(Token.LT); m_state = LexerState.Default; } else if (MatchIdentifer("ne")) { EndToken(); ConsumeToken(Token.NE); m_state = LexerState.Default; } else if (MatchIdentifer("idiv")) { EndToken(); ConsumeToken(Token.IDIV); m_state = LexerState.Default; } else if (MatchIdentifer("intersect")) { EndToken(); ConsumeToken(Token.INTERSECT); m_state = LexerState.Default; } else if (MatchIdentifer("mod")) { EndToken(); ConsumeToken(Token.MOD); m_state = LexerState.Default; } else if (MatchIdentifer("order", "by")) { EndToken(); ConsumeToken(Token.ORDER_BY); m_state = LexerState.Default; } else if (MatchIdentifer("stable", "order", "by")) { EndToken(); ConsumeToken(Token.STABLE_ORDER_BY); m_state = LexerState.Default; } else if (MatchIdentifer("or")) { EndToken(); ConsumeToken(Token.OR); m_state = LexerState.Default; } else if (MatchIdentifer("return")) { EndToken(); ConsumeToken(Token.RETURN); m_state = LexerState.Default; } else if (MatchIdentifer("satisfies")) { EndToken(); ConsumeToken(Token.SATISFIES); m_state = LexerState.Default; } else if (MatchIdentifer("to")) { EndToken(); ConsumeToken(Token.TO); m_state = LexerState.Default; } else if (MatchIdentifer("union")) { EndToken(); ConsumeToken(Token.UNION); m_state = LexerState.Default; } else if (MatchIdentifer("where")) { EndToken(); ConsumeToken(Token.WHERE); m_state = LexerState.Default; } else if (MatchIdentifer("castable", "as")) { EndToken(); ConsumeToken(Token.CASTABLE_AS); m_state = LexerState.SingleType; } else if (MatchIdentifer("cast", "as")) { EndToken(); ConsumeToken(Token.CAST_AS); m_state = LexerState.SingleType; } else if (MatchIdentifer("instance", "of")) { EndToken(); ConsumeToken(Token.INSTANCE_OF); } else if (MatchIdentifer("treat", "as")) { EndToken(); ConsumeToken(Token.TREAT_AS); } else if (MatchIdentifer("case")) { EndToken(); ConsumeToken(Token.CASE); } else if (MatchIdentifer("as")) { EndToken(); ConsumeToken(Token.AS); } else if (MatchIdentifer("in")) { EndToken(); ConsumeToken(Token.IN); m_state = LexerState.Default; } else if (MatchIdentifer("is")) { EndToken(); ConsumeToken(Token.IS); m_state = LexerState.Default; } else if (XmlCharType.Instance.IsNameChar(c)) { ConsumeQName(); m_state = LexerState.OccurrenceIndicator; } }
private void CloseKindTestState() { SkipWhitespace(); if (Peek(0) == 0) return; char c = Peek(0); BeginToken(); if (c == ')') { ConsumeChar(Read()); m_state = m_states.Pop(); } else if (c == ',') { ConsumeChar(Read()); m_state = LexerState.KindTest; } else if (c == '{') { ConsumeChar(Read()); m_states.Push(LexerState.Operator); m_state = LexerState.Default; } else if (c == '?') ConsumeChar(Read()); //else if (MatchText("(:")) //{ // m_states.Push(m_state); // m_state = LexerState.ExprComment; // ExprCommentState(); //} }
public void Tokenize(IEnumerable <string> lines) { afterWord = LexerState.None; parensDepth = 0; Line = 0; foreach (string line in lines) { Line++; if (line.Length == 0) { continue; } CurrentLine = line; Column = 0; State = LexerState.None; endOfLine = false; currentToken = string.Empty; while (!endOfLine) { char ch = Next(); LexerState guess = Guess(ch); State = stateMachine[State](ch, guess); if (!stateMachine.ContainsKey(State)) { throw new TokenizationException($"Unknown tokenizer state: {State}"); } } // process end of line in case something is missing stateMachine[State]('\n', LexerState.EndOfLine); // special handling for 'Include' if (Tokens.Count >= 2 && Tokens[Tokens.Count - 2].Value == "Include") { Token filename = Tokens[Tokens.Count - 1]; Tokens.RemoveRange(Tokens.Count - 2, 2); Tokenizer jt = new Tokenizer(Context); if (filename.Type != TokenType.String) { throw new ArgumentTypeException("Can only Include a (filename) string"); } ScriptSource include = Context.Djn.FindByName <ScriptSource>(filename.Value); if (include == null) { throw new MissingResourceException($"Unknown Source resource: {filename.Value}"); } jt.Tokenize(include.Source.Split('\n')); Tokens.AddRange(jt.Tokens); } else { Tokens.Add(new Token { Type = TokenType.EOL }); } } }
private void DefaultState() { SkipWhitespace(); BeginToken(); char c = Peek(0); if (c == '\0') ConsumeToken(0); // EOF //else if (MatchText("(:")) //{ // m_states.Push(LexerState.Default); // m_state = LexerState.ExprComment; // ExprCommentState(); //} else if (MatchText("(#")) { EndToken(); ConsumeToken(Token.PRAGMA_BEGIN); m_state = LexerState.Pragma; } else if (c == '.') { if (Peek(1) == '.') { Read(); Read(); EndToken(); ConsumeToken(Token.DOUBLE_PERIOD); } else if (XmlCharType.Instance.IsDigit(Peek(1))) ConsumeNumber(); else ConsumeChar(Read()); m_state = LexerState.Operator; } else if (c == ')') { ConsumeChar(Read()); SkipWhitespace(); BeginToken(); if (MatchIdentifer("as")) { EndToken(); ConsumeToken(Token.AS); m_state = LexerState.ItemType; } else m_state = LexerState.Operator; } else if (c == '*') { ConsumeChar(Read()); if (Peek(0) == ':') { BeginToken(); ConsumeChar(Read()); c = Peek(0); if (c != 0 && XmlCharType.Instance.IsStartNCNameChar(c)) ConsumeNCName(); else throw new XQueryException(Properties.Resources.ExpectedNCName); } m_state = LexerState.Operator; } else if (c == ';' || c == ',' || c == '(' || c == '-' || c == '+' || c == '@' || c == '~') ConsumeChar(Read()); else if (c == '/') { if (Peek(1) == '/') { Read(); Read(); EndToken(); ConsumeToken(Token.DOUBLE_SLASH); } else ConsumeChar(Read()); } else if (MatchIdentifer("if", "(")) { EndToken("if"); ConsumeToken(Token.IF); BeginToken(m_bookmark[1]); ConsumeChar('('); } else if (MatchIdentifer("declare", "construction")) { EndToken(); ConsumeToken(Token.DECLARE_CONSTRUCTION); m_state = LexerState.Operator; } else if (MatchIdentifer("declare", "default", "order")) { EndToken(); ConsumeToken(Token.DECLARE_DEFAULT_ORDER); m_state = LexerState.Operator; } else if (MatchIdentifer("declare", "default", "collation")) { EndToken(); ConsumeToken(Token.DECLARE_DEFAULT_COLLATION); m_state = LexerState.NamespaceDecl; } else if (MatchIdentifer("declare", "namespace")) { EndToken(); ConsumeToken(Token.DECLARE_NAMESPACE); m_state = LexerState.NamespaceDecl; } else if (MatchIdentifer("module", "namespace")) { EndToken(); ConsumeToken(Token.MODULE_NAMESPACE); m_state = LexerState.NamespaceDecl; } else if (MatchIdentifer("declare", "base-uri")) { EndToken(); ConsumeToken(Token.DECLARE_BASE_URI); m_state = LexerState.NamespaceDecl; } else if (MatchIdentifer("declare", "default", "element")) { EndToken(); ConsumeToken(Token.DECLARE_DEFAULT_ELEMENT); m_state = LexerState.NamespaceKeyword; } else if (MatchIdentifer("declare", "default", "function")) { EndToken(); ConsumeToken(Token.DECLARE_DEFAULT_FUNCTION); m_state = LexerState.NamespaceKeyword; } else if (MatchIdentifer("import", "schema")) { EndToken(); ConsumeToken(Token.IMPORT_SCHEMA); m_state = LexerState.NamespaceKeyword; } else if (MatchIdentifer("import", "module")) { EndToken(); ConsumeToken(Token.IMPORT_MODULE); m_state = LexerState.NamespaceKeyword; } else if (MatchIdentifer("declare", "copy-namespaces")) { EndToken(); ConsumeToken(Token.DECLARE_COPY_NAMESPACES); m_state = LexerState.NamespaceKeyword; } else if (MatchIdentifer("for")) { EndToken(); ConsumeToken(Token.FOR); SkipWhitespace(); BeginToken(); if (Peek(0) == '$') ConsumeChar(Read()); else throw new XQueryException(Properties.Resources.ExpectedVariablePrefix, "for"); m_state = LexerState.VarName; } else if (MatchIdentifer("parallel", "for")) { EndToken(); ConsumeToken(Token.PFOR); SkipWhitespace(); BeginToken(); if (Peek(0) == '$') ConsumeChar(Read()); else throw new XQueryException(Properties.Resources.ExpectedVariablePrefix, "for"); m_state = LexerState.VarName; } else if (MatchIdentifer("let")) { EndToken(); ConsumeToken(Token.LET); SkipWhitespace(); BeginToken(); if (Peek(0) == '$') ConsumeChar(Read()); else throw new XQueryException(Properties.Resources.ExpectedVariablePrefix, "let"); m_state = LexerState.VarName; } else if (MatchIdentifer("some")) { EndToken(); ConsumeToken(Token.SOME); SkipWhitespace(); BeginToken(); if (Peek(0) == '$') ConsumeChar(Read()); else throw new XQueryException(Properties.Resources.ExpectedVariablePrefix, "some"); m_state = LexerState.VarName; } else if (MatchIdentifer("every")) { EndToken(); ConsumeToken(Token.EVERY); SkipWhitespace(); BeginToken(); if (Peek(0) == '$') ConsumeChar(Read()); else throw new XQueryException(Properties.Resources.ExpectedVariablePrefix, "every"); m_state = LexerState.VarName; } else if (MatchIdentifer("declare", "variable")) { EndToken(); ConsumeToken(Token.DECLARE_VARIABLE); SkipWhitespace(); BeginToken(); if (Peek(0) == '$') ConsumeChar(Read()); else throw new XQueryException(Properties.Resources.ExpectedVariablePrefix, "declare variable"); m_state = LexerState.VarName; } else if (c == '$') { ConsumeChar(Read()); m_state = LexerState.VarName; } else if (MatchIdentifer("element", "(")) { EndToken("element"); ConsumeToken(Token.ELEMENT); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.Operator); m_state = LexerState.KindTest; } else if (MatchIdentifer("attribute", "(")) { EndToken("attribute"); ConsumeToken(Token.ATTRIBUTE); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.Operator); m_state = LexerState.KindTest; } else if (MatchIdentifer("schema-element", "(")) { EndToken("schema-element"); ConsumeToken(Token.SCHEMA_ELEMENT); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.Operator); m_state = LexerState.KindTest; } else if (MatchIdentifer("schema-attribute", "(")) { EndToken("schema-attribute"); ConsumeToken(Token.SCHEMA_ATTRIBUTE); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.Operator); m_state = LexerState.KindTest; } else if (MatchIdentifer("comment", "(")) { EndToken("comment"); ConsumeToken(Token.COMMENT); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.Operator); m_state = LexerState.KindTest; } else if (MatchIdentifer("text", "(")) { EndToken("text"); ConsumeToken(Token.TEXT); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.Operator); m_state = LexerState.KindTest; } else if (MatchIdentifer("node", "(")) { EndToken("node"); ConsumeToken(Token.NODE); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.Operator); m_state = LexerState.KindTest; } else if (MatchIdentifer("document-node", "(")) { EndToken("document-node"); ConsumeToken(Token.DOCUMENT_NODE); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.Operator); m_state = LexerState.KindTest; } else if (MatchIdentifer("processing-instruction", "(")) { EndToken("processing-instruction"); ConsumeToken(Token.PROCESSING_INSTRUCTION); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.Operator); m_state = LexerState.KindTestForPi; } else if (MatchText("<!--")) { EndToken(); ConsumeToken(Token.COMMENT_BEGIN); m_states.Push(LexerState.Operator); m_state = LexerState.XmlComment; } else if (MatchText("<?")) { EndToken(); ConsumeToken(Token.PI_BEGIN); m_states.Push(LexerState.Operator); m_state = LexerState.ProcessingInstruction; } else if (MatchText("<![CDATA[")) { EndToken(); ConsumeToken(Token.CDATA_BEGIN); m_states.Push(LexerState.Operator); m_state = LexerState.CDataSection; } else if (c == '<') { Read(); EndToken(); ConsumeToken(Token.BeginTag); m_states.Push(LexerState.Operator); m_state = LexerState.StartTag; } else if (MatchIdentifer("declare", "boundary-space")) { EndToken(); ConsumeToken(Token.DECLARE_BOUNDARY_SPACE); m_state = LexerState.XmlSpace_Decl; } else if (c == '}') { ConsumeChar(Read()); m_state = m_states.Pop(); } else if (c == '{') { ConsumeChar(Read()); m_states.Push(LexerState.Operator); } else if (MatchIdentifer("validate", "{")) { EndToken("validate"); ConsumeToken(Token.VALIDATE); BeginToken(m_bookmark[1]); ConsumeChar('{'); m_states.Push(LexerState.Operator); } else if (MatchIdentifer("validate", "lax")) { EndToken("validate"); ConsumeToken(Token.VALIDATE); BeginToken(m_bookmark[1]); ConsumeToken(Token.LAX); m_states.Push(LexerState.Operator); } else if (MatchIdentifer("validate", "strict")) { EndToken("validate"); ConsumeToken(Token.VALIDATE); BeginToken(m_bookmark[1]); ConsumeToken(Token.STRICT); m_states.Push(LexerState.Operator); } else if (MatchIdentifer("typeswitch", "(")) { EndToken("typeswitch"); ConsumeToken(Token.TYPESWITCH); BeginToken(m_bookmark[1]); ConsumeChar('('); } else if (MatchIdentifer("document", "{")) { EndToken("document"); ConsumeToken(Token.DOCUMENT); BeginToken(m_bookmark[1]); ConsumeChar('{'); m_states.Push(LexerState.Operator); } else if (MatchIdentifer("text", "{")) { EndToken("text"); ConsumeToken(Token.TEXT); BeginToken(m_bookmark[1]); ConsumeChar('{'); m_states.Push(LexerState.Operator); } else if (MatchIdentifer("comment", "{")) { EndToken("comment"); ConsumeToken(Token.COMMENT); BeginToken(m_bookmark[1]); ConsumeChar('{'); m_states.Push(LexerState.Operator); } else if (MatchIdentifer("declare", "function")) { EndToken(); ConsumeToken(Token.DECLARE_FUNCTION); } else if (MatchIdentifer("ordered", "{")) { EndToken("ordered"); ConsumeToken(Token.ORDERED); BeginToken(m_bookmark[1]); ConsumeChar('{'); m_states.Push(LexerState.Default); } else if (MatchIdentifer("unordered", "{")) { EndToken("unordered"); ConsumeToken(Token.UNORDERED); BeginToken(m_bookmark[1]); ConsumeChar('{'); m_states.Push(LexerState.Default); } else if (MatchIdentifer("declare", "ordering")) { EndToken(); ConsumeToken(Token.DECLARE_ORDERING); m_state = LexerState.DeclareOrdering; } else if (MatchIdentifer("xquery", "version")) { EndToken(); ConsumeToken(Token.XQUERY_VERSION); m_state = LexerState.XQueryVersion; } else if (MatchText("(#")) { EndToken(); ConsumeToken(Token.PRAGMA_BEGIN); m_state = LexerState.Pragma; } else if (MatchIdentifer("declare", "option")) { EndToken(); ConsumeToken(Token.DECLARE_OPTION); m_state = LexerState.Option; } else if (MatchIdentifer("ancestor-or-self", "::")) { EndToken(); ConsumeToken(Token.AXIS_ANCESTOR_OR_SELF); } else if (MatchIdentifer("ancestor", "::")) { EndToken(); ConsumeToken(Token.AXIS_ANCESTOR); } else if (MatchIdentifer("attribute", "::")) { EndToken(); ConsumeToken(Token.AXIS_ATTRIBUTE); } else if (MatchIdentifer("child", "::")) { EndToken(); ConsumeToken(Token.AXIS_CHILD); } else if (MatchIdentifer("descendant-or-self", "::")) { EndToken(); ConsumeToken(Token.AXIS_DESCENDANT_OR_SELF); } else if (MatchIdentifer("descendant", "::")) { EndToken(); ConsumeToken(Token.AXIS_DESCENDANT); } else if (MatchIdentifer("following-sibling", "::")) { EndToken(); ConsumeToken(Token.AXIS_FOLLOWING_SIBLING); } else if (MatchIdentifer("following", "::")) { EndToken(); ConsumeToken(Token.AXIS_FOLLOWING); } else if (MatchIdentifer("parent", "::")) { EndToken(); ConsumeToken(Token.AXIS_PARENT); } else if (MatchIdentifer("preceding-sibling", "::")) { EndToken(); ConsumeToken(Token.AXIS_PRECEDING_SIBLING); } else if (MatchIdentifer("preceding", "::")) { EndToken(); ConsumeToken(Token.AXIS_PRECEDING); } else if (MatchIdentifer("self", "::")) { EndToken(); ConsumeToken(Token.AXIS_SELF); } else if (MatchIdentifer("namespace", "::")) { EndToken(); ConsumeToken(Token.AXIS_NAMESPACE); } else if (MatchIdentifer("at")) { EndToken(); SkipWhitespace(); if (Peek(0) == '"' || Peek(0) == '\'') { ConsumeToken(Token.AT); ConsumeLiteral(); m_state = LexerState.NamespaceDecl; } else { ConsumeToken(Token.QName, new Qname("at")); if (Peek(0) != ')') m_state = LexerState.Operator; } } else if (c == '"' || c == '\'') { ConsumeLiteral(); m_state = LexerState.Operator; } else if (XmlCharType.Instance.IsDigit(c)) { ConsumeNumber(); m_state = LexerState.Operator; } else if (XmlCharType.Instance.IsStartNameChar(c)) { StringBuilder sb = new StringBuilder(); while ((c = Peek(0)) != 0 && XmlCharType.Instance.IsNCNameChar(c)) sb.Append(Read()); if (Peek(0) == ':') { if (Peek(1) == '*') { EndToken(); ConsumeToken(Token.NCName, new Qname(sb.ToString())); BeginToken(); ConsumeChar(Read()); BeginToken(); ConsumeChar(Read()); m_state = LexerState.Operator; } else { while ((c = Peek(0)) != 0 && XmlCharType.Instance.IsNameChar(c)) sb.Append(Read()); EndToken(); ConsumeToken(Token.QName, new Qname(sb.ToString())); SkipWhitespace(); if (Peek(0) != '(') m_state = LexerState.Operator; } } else { EndToken(); int anchor = m_anchor; int length = m_length; string ncname = sb.ToString(); if (ncname == "element" || ncname == "attribute") { SkipWhitespace(); if (Peek(0) == '{') { if (ncname == "element") ConsumeToken(Token.ELEMENT, anchor, length); else ConsumeToken(Token.ATTRIBUTE, anchor, length); BeginToken(); ConsumeChar(Read()); m_states.Push(LexerState.Operator); return; } else if (XmlCharType.Instance.IsStartNameChar(Peek(0))) { BeginToken(); sb = new StringBuilder(); while ((c = Peek(0)) != 0 && XmlCharType.Instance.IsNameChar(c)) sb.Append(Read()); EndToken(); int anchor2 = m_anchor; int length2 = m_length; SkipWhitespace(); if (Peek(0) == '{') { if (ncname == "element") ConsumeToken(Token.ELEMENT, anchor, length); else ConsumeToken(Token.ATTRIBUTE, anchor, length); ConsumeToken(Token.QName, new Qname(sb.ToString()), anchor2, length2); BeginToken(); ConsumeChar(Read()); m_states.Push(LexerState.Operator); return; } else throw new XQueryException(Properties.Resources.ExpectedBlockStart, ncname, sb.ToString()); } } else if (ncname == "processing-instruction") { SkipWhitespace(); if (Peek(0) == '{') { ConsumeToken(Token.PROCESSING_INSTRUCTION, anchor, length); BeginToken(); ConsumeChar(Read()); m_states.Push(LexerState.Operator); return; } else if (XmlCharType.Instance.IsStartNameChar(Peek(0))) { sb = new StringBuilder(); BeginToken(); while ((c = Peek(0)) != 0 && XmlCharType.Instance.IsNameChar(c)) sb.Append(Read()); EndToken(); int anchor2 = m_anchor; int length2 = m_length; SkipWhitespace(); if (Peek(0) == '{') { ConsumeToken(Token.PROCESSING_INSTRUCTION, anchor, length); ConsumeToken(Token.NCName, new Qname(sb.ToString()), anchor2, length2); BeginToken(); ConsumeChar(Read()); m_states.Push(LexerState.Operator); return; } else throw new XQueryException(Properties.Resources.ExpectedBlockStart, ncname, sb.ToString()); } } ConsumeToken(Token.QName, new Qname(ncname)); SkipWhitespace(); if (Peek(0) != '(') m_state = LexerState.Operator; } } }
private void EndTagState() { if (Peek(0) == 0) return; BeginToken(); char c = Peek(0); if (c == '>') { ConsumeChar(Read()); m_state = m_states.Pop(); } else if (XmlCharType.Instance.IsWhiteSpace(c)) ConsumeS(); else if (XmlCharType.Instance.IsStartNameChar(c)) ConsumeQName(); }
public void RevertToState(object data) { TokenizerState stateData = (TokenizerState)data; m_state = stateData.current; m_states = new Stack<LexerState>(stateData.states); m_token = new Queue<CurrentToken>(stateData.tokens); }
public int token() { if (m_token.Count == 0) { EnterState(); if (m_token.Count == 0) { m_value = null; return Token.yyErrorCode; } } CurrentToken curr = m_token.Dequeue(); m_value = curr.value; CurrentPos = curr.anchor; CurrentLength = curr.length; CurrentState = curr.state; return curr.token; }
private void AposAttributeContentState() { if (Peek(0) == 0) return; char c = Peek(0); BeginToken(); if (c == '\'' && Peek(1) != '\'') { Read(); EndToken(); ConsumeToken(Token.Apos); m_state = LexerState.AttributeState; } else if (MatchText("{{")) { ConsumeChar('{'); m_anchor++; ConsumeChar('{'); } else if (MatchText("}}")) { ConsumeChar('}'); m_anchor++; ConsumeChar('}'); } else if (c == '{') { ConsumeChar(Read()); m_states.Push(m_state); m_state = LexerState.Default; } else if (MatchText(">")) { EndToken(); ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef(">")); } else if (MatchText("<")) { EndToken(); ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef("<")); } else if (MatchText("&")) { EndToken(); ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef("&")); } else if (MatchText(""")) { EndToken(); ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef(""")); } else if (MatchText("'")) { EndToken(); ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef("'")); } else if (MatchText("&#x")) ConsumeCharRefHex(); else if (MatchText("&#")) ConsumeCharRef(); else if (c == '\'' && Peek(1) == '\'') { Read(); Read(); EndToken(); ConsumeToken(Token.EscapeApos); } else { StringBuilder sb = new StringBuilder(); while ((c = Peek(0)) != 0 && c != '{' && c != '&' && c != '\'') sb.Append(Read()); EndToken(); ConsumeToken(Token.Char, new Literal(sb.ToString())); } }
private void CDataSectionState() { StringBuilder sb = new StringBuilder(); char c; BeginToken(); while (!((c = Peek(0)) == ']' && Peek(1) == ']' && Peek(2) == '>')) { if (Peek(0) == 0) return; sb.Append(Read()); } EndToken(); ConsumeToken(Token.StringLiteral, new Literal(sb.ToString())); BeginToken(); Read(); // ] Read(); // ] Read(); // > EndToken(); ConsumeToken(Token.CDATA_END); m_state = m_states.Pop(); }
private void ProcessingInstructionContentState() { if (Peek(0) == 0) return; StringBuilder sb = new StringBuilder(); char c; BeginToken(); while (!((c = Peek(0)) == '?' && Peek(1) == '>')) { if (Peek(0) == 0) return; sb.Append(Read()); } EndToken(); ConsumeToken(Token.StringLiteral, new Literal(sb.ToString())); BeginToken(); Read(); // ? Read(); // > EndToken(); ConsumeToken(Token.PI_END); m_state = m_states.Pop(); }
/// <summary> /// Lexes the command line, using the same rules as <see cref="Environment.GetCommandLineArgs"/>. /// </summary> /// <param name="commandLine">The command line to parse.</param> /// <returns>The lexed command line.</returns> public static IEnumerable <string> Lex(this string commandLine) { Contract.Requires(commandLine != null); Contract.Ensures(Contract.Result <IEnumerable <string> >() != null); // The MSDN information for <see cref="Environment.GetCommandLineArgs"/> is incomplete. // This blog post fills in the gaps: http://www.hardtoc.com/archives/162 (webcite: http://www.webcitation.org/62LHTVelJ ) LexerState state = LexerState.Default; Buffer buffer = new Buffer(); foreach (var ch in commandLine) { switch (state) { case LexerState.Default: if (ch == '"') { // Enter the quoted state, without placing anything in the buffer. state = LexerState.Quoted; break; } // Whitespace is ignored. if (ch == ' ' || ch == '\t') { break; } buffer.AppendChar(ch); state = LexerState.Argument; break; case LexerState.Argument: // We have an argument started, though it may be just an empty string for now. if (ch == '"') { // Handle the special rules for any backslashes preceding a double-quote. if (buffer.AppendQuote()) { // An even number of backslashes means that this is a normal double-quote. state = LexerState.Quoted; } break; } if (ch == ' ' || ch == '\t') { // Whitespace ends this argument, so publish it and restart in the default state. yield return(buffer.Consume()); state = LexerState.Default; break; } // Count backslashes; put other characters directly into the buffer. buffer.AppendChar(ch); break; case LexerState.Quoted: // We are within quotes, but may already have characters in the argument buffer. if (ch == '"') { // Handle the special rules for any backslashes preceding a double-quote. if (buffer.AppendQuote()) { // An even number of backslashes means that this is a normal double-quote. state = LexerState.EndQuotedArgument; } break; } // Any non-quote character (including whitespace) is appended to the argument buffer. buffer.AppendChar(ch); break; case LexerState.EndQuotedArgument: // This is a special state that is treated like Argument or Quoted depending on whether the next character is a quote. It's not possible to stay in this state. if (ch == '"') { // We just read a double double-quote within a quoted context, so we add the quote to the buffer and re-enter the quoted state. buffer.AppendNormalChar(ch); state = LexerState.Quoted; } else if (ch == ' ' || ch == '\t') { // In this case, the double-quote we just read did in fact end the quotation, so we publish the argument and restart in the default state. yield return(buffer.Consume()); state = LexerState.Default; } else { // If the double-quote is followed by a non-quote, non-whitespace character, then it's considered a continuation of the argument (leaving the quoted state). buffer.AppendChar(ch); state = LexerState.Argument; } break; } } // If we end in the middle of an argument (or even a quotation), then we just publish what we have. if (state != LexerState.Default) { yield return(buffer.Consume()); } }
private void ElementContentState() { BeginToken(); char c = Peek(0); if (c == 0) ConsumeToken(0); // EOF else if (MatchText("</")) { ConsumeChar('<'); m_anchor++; ConsumeChar('/'); m_state = LexerState.EndTag; } else if (MatchText("{{")) { ConsumeChar('{'); m_anchor++; ConsumeChar('{'); } else if (MatchText("}}")) { ConsumeChar('}'); m_anchor++; ConsumeChar('}'); } else if (c == '{') { ConsumeChar(Read()); m_states.Push(m_state); m_state = LexerState.Default; } else if (MatchText("<!--")) { EndToken(); ConsumeToken(Token.COMMENT_BEGIN); m_states.Push(m_state); m_state = LexerState.XmlComment; } else if (MatchText("<?")) { EndToken(); ConsumeToken(Token.PI_BEGIN); m_states.Push(m_state); m_state = LexerState.ProcessingInstruction; } else if (MatchText("<![CDATA[")) { EndToken(); ConsumeToken(Token.CDATA_BEGIN); m_states.Push(m_state); m_state = LexerState.CDataSection; } else if (c == '<') { Read(); EndToken(); ConsumeToken(Token.BeginTag); m_states.Push(m_state); m_state = LexerState.StartTag; } else if (MatchText(">")) { EndToken(); ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef(">")); } else if (MatchText("<")) { EndToken(); ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef("<")); } else if (MatchText("&")) { EndToken(); ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef("&")); } else if (MatchText(""")) { EndToken(); ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef(""")); } else if (MatchText("'")) { EndToken(); ConsumeToken(Token.PredefinedEntityRef, new PredefinedEntityRef("'")); } else if (MatchText("&#x")) ConsumeCharRefHex(); else if (MatchText("&#")) ConsumeCharRef(); else { StringBuilder sb = new StringBuilder(); while ((c = Peek(0)) != 0 && c != '<' && c != '&' && c != '{' && c != '}') sb.Append(Read()); EndToken(); if (sb.Length == 0) return; ConsumeToken(Token.Char, new Literal(sb.ToString())); } }
private void ProcessingInstructionState() { if (Peek(0) == 0) return; BeginToken(); char c = Peek(0); if (XmlCharType.Instance.IsWhiteSpace(c)) { ConsumeS(); m_state = LexerState.ProcessingInstructionContent; } else if (MatchText("?>")) { EndToken(); ConsumeToken(Token.PI_END); m_state = m_states.Pop(); } else if (XmlCharType.Instance.IsStartNameChar(c)) { StringBuilder sb = new StringBuilder(); while ((c = Peek(0)) != 0 && XmlCharType.Instance.IsNameChar(c)) sb.Append(Read()); EndToken(); if (sb.ToString() == "xml") throw new XQueryException(Properties.Resources.InvalidPITarget); ConsumeToken(Token.StringLiteral, new Literal(sb.ToString())); } }
public void SkipTokenTest() { Lexicon lexicon = new Lexicon(); LexerState global = lexicon.DefaultLexer; LexerState keywords = global.CreateSubState(); LexerState xml = keywords.CreateSubState(); var ID = global.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many())); var NUM = global.DefineToken(RE.Range('0', '9').Many1()); var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many()); var ERROR = global.DefineToken(RE.Range(Char.MinValue, (char)255)); var IF = keywords.DefineToken(RE.Literal("if")); var ELSE = keywords.DefineToken(RE.Literal("else")); var XMLNS = xml.DefineToken(RE.Literal("xmlns")); ScannerInfo info = lexicon.CreateScannerInfo(); PeekableScanner scanner = new PeekableScanner(info); string source = "asdf04a 1107 else Z if vvv xmlns 772737"; StringReader sr = new StringReader(source); scanner.SetSource(new SourceReader(sr)); scanner.SetTriviaTokens(WHITESPACE.Index, ERROR.Index); info.LexerStateIndex = xml.Index; Lexeme l1 = scanner.Read(); Assert.AreEqual(ID.Index, l1.TokenIndex); Assert.AreEqual("asdf04a", l1.Value); Assert.AreEqual(0, l1.PrefixTrivia.Count); Lexeme l2 = scanner.Read(); Assert.AreEqual(NUM.Index, l2.TokenIndex); Assert.AreEqual("1107", l2.Value); Assert.AreEqual(1, l2.PrefixTrivia.Count); Lexeme l3 = scanner.Read(); Assert.AreEqual(ELSE.Index, l3.TokenIndex); Assert.AreEqual("else", l3.Value); Assert.AreEqual(1, l2.PrefixTrivia.Count); Lexeme l4 = scanner.Read(); Assert.AreEqual(IF.Index, l4.TokenIndex); Assert.AreEqual("if", l4.Value); Assert.AreEqual(3, l4.PrefixTrivia.Count); int p1 = scanner.Peek(); Assert.AreEqual(ID.Index, p1); int p2 = scanner.Peek2(); int p3 = scanner.Peek(3); int peof = scanner.Peek(4); Assert.AreEqual(info.EndOfStreamTokenIndex, peof); Lexeme l6 = scanner.Read(); Lexeme l7 = scanner.Read(); Assert.AreEqual(XMLNS.Index, l7.TokenIndex); Lexeme l8 = scanner.Read(); Assert.AreEqual(NUM.Index, l8.TokenIndex); Lexeme leof = scanner.Read(); Assert.AreEqual(info.EndOfStreamTokenIndex, leof.TokenIndex); Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof.Span.EndLocation.CharIndex); Assert.AreEqual(source.Length, leof.Span.StartLocation.CharIndex); }
private void KindTestForPiState() { SkipWhitespace(); if (Peek(0) == 0) return; char c = Peek(0); BeginToken(); if (c == ')') { ConsumeChar(Read()); m_state = m_states.Pop(); } //else if (MatchText("(:")) //{ // m_states.Push(m_state); // m_state = LexerState.ExprComment; // ExprCommentState(); //} else if (XmlCharType.Instance.IsNCNameChar(c)) ConsumeNCName(); else if (c == '\'' || c == '"') ConsumeLiteral(); }
public void LexerStateToDFATest() { Lexicon lexicon = new Lexicon(); LexerState global = lexicon.DefaultLexer; LexerState keywords = global.CreateSubState(); LexerState xml = keywords.CreateSubState(); var ID = global.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many())); var NUM = global.DefineToken(RE.Range('0', '9').Many1()); var ERROR = global.DefineToken(RE.Range(Char.MinValue, (char)255)); var IF = keywords.DefineToken(RE.Literal("if")); var ELSE = keywords.DefineToken(RE.Literal("else")); var XMLNS = xml.DefineToken(RE.Literal("xmlns")); DFAModel dfa = DFAModel.Create(lexicon); CompressedTransitionTable tc = CompressedTransitionTable.Compress(dfa); ScannerInfo si = lexicon.CreateScannerInfo(); FiniteAutomationEngine engine = new FiniteAutomationEngine(si.TransitionTable, si.CharClassTable); engine.InputString("if"); Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("12345"); Assert.AreEqual(NUM.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("asdf12dd"); Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("A"); Assert.AreEqual(ERROR.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("AAA"); Assert.IsTrue(engine.IsAtStoppedState); engine.Reset(); engine.InputString("if "); Assert.IsTrue(engine.IsAtStoppedState); engine.Reset(); si.LexerStateIndex = keywords.Index; engine.InputString("if"); Assert.AreEqual(IF.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("else"); Assert.AreEqual(ELSE.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("xmlns"); Assert.AreEqual(ID.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); si.LexerStateIndex = xml.Index; engine.InputString("if"); Assert.AreEqual(IF.Index, si.GetTokenIndex(engine.CurrentState)); engine.Reset(); engine.InputString("xml"); Assert.IsFalse(engine.IsAtStoppedState); engine.Reset(); engine.InputString("xmlns"); Assert.AreEqual(XMLNS.Index, si.GetTokenIndex(engine.CurrentState)); ; }
private void KindTestState() { SkipWhitespace(); if (Peek(0) == 0) return; BeginToken(); char c = Peek(0); if (c == '{') { ConsumeChar(Read()); m_states.Push(LexerState.Operator); m_state = LexerState.Default; } else if (c == ')') { ConsumeChar(Read()); m_state = m_states.Pop(); } else if (c == '*') { ConsumeChar(Read()); m_state = LexerState.CloseKindTest; } else if (MatchIdentifer("element", "(")) { EndToken("element"); ConsumeToken(Token.ELEMENT); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.KindTest); } else if (MatchIdentifer("schema-element", "(")) { EndToken("schema-element"); ConsumeToken(Token.SCHEMA_ELEMENT); BeginToken(m_bookmark[1]); ConsumeChar('('); m_states.Push(LexerState.KindTest); } else if (XmlCharType.Instance.IsNameChar(c)) { ConsumeQName(); m_state = LexerState.CloseKindTest; } //else if (MatchText("(:")) //{ // m_states.Push(m_state); // m_state = LexerState.ExprComment; // ExprCommentState(); //} }
private void NamespaceKeywordState() { SkipWhitespace(); if (Peek(0) == 0) return; BeginToken(); char c = Peek(0); if (c == '\'' || c == '"') { ConsumeLiteral(); m_state = LexerState.NamespaceDecl; } else if (MatchIdentifer("inherit")) { EndToken(); ConsumeToken(Token.INHERIT); m_state = LexerState.Default; } else if (MatchIdentifer("no-inherit")) { EndToken(); ConsumeToken(Token.NO_INHERIT); m_state = LexerState.Default; } else if (MatchIdentifer("namespace")) { EndToken(); ConsumeToken(Token.NAMESPACE); m_state = LexerState.NamespaceDecl; } //else if (MatchText("(:")) //{ // m_states.Push(m_state); // m_state = LexerState.ExprComment; // ExprCommentState(); //} else if (MatchIdentifer("default", "element")) { EndToken(); ConsumeToken(Token.DEFAULT_ELEMENT); m_state = LexerState.NamespaceKeyword; } else if (MatchIdentifer("preserve")) { EndToken(); ConsumeToken(Token.PRESERVE); m_state = LexerState.NamespaceKeyword; } else if (MatchIdentifer("no-preserve")) { EndToken(); ConsumeToken(Token.NO_PRESERVE); m_state = LexerState.NamespaceKeyword; } else if (c == ',') { ConsumeChar(Read()); m_state = LexerState.NamespaceKeyword; } }
public void initialize(string InputFromInitialization) { Input = InputFromInitialization; StringBegin = StringEnd = 0; State = LexerState.Ready; }
private void TagAttributeState() { if (Peek(0) == 0) return; char c = Peek(0); BeginToken(); if (MatchText("/>")) { ConsumeChar('/'); m_anchor++; ConsumeChar('>'); m_state = m_states.Pop(); } else if (c == '>') { ConsumeChar(Read()); m_state = LexerState.ElementContent; } else if (c == '[') // Mapping extensions { ConsumeChar(Read()); m_states.Push(LexerState.AttributeState); m_state = LexerState.Default; } else if (c == '"') { ConsumeChar(Read()); m_state = LexerState.QuotAttributeContent; } else if (c == '\'') { Read(); EndToken(); ConsumeToken(Token.Apos); m_state = LexerState.AposAttributeContent; } else if (c == '=') ConsumeChar(Read()); else if (XmlCharType.Instance.IsWhiteSpace(c)) ConsumeS(); else if (XmlCharType.Instance.IsStartNameChar(c)) ConsumeQName(); }
public Lexer(NFA pad, string tag, LAD[] alts) { this.pad = pad; this.alts = alts; this.tag = tag; int root = pad.AddNode(); int[] alt_shuffle = new int[alts.Length]; for (int i = 0; i < alts.Length; i++) alt_shuffle[i] = i; Array.Sort(alt_shuffle, delegate (int i1, int i2) { int j1, j2; bool c1, c2; alts[i1].QueryLiteral(pad, out j1, out c1); alts[i2].QueryLiteral(pad, out j2, out c2); return (j1 != j2) ? (j2 - j1) : (i1 - i2); }); for (int ix = 0; ix < alts.Length; ix++) { pad.curfate = alt_shuffle[ix]; int target = pad.AddNode(); pad.nodes_l[target].final = true; alts[alt_shuffle[ix]].ToNFA(pad, root, target); } nfates = alts.Length; fatebuffer = new int[nfates*2+2]; for (int i = 0; i < nfates*2+2; i++) fatebuffer[i] = -1; fatebuffer[0] = fatebuffer[1] = 0; pad.Complete(); // now the NFA nodes are all in tiebreak order by lowest index if (LtmTrace) { Dump(); } start = new LexerState(pad); start.Add(0); pad.Close(start); nil = new LexerState(pad); pad.dfashare[nil] = nil; pad.dfashare[start] = start; }
public void Close(LexerState ls) { int ngrey = 0; for (int i = 0; i < ls.nstates.Length; i++) { int bm = ls.nstates[i]; for (int j = 0; j < 32; j++) { if ((bm & (1 << j)) != 0) greybuf[ngrey++] = 32*i + j; } } while (ngrey != 0) { int val = greybuf[--ngrey]; foreach (NFA.Edge e in nodes[val].edges) { if (e.when == null) { int ix = e.to >> 5; int m = 1 << (e.to & 31); if ((ls.nstates[ix] & m) == 0) { ls.nstates[ix] |= m; greybuf[ngrey++] = e.to; } } } } }
private void StartTagState() { if (Peek(0) == 0) return; char c = Peek(0); BeginToken(); if (MatchText("/>")) { ConsumeChar('/'); m_anchor++; ConsumeChar('>'); m_state = m_states.Pop(); } else if (c == '>') { ConsumeChar(Read()); m_state = LexerState.ElementContent; } else if (XmlCharType.Instance.IsWhiteSpace(c)) ConsumeS(); else if (XmlCharType.Instance.IsStartNameChar(c)) { ConsumeQName(); m_state = LexerState.AttributeState; } }
private static IEnumerable <Token> LexImpl(TokenTextIndex tokenFactory) { string text = tokenFactory.Text; LexerState state = LexerState.SkipWhitespace; int tokenStart = 0; int multiLineCommentStart = 0; int valueLeftBraceDepth = 0; for (int idx = 0; idx < text.Length; ++idx) { char ch = text[idx]; // Note: The "error detection" cases are later in the switch because we // expect them to be visited less often. (and the C# compiler emits the branches // in order) switch (state) { case LexerState.SkipWhitespace: // Putting Default first because we expect most of the time to be skipping // whitespace. tokenStart = idx; switch (ch) { case ' ': case '\t': case '\r': case '\n': case '\u2028': case '\u2029': // Skip whitespace break; case '\'': state = LexerState.CollectingString; break; case '/': state = LexerState.CommentCandidate; break; case '|': yield return(tokenFactory.Token(idx, TokenKind.Pipe)); break; case ':': yield return(tokenFactory.Token(idx, TokenKind.Colon)); break; case ';': yield return(tokenFactory.Token(idx, TokenKind.Semicolon)); break; case '.': state = LexerState.DotsCandidate; break; case '(': yield return(tokenFactory.Token(idx, TokenKind.Lparen)); break; case ')': yield return(tokenFactory.Token(idx, TokenKind.Rparen)); break; case '*': yield return(tokenFactory.Token(idx, TokenKind.Star)); break; case '+': yield return(tokenFactory.Token(idx, TokenKind.Plus)); break; case '?': yield return(tokenFactory.Token(idx, TokenKind.Question)); break; default: state = LexerState.CollectingIdentifier; break; } break; case LexerState.CollectingString: if (ch == '\'') { yield return(tokenFactory.Token(tokenStart, idx + 1, TokenKind.String)); state = LexerState.SkipWhitespace; } break; case LexerState.SkipSingleLineComment: switch (ch) { case '\r': case '\n': case '\u2028': case '\u2029': state = LexerState.SkipWhitespace; break; } break; case LexerState.CommentCandidate: switch (ch) { case '/': state = LexerState.SkipSingleLineComment; break; case '*': state = LexerState.MultiLineComment; multiLineCommentStart = idx - 1; break; default: throw new G4ParseFailureException(tokenFactory.Location(idx - 1), Strings.UnrecognizedForwardSlash); } break; case LexerState.MultiLineComment: switch (ch) { case '*': state = LexerState.MultiLineCommentStar; break; case '@': state = LexerState.CollectingAnnotation; tokenStart = idx; break; case '{': state = LexerState.CollectingAnnotationValue; tokenStart = idx; break; } break; case LexerState.MultiLineCommentStar: switch (ch) { case '*': // Do nothing, e.g. in case *****/ break; case '@': state = LexerState.CollectingAnnotation; tokenStart = idx; break; case '{': state = LexerState.CollectingAnnotationValue; tokenStart = idx; break; case '/': state = LexerState.SkipWhitespace; break; default: state = LexerState.MultiLineComment; break; } break; case LexerState.CollectingAnnotation: switch (ch) { case ' ': case '\t': case '\r': case '\n': case '\u2028': case '\u2029': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Annotation)); state = LexerState.MultiLineComment; break; case '*': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Annotation)); state = LexerState.MultiLineCommentStar; break; case '{': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Annotation)); valueLeftBraceDepth = 0; state = LexerState.CollectingAnnotationValue; tokenStart = idx; break; case '@': throw new G4ParseFailureException(tokenFactory.Location(tokenStart), Strings.UnrecognizedAtInAnnotation); } break; case LexerState.CollectingAnnotationValue: switch (ch) { case '{': valueLeftBraceDepth++; break; case '}': if (valueLeftBraceDepth > 0) { valueLeftBraceDepth--; } else { yield return(tokenFactory.Token(tokenStart, idx + 1, TokenKind.AnnotationValue)); state = LexerState.MultiLineComment; } break; case '*': state = LexerState.CollectingAnnotationValueStar; break; } break; case LexerState.CollectingIdentifier: switch (ch) { case ' ': case '\t': case '\r': case '\n': case '\u2028': case '\u2029': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); state = LexerState.SkipWhitespace; break; case '\'': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); tokenStart = idx; state = LexerState.CollectingString; break; case '/': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); state = LexerState.CommentCandidate; break; case '|': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); yield return(tokenFactory.Token(idx, TokenKind.Pipe)); state = LexerState.SkipWhitespace; break; case ':': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); yield return(tokenFactory.Token(idx, TokenKind.Colon)); state = LexerState.SkipWhitespace; break; case ';': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); yield return(tokenFactory.Token(idx, TokenKind.Semicolon)); state = LexerState.SkipWhitespace; break; case '.': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); tokenStart = idx; state = LexerState.DotsCandidate; break; case '(': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); yield return(tokenFactory.Token(idx, TokenKind.Lparen)); state = LexerState.SkipWhitespace; break; case ')': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); yield return(tokenFactory.Token(idx, TokenKind.Rparen)); state = LexerState.SkipWhitespace; break; case '*': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); yield return(tokenFactory.Token(idx, TokenKind.Star)); state = LexerState.SkipWhitespace; break; case '+': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); yield return(tokenFactory.Token(idx, TokenKind.Plus)); state = LexerState.SkipWhitespace; break; case '?': yield return(tokenFactory.Token(tokenStart, idx, TokenKind.Identifier)); yield return(tokenFactory.Token(idx, TokenKind.Question)); state = LexerState.SkipWhitespace; break; } break; case LexerState.CollectingAnnotationValueStar: switch (ch) { case '}': yield return(tokenFactory.Token(tokenStart, idx + 1, TokenKind.AnnotationValue)); state = LexerState.MultiLineComment; break; case '/': throw new G4ParseFailureException(tokenFactory.Location(tokenStart), Strings.UnclosedAnnotation); default: state = LexerState.CollectingAnnotationValue; break; } break; case LexerState.DotsCandidate: switch (ch) { case '.': yield return(tokenFactory.Token(tokenStart, idx + 1, TokenKind.Dots)); state = LexerState.SkipWhitespace; break; default: throw new G4ParseFailureException(tokenFactory.Location(tokenStart), Strings.SingleDot); } break; } } switch (state) { case LexerState.CollectingIdentifier: yield return(tokenFactory.Token(tokenStart, text.Length, TokenKind.Identifier)); break; case LexerState.MultiLineComment: case LexerState.MultiLineCommentStar: case LexerState.CollectingAnnotation: case LexerState.CollectingAnnotationValue: case LexerState.CollectingAnnotationValueStar: throw new G4ParseFailureException(tokenFactory.Location(multiLineCommentStart), Strings.UnclosedMultiLineComment); case LexerState.CommentCandidate: throw new G4ParseFailureException(tokenFactory.Location(text.Length), Strings.UnrecognizedForwardSlash); case LexerState.CollectingString: throw new G4ParseFailureException(tokenFactory.Location(tokenStart), Strings.UnclosedString); case LexerState.DotsCandidate: throw new G4ParseFailureException(tokenFactory.Location(text.Length), Strings.SingleDot); case LexerState.SkipWhitespace: case LexerState.SkipSingleLineComment: // OK (do nothing) break; } }
private void OptionState() { SkipWhitespace(); if (Peek(0) == 0) return; if (XmlCharType.Instance.IsStartNameChar(Peek(0))) { ConsumeQName(); m_state = LexerState.Default; } }
private void XmlSpace_DeclState() { SkipWhitespace(); if (Peek(0) == 0) return; BeginToken(); if (MatchIdentifer("preserve")) { EndToken(); ConsumeToken(Token.PRESERVE); m_state = LexerState.Default; } else if (MatchIdentifer("strip")) { EndToken(); ConsumeToken(Token.STRIP); m_state = LexerState.Default; } //else if (MatchText("(:")) //{ // m_states.Push(m_state); // m_state = LexerState.ExprComment; // ExprCommentState(); //} }
private void OccurrenceIndicatorState() { SkipWhitespace(); BeginToken(); //if (MatchText("(:")) //{ // m_states.Push(m_state); // m_state = LexerState.ExprComment; // ExprCommentState(); //} //else { char c = Peek(0); if (c == '*') { //if (!(XmlCharType.Instance.IsNameChar(Peek(1)) || XmlCharType.Instance.IsDigit) //{ Read(); EndToken(); ConsumeToken(Token.Indicator1); } else if (c == '+') { Read(); EndToken(); ConsumeToken(Token.Indicator2); } else if (c == '?') { Read(); EndToken(); ConsumeToken(Token.Indicator3); } m_state = LexerState.Operator; OperatorState(); } }
public void ScannerTest() { Lexicon lexicon = new Lexicon(); LexerState global = lexicon.DefaultLexer; LexerState keywords = global.CreateSubState(); LexerState xml = keywords.CreateSubState(); var ID = global.DefineToken(RE.Range('a', 'z').Concat( (RE.Range('a', 'z') | RE.Range('0', '9')).Many())); var NUM = global.DefineToken(RE.Range('0', '9').Many1()); var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many()); var ERROR = global.DefineToken(RE.Range(Char.MinValue, (char)255)); var IF = keywords.DefineToken(RE.Literal("if")); var ELSE = keywords.DefineToken(RE.Literal("else")); var XMLNS = xml.DefineToken(RE.Literal("xmlns")); ScannerInfo info = lexicon.CreateScannerInfo(); PeekableScanner scanner = new PeekableScanner(info); string source = "asdf04a 1107 else Z if vvv xmlns 772737"; StringReader sr = new StringReader(source); scanner.SetSource(new SourceReader(sr)); Lexeme l1 = scanner.Read(); Assert.AreEqual(ID.Index, l1.TokenIndex); Assert.AreEqual("asdf04a", l1.Value); Assert.AreEqual(0, l1.Span.StartLocation.Column); Assert.AreEqual(6, l1.Span.EndLocation.Column); Lexeme l2 = scanner.Read(); Assert.AreEqual(WHITESPACE.Index, l2.TokenIndex); Assert.AreEqual(" ", l2.Value); Lexeme l3 = scanner.Read(); Assert.AreEqual(NUM.Index, l3.TokenIndex); Assert.AreEqual("1107", l3.Value); Lexeme l4 = scanner.Read(); Assert.AreEqual(WHITESPACE.Index, l4.TokenIndex); Lexeme l5 = scanner.Read(); Assert.AreEqual(ID.Index, l5.TokenIndex); int p1 = scanner.Peek(); Assert.AreEqual(WHITESPACE.Index, p1); int p2 = scanner.Peek2(); Assert.AreEqual(ERROR.Index, p2); int p3 = scanner.Peek(3); Assert.AreEqual(WHITESPACE.Index, p3); int p4 = scanner.Peek(4); Assert.AreEqual(ID.Index, p4); int p5 = scanner.Peek(5); Assert.AreEqual(WHITESPACE.Index, p5); Lexeme l6 = scanner.Read(); Lexeme l7 = scanner.Read(); Assert.AreEqual(ERROR.Index, l7.TokenIndex); int p3_2 = scanner.Peek(); Assert.AreEqual(p3, p3_2); Lexeme l8 = scanner.Read(); // whitespace Lexeme l9 = scanner.Read(); // ID:if Lexeme l10 = scanner.Read(); // whitespace Lexeme l11 = scanner.Read(); // ID:vvv Lexeme l12 = scanner.Read(); // whitespace Lexeme l13 = scanner.Read(); // ID:xmlns Lexeme l14 = scanner.Read(); // whitespace Lexeme l15 = scanner.Read(); // NUM:772737 Lexeme leof = scanner.Read(); // eof Assert.AreEqual(info.EndOfStreamTokenIndex, leof.TokenIndex); Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof.Span.EndLocation.CharIndex); Assert.AreEqual(source.Length, leof.Span.StartLocation.CharIndex); Lexeme leof2 = scanner.Read(); //after eof, should return eof again Assert.AreEqual(info.EndOfStreamTokenIndex, leof2.TokenIndex); Assert.AreEqual(leof.Span.StartLocation.CharIndex, leof2.Span.StartLocation.CharIndex); }
private void XmlCommentState() { BeginToken(); StringBuilder sb = new StringBuilder(); char c; while (!((c = Peek(0)) == '-' && Peek(1) == '-' && Peek(2) == '>')) { if (Peek(0) == 0) return; sb.Append(Read()); } EndToken(); ConsumeToken(Token.StringLiteral, new Literal(sb.ToString())); BeginToken(); Read(); // - Read(); // - Read(); // > EndToken(); ConsumeToken(Token.COMMENT_END); m_state = m_states.Pop(); }
public void CompactCharSetTest() { Lexicon lexicon = new Lexicon(); LexerState global = lexicon.DefaultLexer; LexerState keywords = global.CreateSubState(); LexerState xml = keywords.CreateSubState(); var lettersCategories = new[] { UnicodeCategory.LetterNumber, UnicodeCategory.LowercaseLetter, UnicodeCategory.ModifierLetter, UnicodeCategory.OtherLetter, UnicodeCategory.TitlecaseLetter, UnicodeCategory.UppercaseLetter }; var RE_IDCHAR = RE.CharsOf(c => lettersCategories.Contains(Char.GetUnicodeCategory(c))); var ID = global.DefineToken(RE_IDCHAR.Concat( (RE_IDCHAR | RE.Range('0', '9')).Many())); var NUM = global.DefineToken(RE.Range('0', '9').Many1()); var WHITESPACE = global.DefineToken(RE.Symbol(' ').Many()); var IF = keywords.DefineToken(RE.Literal("if")); var ELSE = keywords.DefineToken(RE.Literal("else")); var XMLNS = xml.DefineToken(RE.Literal("xmlns")); var scannerInfo = lexicon.CreateScannerInfo(); scannerInfo.LexerStateIndex = xml.Index; Scanner s = new Scanner(scannerInfo); string source = "xmlns 你好吗1 123 蘏臦囧綗 ABCD if"; SourceReader sr = new SourceReader(new StringReader(source)); s.SetSource(sr); s.SetTriviaTokens(WHITESPACE.Index); var l1 = s.Read(); Assert.AreEqual(XMLNS.Index, l1.TokenIndex); var l2 = s.Read(); Assert.AreEqual(ID.Index, l2.TokenIndex); var l3 = s.Read(); Assert.AreEqual(NUM.Index, l3.TokenIndex); var l4 = s.Read(); Assert.AreEqual(ID.Index, l4.TokenIndex); var l5 = s.Read(); Assert.AreEqual(ID.Index, l5.TokenIndex); var l6 = s.Read(); Assert.AreEqual(IF.Index, l6.TokenIndex); }
public LexerState Next(NFA nf, int ch) { LexerState l; if (dfc.TryGetValue(ch, out l)) return l; l = new LexerState(nf); for (int i = 0; i < nstates.Length; i++) { int bm = nstates[i]; for (int j = 0; j < 32; j++) { if ((bm & (1 << j)) == 0) continue; int ei = 0, eimax = 0; var es = nf.EdgesOf(32*i + j, ref ei, ref eimax); while (ei != eimax) { var e = es[ei++]; if (e.when == ch || e.when == -1 && e.when_cc.Accepts(ch)) l.Add(e.to); } } } nf.Close(l); LexerState cl; if (!nf.dfashare.TryGetValue(l, out cl)) { nf.dfashare[l] = cl = l; } dfc[ch] = cl; return cl; }
public void Close(LexerState ls) { int ngrey = 0; for (int i = 0; i < ls.nstates.Length; i++) { int bm = ls.nstates[i]; for (int j = 0; j < 32; j++) { if ((bm & (1 << j)) != 0) greybuf[ngrey++] = 32*i + j; } } while (ngrey != 0) { int val = greybuf[--ngrey]; int eix = 0, lix = 0; EdgesOf(val, ref eix, ref lix); while (eix != lix) { Edge e = edges[eix++]; if (e.when == -2) { int ix = e.to >> 5; int m = 1 << (e.to & 31); if ((ls.nstates[ix] & m) == 0) { ls.nstates[ix] |= m; greybuf[ngrey++] = e.to; } } } } }
private void UriToOperatorState() { SkipWhitespace(); if (Peek(0) == 0) return; char c = Peek(0); if (c == '\'' || c == '"') { ConsumeLiteral(); m_state = LexerState.Operator; } }