/// <summary> /// Gets the tokens in the match context by executing a different lexer /// </summary> /// <param name="context">The context of the current lexing process</param> /// <param name="value">The string to lex</param> /// <returns></returns> public override IEnumerable<Token> GetTokens(RegexLexerContext context, string value) { var tokens = Lexer.GetTokens(value); context.Position += value.Length; return tokens; }
/// <summary> /// When overridden in a child class, gets all the <see cref="Token"/>s for the given string /// </summary> /// <param name="text">The string to tokenize</param> /// <returns>A sequence of <see cref="Token"/> structs</returns> protected override IEnumerable <Token> GetTokensUnprocessed(string text) { var rules = GetStateRules(); int pos = 0; var stateStack = new Stack <string>(50); stateStack.Push("root"); var currentStateRules = rules[stateStack.Peek()]; while (true) { bool found = false; foreach (var rule in currentStateRules) { var m = rule.Regex.Match(text, pos); if (m.Success) { var context = new RegexLexerContext(pos, m, stateStack, rule.TokenType); Debug.Assert(m.Index == pos, $"Regex \"{rule.Regex}\" should have matched at position {pos} but matched at {m.Index}"); var tokens = rule.Action.Execute(context); foreach (var token in tokens) { yield return(token); } pos = context.Position; currentStateRules = rules[stateStack.Peek()]; found = true; break; } } if (!found) { if (pos >= text.Length) { break; } if (text[pos] == '\n') { stateStack.Clear(); stateStack.Push("root"); currentStateRules = rules["root"]; yield return(new Token(pos, TokenTypes.Text, "\n")); pos++; continue; } yield return(new Token(pos, TokenTypes.Error, text[pos].ToString())); pos++; } } }
/// <summary> /// Yields a token and applies configured actions against the stack /// </summary> /// <param name="context">The current lexer context</param> /// <returns>A sequence of tokens</returns> public override IEnumerable <Token> Execute(RegexLexerContext context) { if (context.Match.Value != "") { yield return(new Token(context.Position, context.RuleTokenType, context.Match.Value)); } Apply(context.StateStack); context.Position += context.Match.Length; }
/// <summary> /// When overridden in a child class, gets all the <see cref="Token"/>s for the given string /// </summary> /// <param name="text">The string to tokenize</param> /// <returns>A sequence of <see cref="Token"/> structs</returns> protected override IEnumerable<Token> GetTokensUnprocessed(string text) { var rules = GetStateRules(); int pos = 0; var stateStack = new Stack<string>(50); stateStack.Push("root"); var currentStateRules = rules[stateStack.Peek()]; while (true) { bool found = false; foreach (var rule in currentStateRules) { var m = rule.Regex.Match(text, pos); if (m.Success) { var context = new RegexLexerContext(pos, m, stateStack, rule.TokenType); Debug.Assert(m.Index == pos, $"Regex \"{rule.Regex}\" should have matched at position {pos} but matched at {m.Index}"); var tokens = rule.Action.Execute(context); foreach (var token in tokens) yield return token; pos = context.Position; currentStateRules = rules[stateStack.Peek()]; found = true; break; } } if (!found) { if (pos >= text.Length) break; if (text[pos] == '\n') { stateStack.Clear(); stateStack.Push("root"); currentStateRules = rules["root"]; yield return new Token(pos, TokenTypes.Text, "\n"); pos++; continue; } yield return new Token(pos, TokenTypes.Error, text[pos].ToString()); pos++; } } }
/// <summary> /// Executes the action against the lexer state /// </summary> /// <param name="context">The lexer state</param> /// <returns>A list of tokens to emit</returns> public override IEnumerable <Token> Execute(RegexLexerContext context) { int offset = context.Position; var tokens = Lexer.GetTokens(context.Match.Value); foreach (var token in tokens) { yield return(token.Offset(offset)); } context.Position += context.Match.Length; }
/// <summary> /// Executes the action against the lexer state /// </summary> /// <param name="context">The lexer state</param> /// <returns>A list of tokens to emit</returns> public override IEnumerable<Token> Execute(RegexLexerContext context) { if (context.Match.Groups.Count > Processors.Count + 1) throw new InvalidOperationException("Regex had more match groups than processors"); for (int i = 1; i < context.Match.Groups.Count; i++) { var group = context.Match.Groups[i]; var tokens = Processors[i-1].GetTokens(context, group.Value); foreach (var token in tokens) yield return token; } Action.Apply(context.StateStack); }
/// <summary> /// Executes the action against the lexer state /// </summary> /// <param name="context">The lexer state</param> /// <returns>A list of tokens to emit</returns> public override IEnumerable <Token> Execute(RegexLexerContext context) { if (context.Match.Groups.Count > Processors.Count + 1) { throw new InvalidOperationException("Regex had more match groups than processors"); } for (int i = 1; i < context.Match.Groups.Count; i++) { var group = context.Match.Groups[i]; var tokens = Processors[i - 1].GetTokens(context, group.Value); foreach (var token in tokens) { yield return(token); } } Action.Apply(context.StateStack); }
/// <summary> /// Executes the action against the lexer state /// </summary> /// <param name="context">The lexer state</param> /// <returns>A list of tokens to emit</returns> public abstract IEnumerable <Token> Execute(RegexLexerContext context);
/// <summary> /// Executes the action against the lexer state /// </summary> /// <param name="context">The lexer state</param> /// <returns>A list of tokens to emit</returns> public abstract IEnumerable<Token> Execute(RegexLexerContext context);
/// <summary> /// Yields a token and applies configured actions against the stack /// </summary> /// <param name="context">The current lexer context</param> /// <returns>A sequence of tokens</returns> public override IEnumerable<Token> Execute(RegexLexerContext context) { if(context.Match.Value != "") yield return new Token(context.Position, context.RuleTokenType, context.Match.Value); Apply(context.StateStack); context.Position += context.Match.Length; }
/// <summary> /// Executes the action against the lexer state /// </summary> /// <param name="context">The lexer state</param> /// <returns>A list of tokens to emit</returns> public override IEnumerable<Token> Execute(RegexLexerContext context) { int offset = context.Position; var tokens = Lexer.GetTokens(context.Match.Value); foreach (var token in tokens) yield return token.Offset(offset); context.Position += context.Match.Length; }
/// <summary> /// Processes a match group and yields a single token for the vale /// </summary> /// <param name="context">The context of the lexer</param> /// <param name="value">The group value that should be turned into a token</param> /// <returns></returns> public override IEnumerable<Token> GetTokens(RegexLexerContext context, string value) { yield return new Token(context.Position, Type, value); context.Position += value.Length; }
/// <summary> /// Gets the tokens for a matched group value /// </summary> /// <param name="context">The lexer context</param> /// <param name="value">The matched group value to process</param> /// <returns></returns> public abstract IEnumerable<Token> GetTokens(RegexLexerContext context, string value);