Пример #1
0
        /// <summary>
        /// Gets the tokens in the match context by executing a different lexer
        /// </summary>
        /// <param name="context">The context of the current lexing process</param>
        /// <param name="value">The string to lex</param>
        /// <returns></returns>
        public override IEnumerable<Token> GetTokens(RegexLexerContext context, string value)
        {
            var tokens = Lexer.GetTokens(value);

            context.Position += value.Length;

            return tokens;
        }
Пример #2
0
        /// <summary>
        /// When overridden in a child class, gets all the <see cref="Token"/>s for the given string
        /// </summary>
        /// <param name="text">The string to tokenize</param>
        /// <returns>A sequence of <see cref="Token"/> structs</returns>
        protected override IEnumerable <Token> GetTokensUnprocessed(string text)
        {
            var rules      = GetStateRules();
            int pos        = 0;
            var stateStack = new Stack <string>(50);

            stateStack.Push("root");
            var currentStateRules = rules[stateStack.Peek()];

            while (true)
            {
                bool found = false;
                foreach (var rule in currentStateRules)
                {
                    var m = rule.Regex.Match(text, pos);
                    if (m.Success)
                    {
                        var context = new RegexLexerContext(pos, m, stateStack, rule.TokenType);
                        Debug.Assert(m.Index == pos, $"Regex \"{rule.Regex}\" should have matched at position {pos} but matched at {m.Index}");

                        var tokens = rule.Action.Execute(context);

                        foreach (var token in tokens)
                        {
                            yield return(token);
                        }

                        pos = context.Position;
                        currentStateRules = rules[stateStack.Peek()];
                        found             = true;
                        break;
                    }
                }

                if (!found)
                {
                    if (pos >= text.Length)
                    {
                        break;
                    }

                    if (text[pos] == '\n')
                    {
                        stateStack.Clear();
                        stateStack.Push("root");
                        currentStateRules = rules["root"];
                        yield return(new Token(pos, TokenTypes.Text, "\n"));

                        pos++;
                        continue;
                    }

                    yield return(new Token(pos, TokenTypes.Error, text[pos].ToString()));

                    pos++;
                }
            }
        }
Пример #3
0
        /// <summary>
        /// Yields a token and applies configured actions against the stack
        /// </summary>
        /// <param name="context">The current lexer context</param>
        /// <returns>A sequence of tokens</returns>
        public override IEnumerable <Token> Execute(RegexLexerContext context)
        {
            if (context.Match.Value != "")
            {
                yield return(new Token(context.Position, context.RuleTokenType, context.Match.Value));
            }

            Apply(context.StateStack);
            context.Position += context.Match.Length;
        }
Пример #4
0
        /// <summary>
        /// When overridden in a child class, gets all the <see cref="Token"/>s for the given string
        /// </summary>
        /// <param name="text">The string to tokenize</param>
        /// <returns>A sequence of <see cref="Token"/> structs</returns>
        protected override IEnumerable<Token> GetTokensUnprocessed(string text)
        {
            var rules = GetStateRules();
            int pos = 0;
            var stateStack = new Stack<string>(50);
            stateStack.Push("root");
            var currentStateRules = rules[stateStack.Peek()];

            while (true)
            {
                bool found = false;
                foreach (var rule in currentStateRules)
                {
                    var m = rule.Regex.Match(text, pos);
                    if (m.Success)
                    {
                        var context = new RegexLexerContext(pos, m, stateStack, rule.TokenType);
                        Debug.Assert(m.Index == pos, $"Regex \"{rule.Regex}\" should have matched at position {pos} but matched at {m.Index}");

                        var tokens = rule.Action.Execute(context);

                        foreach (var token in tokens)
                            yield return token;

                        pos = context.Position;
                        currentStateRules = rules[stateStack.Peek()];
                        found = true;
                        break;
                    }
                }

                if (!found)
                {
                    if (pos >= text.Length)
                        break;

                    if (text[pos] == '\n')
                    {
                        stateStack.Clear();
                        stateStack.Push("root");
                        currentStateRules = rules["root"];
                        yield return new Token(pos, TokenTypes.Text, "\n");
                        pos++;
                        continue;
                    }

                    yield return new Token(pos, TokenTypes.Error, text[pos].ToString());
                    pos++;
                }
            }


        }
Пример #5
0
        /// <summary>
        /// Executes the action against the lexer state
        /// </summary>
        /// <param name="context">The lexer state</param>
        /// <returns>A list of tokens to emit</returns>
        public override IEnumerable <Token> Execute(RegexLexerContext context)
        {
            int offset = context.Position;

            var tokens = Lexer.GetTokens(context.Match.Value);

            foreach (var token in tokens)
            {
                yield return(token.Offset(offset));
            }

            context.Position += context.Match.Length;
        }
Пример #6
0
        /// <summary>
        /// Executes the action against the lexer state
        /// </summary>
        /// <param name="context">The lexer state</param>
        /// <returns>A list of tokens to emit</returns>
        public override IEnumerable<Token> Execute(RegexLexerContext context)
        {
            if (context.Match.Groups.Count > Processors.Count + 1)
                throw new InvalidOperationException("Regex had more match groups than processors");

            for (int i = 1; i < context.Match.Groups.Count; i++)
            {
                var group = context.Match.Groups[i];
                var tokens = Processors[i-1].GetTokens(context, group.Value);
                foreach (var token in tokens)
                    yield return token;
            }

            Action.Apply(context.StateStack);
        }
Пример #7
0
        /// <summary>
        /// Executes the action against the lexer state
        /// </summary>
        /// <param name="context">The lexer state</param>
        /// <returns>A list of tokens to emit</returns>
        public override IEnumerable <Token> Execute(RegexLexerContext context)
        {
            if (context.Match.Groups.Count > Processors.Count + 1)
            {
                throw new InvalidOperationException("Regex had more match groups than processors");
            }

            for (int i = 1; i < context.Match.Groups.Count; i++)
            {
                var group  = context.Match.Groups[i];
                var tokens = Processors[i - 1].GetTokens(context, group.Value);
                foreach (var token in tokens)
                {
                    yield return(token);
                }
            }

            Action.Apply(context.StateStack);
        }
Пример #8
0
 /// <summary>
 /// Executes the action against the lexer state
 /// </summary>
 /// <param name="context">The lexer state</param>
 /// <returns>A list of tokens to emit</returns>
 public abstract IEnumerable <Token> Execute(RegexLexerContext context);
Пример #9
0
 /// <summary>
 /// Executes the action against the lexer state
 /// </summary>
 /// <param name="context">The lexer state</param>
 /// <returns>A list of tokens to emit</returns>
 public abstract IEnumerable<Token> Execute(RegexLexerContext context);
Пример #10
0
        /// <summary>
        /// Yields a token and applies configured actions against the stack
        /// </summary>
        /// <param name="context">The current lexer context</param>
        /// <returns>A sequence of tokens</returns>
        public override IEnumerable<Token> Execute(RegexLexerContext context)
        {
            if(context.Match.Value != "")
                yield return new Token(context.Position, context.RuleTokenType, context.Match.Value);

            Apply(context.StateStack);
            context.Position += context.Match.Length;
        }
Пример #11
0
        /// <summary>
        /// Executes the action against the lexer state
        /// </summary>
        /// <param name="context">The lexer state</param>
        /// <returns>A list of tokens to emit</returns>
        public override IEnumerable<Token> Execute(RegexLexerContext context)
        {
            int offset = context.Position;

            var tokens = Lexer.GetTokens(context.Match.Value);
            foreach (var token in tokens)
                yield return token.Offset(offset);

            context.Position += context.Match.Length;
        }
Пример #12
0
 /// <summary>
 /// Processes a match group and yields a single token for the vale
 /// </summary>
 /// <param name="context">The context of the lexer</param>
 /// <param name="value">The group value that should be turned into a token</param>
 /// <returns></returns>
 public override IEnumerable<Token> GetTokens(RegexLexerContext context, string value)
 {
     yield return new Token(context.Position, Type, value);
     context.Position += value.Length;
 }
Пример #13
0
 /// <summary>
 /// Gets the tokens for a matched group value
 /// </summary>
 /// <param name="context">The lexer context</param>
 /// <param name="value">The matched group value to process</param>
 /// <returns></returns>
 public abstract IEnumerable<Token> GetTokens(RegexLexerContext context, string value);