Пример #1
0
        public void AddMatch(string str, bool caseInsensitive, TokenPattern value)
        {
            DFAState state;
            char     c     = str[0];
            int      start = 0;

            if (caseInsensitive)
            {
                c = Char.ToLower(c);
            }
            if (c < 128)
            {
                state = _ascii[c];
                if (state == null)
                {
                    state = _ascii[c] = new DFAState();
                }
                start++;
            }
            else
            {
                state = _nonAscii;
            }
            for (int i = start; i < str.Length; i++)
            {
                var next = state.Tree.Find(str[i], caseInsensitive);
                if (next == null)
                {
                    next = new DFAState();
                    state.Tree.Add(str[i], caseInsensitive, next);
                }
                state = next;
            }
            state.Value = value;
        }
Пример #2
0
 protected virtual Token NewToken(TokenPattern pattern,
                                  string image,
                                  int line,
                                  int column)
 {
     return(new Token(pattern, image, line, column));
 }
Пример #3
0
 public void Update(int length, TokenPattern pattern)
 {
     if (this._length < length)
     {
         this._length  = length;
         this._pattern = pattern;
     }
 }
Пример #4
0
        /**
         * nfa - true to attempt as an nfa pattern for regexp. This handles most things except the complex repeates, ie {1,4}
         */
        public void AddPattern(TokenPattern pattern, bool nfa = true)
        {
            switch (pattern.Type)
            {
            case TokenPattern.PatternType.STRING:
                try
                {
                    _stringDfaMatcher.AddPattern(pattern);
                }
                catch (Exception e)
                {
                    throw new ParserCreationException(
                              ParserCreationException.ErrorType.INVALID_TOKEN,
                              pattern.Name,
                              "error adding string token: " +
                              e.Message);
                }
                break;

            case TokenPattern.PatternType.REGEXP:
                if (nfa)
                {
                    try
                    {
                        _nfaMatcher.AddPattern(pattern);
                    }
                    catch (Exception)
                    {
                        nfa = false;
                    }
                }
                if (!nfa)
                {
                    try
                    {
                        _regExpMatcher.AddPattern(pattern);
                    }
                    catch (Exception e)
                    {
                        throw new ParserCreationException(
                                  ParserCreationException.ErrorType.INVALID_TOKEN,
                                  pattern.Name,
                                  "regular expression contains error(s): " +
                                  e.Message);
                    }
                }

                break;

            default:
                throw new ParserCreationException(
                          ParserCreationException.ErrorType.INVALID_TOKEN,
                          pattern.Name,
                          "pattern type " + pattern.Type +
                          " is undefined");
            }
        }
Пример #5
0
        public override void Match(ReaderBuffer buffer, TokenMatch match)
        {
            TokenPattern res = _automaton.Match(buffer, IgnoreCase);

            if (res != null)
            {
                match.Update(res.Pattern.Length, res);
            }
        }
Пример #6
0
        public void AddRegExpMatch(string pattern,
                                   bool ignoreCase,
                                   TokenPattern value)
        {
            TokenRegExpParser parser = new TokenRegExpParser(pattern, ignoreCase);
            string            debug  = "DFA regexp; " + parser.GetDebugInfo();

            var isAscii = parser.Start.IsAsciiOutgoing();

            for (int i = 0; isAscii && i < 128; i++)
            {
                bool match = false;
                for (int j = 0; j < parser.Start.Outgoing.Length; j++)
                {
                    if (parser.Start.Outgoing[j].Match((char)i))
                    {
                        if (match)
                        {
                            isAscii = false;
                            break;
                        }
                        match = true;
                    }
                }
                if (match && _initialChar[i] != null)
                {
                    isAscii = false;
                }
            }
            if (parser.Start.Incoming.Length > 0)
            {
                _initial.AddOut(new NFAEpsilonTransition(parser.Start));
                debug += ", uses initial epsilon";
            }
            else if (isAscii && !ignoreCase)
            {
                for (int i = 0; isAscii && i < 128; i++)
                {
                    for (int j = 0; j < parser.Start.Outgoing.Length; j++)
                    {
                        if (parser.Start.Outgoing[j].Match((char)i))
                        {
                            _initialChar[i] = parser.Start.Outgoing[j].State;
                        }
                    }
                }
                debug += ", uses ASCII lookup";
            }
            else
            {
                parser.Start.MergeInto(_initial);
                debug += ", uses initial state";
            }
            parser.End.Value = value;
            value.DebugInfo  = debug;
        }
Пример #7
0
        public void AddPattern(TokenPattern pattern)
        {
            switch (pattern.Type)
            {
            case TokenPattern.PatternType.STRING:
                try
                {
                    _stringDfaMatcher.AddPattern(pattern);
                }
                catch (Exception e)
                {
                    throw new ParserCreationException(
                              ParserCreationException.ErrorType.INVALID_TOKEN,
                              pattern.Name,
                              "error adding string token: " +
                              e.Message);
                }
                break;

            case TokenPattern.PatternType.REGEXP:
                try
                {
                    //Because of a bug in nfaMatcher's treatment
                    //of repeath specifiers we resort to effectively always using the
                    //regExp matcher.
                    //Under some conditions, the process of throwing the
                    //exception from the nfaMatcher and catching it
                    //incurs an unacceptable slowdown in performance
                    _regExpMatcher.AddPattern(pattern);
                }
                catch (Exception)
                {
                    try
                    {
                        _nfaMatcher.AddPattern(pattern);
                    }
                    catch (Exception e)
                    {
                        throw new ParserCreationException(
                                  ParserCreationException.ErrorType.INVALID_TOKEN,
                                  pattern.Name,
                                  "regular expression contains error(s): " +
                                  e.Message);
                    }
                }
                break;

            default:
                throw new ParserCreationException(
                          ParserCreationException.ErrorType.INVALID_TOKEN,
                          pattern.Name,
                          "pattern type " + pattern.Type +
                          " is undefined");
            }
        }
Пример #8
0
 public override void AddPattern(TokenPattern pattern)
 {
     if (pattern.Type == TokenPattern.PatternType.STRING)
     {
         _automaton.AddTextMatch(pattern.Pattern, IgnoreCase, pattern);
     }
     else
     {
         _automaton.AddRegExpMatch(pattern.Pattern, IgnoreCase, pattern);
     }
     base.AddPattern(pattern);
 }
Пример #9
0
 public void Update(int length, TokenPattern pattern)
 {
     if (this._length < length)
     {
         this._length  = length;
         this._pattern = pattern;
     }
     else if (this._length == length && this._pattern.Id > pattern.Id)
     {
         this._length  = length;
         this._pattern = pattern;
     }
 }
Пример #10
0
        public void AddPattern(TokenPattern pattern)
        {
            switch (pattern.Type)
            {
            case TokenPattern.PatternType.STRING:
                try
                {
                    _stringDfaMatcher.AddPattern(pattern);
                }
                catch (Exception e)
                {
                    throw new ParserCreationException(
                              ParserCreationException.ErrorType.INVALID_TOKEN,
                              pattern.Name,
                              "error adding string token: " +
                              e.Message);
                }
                break;

            case TokenPattern.PatternType.REGEXP:
                try
                {
                    //See comments in NetStandard version for why the regExp matcher is tried first
                    _regExpMatcher.AddPattern(pattern);
                }
                catch (Exception)
                {
                    try
                    {
                        _nfaMatcher.AddPattern(pattern);
                    }
                    catch (Exception e)
                    {
                        throw new ParserCreationException(
                                  ParserCreationException.ErrorType.INVALID_TOKEN,
                                  pattern.Name,
                                  "regular expression contains error(s): " +
                                  e.Message);
                    }
                }
                break;

            default:
                throw new ParserCreationException(
                          ParserCreationException.ErrorType.INVALID_TOKEN,
                          pattern.Name,
                          "pattern type " + pattern.Type +
                          " is undefined");
            }
        }
Пример #11
0
 public Token(TokenPattern pattern, string image, int line, int col)
 {
     this._pattern     = pattern;
     this._image       = image;
     this._startLine   = line;
     this._startColumn = col;
     this._endLine     = line;
     this._endColumn   = col + image.Length - 1;
     for (int pos = 0; image.IndexOf('\n', pos) >= 0;)
     {
         pos = image.IndexOf('\n', pos) + 1;
         this._endLine++;
         _endColumn = image.Length - pos;
     }
 }
Пример #12
0
        public TokenPattern Match(ReaderBuffer buffer, bool caseInsensitive)
        {
            TokenPattern result = null;
            DFAState     state;
            int          pos = 0;

            var c = buffer.Peek(0);

            if (c < 0)
            {
                return(null);
            }
            if (caseInsensitive)
            {
                c = Char.ToLower((char)c);
            }
            if (c < 128)
            {
                state = _ascii[c];
                if (state == null)
                {
                    return(null);
                }
                else if (state.Value != null)
                {
                    result = state.Value;
                }
                pos++;
            }
            else
            {
                state = _nonAscii;
            }
            while ((c = buffer.Peek(pos)) >= 0)
            {
                state = state.Tree.Find((char)c, caseInsensitive);
                if (state == null)
                {
                    break;
                }
                else if (state.Value != null)
                {
                    result = state.Value;
                }
                pos++;
            }
            return(result);
        }
Пример #13
0
        public override void AddPattern(TokenPattern pattern)
        {
            REHandler re;

            try
            {
                re = new GrammaticaRE(pattern.Pattern, IgnoreCase);
                pattern.DebugInfo = "Grammatica regexp\n" + re;
            }
            catch (Exception)
            {
                re = new SystemRE(pattern.Pattern, IgnoreCase);
                pattern.DebugInfo = "native .NET regexp";
            }
            Array.Resize(ref _regExps, _regExps.Length + 1);
            _regExps[_regExps.Length - 1] = re;
            base.AddPattern(pattern);
        }
Пример #14
0
        public void AddTextMatch(string str, bool ignoreCase, TokenPattern value)
        {
            NFAState state;
            char     ch = str[0];

            if (ch < 128 && !ignoreCase)
            {
                state = _initialChar[ch];
                if (state == null)
                {
                    state = _initialChar[ch] = new NFAState();
                }
            }
            else
            {
                state = _initial.AddOut(ch, ignoreCase, null);
            }
            for (int i = 1; i < str.Length; i++)
            {
                state = state.AddOut(str[i], ignoreCase, null);
            }
            state.Value = value;
        }
Пример #15
0
 public void Clear()
 {
     _length  = 0;
     _pattern = null;
 }
Пример #16
0
 public override void AddPattern(TokenPattern pattern)
 {
     _automaton.AddMatch(pattern.Pattern, IgnoreCase, pattern);
     base.AddPattern(pattern);
 }
Пример #17
0
 public virtual void AddPattern(TokenPattern pattern)
 {
     Array.Resize(ref Patterns, Patterns.Length + 1);
     Patterns[Patterns.Length - 1] = pattern;
 }