public void AddExpression(string text, bool caseSensitive, bool needSeparators, IPatternMatcher matcher,
            object tag)
        {
            if (StringUtils.IsNullOrEmpty(text))
                throw new ArgumentException("text may not be empty", "text"); // do not localize

            Char = text[0];

            if (!caseSensitive)
                ContainsCaseInsensitiveData = true;

            if (text.Length == 1)
            {
                PatternMatchReference patternMatcherReference = new PatternMatchReference(matcher);
                patternMatcherReference.NextSibling = FirstExpression;
                patternMatcherReference.Tag = tag;
                FirstExpression = patternMatcherReference;
            }
            else
            {
                string leftovers = text.Substring(1);
                char childChar = leftovers[0];
                int childIndex = (int) childChar & 0xff; //make a lookupindex

                TokenTreeNode node = ChildNodes[childIndex];
                if (node == null)
                {
                    TokenTreeNode child = new TokenTreeNode();
                    ChildNodes[childIndex] = child;
                    child.AddExpression(leftovers, caseSensitive, needSeparators, matcher, tag);

                    if (child.Char == ' ')
                    {
                        // if the node contains " " (whitespace)
                        // then add the node as a childnode of itself.
                        // thus allowing it to parse things like
                        // "end         sub" even if the pattern is "end sub" // do not localize
                        child.ChildNodes[(int) ' '] = child;
                    }
                }
                else
                {
                    while (node.NextSibling != null && node.Char != childChar)
                    {
                        node = node.NextSibling;
                    }

                    if (node.Char != childChar)
                    {
                        TokenTreeNode child = new TokenTreeNode();
                        node.NextSibling = child;
                        child.AddExpression(leftovers, caseSensitive, needSeparators, matcher, tag);
                    }
                    else
                    {
                        node.AddExpression(leftovers, caseSensitive, needSeparators, matcher, tag);
                    }
                }
            }
        }
        public TokenTree()
        {
            nodes = new TokenTreeNode[65536];
            Separators = ".,;:<>[](){}!\"#¤%&/=?*+-/\\ \t\n\r";
            textLookup = new char[65536];
            for (int i = 0; i < 65536; i++)
            {
                textLookup[i] = (char) i;
            }
            textLookup[(int) '\t'] = ' ';

            root = new TokenTreeNode();
        }
Beispiel #3
0
        //this is wicked fast
        //do not refactor extract methods from this if you want to keep the speed
        public MatchResult Match(string text, int startIndex)
        {
            if (StringUtils.IsNullOrEmpty(text))
            {
                throw new ArgumentException("text may not be empty", "text"); // do not localize
            }
            MatchResult lastMatch = new MatchResult();

            lastMatch.Text = text;
            int textLength = text.Length;

            for (int currentIndex = startIndex; currentIndex < textLength; currentIndex++)
            {
                //call any prefixless patternmatchers

                #region HasExpressions

                if (root.FirstExpression != null)
                {
                    //begin with the first expression of the _root node_
                    PatternMatchReference patternMatcherReference = root.FirstExpression;
                    while (patternMatcherReference != null)
                    {
                        int expressionMatchIndex = patternMatcherReference.Matcher.Match(text, currentIndex);
                        if (expressionMatchIndex > 0 && expressionMatchIndex > lastMatch.Length)
                        {
                            lastMatch.Index  = currentIndex;
                            lastMatch.Length = expressionMatchIndex;
                            lastMatch.Found  = true;
                            lastMatch.Tag    = patternMatcherReference.Tag;
                        }

                        patternMatcherReference = patternMatcherReference.NextSibling;
                    }
                }

                #endregion

                //lookup the first token tree node
                TokenTreeNode node = nodes[(int)text[currentIndex]];
                if (node == null)
                {
                    if (lastMatch.Found)
                    {
                        break;
                    }
                    else
                    {
                        continue;
                    }
                }


                for (int matchIndex = currentIndex + 1; matchIndex <= textLength; matchIndex++)
                {
                    //call patternmatchers for the current prefix

                    #region HasExpressions

                    if (node.FirstExpression != null)
                    {
                        //begin with the first expression of the _current node_
                        PatternMatchReference patternMatcherReference = node.FirstExpression;
                        while (patternMatcherReference != null)
                        {
                            int expressionMatchIndex = patternMatcherReference.Matcher.Match(text, matchIndex);
                            if (expressionMatchIndex > 0 && expressionMatchIndex > lastMatch.Length)
                            {
                                lastMatch.Index  = currentIndex;
                                lastMatch.Length = expressionMatchIndex + matchIndex - currentIndex;
                                lastMatch.Found  = true;
                                lastMatch.Tag    = patternMatcherReference.Tag;
                            }

                            patternMatcherReference = patternMatcherReference.NextSibling;
                        }
                    }

                    #endregion

                    #region IsEndNode

                    if (node.IsEnd && matchIndex - currentIndex >= lastMatch.Length)
                    {
                        bool leftIsSeparator  = currentIndex == 0 ? true : separatorCharLookup[text[currentIndex - 1]];
                        bool rightIsSeparator = matchIndex == textLength ? true : separatorCharLookup[text[matchIndex]];

                        if (!node.NeedSeparators || (leftIsSeparator && rightIsSeparator))
                        {
                            //this node does not require separators on the sides
                            lastMatch.Index  = currentIndex;
                            lastMatch.Tag    = node.Tag;
                            lastMatch.Found  = true;
                            lastMatch.Length = matchIndex - currentIndex;
                            //TODO:perform case test here , case sensitive words might be matched even if they have incorrect case
                            if (currentIndex + lastMatch.Length == textLength)
                            {
                                break;
                            }
                        }
                    }

                    #endregion

                    if (matchIndex >= textLength)
                    {
                        break;
                    }
                    //try fetch a node at this index
                    node =
                        node.ChildNodes[
                            node.ContainsCaseInsensitiveData
                                ? (int)CharUtils.ToLower(textLookup[(int)text[matchIndex]]) & 0xff
                                : (int)textLookup[(int)text[matchIndex]] & 0xff];


                    //if node is not null then do: if caseinsensitive then do: insensitivelookup else do: casesesnsitivelookup
                    while (node != null
                               ?
                           (node.ContainsCaseInsensitiveData
                                ? (node.Char != CharUtils.ToLower(textLookup[(int)text[matchIndex]]))
                                : (node.Char != textLookup[(int)text[matchIndex]]))
                               : false)
                    {
                        node = node.NextSibling;
                    }

                    //we found no node on the lookupindex or none of the siblingnodes at that index matched the current char
                    if (node == null)
                    {
                        break; // continue with the next character
                    }
                }

                //return last match
                if (lastMatch.Found)
                {
                    return(lastMatch);
                }
            }

            if (lastMatch.Found)
            {
                return(lastMatch);
            }
            else
            {
                //no match was found
                return(MatchResult.NoMatch);
            }
        }
        private void AddExpressionWithCaseSensitivePrefix(string prefix, bool needSeparators, IPatternMatcher matcher,
            object tag)
        {
            char startChar = prefix[0];
            int startIndex = (int) startChar;
            if (nodes[startIndex] == null)
                nodes[startIndex] = new TokenTreeNode();

            nodes[startIndex].AddExpression(prefix, true, needSeparators, matcher, tag);
        }
        private void AddExpressionWithCaseInsensitivePrefix(string prefix, bool needSeparators, IPatternMatcher matcher,
            object tag)
        {
            //make a lowercase string and add it as a token
            prefix = prefix.ToLower();
            char startChar = prefix[0];
            int startIndex = (int) startChar;
            if (nodes[startIndex] == null)
                nodes[startIndex] = new TokenTreeNode();

            nodes[startIndex].AddExpression(prefix, false, needSeparators, matcher, tag);

            //make a lowercase string with a uppercase start char and add it as a token
            prefix = char.ToUpper(startChar) + prefix.Substring(1);
            startChar = prefix[0];
            startIndex = (int) startChar;
            if (nodes[startIndex] == null)
                nodes[startIndex] = new TokenTreeNode();

            nodes[startIndex].AddExpression(prefix, false, needSeparators, matcher, tag);
        }
        private void AddCaseSensitiveToken(string text, bool needSeparators, object tag)
        {
            char startChar = text[0];
            int startIndex = (int) startChar;
            if (nodes[startIndex] == null)
                nodes[startIndex] = new TokenTreeNode();

            nodes[startIndex].AddToken(text, true, needSeparators, tag);
        }
        private void AddCaseInsensitiveToken(string text, bool needSeparators, object tag)
        {
            //make a lowercase string and add it as a token
            text = text.ToLower();
            char startChar = text[0];
            int startIndex = (int) startChar;
            if (nodes[startIndex] == null)
                nodes[startIndex] = new TokenTreeNode();

            nodes[startIndex].AddToken(text, false, needSeparators, tag);

            //make a lowercase string with a uppercase start char and add it as a token
            text = char.ToUpper(startChar) + text.Substring(1);
            startChar = text[0];
            startIndex = (int) startChar;
            if (nodes[startIndex] == null)
                nodes[startIndex] = new TokenTreeNode();

            nodes[startIndex].AddToken(text, false, needSeparators, tag);
        }
        public void AddToken(string text, bool caseSensitive, bool needSeparators, object tag)
        {
            Char = text[0];

            if (!caseSensitive)
                ContainsCaseInsensitiveData = true;

            if (text.Length == 1)
            {
                IsEnd = true;
                Tag = tag;
                NeedSeparators = needSeparators;
                CaseSensitive = caseSensitive;
            }
            else
            {
                string leftovers = text.Substring(1);
                char childChar = leftovers[0];
                int childIndex = (int) childChar & 0xff;
                    //make a lookupindex (dont mind if unicode chars end up as siblings as ascii)

                TokenTreeNode node = ChildNodes[childIndex];
                if (node == null)
                {
                    TokenTreeNode child = new TokenTreeNode();
                    ChildNodes[childIndex] = child;
                    child.AddToken(leftovers, caseSensitive, needSeparators, tag);

                    if (child.Char == ' ')
                    {
                        // if the node contains " " (whitespace)
                        // then add the node as a childnode of itself.
                        // thus allowing it to parse things like
                        // "end         sub" even if the pattern is "end sub" // do not localize
                        child.ChildNodes[(int) ' '] = child;
                    }
                }
                else
                {
                    while (node.NextSibling != null && node.Char != childChar)
                    {
                        node = node.NextSibling;
                    }

                    if (node.Char != childChar)
                    {
                        TokenTreeNode child = new TokenTreeNode();
                        node.NextSibling = child;
                        child.AddToken(leftovers, caseSensitive, needSeparators, tag);
                    }
                    else
                    {
                        node.AddToken(leftovers, caseSensitive, needSeparators, tag);
                    }
                }
            }
        }
Beispiel #9
0
        public void AddExpression(string text, bool caseSensitive, bool needSeparators, IPatternMatcher matcher,
                                  object tag)
        {
            if (StringUtils.IsNullOrEmpty(text))
            {
                throw new ArgumentException("text may not be empty", "text"); // do not localize
            }
            Char = text[0];


            if (!caseSensitive)
            {
                ContainsCaseInsensitiveData = true;
            }

            if (text.Length == 1)
            {
                PatternMatchReference patternMatcherReference = new PatternMatchReference(matcher);
                patternMatcherReference.NextSibling = FirstExpression;
                patternMatcherReference.Tag         = tag;
                FirstExpression = patternMatcherReference;
            }
            else
            {
                string leftovers  = text.Substring(1);
                char   childChar  = leftovers[0];
                int    childIndex = (int)childChar & 0xff; //make a lookupindex

                TokenTreeNode node = ChildNodes[childIndex];
                if (node == null)
                {
                    TokenTreeNode child = new TokenTreeNode();
                    ChildNodes[childIndex] = child;
                    child.AddExpression(leftovers, caseSensitive, needSeparators, matcher, tag);

                    if (child.Char == ' ')
                    {
                        // if the node contains " " (whitespace)
                        // then add the node as a childnode of itself.
                        // thus allowing it to parse things like
                        // "end         sub" even if the pattern is "end sub" // do not localize
                        child.ChildNodes[(int)' '] = child;
                    }
                }
                else
                {
                    while (node.NextSibling != null && node.Char != childChar)
                    {
                        node = node.NextSibling;
                    }

                    if (node.Char != childChar)
                    {
                        TokenTreeNode child = new TokenTreeNode();
                        node.NextSibling = child;
                        child.AddExpression(leftovers, caseSensitive, needSeparators, matcher, tag);
                    }
                    else
                    {
                        node.AddExpression(leftovers, caseSensitive, needSeparators, matcher, tag);
                    }
                }
            }
        }