Ejemplo n.º 1
0
        private void AddPatternWithCaseSensitivePrefix(string prefix, IPatternMatcher matcher, bool needSeparators, object[] tags)
        {
            char startChar = prefix[0];
            int startIndex = startChar;
            if (nodes[startIndex] == null)
                nodes[startIndex] = new TokenTreeNode();

            nodes[startIndex].AddPattern(prefix, true, needSeparators, matcher, tags);
        }
Ejemplo n.º 2
0
        private void AddCaseSensitiveToken(string text, bool needSeparators, object[] tags)
        {
            char startChar = text[0];
            int startIndex = startChar;
            if (nodes[startIndex] == null)
                nodes[startIndex] = new TokenTreeNode();

            nodes[startIndex].AddToken(text, true, needSeparators, tags);
        }
Ejemplo n.º 3
0
        public TokenTree()
        {
            nodes = new TokenTreeNode[65536];
            Separators = ".,;:<>[](){}!\"#¤%&/=?*+-/\\ \t\n\r";
            textLookup = new char[65536];
            for (int i = 0; i < 65536; i++) {
                textLookup[i] = (char) i;
            }
            textLookup['\t'] = ' ';

            root = new TokenTreeNode();
        }
Ejemplo n.º 4
0
        public void AddPattern(string prefix, bool caseSensitive, bool needSeparators, IPatternMatcher matcher,
                               object[] tags)
        {
            if (string.IsNullOrEmpty(prefix))
            {
                throw new ArgumentNullException("prefix");
            }

            TokenTreeNode node = AddTokenInternal(prefix, caseSensitive);

            var patternMatcherReference = new PatternMatchReference(matcher)
            {
                NextSibling    = FirstExpression,
                Tags           = tags,
                NeedSeparators = needSeparators
            };

            node.FirstExpression = patternMatcherReference;
        }
Ejemplo n.º 5
0
        private void AddPatternWithCaseInsensitivePrefix(string prefix, IPatternMatcher matcher, bool needSeparators, object[] tags)
        {
            //make a lowercase string and add it as a token
            prefix = prefix.ToLower();
            char startChar = prefix[0];
            int startIndex = startChar;
            if (nodes[startIndex] == null)
                nodes[startIndex] = new TokenTreeNode();

            nodes[startIndex].AddPattern(prefix, false, needSeparators, matcher, tags);

            //make a lowercase string with a uppercase start char and add it as a token
            prefix = char.ToUpper(startChar) + prefix.Substring(1);
            startChar = prefix[0];
            startIndex = startChar;
            if (nodes[startIndex] == null)
                nodes[startIndex] = new TokenTreeNode();

            nodes[startIndex].AddPattern(prefix, false, needSeparators, matcher, tags);
        }
Ejemplo n.º 6
0
        private void AddCaseInsensitiveToken(string text, bool needSeparators, object[] tags)
        {
            //make a lowercase string and add it as a token
            text = text.ToLower();
            char startChar = text[0];
            int startIndex = startChar;

            if (nodes[startIndex] == null)
                nodes[startIndex] = new TokenTreeNode();

            nodes[startIndex].AddToken(text, false, needSeparators, tags);

            //make a lowercase string with a uppercase start char and add it as a token
            text = char.ToUpper(startChar) + text.Substring(1);
            startChar = text[0];
            startIndex = startChar;
            if (nodes[startIndex] == null)
                nodes[startIndex] = new TokenTreeNode();

            nodes[startIndex].AddToken(text, false, needSeparators, tags);
        }
        public TokenTreeNode AddTokenInternal(string token, bool caseSensitive)
        {
            Char = token[0];


            if (!caseSensitive)
            {
                ContainsCaseInsensitiveData = true;
            }

            if (token.Length == 1)
            {
                return(this);
            }

            string leftovers  = token.Substring(1);
            char   childChar  = leftovers[0];
            int    childIndex = childChar & 0xff;
            //make a lookupindex (dont mind if unicode chars end up as siblings as ascii)

            TokenTreeNode node = ChildNodes[childIndex];
            TokenTreeNode res;

            if (node == null)
            {
                var child = new TokenTreeNode();
                ChildNodes[childIndex] = child;
                res = child.AddTokenInternal(leftovers, caseSensitive);

                MakeRepeatingWS(child);
            }
            else
            {
                node = GetMatchingNode(childChar, node);
                res  = node.AddTokenInternal(leftovers, caseSensitive);
            }

            return(res);
        }
Ejemplo n.º 8
0
        public TokenTreeNode AddTokenInternal(string token, bool caseSensitive)
        {
            Char = token[0];

            if (!caseSensitive)
                ContainsCaseInsensitiveData = true;

            if (token.Length == 1)
                return this;

            string leftovers = token.Substring(1);
            char childChar = leftovers[0];
            int childIndex = childChar & 0xff;
            //make a lookupindex (dont mind if unicode chars end up as siblings as ascii)

            TokenTreeNode node = ChildNodes[childIndex];
            TokenTreeNode res;
            if (node == null)
            {
                var child = new TokenTreeNode();
                ChildNodes[childIndex] = child;
                res = child.AddTokenInternal(leftovers, caseSensitive);

                MakeRepeatingWS(child);
            }
            else
            {
                node = GetMatchingNode(childChar, node);
                res = node.AddTokenInternal(leftovers, caseSensitive);
            }

            return res;
        }
Ejemplo n.º 9
0
 private static void MakeRepeatingWS(TokenTreeNode child)
 {
     if (child.Char == ' ')
     {
         // if the node contains " " (whitespace)
         // then add the node as a childnode of itself.
         // thus allowing it to parse things like
         // "end         sub" even if the pattern is "end sub" // do not localize
         child.ChildNodes[' '] = child;
     }
 }
Ejemplo n.º 10
0
        private static TokenTreeNode GetMatchingNode(char childChar, TokenTreeNode node)
        {
            //find a bucket with the same childChar as we need
            while (node.NextSibling != null && node.Char != childChar)
            {
                node = node.NextSibling;
            }

            if (node.Char != childChar)
            {
                var child = new TokenTreeNode();
                node.NextSibling = child;
                return child;
            }
            return node;
        }
Ejemplo n.º 11
0
        //this is wicked fast
        //do not refactor extract methods from this if you want to keep the speed
        public MatchResult Match(string text, int startIndex)
        {
            if (string.IsNullOrEmpty(text))
            {
                throw new ArgumentNullException(text);
            }

            var lastMatch = new MatchResult {
                Text = text
            };
            int textLength = text.Length;

            for (int currentIndex = startIndex; currentIndex < textLength; currentIndex++)
            {
                //call any prefixless patternmatchers

                #region HasExpressions

                if (root.FirstExpression != null)
                {
                    //begin with the first expression of the _root node_
                    PatternMatchReference patternMatcherReference = root.FirstExpression;
                    while (patternMatcherReference != null)
                    {
                        int patternMatchIndex = patternMatcherReference.Matcher.Match(text, currentIndex);
                        if (patternMatchIndex > 0 && patternMatchIndex > lastMatch.Length)
                        {
                            bool leftIsSeparator  = currentIndex == 0 ? true : separatorCharLookup[text[currentIndex - 1]];
                            bool rightIsSeparator = (currentIndex + patternMatchIndex) == textLength ? true : separatorCharLookup[text[currentIndex + patternMatchIndex]];

                            if (!patternMatcherReference.NeedSeparators || (leftIsSeparator && rightIsSeparator))
                            {
                                lastMatch.Index  = currentIndex;
                                lastMatch.Length = patternMatchIndex;
                                lastMatch.Found  = true;
                                lastMatch.Tags   = patternMatcherReference.Tags;
                            }
                        }

                        patternMatcherReference = patternMatcherReference.NextSibling;
                    }
                }

                #endregion

                //lookup the first token tree node
                TokenTreeNode node = nodes[text[currentIndex]];
                if (node == null)
                {
                    if (lastMatch.Found)
                    {
                        break;
                    }

                    continue;
                }


                for (int matchIndex = currentIndex + 1; matchIndex <= textLength; matchIndex++)
                {
                    //call patternmatchers for the current prefix

                    #region HasExpressions

                    if (node.FirstExpression != null)
                    {
                        //begin with the first expression of the _current node_
                        PatternMatchReference patternMatcherReference = node.FirstExpression;
                        while (patternMatcherReference != null)
                        {
                            int patternMatchIndex = patternMatcherReference.Matcher.Match(text, matchIndex);
                            if (patternMatchIndex > 0 && patternMatchIndex > lastMatch.Length)
                            {
                                bool leftIsSeparator  = currentIndex == 0 ? true : separatorCharLookup[text[currentIndex - 1]];
                                bool rightIsSeparator = (currentIndex + patternMatchIndex + matchIndex) == textLength ? true : separatorCharLookup[text[currentIndex + patternMatchIndex + matchIndex]];

                                if (!patternMatcherReference.NeedSeparators || (leftIsSeparator && rightIsSeparator))
                                {
                                    lastMatch.Index  = currentIndex;
                                    lastMatch.Length = patternMatchIndex + matchIndex - currentIndex;
                                    lastMatch.Found  = true;
                                    lastMatch.Tags   = patternMatcherReference.Tags;
                                }
                            }

                            patternMatcherReference = patternMatcherReference.NextSibling;
                        }
                    }

                    #endregion

                    #region IsEndNode

                    if (node.IsEnd && matchIndex - currentIndex >= lastMatch.Length)
                    {
                        bool leftIsSeparator  = currentIndex == 0 ? true : separatorCharLookup[text[currentIndex - 1]];
                        bool rightIsSeparator = matchIndex == textLength ? true : separatorCharLookup[text[matchIndex]];

                        if (!node.NeedSeparators || (leftIsSeparator && rightIsSeparator))
                        {
                            lastMatch.Index  = currentIndex;
                            lastMatch.Tags   = node.Tags;
                            lastMatch.Found  = true;
                            lastMatch.Length = matchIndex - currentIndex;
                            //TODO:perform case test here , case sensitive words might be matched even if they have incorrect case
                            if (currentIndex + lastMatch.Length == textLength)
                            {
                                break;
                            }
                        }
                    }

                    #endregion

                    //try fetch a node at this index
                    node = node.GetNextNode(textLookup[text[matchIndex]]);

                    //we found no node on the lookupindex or none of the siblingnodes at that index matched the current char
                    if (node == null)
                    {
                        break; // continue with the next character
                    }
                }

                //return last match
                if (lastMatch.Found)
                {
                    return(lastMatch);
                }
            }

            if (lastMatch.Found)
            {
                return(lastMatch);
            }

            //no match was found
            return(MatchResult.NoMatch);
        }