Пример #1
0
        //this is wicked fast
        //do not refactor extract methods from this if you want to keep the speed
        public MatchResult Match(string text, int startIndex)
        {
            if (StringUtils.IsNullOrEmpty(text))
            {
                throw new ArgumentException("text may not be empty", "text"); // do not localize
            }
            MatchResult lastMatch = new MatchResult();

            lastMatch.Text = text;
            int textLength = text.Length;

            for (int currentIndex = startIndex; currentIndex < textLength; currentIndex++)
            {
                //call any prefixless patternmatchers

                #region HasExpressions

                if (root.FirstExpression != null)
                {
                    //begin with the first expression of the _root node_
                    PatternMatchReference patternMatcherReference = root.FirstExpression;
                    while (patternMatcherReference != null)
                    {
                        int expressionMatchIndex = patternMatcherReference.Matcher.Match(text, currentIndex);
                        if (expressionMatchIndex > 0 && expressionMatchIndex > lastMatch.Length)
                        {
                            lastMatch.Index  = currentIndex;
                            lastMatch.Length = expressionMatchIndex;
                            lastMatch.Found  = true;
                            lastMatch.Tag    = patternMatcherReference.Tag;
                        }

                        patternMatcherReference = patternMatcherReference.NextSibling;
                    }
                }

                #endregion

                //lookup the first token tree node
                TokenTreeNode node = nodes[(int)text[currentIndex]];
                if (node == null)
                {
                    if (lastMatch.Found)
                    {
                        break;
                    }
                    else
                    {
                        continue;
                    }
                }


                for (int matchIndex = currentIndex + 1; matchIndex <= textLength; matchIndex++)
                {
                    //call patternmatchers for the current prefix

                    #region HasExpressions

                    if (node.FirstExpression != null)
                    {
                        //begin with the first expression of the _current node_
                        PatternMatchReference patternMatcherReference = node.FirstExpression;
                        while (patternMatcherReference != null)
                        {
                            int expressionMatchIndex = patternMatcherReference.Matcher.Match(text, matchIndex);
                            if (expressionMatchIndex > 0 && expressionMatchIndex > lastMatch.Length)
                            {
                                lastMatch.Index  = currentIndex;
                                lastMatch.Length = expressionMatchIndex + matchIndex - currentIndex;
                                lastMatch.Found  = true;
                                lastMatch.Tag    = patternMatcherReference.Tag;
                            }

                            patternMatcherReference = patternMatcherReference.NextSibling;
                        }
                    }

                    #endregion

                    #region IsEndNode

                    if (node.IsEnd && matchIndex - currentIndex >= lastMatch.Length)
                    {
                        bool leftIsSeparator  = currentIndex == 0 ? true : separatorCharLookup[text[currentIndex - 1]];
                        bool rightIsSeparator = matchIndex == textLength ? true : separatorCharLookup[text[matchIndex]];

                        if (!node.NeedSeparators || (leftIsSeparator && rightIsSeparator))
                        {
                            //this node does not require separators on the sides
                            lastMatch.Index  = currentIndex;
                            lastMatch.Tag    = node.Tag;
                            lastMatch.Found  = true;
                            lastMatch.Length = matchIndex - currentIndex;
                            //TODO:perform case test here , case sensitive words might be matched even if they have incorrect case
                            if (currentIndex + lastMatch.Length == textLength)
                            {
                                break;
                            }
                        }
                    }

                    #endregion

                    if (matchIndex >= textLength)
                    {
                        break;
                    }
                    //try fetch a node at this index
                    node =
                        node.ChildNodes[
                            node.ContainsCaseInsensitiveData
                                ? (int)CharUtils.ToLower(textLookup[(int)text[matchIndex]]) & 0xff
                                : (int)textLookup[(int)text[matchIndex]] & 0xff];


                    //if node is not null then do: if caseinsensitive then do: insensitivelookup else do: casesesnsitivelookup
                    while (node != null
                               ?
                           (node.ContainsCaseInsensitiveData
                                ? (node.Char != CharUtils.ToLower(textLookup[(int)text[matchIndex]]))
                                : (node.Char != textLookup[(int)text[matchIndex]]))
                               : false)
                    {
                        node = node.NextSibling;
                    }

                    //we found no node on the lookupindex or none of the siblingnodes at that index matched the current char
                    if (node == null)
                    {
                        break; // continue with the next character
                    }
                }

                //return last match
                if (lastMatch.Found)
                {
                    return(lastMatch);
                }
            }

            if (lastMatch.Found)
            {
                return(lastMatch);
            }
            else
            {
                //no match was found
                return(MatchResult.NoMatch);
            }
        }
Пример #2
0
        //this is wicked fast
        //do not refactor extract methods from this if you want to keep the speed
        public MatchResult Match(string text, int startIndex)
        {
            if (StringUtils.IsNullOrEmpty(text))
                throw new ArgumentException("text may not be empty", "text"); // do not localize

            MatchResult lastMatch = new MatchResult();
            lastMatch.Text = text;
            int textLength = text.Length;

            for (int currentIndex = startIndex; currentIndex < textLength; currentIndex ++)
            {
                //call any prefixless patternmatchers

                #region HasExpressions

                if (root.FirstExpression != null)
                {
                    //begin with the first expression of the _root node_
                    PatternMatchReference patternMatcherReference = root.FirstExpression;
                    while (patternMatcherReference != null)
                    {
                        int expressionMatchIndex = patternMatcherReference.Matcher.Match(text, currentIndex);
                        if (expressionMatchIndex > 0 && expressionMatchIndex > lastMatch.Length)
                        {
                            lastMatch.Index = currentIndex;
                            lastMatch.Length = expressionMatchIndex;
                            lastMatch.Found = true;
                            lastMatch.Tag = patternMatcherReference.Tag;
                        }

                        patternMatcherReference = patternMatcherReference.NextSibling;
                    }
                }

                #endregion

                //lookup the first token tree node
                TokenTreeNode node = nodes[(int) text[currentIndex]];
                if (node == null)
                {
                    if (lastMatch.Found)
                        break;
                    else
                        continue;
                }

                for (int matchIndex = currentIndex + 1; matchIndex <= textLength; matchIndex++)
                {
                    //call patternmatchers for the current prefix

                    #region HasExpressions

                    if (node.FirstExpression != null)
                    {
                        //begin with the first expression of the _current node_
                        PatternMatchReference patternMatcherReference = node.FirstExpression;
                        while (patternMatcherReference != null)
                        {
                            int expressionMatchIndex = patternMatcherReference.Matcher.Match(text, matchIndex);
                            if (expressionMatchIndex > 0 && expressionMatchIndex > lastMatch.Length)
                            {
                                lastMatch.Index = currentIndex;
                                lastMatch.Length = expressionMatchIndex + matchIndex - currentIndex;
                                lastMatch.Found = true;
                                lastMatch.Tag = patternMatcherReference.Tag;
                            }

                            patternMatcherReference = patternMatcherReference.NextSibling;
                        }
                    }

                    #endregion

                    #region IsEndNode

                    if (node.IsEnd && matchIndex - currentIndex >= lastMatch.Length)
                    {
                        bool leftIsSeparator = currentIndex == 0 ? true : separatorCharLookup[text[currentIndex - 1]];
                        bool rightIsSeparator = matchIndex == textLength ? true : separatorCharLookup[text[matchIndex]];

                        if (!node.NeedSeparators || (leftIsSeparator && rightIsSeparator))
                        {
                            //this node does not require separators on the sides
                            lastMatch.Index = currentIndex;
                            lastMatch.Tag = node.Tag;
                            lastMatch.Found = true;
                            lastMatch.Length = matchIndex - currentIndex;
                            //TODO:perform case test here , case sensitive words might be matched even if they have incorrect case
                            if (currentIndex + lastMatch.Length == textLength)
                                break;
                        }
                    }

                    #endregion

                    if (matchIndex >= textLength)
                        break;
                    //try fetch a node at this index
                    node =
                        node.ChildNodes[
                            node.ContainsCaseInsensitiveData
                                ? (int) CharUtils.ToLower(textLookup[(int) text[matchIndex]]) & 0xff
                                : (int) textLookup[(int) text[matchIndex]] & 0xff];

                    //if node is not null then do: if caseinsensitive then do: insensitivelookup else do: casesesnsitivelookup
                    while (node != null
                               ?
                           (node.ContainsCaseInsensitiveData
                                ? (node.Char != CharUtils.ToLower(textLookup[(int) text[matchIndex]]))
                                : (node.Char != textLookup[(int) text[matchIndex]]))
                               : false)
                        node = node.NextSibling;

                    //we found no node on the lookupindex or none of the siblingnodes at that index matched the current char
                    if (node == null)
                        break; // continue with the next character
                }

                //return last match
                if (lastMatch.Found)
                    return lastMatch;
            }

            if (lastMatch.Found)
            {
                return lastMatch;
            }
            else
            {
                //no match was found
                return MatchResult.NoMatch;
            }
        }