Beispiel #1
0
        public void parsePattern()
        {
            if (node != null)
            {
                return;
            }

            if (!_pattern.StartsWith("^"))
            {
                throw new Exception("Invalid pattern start");
            }

            _finished = false;

            _exactQuantifierRegex = new Regex("^\\{\\s*(\\d+)\\s*\\}$");
            _rangeQuantifierRegex = new Regex("^\\{\\s*(\\d*)\\s*,\\s*(\\d*)\\s*\\}$");

            node = parseSubpattern(_pattern, new NSRange(1, _pattern.Length - 1), false);

            _exactQuantifierRegex = null;
            _rangeQuantifierRegex = null;

            if (!_finished)
            {
                throw new Exception("Invalid pattern end");
            }
        }
Beispiel #2
0
        public WTReGroup groupFromNodes(List <WTReNode> nodes, bool enclosed)
        {
            if ((nodes.Count == 1) && (nodes[0] is WTReGroup))
            {
                WTReGroup t = (WTReGroup)nodes[0];
                if (t is WTReGroup)
                {
                    t.capturing |= enclosed;
                    return(t);
                }
            }

            WTReGroup g = new WTReGroup();

            g.children  = new List <WTReNode>(nodes);
            g.capturing = enclosed;

            // setup links
            WTReNode prev = g.children[0];

            prev.parent = g;

            for (int i = 1; i < g.children.Count; i++)
            {
                WTReNode curr = g.children[i];
                curr.parent      = g;
                prev.nextSibling = curr;
                prev             = curr;
            }

            return(g);
        }
Beispiel #3
0
 public WTReParser(string pattern, bool ignoreCase)
 {
     _pattern    = pattern;
     _ignoreCase = ignoreCase;
     node        = null;
     this.parsePattern();
 }
Beispiel #4
0
        WTState processNode(WTReNode node, WTState state, int length)
        {
            if (node is WTReEndOfString)
            {
                WTState finalState = new WTState();;
                finalState.isFinal = true;

                WTTransition tran = new WTTransition();
                tran.node      = (WTReCharacterBase)node;
                tran.nextState = finalState;
                state.transitions.Add(tran);

                return(finalState);
            }
            else if (node is WTReCharacterBase)
            {
                WTState finalState = new WTState();

                WTTransition tran = new WTTransition();;
                tran.node      = (WTReCharacterBase)node;
                tran.nextState = finalState;
                state.transitions.Add(tran);

                return(finalState);
            }
            else if (node is WTReQuantifier)
            {
                WTReQuantifier qtf = (WTReQuantifier)node;

                WTState curState = state;
                for (int i = 0; i < qtf.countFrom; i++)
                {
                    curState = processNode(qtf.child, curState, length);
                }

                if (qtf.countTo == qtf.countFrom)
                {
                    // strict quantifier
                    return(curState);
                }

                WTState finalState = new WTState();;

                for (int i = qtf.countFrom; i < Math.Min(qtf.countTo, length); i++)
                {
                    WTState nextState = processNode(qtf.child, curState, length);

                    WTTransition _tran = new WTTransition();
                    _tran.node      = null;
                    _tran.nextState = finalState;

                    if (qtf.greedy)
                    {
                        curState.transitions.Add(_tran);
                    }
                    else
                    {
                        curState.transitions.Insert(0, _tran);
                    }

                    curState = nextState;
                }

                WTTransition tran = new WTTransition();
                tran.node      = null;
                tran.nextState = finalState;
                curState.transitions.Add(tran);

                return(finalState);
            }
            else if (node is WTReGroup)
            {
                WTReGroup grp = (WTReGroup)node;

                WTState curState = state;
                for (int i = 0; i < grp.children.Count; i++)
                {
                    curState = processNode(grp.children[i], curState, length);
                }

                if (!grp.capturing && grp.children.Count == 1 && (grp.children[0] is WTReLiteral))
                {
                    WTTransition tran = new WTTransition();
                    tran.node       = null;
                    tran.bypassNode = (WTReLiteral)grp.children[0];
                    tran.nextState  = curState;
                    state.transitions.Add(tran);
                }

                return(curState);
            }
            else if (node is WTReAlteration)
            {
                WTReAlteration alt = (WTReAlteration)node;

                WTState finalState = new WTState();

                for (int i = 0; i < alt.children.Count; i++)
                {
                    WTState curState = processNode(alt.children[i], state, length);

                    WTTransition tran = new WTTransition();
                    tran.node      = null;
                    tran.nextState = finalState;
                    curState.transitions.Add(tran);
                }

                return(finalState);
            }
            else
            {
                return(null);
            }
        }
Beispiel #5
0
        WTReGroup parseSubpattern(string pattern, NSRange range, bool enclosed)
        {
            List <WTReNode> nodes = new List <WTReNode>(range.length);

            List <WTReNode> alternations = null;
            int             startPos = 0, endPos = range.length;

            bool     escape = false;
            WTReNode lastnode = null;

            for (int i = 0; i < range.length; i++)
            {
                if (_finished)
                {
                    throw new Exception("Found pattern end in the middle of string");
                }

                char c = pattern[range.location + i];

                if (enclosed && i == 0 && c == '?')
                {
                    // group modifiers are present

                    if (range.length < 3)
                    {
                        throw new Exception("Invalid group found in pattern");
                    }


                    char d = pattern[range.location + i + 1];
                    if (d == '<')
                    {
                        // tagged group (?<style1>…)
                        for (int j = i + 2; j < range.length; j++)
                        {
                            d = pattern[range.location + j];
                            ;

                            if (d == '<')
                            {
                                throw new Exception("Invalid group tag found in pattern");
                            }
                            else if (d == '>')
                            {
                                if (j == i + 2)
                                {
                                    throw new Exception("Empty group tag found in pattern");
                                }
                                i = j;
                                break;
                            }
                            else if (char.IsLetterOrDigit(d))
                            {
                                throw new Exception("Group tag contains invalid chars");
                            }
                        }
                    }
                    else if (d == '\'')
                    {
                        // tagged group (?'style2'…)
                        for (int j = i + 2; j < range.length; j++)
                        {
                            d = pattern[range.location + j];

                            if (d == '\'')
                            {
                                if (j == i + 2)
                                {
                                    throw new Exception("Empty group tag found in pattern");
                                }
                                i = j;
                                break;
                            }
                            else if (!char.IsLetterOrDigit(d))
                            {
                                throw new Exception("Group tag contains invalid chars");
                            }
                        }
                    }
                    else if (d == ':')
                    {
                        // non-capturing group
                        enclosed = false;
                        i++;
                    }
                    else
                    {
                        throw new Exception("Unknown group modifier");
                    }

                    continue;
                }

                if (c == '\\' && !escape)
                {
                    escape = true;
                    continue;
                }

                if (escape)
                {
                    if (!isValidEscapedChar(c, false) || i == 0)
                    {
                        throw new Exception("Invalid escape sequence");
                    }

                    lastnode = this.parseCharset(pattern, new NSRange(range.location + i - 1, 2), false);
                    nodes.Add(lastnode);

                    escape = false;
                }
                else if (c == '(')
                {
                    int  brackets = 1;
                    bool escape2  = true;

                    for (int j = i + 1; j < range.length; j++)
                    {
                        char d = pattern[range.location + j];

                        if (escape2)
                        {
                            escape2 = false;
                        }
                        else if (d == '\\')
                        {
                            escape2 = true;
                        }
                        else if (d == '(')
                        {
                            brackets++;
                        }
                        else if (d == ')')
                        {
                            brackets--;

                            if (brackets == 0)
                            {
                                lastnode = this.parseSubpattern(pattern, new NSRange(range.location + i + 1, j - i - 1), true);
                                nodes.Add(lastnode);
                                i = j;
                                break;
                            }
                        }
                    }

                    if (brackets != 0)
                    {
                        throw new Exception("Unclosed group bracket");
                    }
                }
                else if (c == ')')
                {
                    throw new Exception("Unopened group bracket");
                }
                else if (c == '[')
                {
                    bool escape2 = false;
                    bool valid   = false;

                    for (int j = i + 1; j < range.length; j++)
                    {
                        char d = pattern[range.location + j];

                        if (escape2)
                        {
                            escape2 = false;
                        }
                        else if (d == '\\')
                        {
                            escape2 = true;
                        }
                        else if (d == '[' || d == '(' || d == ')')
                        {
                            // invalid character
                            break;
                        }
                        else if (d == ']')
                        {
                            lastnode = this.parseCharset(pattern, new NSRange(range.location + i + 1, j - i - 1), true);
                            nodes.Add(lastnode);

                            i     = j;
                            valid = true;
                            break;
                        }
                    }

                    if (!valid)
                    {
                        throw new Exception("Unclosed character set bracket");
                    }
                }
                else if (c == ']')
                {
                    throw new Exception("Unopened character set bracket");
                }
                else if (c == '{')
                {
                    if (lastnode == null || lastnode is WTReQuantifier)
                    {
                        throw new Exception("Invalid quantifier usage");
                    }

                    bool valid = false;

                    for (int j = i + 1; j < range.length; j++)
                    {
                        char d = pattern[range.location + j];

                        if (d == '}')
                        {
                            string from, to;

                            string str = pattern.Substring(range.location + i, j + 1 - i);
                            Match  m   = _exactQuantifierRegex.Match(str);

                            if (m.Success)
                            {
                                from = m.Groups[1].Value;
                                to   = from;
                            }
                            else
                            {
                                m = _rangeQuantifierRegex.Match(str);
                                if (!m.Success)
                                {
                                    throw new Exception("Invalid quantifier format");
                                }
                                else
                                {
                                    from = m.Groups[1].Value;
                                    to   = m.Groups[2].Value;
                                }
                            }

                            WTReQuantifier qtf = new WTReQuantifier();

                            if (from == null || from.Equals(""))
                            {
                                qtf.countFrom = 0;
                            }
                            else
                            {
                                qtf.countFrom = int.Parse(from);
                            }

                            if (to == null || to.Equals(""))
                            {
                                qtf.countTo = int.MaxValue;
                            }
                            else
                            {
                                qtf.countTo = int.Parse(to);
                            }

                            if (qtf.countFrom > qtf.countTo)
                            {
                                throw new Exception("Invalid quantifier range");
                            }

                            nodes.RemoveAt(nodes.Count - 1); //removeLastObject
                            qtf.child       = lastnode;
                            lastnode.parent = qtf;
                            lastnode        = qtf;
                            nodes.Add(lastnode);

                            i     = j;
                            valid = true;
                            break;
                        }
                    }

                    if (!valid)
                    {
                        throw new Exception("Unclosed quantifier bracket");
                    }
                }
                else if (c == '}')
                {
                    throw new Exception("Unopened qualifier bracket");
                }
                else if (c == '*')
                {
                    if (lastnode == null || lastnode is WTReQuantifier)
                    {
                        throw new Exception("Invalid quantifier usage");
                    }

                    nodes.RemoveAt(nodes.Count - 1);
                    WTReQuantifier qtf = new WTReQuantifier(0, int.MaxValue);
                    qtf.child       = lastnode;
                    lastnode.parent = qtf;
                    lastnode        = qtf;
                    nodes.Add(lastnode);
                }
                else if (c == '+')
                {
                    if (lastnode == null || lastnode is WTReQuantifier)
                    {
                        throw new Exception("Invalid quantifier usage");
                    }

                    nodes.RemoveAt(nodes.Count - 1);
                    WTReQuantifier qtf = new WTReQuantifier(1, int.MaxValue);
                    qtf.child       = lastnode;
                    lastnode.parent = qtf;
                    lastnode        = qtf;
                    nodes.Add(lastnode);
                }
                else if (c == '?')
                {
                    if (lastnode == null)
                    {
                        throw new Exception("Invalid quantifier usage");
                    }

                    if (lastnode is WTReQuantifier)
                    {
                        ((WTReQuantifier)lastnode).greedy = false;
                    }
                    else
                    {
                        nodes.RemoveAt(nodes.Count - 1);
                        WTReQuantifier qtf = new WTReQuantifier(0, 1);
                        qtf.child       = lastnode;
                        lastnode.parent = qtf;
                        lastnode        = qtf;
                        nodes.Add(lastnode);
                    }

                    lastnode = null;
                }
                else if (c == '.')
                {
                    // any character
                    lastnode = new WTReAnyCharacter();
                    nodes.Add(lastnode);
                }
                else if (c == '|')
                {
                    // alternation
                    if (alternations == null)
                    {
                        alternations = new List <WTReNode>(2);
                    }

                    WTReGroup gr = groupFromNodes(nodes, enclosed);

                    gr.sourceRange = new NSRange(range.location + startPos, i - startPos);
                    startPos       = i + 1;

                    alternations.Add(gr);
                    nodes.Clear();
                    lastnode = null;
                }
                else if (c == '$')
                {
                    if (alternations != null && enclosed)
                    {
                        throw new Exception("End of string shouldn't be inside alternation");
                    }

                    if (range.location + i + 1 < pattern.Length)
                    {
                        throw new Exception("Unexpected end of string");
                    }

                    lastnode = new WTReEndOfString();
                    nodes.Add(lastnode);

                    endPos    = i + 1;
                    _finished = true;
                    break;
                }
                else
                {
                    lastnode = this.parseCharset(pattern, new NSRange(range.location + i, 1), false);
                    nodes.Add(lastnode);
                }
            }

            if (escape)
            {
                throw new Exception("Invalid group ending");
            }

            WTReGroup g = groupFromNodes(nodes, enclosed);

            g.sourceRange = new NSRange(range.location + startPos, endPos - startPos);
            g.capturing   = enclosed;

            if (alternations != null)
            {
                // build alternation and enclose it into group
                alternations.Add(g);

                WTReAlteration a = new WTReAlteration();
                a.children    = alternations;
                a.sourceRange = new NSRange(range.location, endPos);

                // setup links
                WTReNode prev = alternations[0];
                prev.parent = a;

                for (int i = 1; i < alternations.Count; i++)
                {
                    WTReNode curr = alternations[i];
                    curr.parent      = a;
                    prev.nextSibling = curr;
                    prev             = curr;
                }

                g          = new WTReGroup();
                g.children = new List <WTReNode>();
                g.children.Add(a);
                g.capturing   = enclosed;
                g.sourceRange = a.sourceRange;

                a.parent = g;
            }
            return(g);
        }