コード例 #1
0
        public void QuantifierConstructorTest( )
        {
            var goodPatterns = new[] {
                new { Pattern = "?", MinOccurrences = 0, MaxOccurrences = 1, IsLazy = false },
                new { Pattern = "+", MinOccurrences = 1, MaxOccurrences = int.MaxValue, IsLazy = false },
                new { Pattern = "+?", MinOccurrences = 1, MaxOccurrences = int.MaxValue, IsLazy = true },
                new { Pattern = "*", MinOccurrences = 0, MaxOccurrences = int.MaxValue, IsLazy = false },
                new { Pattern = "*?", MinOccurrences = 0, MaxOccurrences = int.MaxValue, IsLazy = true },
                new { Pattern = "{1,2}", MinOccurrences = 1, MaxOccurrences = 2, IsLazy = false },
                new { Pattern = "{,2}", MinOccurrences = 0, MaxOccurrences = 2, IsLazy = false },
                new { Pattern = "{1,}", MinOccurrences = 1, MaxOccurrences = int.MaxValue, IsLazy = false },
                new { Pattern = "{3}", MinOccurrences = 3, MaxOccurrences = 3, IsLazy = false },
            };

            foreach (var p in goodPatterns)
            {
                var node = new QuantifierToken(p.Pattern, null);

                Assert.AreEqual(node.MinOccurrences, p.MinOccurrences,
                                string.Format("pattern: {0}, expected: {1}, actual: {2}",
                                              p.Pattern, p.MinOccurrences, node.MinOccurrences));

                Assert.AreEqual(node.MaxOccurrences, p.MaxOccurrences,
                                string.Format("pattern: {0}, expected: {1}, actual: {2}",
                                              p.Pattern, p.MaxOccurrences, node.MaxOccurrences));

                Assert.AreEqual(node.IsLazy, p.IsLazy,
                                string.Format("pattern: {0}, expected: {1}, actual: {2}",
                                              p.Pattern, p.IsLazy, node.IsLazy));
            }
        }
コード例 #2
0
        public void TokenizeQuantifierTest( )
        {
            var patterns = new[] {
                new { Pattern = "ba?", MinOccurrences = 0, MaxOccurrences = 1, Lazy = false },
                new { Pattern = "ba+", MinOccurrences = 1, MaxOccurrences = int.MaxValue, Lazy = false },
                new { Pattern = "ba+?", MinOccurrences = 1, MaxOccurrences = int.MaxValue, Lazy = true },
                new { Pattern = "ba*", MinOccurrences = 0, MaxOccurrences = int.MaxValue, Lazy = false },
                new { Pattern = "ba*?", MinOccurrences = 0, MaxOccurrences = int.MaxValue, Lazy = true },
                new { Pattern = "ba{1,2}", MinOccurrences = 1, MaxOccurrences = 2, Lazy = false },
                new { Pattern = "ba{,2}", MinOccurrences = 0, MaxOccurrences = 2, Lazy = false },
                new { Pattern = "ba{1,}", MinOccurrences = 1, MaxOccurrences = int.MaxValue, Lazy = false },
            };

            foreach (var p in patterns)
            {
                Token root = Token.Tokenize(p.Pattern);
                Assert.IsInstanceOfType(root, typeof(GroupToken));

                Token first = (root as GroupToken).Content[0];
                Assert.AreEqual(Token.TokenType.Literal, first.Type);
                Assert.AreEqual("b", first.Text);

                Token result = (root as GroupToken).Content[1];
                Assert.IsInstanceOfType(result, typeof(QuantifierToken));
                QuantifierToken quantifier = result as QuantifierToken;

                Assert.AreEqual(Token.TokenType.Literal, quantifier.Target.Type);
                Assert.AreEqual("a", quantifier.Target.Text);

                Assert.AreEqual(quantifier.MinOccurrences, p.MinOccurrences,
                                string.Format("pattern: {0}, expected: {1}, actual: {2}",
                                              p.Pattern, p.MinOccurrences, quantifier.MinOccurrences));

                Assert.AreEqual(quantifier.MaxOccurrences, p.MaxOccurrences,
                                string.Format("pattern: {0}, expected: {1}, actual: {2}",
                                              p.Pattern, p.MaxOccurrences, quantifier.MaxOccurrences));

                Assert.AreEqual(quantifier.IsLazy, p.Lazy,
                                string.Format("pattern: {0}, expected: {1}, actual: {2}",
                                              p.Pattern, p.Lazy, quantifier.IsLazy));
            }
        }
コード例 #3
0
ファイル: Tokenize.cs プロジェクト: wyb314/regexer
        /** Recursively transform a plain sequence of Token into a tree-like structure,
         *  transforming them into the appropriate subclass.
         *
         *  Token themselves as returned by findTokens() are not of much use because
         *  they often refer to other tokens located near them, either before or after.
         *  Also, regexes do have a structure, given by round brackets; in order to
         *  recognise and preserve this structure some further processing is needed
         *  to the stream of tokens.
         *
         *  A tree is the perfect data type to represent the grammar and the structure
         *  of a regex, where node types specify the meaning and node children represent
         *  the "arguments" of each component.
         *
         *  \param tokens A sequence of tokens.
         *  \return An organised tree.
         */
        private static GroupToken regroupTokens(IEnumerable <Token> tokens)
        {
            int   groupCount       = 0;
            var   groups           = new Stack <GroupToken>( );
            var   names            = new HashSet <string>( ); // groups with the same name are not allowed
            bool  insideLookaround = false;                   // nested lookarounds are not allowed
            Token target;

            var current = new GroupToken(string.Empty, groupCount++);

            groups.Push(current);

            foreach (Token t in tokens)
            {
                switch (t.Type)
                {
                case TokenType.GroupStart:
                    var newGroup = new GroupToken(t.Text, groupCount++);
                    if (newGroup.Name != null)
                    {
                        if (names.Contains(newGroup.Name))
                        {
                            throw new ParsingException("multiple groups with the same name are not allowed");
                        }
                        else
                        {
                            names.Add(newGroup.Name);
                        }
                    }

                    current.Content.Add(newGroup);
                    groups.Push(current);

                    current = newGroup;
                    break;


                case TokenType.GroupEnd:
                    if (current.Index == lookaheadIndex)
                    {
                        insideLookaround = false;
                        current          = groups.Pop( );
                        break;
                    }
                    else if (current.Index == lookbehindIndex)
                    {
                        insideLookaround = false;
                        var lookbehind = new LookbehindToken(current.Text, current);
                        current = groups.Pop( );
                        current.Content.Add(lookbehind);
                        break;
                    }
                    else
                    {
                        current = groups.Pop( );
                        break;
                    }


                case TokenType.Quantifier:
                    target = current.Content.Last( );
                    current.Content.Remove(target);

                    var quantifier = new QuantifierToken(t.Text, target);
                    current.Content.Add(quantifier);
                    break;


                case TokenType.Lookahead:
                    if (insideLookaround)
                    {
                        throw new ParsingException("nested lookarounds are not allowed");
                    }
                    insideLookaround = true;

                    /* mark the target as belonging to a lookahead, this will allow us to *
                    * update insideLookaround when we find the corresponding GroupEnd    */
                    var lookahead = new LookaheadToken(t.Text,
                                                       new GroupToken {
                        Index = lookaheadIndex
                    });

                    current.Content.Add(lookahead);
                    groups.Push(current);

                    current = ( GroupToken )lookahead.Target;
                    break;


                case TokenType.Lookbehind:
                    if (insideLookaround)
                    {
                        throw new ParsingException("nested lookarounds are not allowed");
                    }
                    insideLookaround = true;

                    // the actual lookbehind will be created once all its content has been collected
                    var group = new GroupToken(t.Text, lookbehindIndex);
                    groups.Push(current);

                    current = group;
                    break;


                default:
                    current.Content.Add(t);
                    break;
                }
            }

            if (groups.Count > 1)
            {
                throw new ParsingException("unbalanced parenthesis");
            }

            return(groups.Pop( ));
        }