Пример #1
0
        protected virtual void Build()
        {
            endOfFile = new Pattern(nameof(endOfFile))
            {
                Data = new Not(new Any())
            };

            endOfLine = new Pattern(nameof(endOfFile))
            {
                Data = new PrioritizedChoice("\r\n", '\n', '\r')
            };

            space = new Pattern(nameof(space))
            {
                Data = new PrioritizedChoice(new CharacterClass(' ', '\t'), endOfLine)
            };

            comment = new Pattern(nameof(comment))
            {
                Data = new Sequence(
                    '#',
                    new ZeroOrMore(new Sequence(new Not(endOfLine), new Any())),
                    endOfLine
                    )
            };

            spacing = new Pattern(nameof(spacing))
            {
                Data = new ZeroOrMore(new PrioritizedChoice(space, comment))
            };

            DOT = new Pattern(".")
            {
                Data = new CaptureGroup((int)CaptureType.Any, new Sequence('.', spacing))
            };

            empty = new Pattern(nameof(empty))
            {
                Data = new CaptureGroup((int)CaptureType.Empty, new Sequence('e', spacing))
            };

            OPEN     = SingleCharacterWithSpacing('(');
            CLOSE    = SingleCharacterWithSpacing(')');
            PLUS     = SingleCharacterWithSpacing('+');
            STAR     = SingleCharacterWithSpacing('*');
            QUESTION = SingleCharacterWithSpacing('?');
            NOT      = SingleCharacterWithSpacing('!');
            AND      = SingleCharacterWithSpacing('&');
            SLASH    = SingleCharacterWithSpacing('/');

            LEFTARROW = new Pattern("<-")
            {
                Data = new Sequence("<-", spacing)
            };

            // TODO: Properly parse special chars
            character = new Pattern(nameof(character))
            {
                Data = new CaptureGroup((int)CaptureType.Character, new PrioritizedChoice(
                                            new Sequence('\\', new CharacterClass('n', 'r', 't', '\'', '"', '[', ']', '\\', '-')),
                                            new Sequence('\\', CharacterClass.Range('0', '2'), CharacterClass.Range('0', '7'), CharacterClass.Range('0', '7')),
                                            new Sequence('\\', CharacterClass.Range('0', '7'), Operator.Optional(CharacterClass.Range('0', '7'))),
                                            new Sequence(new Not(new CharacterClass('\\')), new Any())
                                            )
                                        )
            };

            range = new Pattern(nameof(range))
            {
                Data = new CaptureGroup((int)CaptureType.CharacterClassRange, new PrioritizedChoice(new Sequence(character, new CharacterClass('-'), character), character))
            };

            characterClass = new Pattern(nameof(characterClass))
            {
                Data = new CaptureGroup((int)CaptureType.CharacterClass,
                                        new Sequence(
                                            '[',
                                            new ZeroOrMore(new Sequence(new Not(new CharacterClass(']')), range)),
                                            ']',
                                            spacing
                                            )
                                        )
            };

            literal = new Pattern(nameof(literal))
            {
                Data = new Sequence(
                    new CaptureGroup((int)CaptureType.Literal,
                                     new PrioritizedChoice(
                                         new Sequence(
                                             '\'',
                                             new ZeroOrMore(new Sequence(new Not('\''), character)),
                                             '\''
                                             ),
                                         new Sequence(
                                             '"',
                                             new ZeroOrMore(new Sequence(new Not('"'), character)),
                                             '"'
                                             )
                                         )
                                     ),
                    spacing
                    )
            };

            primary    = new Pattern();
            identifier = new Pattern(nameof(identifier))
            {
                Data = new Sequence(new CaptureGroup((int)CaptureType.Identifier,
                                                     new Sequence(
                                                         new CharacterClass(new CharacterRange('A', 'Z'), new CharacterRange('a', 'z'), new CharacterRange('_', '_')),
                                                         new ZeroOrMore(new CharacterClass(new CharacterRange('A', 'Z'), new CharacterRange('a', 'z'), new CharacterRange('0', '9'), new CharacterRange('_', '_')))
                                                         )
                                                     ), spacing)
            };

            identifierDeclaration = new Pattern(nameof(identifierDeclaration))
            {
                Data = identifier
            };

            suffix = new Pattern(nameof(suffix))
            {
                Data = new PrioritizedChoice(
                    new CaptureGroup((int)CaptureType.Optional, new Sequence(primary, QUESTION)),
                    new CaptureGroup((int)CaptureType.ZeroOrMore, new Sequence(primary, STAR)),
                    new CaptureGroup((int)CaptureType.OneOrMore, new Sequence(primary, PLUS)),
                    primary
                    )
            };

            prefix = new Pattern(nameof(prefix))
            {
                Data = new PrioritizedChoice(
                    new CaptureGroup((int)CaptureType.And, new Sequence(AND, suffix)),
                    new CaptureGroup((int)CaptureType.Not, new Sequence(NOT, suffix)),
                    suffix
                    )
            };

            sequence = new Pattern(nameof(sequence))
            {
                Data = new CaptureGroup((int)CaptureType.Sequence, new ZeroOrMore(prefix))
            };

            internalExpression = new Pattern(nameof(internalExpression))
            {
                Data = new CaptureGroup((int)CaptureType.PrioritizedChoice, new Sequence(sequence, new ZeroOrMore(new Sequence(SLASH, sequence))))
            };

            primary.Data = new PrioritizedChoice(GetPrimaryData());

            this.Definition = new Pattern(nameof(Definition))
            {
                Data = new CaptureGroup((int)CaptureType.Definition, new Sequence(identifierDeclaration, LEFTARROW, internalExpression))
            };

            this.Grammar = new Pattern(nameof(Grammar))
            {
                Data = new Sequence(spacing, Operator.OneOrMore(Definition), endOfFile)
            };

            this.Expression = new Pattern(nameof(Expression))
            {
                Data = new Sequence(internalExpression.Data, endOfFile)
            };
        }
Пример #2
0
        public RegexGrammar(PatternCompiler patternCompiler)
        {
            var RE       = new Pattern("RE");
            var simpleRE = new Pattern("SimpleRE");

            var metaCharacter = new Pattern("metaCharacter")
            {
                Data = new PrioritizedChoice(
                    new CharacterClass('*', '+', '^', '$', '|', '(', ')', '[', ']'),
                    new Sequence(new CharacterClass('\\'), new CharacterClass('t', 'n', 'r', 'f', 'b', 'B', 'd', 'D', 's', 'S', 'w', 'W', 'Q', 'U', 'L')),
                    CharacterClass.String(@"*?"),
                    CharacterClass.String(@"+?"),
                    CharacterClass.String(@"$`"),
                    CharacterClass.String(@"$'"),
                    CharacterClass.String(@"$&"),
                    CharacterClass.String(@"\cX"),
                    new Sequence(new CharacterClass('\\', '$'), CharacterClass.Range('0', '9')),
                    new Sequence(new CharacterClass('\\'), CharacterClass.Range('0', '7'), CharacterClass.Range('0', '7'), CharacterClass.Range('0', '7'))
                    )
            };

            var allowedMetaCharacters = new Pattern("allowedMetaCharacter")
            {
                Data = new CaptureGroup((int)CaptureType.MetaCharacter,
                                        new PrioritizedChoice(
                                            new Sequence(new CharacterClass('\\'), new CharacterClass('t', 'n', 'r', 'f', 'b', 'B', 'd', 'D', 's', 'S', 'w', 'W', 'Q', 'U', 'L')),
                                            CharacterClass.String(@"*?"),
                                            CharacterClass.String(@"+?"),
                                            CharacterClass.String(@"$`"),
                                            CharacterClass.String(@"$'"),
                                            CharacterClass.String(@"$&"),
                                            CharacterClass.String(@"\cX"),
                                            new Sequence(new CharacterClass('\\', '$'), CharacterClass.Range('0', '9')),
                                            new Sequence(new CharacterClass('\\'), CharacterClass.Range('0', '7'), CharacterClass.Range('0', '7'), CharacterClass.Range('0', '7'))
                                            )
                                        )
            };

            var character = new Pattern("character")
            {
                Data = new CaptureGroup((int)CaptureType.Char,
                                        new PrioritizedChoice(
                                            new Sequence(
                                                new CharacterClass('\\'),
                                                metaCharacter
                                                ),
                                            new Sequence(
                                                new Not(metaCharacter),
                                                new Any()
                                                )
                                            )
                                        )
            };

            var range    = new CaptureGroup((int)CaptureType.Range, new Sequence(character, new CharacterClass('-'), character));
            var setItem  = new PrioritizedChoice(range, character);
            var setItems = new Pattern()
            {
                Data = Operator.OneOrMore(setItem)
            };
            var positiveSet = new CaptureGroup((int)CaptureType.PositiveSet, new Sequence(new CharacterClass('['), setItems, new CharacterClass(']')));
            var negativeSet = new CaptureGroup((int)CaptureType.NegativeSet, new Sequence(CharacterClass.String("[^"), setItems, new CharacterClass(']')));
            var set         = new Pattern("set")
            {
                Data = new PrioritizedChoice(negativeSet, positiveSet)
            };
            var eos   = new CaptureGroup((int)CaptureType.Eos, new CharacterClass('$'));
            var any   = new CaptureGroup((int)CaptureType.Any, new CharacterClass('.'));
            var group = new Sequence(new CharacterClass('('), RE, new CharacterClass(')'));

            var elementaryRE = new Pattern("elementaryRE")
            {
                Data = new PrioritizedChoice(group, any, eos, set, character, allowedMetaCharacters)
            };

            var number      = Operator.OneOrMore(CharacterClass.Range('0', '9'));
            var repeatRange = new Sequence(new CharacterClass('{'), new CaptureGroup((int)CaptureType.RepeatRange, new Sequence(number, Operator.Optional(new Sequence(new CharacterClass(','), number)))), new CharacterClass('}'));

            var plus = new Pattern("plus")
            {
                Data = new CaptureGroup((int)CaptureType.Plus, new Sequence(elementaryRE, new CharacterClass('+')))
            };
            var star = new Pattern("star")
            {
                Data = new CaptureGroup((int)CaptureType.Star, new Sequence(elementaryRE, new CharacterClass('*')))
            };
            var repeat = new Pattern("repeat")
            {
                Data = new CaptureGroup((int)CaptureType.Repeat, new Sequence(elementaryRE, repeatRange))
            };
            var basicRE = new PrioritizedChoice(star, plus, repeat, elementaryRE);

            simpleRE.Data = new CaptureGroup((int)CaptureType.Concatenation, Operator.OneOrMore(basicRE));

            RE.Data = new CaptureGroup((int)CaptureType.Union, new Sequence(simpleRE, new ZeroOrMore(new Sequence(new CharacterClass('|'), RE))));

            Parser = patternCompiler.Compile(RE);
        }
Пример #3
0
        protected virtual Operator BuildTreeNode(int intKey, string captureData, IReadOnlyList <Operator> parameters)
        {
            var key = (CaptureType)intKey;

            switch (key)
            {
            case CaptureType.And: return(Operator.And(parameters[0]));

            case CaptureType.Any: return(new Any());

            case CaptureType.CharacterClass:
                var rangeParams = parameters.OfType <CharacterClass>();
                return(new CharacterClass(rangeParams.SelectMany(p => p.Ranges)));

            case CaptureType.CharacterClassRange:
                if (parameters.Count == 1)
                {
                    return(parameters[0]);
                }
                else
                {
                    var min = (CharacterClass)parameters[0];
                    var max = (CharacterClass)parameters[1];

                    if (min.NumChars > 1 || max.NumChars > 1)
                    {
                        throw new PegParsingException($"Cannot create range from {min} and {max}");
                    }

                    return(CharacterClass.Range(min.Value.First(), max.Value.Last()));
                }

            case CaptureType.Definition:
                var identifier = parameters[0] as Pattern;
                identifier.Data = parameters[1];

                return(identifier);

            case CaptureType.Empty: return(new Empty());

            case CaptureType.Identifier:
                return(new UnresolvedPatternReference(captureData));

            case CaptureType.Character:
                if (captureData.Length == 1)
                {
                    return(new CharacterClass(captureData[0]));
                }
                else
                {
                    return(new CharacterClass(TranslateEscapeCharacter(captureData)));
                }

            case CaptureType.Literal:
                if (parameters.Count <= 0)
                {
                    return(new Empty());
                }
                else if (parameters.Count == 1)
                {
                    return(parameters[0]);
                }
                else
                {
                    return(new Sequence(parameters));
                }

            case CaptureType.Not: return(new Not(parameters[0]));

            case CaptureType.OneOrMore: return(Operator.OneOrMore(parameters[0]));

            case CaptureType.Optional: return(Operator.Optional(parameters[0]));

            case CaptureType.ZeroOrMore: return(new ZeroOrMore(parameters[0]));

            case CaptureType.PrioritizedChoice:
                if (parameters.Count <= 1)
                {
                    return(parameters[0]);
                }

                if (parameters.All(item => item is CharacterClass))
                {
                    return(new CharacterClass(parameters.Cast <CharacterClass>().SelectMany(item => item.Ranges)));
                }

                return(new PrioritizedChoice(parameters));

            case CaptureType.Sequence:
                if (parameters.Count <= 1)
                {
                    return(parameters[0]);
                }

                return(new Sequence(parameters));

            default:
                throw new ArgumentOutOfRangeException($"Unrecognised CaptureType {key}");
            }
        }