protected virtual void Build() { endOfFile = new Pattern(nameof(endOfFile)) { Data = new Not(new Any()) }; endOfLine = new Pattern(nameof(endOfFile)) { Data = new PrioritizedChoice("\r\n", '\n', '\r') }; space = new Pattern(nameof(space)) { Data = new PrioritizedChoice(new CharacterClass(' ', '\t'), endOfLine) }; comment = new Pattern(nameof(comment)) { Data = new Sequence( '#', new ZeroOrMore(new Sequence(new Not(endOfLine), new Any())), endOfLine ) }; spacing = new Pattern(nameof(spacing)) { Data = new ZeroOrMore(new PrioritizedChoice(space, comment)) }; DOT = new Pattern(".") { Data = new CaptureGroup((int)CaptureType.Any, new Sequence('.', spacing)) }; empty = new Pattern(nameof(empty)) { Data = new CaptureGroup((int)CaptureType.Empty, new Sequence('e', spacing)) }; OPEN = SingleCharacterWithSpacing('('); CLOSE = SingleCharacterWithSpacing(')'); PLUS = SingleCharacterWithSpacing('+'); STAR = SingleCharacterWithSpacing('*'); QUESTION = SingleCharacterWithSpacing('?'); NOT = SingleCharacterWithSpacing('!'); AND = SingleCharacterWithSpacing('&'); SLASH = SingleCharacterWithSpacing('/'); LEFTARROW = new Pattern("<-") { Data = new Sequence("<-", spacing) }; // TODO: Properly parse special chars character = new Pattern(nameof(character)) { Data = new CaptureGroup((int)CaptureType.Character, new PrioritizedChoice( new Sequence('\\', new CharacterClass('n', 'r', 't', '\'', '"', '[', ']', '\\', '-')), new Sequence('\\', CharacterClass.Range('0', '2'), CharacterClass.Range('0', '7'), CharacterClass.Range('0', '7')), new Sequence('\\', CharacterClass.Range('0', '7'), Operator.Optional(CharacterClass.Range('0', '7'))), new Sequence(new Not(new CharacterClass('\\')), new Any()) ) ) }; range = new Pattern(nameof(range)) { Data = new CaptureGroup((int)CaptureType.CharacterClassRange, new PrioritizedChoice(new Sequence(character, new CharacterClass('-'), character), character)) }; characterClass = new Pattern(nameof(characterClass)) { Data = new CaptureGroup((int)CaptureType.CharacterClass, new Sequence( '[', new ZeroOrMore(new Sequence(new Not(new CharacterClass(']')), range)), ']', spacing ) ) }; literal = new Pattern(nameof(literal)) { Data = new Sequence( new CaptureGroup((int)CaptureType.Literal, new PrioritizedChoice( new Sequence( '\'', new ZeroOrMore(new Sequence(new Not('\''), character)), '\'' ), new Sequence( '"', new ZeroOrMore(new Sequence(new Not('"'), character)), '"' ) ) ), spacing ) }; primary = new Pattern(); identifier = new Pattern(nameof(identifier)) { Data = new Sequence(new CaptureGroup((int)CaptureType.Identifier, new Sequence( new CharacterClass(new CharacterRange('A', 'Z'), new CharacterRange('a', 'z'), new CharacterRange('_', '_')), new ZeroOrMore(new CharacterClass(new CharacterRange('A', 'Z'), new CharacterRange('a', 'z'), new CharacterRange('0', '9'), new CharacterRange('_', '_'))) ) ), spacing) }; identifierDeclaration = new Pattern(nameof(identifierDeclaration)) { Data = identifier }; suffix = new Pattern(nameof(suffix)) { Data = new PrioritizedChoice( new CaptureGroup((int)CaptureType.Optional, new Sequence(primary, QUESTION)), new CaptureGroup((int)CaptureType.ZeroOrMore, new Sequence(primary, STAR)), new CaptureGroup((int)CaptureType.OneOrMore, new Sequence(primary, PLUS)), primary ) }; prefix = new Pattern(nameof(prefix)) { Data = new PrioritizedChoice( new CaptureGroup((int)CaptureType.And, new Sequence(AND, suffix)), new CaptureGroup((int)CaptureType.Not, new Sequence(NOT, suffix)), suffix ) }; sequence = new Pattern(nameof(sequence)) { Data = new CaptureGroup((int)CaptureType.Sequence, new ZeroOrMore(prefix)) }; internalExpression = new Pattern(nameof(internalExpression)) { Data = new CaptureGroup((int)CaptureType.PrioritizedChoice, new Sequence(sequence, new ZeroOrMore(new Sequence(SLASH, sequence)))) }; primary.Data = new PrioritizedChoice(GetPrimaryData()); this.Definition = new Pattern(nameof(Definition)) { Data = new CaptureGroup((int)CaptureType.Definition, new Sequence(identifierDeclaration, LEFTARROW, internalExpression)) }; this.Grammar = new Pattern(nameof(Grammar)) { Data = new Sequence(spacing, Operator.OneOrMore(Definition), endOfFile) }; this.Expression = new Pattern(nameof(Expression)) { Data = new Sequence(internalExpression.Data, endOfFile) }; }
public RegexGrammar(PatternCompiler patternCompiler) { var RE = new Pattern("RE"); var simpleRE = new Pattern("SimpleRE"); var metaCharacter = new Pattern("metaCharacter") { Data = new PrioritizedChoice( new CharacterClass('*', '+', '^', '$', '|', '(', ')', '[', ']'), new Sequence(new CharacterClass('\\'), new CharacterClass('t', 'n', 'r', 'f', 'b', 'B', 'd', 'D', 's', 'S', 'w', 'W', 'Q', 'U', 'L')), CharacterClass.String(@"*?"), CharacterClass.String(@"+?"), CharacterClass.String(@"$`"), CharacterClass.String(@"$'"), CharacterClass.String(@"$&"), CharacterClass.String(@"\cX"), new Sequence(new CharacterClass('\\', '$'), CharacterClass.Range('0', '9')), new Sequence(new CharacterClass('\\'), CharacterClass.Range('0', '7'), CharacterClass.Range('0', '7'), CharacterClass.Range('0', '7')) ) }; var allowedMetaCharacters = new Pattern("allowedMetaCharacter") { Data = new CaptureGroup((int)CaptureType.MetaCharacter, new PrioritizedChoice( new Sequence(new CharacterClass('\\'), new CharacterClass('t', 'n', 'r', 'f', 'b', 'B', 'd', 'D', 's', 'S', 'w', 'W', 'Q', 'U', 'L')), CharacterClass.String(@"*?"), CharacterClass.String(@"+?"), CharacterClass.String(@"$`"), CharacterClass.String(@"$'"), CharacterClass.String(@"$&"), CharacterClass.String(@"\cX"), new Sequence(new CharacterClass('\\', '$'), CharacterClass.Range('0', '9')), new Sequence(new CharacterClass('\\'), CharacterClass.Range('0', '7'), CharacterClass.Range('0', '7'), CharacterClass.Range('0', '7')) ) ) }; var character = new Pattern("character") { Data = new CaptureGroup((int)CaptureType.Char, new PrioritizedChoice( new Sequence( new CharacterClass('\\'), metaCharacter ), new Sequence( new Not(metaCharacter), new Any() ) ) ) }; var range = new CaptureGroup((int)CaptureType.Range, new Sequence(character, new CharacterClass('-'), character)); var setItem = new PrioritizedChoice(range, character); var setItems = new Pattern() { Data = Operator.OneOrMore(setItem) }; var positiveSet = new CaptureGroup((int)CaptureType.PositiveSet, new Sequence(new CharacterClass('['), setItems, new CharacterClass(']'))); var negativeSet = new CaptureGroup((int)CaptureType.NegativeSet, new Sequence(CharacterClass.String("[^"), setItems, new CharacterClass(']'))); var set = new Pattern("set") { Data = new PrioritizedChoice(negativeSet, positiveSet) }; var eos = new CaptureGroup((int)CaptureType.Eos, new CharacterClass('$')); var any = new CaptureGroup((int)CaptureType.Any, new CharacterClass('.')); var group = new Sequence(new CharacterClass('('), RE, new CharacterClass(')')); var elementaryRE = new Pattern("elementaryRE") { Data = new PrioritizedChoice(group, any, eos, set, character, allowedMetaCharacters) }; var number = Operator.OneOrMore(CharacterClass.Range('0', '9')); var repeatRange = new Sequence(new CharacterClass('{'), new CaptureGroup((int)CaptureType.RepeatRange, new Sequence(number, Operator.Optional(new Sequence(new CharacterClass(','), number)))), new CharacterClass('}')); var plus = new Pattern("plus") { Data = new CaptureGroup((int)CaptureType.Plus, new Sequence(elementaryRE, new CharacterClass('+'))) }; var star = new Pattern("star") { Data = new CaptureGroup((int)CaptureType.Star, new Sequence(elementaryRE, new CharacterClass('*'))) }; var repeat = new Pattern("repeat") { Data = new CaptureGroup((int)CaptureType.Repeat, new Sequence(elementaryRE, repeatRange)) }; var basicRE = new PrioritizedChoice(star, plus, repeat, elementaryRE); simpleRE.Data = new CaptureGroup((int)CaptureType.Concatenation, Operator.OneOrMore(basicRE)); RE.Data = new CaptureGroup((int)CaptureType.Union, new Sequence(simpleRE, new ZeroOrMore(new Sequence(new CharacterClass('|'), RE)))); Parser = patternCompiler.Compile(RE); }
protected virtual Operator BuildTreeNode(int intKey, string captureData, IReadOnlyList <Operator> parameters) { var key = (CaptureType)intKey; switch (key) { case CaptureType.And: return(Operator.And(parameters[0])); case CaptureType.Any: return(new Any()); case CaptureType.CharacterClass: var rangeParams = parameters.OfType <CharacterClass>(); return(new CharacterClass(rangeParams.SelectMany(p => p.Ranges))); case CaptureType.CharacterClassRange: if (parameters.Count == 1) { return(parameters[0]); } else { var min = (CharacterClass)parameters[0]; var max = (CharacterClass)parameters[1]; if (min.NumChars > 1 || max.NumChars > 1) { throw new PegParsingException($"Cannot create range from {min} and {max}"); } return(CharacterClass.Range(min.Value.First(), max.Value.Last())); } case CaptureType.Definition: var identifier = parameters[0] as Pattern; identifier.Data = parameters[1]; return(identifier); case CaptureType.Empty: return(new Empty()); case CaptureType.Identifier: return(new UnresolvedPatternReference(captureData)); case CaptureType.Character: if (captureData.Length == 1) { return(new CharacterClass(captureData[0])); } else { return(new CharacterClass(TranslateEscapeCharacter(captureData))); } case CaptureType.Literal: if (parameters.Count <= 0) { return(new Empty()); } else if (parameters.Count == 1) { return(parameters[0]); } else { return(new Sequence(parameters)); } case CaptureType.Not: return(new Not(parameters[0])); case CaptureType.OneOrMore: return(Operator.OneOrMore(parameters[0])); case CaptureType.Optional: return(Operator.Optional(parameters[0])); case CaptureType.ZeroOrMore: return(new ZeroOrMore(parameters[0])); case CaptureType.PrioritizedChoice: if (parameters.Count <= 1) { return(parameters[0]); } if (parameters.All(item => item is CharacterClass)) { return(new CharacterClass(parameters.Cast <CharacterClass>().SelectMany(item => item.Ranges))); } return(new PrioritizedChoice(parameters)); case CaptureType.Sequence: if (parameters.Count <= 1) { return(parameters[0]); } return(new Sequence(parameters)); default: throw new ArgumentOutOfRangeException($"Unrecognised CaptureType {key}"); } }