public static IRunner CompileExpression(this PatternCompiler patternCompiler, string expression) { return(patternCompiler.Compile(new Pattern("Expression") { Data = grammar.ParseExpression(expression) })); }
private static IRunner BuildRunnerFromExpression(PatternCompiler patternCompiler, Operator expression) { return(patternCompiler.Compile(new Pattern { Data = new ZeroOrMore(new PrioritizedChoice(new CaptureGroup(0, expression), new Any())) })); }
private static List <string> Match(Operator p, string data, bool optimize = true) { var runner = new PatternCompiler(new Compiler(), optimize ? new DefaultOptimizer() : null, new InterpreterJitter()).Compile(new Pattern() { Data = p }); var captures = new List <Capture>(); var result = runner.Run(data, captures); Assert.IsTrue(result.IsSuccessful); return(captures.Select(item => data.Substring(item.StartPosition, item.EndPosition - item.StartPosition)).ToList()); }
public void TestPegGrammar() { var grammar = new PegGrammar(); grammar.EnsureGrammarBuilt(); var runner = new PatternCompiler(new Compiler(), new DefaultOptimizer(), new ILJitter { EnableMemoization = true, EnableCaptureMemoization = true, }).Compile(grammar.Grammar); var result = runner.Run(StringData); var patternNames = runner.GetPatternsTriedAt(StringData.Length).ToList(); Assert.AreEqual(true, result.IsSuccessful); Assert.IsTrue(patternNames.Count > 0); }
public static void DisassembleBlock(ushort[] image, int pc, int depth) { while (pc < image.Length) { PatternCompiler.DecodeOp(image[pc], out OpCode op, out OpFlags _); Console.Write(FormatAddress(pc) + ": "); Console.Write(new string(' ', depth * 2)); Console.Write(DisassembleOp(image, pc)); Console.WriteLine(); int num; switch (op) { case OpCode.False: case OpCode.True: case OpCode.Until: num = 1; break; case OpCode.Position: case OpCode.Reference: case OpCode.Character: case OpCode.Category: case OpCode.NotCategory: case OpCode.In: case OpCode.Open: case OpCode.Close: case OpCode.Sub: case OpCode.Branch: case OpCode.Jump: num = 2; break; case OpCode.Range: case OpCode.Balance: case OpCode.IfDefined: case OpCode.Test: case OpCode.Anchor: num = 3; break; case OpCode.Repeat: case OpCode.FastRepeat: case OpCode.Info: num = 4; break; case OpCode.String: num = image[pc + 1] + 2; break; case OpCode.Set: num = image[pc + 2] + 3; break; default: num = 1; break; } pc += num; } }
public static string DisassembleOp(ushort[] image, int pc) { PatternCompiler.DecodeOp(image[pc], out OpCode op, out OpFlags flags); string text = op.ToString(); if (flags != 0) { text = text + "[" + flags.ToString("f") + "]"; } switch (op) { case OpCode.Info: { text = text + " " + image[pc + 1]; string text2 = text; text = text2 + " (" + image[pc + 2] + ", " + image[pc + 3] + ")"; break; } case OpCode.Character: text = text + " '" + FormatChar((char)image[pc + 1]) + "'"; break; case OpCode.Category: case OpCode.NotCategory: text = text + " /" + (Category)image[pc + 1]; break; case OpCode.Range: text = text + " '" + FormatChar((char)image[pc + 1]) + "', "; text = text + " '" + FormatChar((char)image[pc + 2]) + "'"; break; case OpCode.Set: text = text + " " + FormatSet(image, pc + 1); break; case OpCode.String: text = text + " '" + ReadString(image, pc + 1) + "'"; break; case OpCode.Position: text = text + " /" + (Position)image[pc + 1]; break; case OpCode.Reference: case OpCode.Open: case OpCode.Close: text = text + " " + image[pc + 1]; break; case OpCode.Balance: { string text2 = text; text = text2 + " " + image[pc + 1] + " " + image[pc + 2]; break; } case OpCode.IfDefined: case OpCode.Anchor: text = text + " :" + FormatAddress(pc + image[pc + 1]); text = text + " " + image[pc + 2]; break; case OpCode.In: case OpCode.Sub: case OpCode.Branch: case OpCode.Jump: text = text + " :" + FormatAddress(pc + image[pc + 1]); break; case OpCode.Test: text = text + " :" + FormatAddress(pc + image[pc + 1]); text = text + ", :" + FormatAddress(pc + image[pc + 2]); break; case OpCode.Repeat: case OpCode.FastRepeat: { text = text + " :" + FormatAddress(pc + image[pc + 1]); string text2 = text; text = text2 + " (" + image[pc + 2] + ", "; text = ((image[pc + 3] != ushort.MaxValue) ? (text + image[pc + 3]) : (text + "Inf")); text += ")"; break; } } return(text); }
public ExtendedPegGrammar(PatternCompiler patternCompiler) : base(patternCompiler) { }
public RegexGrammar(PatternCompiler patternCompiler) { var RE = new Pattern("RE"); var simpleRE = new Pattern("SimpleRE"); var metaCharacter = new Pattern("metaCharacter") { Data = new PrioritizedChoice( new CharacterClass('*', '+', '^', '$', '|', '(', ')', '[', ']'), new Sequence(new CharacterClass('\\'), new CharacterClass('t', 'n', 'r', 'f', 'b', 'B', 'd', 'D', 's', 'S', 'w', 'W', 'Q', 'U', 'L')), CharacterClass.String(@"*?"), CharacterClass.String(@"+?"), CharacterClass.String(@"$`"), CharacterClass.String(@"$'"), CharacterClass.String(@"$&"), CharacterClass.String(@"\cX"), new Sequence(new CharacterClass('\\', '$'), CharacterClass.Range('0', '9')), new Sequence(new CharacterClass('\\'), CharacterClass.Range('0', '7'), CharacterClass.Range('0', '7'), CharacterClass.Range('0', '7')) ) }; var allowedMetaCharacters = new Pattern("allowedMetaCharacter") { Data = new CaptureGroup((int)CaptureType.MetaCharacter, new PrioritizedChoice( new Sequence(new CharacterClass('\\'), new CharacterClass('t', 'n', 'r', 'f', 'b', 'B', 'd', 'D', 's', 'S', 'w', 'W', 'Q', 'U', 'L')), CharacterClass.String(@"*?"), CharacterClass.String(@"+?"), CharacterClass.String(@"$`"), CharacterClass.String(@"$'"), CharacterClass.String(@"$&"), CharacterClass.String(@"\cX"), new Sequence(new CharacterClass('\\', '$'), CharacterClass.Range('0', '9')), new Sequence(new CharacterClass('\\'), CharacterClass.Range('0', '7'), CharacterClass.Range('0', '7'), CharacterClass.Range('0', '7')) ) ) }; var character = new Pattern("character") { Data = new CaptureGroup((int)CaptureType.Char, new PrioritizedChoice( new Sequence( new CharacterClass('\\'), metaCharacter ), new Sequence( new Not(metaCharacter), new Any() ) ) ) }; var range = new CaptureGroup((int)CaptureType.Range, new Sequence(character, new CharacterClass('-'), character)); var setItem = new PrioritizedChoice(range, character); var setItems = new Pattern() { Data = Operator.OneOrMore(setItem) }; var positiveSet = new CaptureGroup((int)CaptureType.PositiveSet, new Sequence(new CharacterClass('['), setItems, new CharacterClass(']'))); var negativeSet = new CaptureGroup((int)CaptureType.NegativeSet, new Sequence(CharacterClass.String("[^"), setItems, new CharacterClass(']'))); var set = new Pattern("set") { Data = new PrioritizedChoice(negativeSet, positiveSet) }; var eos = new CaptureGroup((int)CaptureType.Eos, new CharacterClass('$')); var any = new CaptureGroup((int)CaptureType.Any, new CharacterClass('.')); var group = new Sequence(new CharacterClass('('), RE, new CharacterClass(')')); var elementaryRE = new Pattern("elementaryRE") { Data = new PrioritizedChoice(group, any, eos, set, character, allowedMetaCharacters) }; var number = Operator.OneOrMore(CharacterClass.Range('0', '9')); var repeatRange = new Sequence(new CharacterClass('{'), new CaptureGroup((int)CaptureType.RepeatRange, new Sequence(number, Operator.Optional(new Sequence(new CharacterClass(','), number)))), new CharacterClass('}')); var plus = new Pattern("plus") { Data = new CaptureGroup((int)CaptureType.Plus, new Sequence(elementaryRE, new CharacterClass('+'))) }; var star = new Pattern("star") { Data = new CaptureGroup((int)CaptureType.Star, new Sequence(elementaryRE, new CharacterClass('*'))) }; var repeat = new Pattern("repeat") { Data = new CaptureGroup((int)CaptureType.Repeat, new Sequence(elementaryRE, repeatRange)) }; var basicRE = new PrioritizedChoice(star, plus, repeat, elementaryRE); simpleRE.Data = new CaptureGroup((int)CaptureType.Concatenation, Operator.OneOrMore(basicRE)); RE.Data = new CaptureGroup((int)CaptureType.Union, new Sequence(simpleRE, new ZeroOrMore(new Sequence(new CharacterClass('|'), RE)))); Parser = patternCompiler.Compile(RE); }
public PegGrammar(PatternCompiler compilerFactory) { m_IsBuilt = false; patternCompiler = compilerFactory; }
static void Main(string[] args) { //{ // var jitter = new CustomJitter("Regex.dll"); // var rg = new RegexGrammar(new PatternCompiler(new Compiler(), new DefaultOptimizer(), jitter)); // jitter.Save(); // rg.ParseExpression("abc"); //} var patternCompiler = new PatternCompiler(new Compiler(), null, new ILJitter()); var regexGrammar = new Lazy <RegexGrammar>(() => new RegexGrammar(patternCompiler)); var converter = new RegexConverter(); var helper = new PegHelper(patternCompiler); helper.EnsureExpressionBuilt(); //CompileAndWritePatternToFile("PegExpression", helper.GetExpressionPattern()); //var input = "AAA AAAas ntAar ".ToCharArray(); var input = GenerateInputData(1 << 20); //var pattern = new PointerImplementation(); //var patternStr = "([A-Za-z] 'awyer' [ \t] / [A-Za-z] 'inn' [ \t])"; //var patternStr = "([A-Za-z] 'x')"; //var patternStr = "([A-Za-z] 'awyer' [ \t] / [A-Za-z] 'inn' [ \t])"; //var patternStr = "'Tom' / 'Finn' / 'Sawyer' / 'Huckleberry'"; //var patternStr = "'Tom' / 'Sawyer' / 'Huckleberry' / 'Finn' "; //var patternStr = "[ -z][ -z]([ -z][ -z]('Tom' / 'Sawyer' / 'Huckleberry' / 'Finn') / [ -z]('Tom' / 'Sawyer' / 'Huckleberry' / 'Finn') / ('Tom' / 'Sawyer' / 'Huckleberry' / 'Finn'))"; //var patternStr = "[ -z][ -z]([ -z][ -z]('T' / 'S') / [ -z]('T' / 'Sawye' / 'Huck') / 'Huckleberry')"; //var patternStr = "[ -z][ -z]([ -z][ -z]('Tom' / 'Sawyer' / 'Huckleberry' / 'Finn') / [ -z]('Tom' / 'Sawyer' / 'Huckleberry' / 'Finn') / ('Tom' / 'Sawyer' / 'Huckleberry' / 'Finn'))"; //var patternStr = "[ -z][ -z]([ -z][ -z]('T' / 'S' / 'H') / [ -z]('T' / 'S' / 'H') / ('T' / 'S'))"; //var patternStr = $"[ -{char.MaxValue}][ -z]([ -z][ -z]('T' / 'S') / [ -z]('T'))"; //var patternStr = ".. ('T' / 'SS' / 'HHH' / 'FFFF')"; //var patternStr = "('T' / 'SS' / 'HHH' / 'FFFF')"; //var patternStr = ".. ('TT' / 'FFF')"; //var patternStr = "'Twain'"; //var patternStr = "[a-z] 'shing'"; //var patternStr = "[a-z]+"; //var patternStr = "('Huck'[a-zA-Z]+) / ('Saw'[a-zA-Z]+)"; //var m = $"[{char.MinValue}-uz-{char.MaxValue}]"; //var patternStr = $"[a-q]{m}{m}{m}{m}{m}{m}{m}{m}{m}{m}{m}{m}{m} 'x'"; //var pattern = CompileAndWritePatternToFile("SimpleMatch", new Pattern("SimpleMatch") { Data = helper.ParseExpression("[a-z]*") }); //var p = converter.Convert(regexGrammar.Value.ParseExpression("Twain")); //var p = converter.Convert(regexGrammar.Value.ParseExpression("river.{20,50}Tom|Tom.{20,50}river")); //var p = converter.Convert(regexGrammar.Value.ParseExpression("river.{10,25}Tom|Tom.{10,25}river")); //var a = new Pattern("A"); //a.Data = new PrioritizedChoice(new Sequence(letters, a), new Empty()); //var p = new Sequence(letters, a); //var p = new Sequence(new PrioritizedChoice('T', 'R'), "om");//Operator.EndingWithGreedy(capitalsAndNonCapitals, CharacterClass.String("ing")); //var ws = new Pattern { Data = new ZeroOrMore(new CharacterClass(' ')) }; //var p1 = new Pattern { Data = new Sequence(ws, CharacterClass.String("abc")) }; //var p2 = new Pattern { Data = new Sequence(ws, CharacterClass.String("xyz")) }; //var p = new PrioritizedChoice(p1, p2); var p = new ZeroOrMore(new PrioritizedChoice(new CaptureGroup(0, converter.Convert(regexGrammar.Value.ParseExpression("([A-Za-z]awyer|[A-Za-z]inn)\\s"))), new Any())); var s2 = new Stopwatch(); s2.Start(); var peg = new Pattern("SimpleMatch") { Data = p, //Data = new ZeroOrMore(new PrioritizedChoice(new CaptureGroup(0, p), new Any())) }; var pattern = CompileAndWritePatternToFile("SimpleMatch", peg); Console.WriteLine($"Saved ({s2.ElapsedMilliseconds}ms)"); var text = "Tom..Huckleberry Finn Tom Tom Huck\nFinn,"; var capts = new List <Capture>(); var runResult = pattern.Run(text, capts); if (runResult.IsSuccessful && runResult.InputPosition == text.Length) { Console.WriteLine($"Successful match on '{text}'"); } //for (var n = 0; n < 10; n++) //{ // for (var x = 0; x < 25; x++) // { // //var pegGrammar = new PegGrammar(new ILInterpreterFactory()); // //pegGrammar.EnsureExpressionBuilt(); // //var expression = pegGrammar.ParseExpression("'th' [a-z]+"); // //var compiler = (new ILCompilerFactory()).Create(new Pattern // //{ // // Data = new ZeroOrMore(new PrioritizedChoice(new CaptureGroup(0, expression), new Any())) // //}); // Stopwatch s = new Stopwatch(); // s.Start(); // var result = default(RunResult); // var captures = new List<Capture>(); // for (var i = 0; i < 1000; i++) // { // captures = new List<Capture>(); // result = pattern.Run(input, 0, input.Length, captures); // if (!result.IsSuccessful) // { // Console.WriteLine("Match fail"); // } // } // s.Stop(); // Console.WriteLine($"That took {s.ElapsedMilliseconds}ms ({captures.Count})"); // } //} Console.ReadKey(); }
public static IRunner CompileGrammar(this PatternCompiler patternCompiler, string grammarStr) { return(patternCompiler.Compile(grammar.ParseGrammar(grammarStr).Last())); }
public PegHelper(PatternCompiler patternCompiler) : base(patternCompiler) { }