public IExtractor CreateExtractor() { var tokenizer = Tokenizer; var loggerFactory = _loggerFactory; var gramRep = GrammarRepository; var gramCompiler = new GrammarCompiler(tokenizer, _morph, _extensions); var startTerminalsCreator = new StartTerminalsCreator(_settings); var srcGrams = new List <(string key, string src)>(); if (!string.IsNullOrWhiteSpace(_settings.MainGrammar)) { srcGrams.Add((key: "main", src: _settings.MainGrammar)); } if (!string.IsNullOrWhiteSpace(_settings.GrammarsDirPath)) { srcGrams.AddRange(gramRep.GetAll()); } var grams = _getGrammars(loggerFactory, srcGrams, _settings.Language); var rules = gramCompiler.Compile(grams, _settings.Variables); var startRules = startTerminalsCreator.Create(rules); var parserLogger = loggerFactory?.CreateLogger <IChartParser>(); var parser = new EarleyParser(startRules, parserLogger); var extractorLogger = loggerFactory?.CreateLogger <IExtractor>(); var extractor = new Extractor(tokenizer, _morph, parser, _settings, extractorLogger); return(extractor); }
private static Chart _createChart(IEnumerable <Rule> rules, IEnumerable <Token> tokens) { var sRls = StartTerminal.Create(rules.Where(x => x.IsStart)).ToArray(); var parser = new EarleyParser(sRls, null); return(parser.Parse(tokens)); }
public static void Do() { // S -> aSa | bSb | ε var productions = new List<Production> { // construct productions by passing arguments... new Production( lhs: Nonterminal.Of("S"), rhs: new Sentence { Terminal.Of("a"), Nonterminal.Of("S"), Terminal.Of("a") }, weight: 20 ), // or from a string... CFGParser.Production(@"<S> -> 'b' <S> 'b' [10]"), CFGParser.Production(@"<S> -> ε [5]"), }; var cfg = new Grammar(productions, Nonterminal.Of("S")); // var cnf = cfg.ToCNF(); //var probs = cfg.EstimateProbabilities(1000000); //foreach (var entry in probs) { // var key = entry.Key; // var value = entry.Value; // if (key.Length <= 4) { // Console.WriteLine("{0}: {1}", key, value); // } //} // Print out the new CNF grammar // Console.WriteLine(cnf); var ep = new EarleyParser(cfg); // var cp = new CykParser(cnf); // Does this grammar accept the string "aabb"? Console.WriteLine("aabb: {0}", ep.ParseGetProbability(Sentence.FromLetters("aabb"))); // How about "abba"? Console.WriteLine("abba: {0}", ep.ParseGetProbability(Sentence.FromLetters("abba"))); Console.WriteLine(ep.ParseGetForest(Sentence.FromLetters("abba"))); for (int i = 0; i < 5; i++) { Console.WriteLine(cfg.ProduceRandom().AsTerminals()); } var sentences = cfg.ProduceToDepth(3); foreach (var sentence in sentences) { Console.WriteLine(sentence.Value.AsTerminals()); } var gg = new GrammarGenerator(1); var terminals = new List<Terminal> { Terminal.Of("a"), Terminal.Of("b") }; var randGram = gg.NextCFG( numNonterminals: 4, numProductions: 10, maxProductionLength: 4, terminals: terminals ); Console.WriteLine(randGram); }
private static void ExecuteTest(Grammar g, string input) { var earley1 = new EarleyParser(g); var earley2 = new EarleyParser2(g); var sentence = Sentence.FromWords(input); var p1 = earley1.ParseGetProbability(sentence); var p2 = earley2.ParseGetProbability(sentence); Helpers.AssertNear(p1, p2); }
private static void ExecuteTest(Grammar g, List<Sentence> sentences) { CNFGrammar h = g.ToCNF(); var earley = new EarleyParser(g); var cyk = new CykParser(h); foreach (var sentence in sentences) { var p1 = cyk.ParseGetProbability(sentence); var p2 = earley.ParseGetProbability(sentence); Helpers.AssertNear(p1, p2); } }
private static void ExecuteTest(Grammar g, List <Sentence> sentences) { CNFGrammar h = g.ToCNF(); var earley = new EarleyParser(g); var cyk = new CykParser(h); foreach (var sentence in sentences) { var p1 = cyk.ParseGetProbability(sentence); var p2 = earley.ParseGetProbability(sentence); Helpers.AssertNear(p1, p2); } }
private static void ExecuteTest(Grammar g, string input) { var ag = IdentityActions.Annotate(g); var earley1 = new EarleyParser(ag); var earley2 = new EarleyParser2(ag); var sentence = Sentence.FromWords(input); var sppf1 = earley1.ParseGetForest(sentence); var sppf2 = earley2.ParseGetForest(sentence); CheckTraversal(ag, sentence, sppf1); CheckTraversal(ag, sentence, sppf2); }
private static void BnfParse(Sentence sentence) { var g = Bnf.Grammar(); //var h = g.ToCNF(); // too much memory var earley = new EarleyParser(g); // var cyk = new CykParser(h); var earley2 = new EarleyParser2(g); // var p1 = cyk.ParseGetProbability(sentence); var p2 = earley.ParseGetProbability(sentence); var p3 = earley2.ParseGetProbability(sentence); // Helpers.AssertNear(p1, p2); Helpers.AssertNear(p2, p3); Assert.IsTrue(p2 > 0.0); }
// from http://dx.doi.org/10.1016/j.entcs.2008.03.044 private static void PaperExamples() { var ex3 = new Grammar(new List <Production> { CFGParser.Production("<S> → <A> <T>"), CFGParser.Production("<S> → 'a' <T>"), CFGParser.Production("<A> → 'a'"), CFGParser.Production("<A> → <B> <A>"), CFGParser.Production("<B> → ε"), CFGParser.Production("<T> → 'b' 'b' 'b'"), }, Nonterminal.Of("S")); var input = Sentence.FromLetters("abbb"); var sppf1 = new EarleyParser(ex3).ParseGetForest(input); var sppf2 = new EarleyParser2(ex3).ParseGetForest(input); DotRunner.Run(DotBuilder.GetRawDot(sppf1), "example3_old"); DotRunner.Run(DotBuilder.GetRawDot(sppf2), "example3_new"); }
private static void DebugGrammar() { BaseGrammar g = new Grammar(new List <Production> { CFGParser.Production("<S> → ε"), }, Nonterminal.Of("S")); var sentence = Sentence.FromWords("1 + 1 + 1"); var grammar = AdditionGrammar(argList => string.Format("({0} + {1})", argList[0].Payload, argList[2].Payload)); g = grammar; var earley = new EarleyParser(g); var earley2 = new EarleyParser2(g); //DotRunner.Run(earley.ParseGetForest(sentence).GetRawDot(), "testEarleyOld"); //DotRunner.Run(earley2.ParseGetForest(sentence).GetRawDot(), "testEarleyNew"); DotRunner.Run(DotBuilder.GetRawDot(earley.ParseGetForest(sentence)), "testEarleyOld"); DotRunner.Run(DotBuilder.GetRawDot(earley2.ParseGetForest(sentence)), "testEarleyNew"); // DotRunner.Run(DotBuilder.GetFlattenedDot(earley2.ParseGetForest(sentence)), "testEarleyFlat"); // var prob0 = earley.ParseGetProbability(sentence); var prob = earley2.ParseGetProbability(sentence); }
public void RandomParsingTest( int _numGrammars = 10000, int _numNonterminals = 10, int _numTerminals = 5, int _numProductions = 30, int _maxProductionLength = 8, int _maxInputLength = 6, int seed = 0 ) { var printStatus = true; var range = Enumerable.Range(0, _numTerminals); var terminals = new List<Terminal>(range.Select((x) => Terminal.Of("x" + x))); Console.WriteLine("Preparing sentences"); var preparedSentences = new List<Sentence>(); for (int length = 0; length <= _maxInputLength; length++) { var combinations = CFGLibTest.Helpers.CombinationsWithRepetition(terminals, length); foreach (var target in combinations) { var sentence = new Sentence(target); preparedSentences.Add(sentence); } } var randg = new GrammarGenerator(seed); var preparedGrammars = new List<Grammar>(_numGrammars); var preparedGrammarsCNF = new List<CNFGrammar>(_numGrammars); Console.WriteLine("Preparing grammars"); for (int i = 0; i < _numGrammars; i++) { Grammar g = null; while (g == null) { // g = randg.NextCNF(_numNonterminals, _numProductions, terminals); g = randg.NextCFG(_numNonterminals, _numProductions, _maxProductionLength, terminals, true); if (g.Productions.Count() == 0) { g = null; } } // Console.WriteLine("---------------{0}/{1}---------------", i.ToString("D5"), _numGrammars.ToString("D5")); // Console.WriteLine(g.ToCodeString()); var h = g.ToCNF(); //return; // Console.WriteLine(g); // g.PrintProbabilities(2, 3); preparedGrammars.Add(g); preparedGrammarsCNF.Add(h); } Console.WriteLine("starting"); var sw = Stopwatch.StartNew(); int count = 0; for (int grammarIndex = 0; grammarIndex < _numGrammars; grammarIndex++) { if (printStatus) { Console.WriteLine("---------------{0}/{1}---------------", grammarIndex.ToString("D5"), _numGrammars.ToString("D5")); } var g = preparedGrammars[grammarIndex]; var h = preparedGrammarsCNF[grammarIndex]; var earley = new EarleyParser(g); var cyk = new CykParser(h); // Console.WriteLine(g.ToCodeString()); // Console.Write("{0}, ", count); count++; var accepts = 0; foreach (var sentence in preparedSentences) { try { var p1 = earley.ParseGetProbability(sentence); var p2 = cyk.ParseGetProbability(sentence); if (!Helpers.IsNear(p2, p1)) { throw new Exception(); } var accepts1 = p1 > 0; var accepts2 = p2 > 0; if (accepts2) { accepts++; } } catch (Exception) { Report(g, sentence); throw; } } if (printStatus) { Console.WriteLine("Accepted {0} / {1}", accepts, preparedSentences.Count); } } sw.Stop(); Console.WriteLine(); Console.WriteLine("inner Elapsed: {0}s", sw.Elapsed.TotalMilliseconds / 1000.0); // Console.WriteLine("Per CYK: {0}ms", sw.Elapsed.TotalMilliseconds / (_numGrammars * preparedSentences.Count)); }
static void Main(string[] args) { //var rand = new Random(0); //Experimental.TestSolver(rand); // RandomTests.RandomJacobianTest(); var t = new TestCFGToCNF(); var tp = new TestCFGToCNFEmptyProb(); var tr = new RegressionTests(); var testp = new TestParsing(); // testp.TestParsing21(); // testp.TestWeirdSppf06(); // testp.TestWeirdSppf07(); // Console.Read(); //var g = new Grammar(new List<Production>{ // CFGParser.Production("<S> → 'x' <X>"), // CFGParser.Production("<S> → <X> 'x'"), // CFGParser.Production("<S> → 'x' 'x'"), // CFGParser.Production("<X> → 'x'"), //}, Nonterminal.Of("S")); //var g = new Grammar(new List<Production>{ // CFGParser.Production("<S> → <S> <S> <S>"), // CFGParser.Production("<S> → 'x'"), // CFGParser.Production("<S> → ε"), //}, Nonterminal.Of("S")); //var g = new Grammar(new List<Production>{ // CFGParser.Production("<S> → <S> <S>"), // CFGParser.Production("<S> → 'b'"), // CFGParser.Production("<S> → ε"), //}, Nonterminal.Of("S")); var g = new Grammar(new List<Production>{ CFGParser.Production("<S> → <S> '+' <S>"), // CFGParser.Production("<S> → <S> '*' <S>"), // CFGParser.Production("<S> → [0-9]+"), CFGParser.Production("<S> → '0'"), // CFGParser.Production("<S> → '2'"), }, Nonterminal.Of("S")); //var ests = g.EstimateProbabilities(10000); //foreach (var est in ests) { // Console.WriteLine("{0}: {1}", est.Key, est.Value); //} // 0 + 123 * 72 var ep = new EarleyParser(g); var sppf = ep.ParseGetForest(Sentence.FromWords("0 + 0 + 0")); // var sppf = ep.ParseGetForest(Sentence.FromWords("x x")); // var sppf = ep.ParseGetForest(Sentence.FromWords("b")); //Console.WriteLine(); Console.WriteLine(sppf); // var dot = ForestHelpers.ToDot(sppf); var rawdot = sppf.GetRawDot(); DotRunner.Run(rawdot, "rawGraph"); var dot = sppf.ToDot(); DotRunner.Run(dot, "addition"); // var dotShared = ForestHelpers.ToDot(sppf, true); //var dotShared = sppf.ToDot(true); //DotRunner.Run(dotShared, "additionShared"); //var pp = new PrettyPrinter(); //sppf.Accept(pp); //Console.WriteLine(pp.Result); //// Console.WriteLine(sppf.ToStringHelper("", new HashSet<Sppf>())); //Console.WriteLine(); // Readme.Do(); //var p = CFGParser.Production("<S> -> 'a' [5]"); //Console.WriteLine(p); //Console.Read(); //return; //var rt = new CFGLibTest.RandomTests(); //var sw = Stopwatch.StartNew(); //// rt.RandomParsingTest(50000, 4, 3, 5, 4, 6, 1); //// rt.RandomParsingTest(500, 10, 5, 30, 8, 6); //rt.RandomParsingTest(1, 10, 5, 50, 8, 6); //sw.Stop(); //Console.WriteLine("Elapsed: {0}s", sw.Elapsed.TotalMilliseconds / 1000.0); Console.WriteLine("Finished!"); Console.Read(); }
public void RandomParsingTest( int _numGrammars = 10000, int _numNonterminals = 10, int _numTerminals = 5, int _numProductions = 30, int _maxProductionLength = 8, int _maxInputLength = 6, int seed = 0 ) { var printStatus = true; var range = Enumerable.Range(0, _numTerminals); var terminals = new List <Terminal>(range.Select((x) => Terminal.Of("x" + x))); Console.WriteLine("Preparing sentences"); var preparedSentences = new List <Sentence>(); for (int length = 0; length <= _maxInputLength; length++) { var combinations = CFGLibTest.Helpers.CombinationsWithRepetition(terminals, length); foreach (var target in combinations) { var sentence = new Sentence(target); preparedSentences.Add(sentence); } } var randg = new GrammarGenerator(seed); var preparedGrammars = new List <Grammar>(_numGrammars); var preparedGrammarsCNF = new List <CNFGrammar>(_numGrammars); Console.WriteLine("Preparing grammars"); for (int i = 0; i < _numGrammars; i++) { Grammar g = null; while (g == null) { // g = randg.NextCNF(_numNonterminals, _numProductions, terminals); g = randg.NextCFG(_numNonterminals, _numProductions, _maxProductionLength, terminals, true); if (g.Productions.Count() == 0) { g = null; } } // Console.WriteLine("---------------{0}/{1}---------------", i.ToString("D5"), _numGrammars.ToString("D5")); // Console.WriteLine(g.ToCodeString()); var h = g.ToCNF(); //return; // Console.WriteLine(g); // g.PrintProbabilities(2, 3); preparedGrammars.Add(g); preparedGrammarsCNF.Add(h); } Console.WriteLine("starting"); var sw = Stopwatch.StartNew(); int count = 0; for (int grammarIndex = 0; grammarIndex < _numGrammars; grammarIndex++) { if (printStatus) { Console.WriteLine("---------------{0}/{1}---------------", grammarIndex.ToString("D5"), _numGrammars.ToString("D5")); } var g = preparedGrammars[grammarIndex]; var h = preparedGrammarsCNF[grammarIndex]; var earley = new EarleyParser(g); var cyk = new CykParser(h); // Console.WriteLine(g.ToCodeString()); // Console.Write("{0}, ", count); count++; var accepts = 0; foreach (var sentence in preparedSentences) { try { var p1 = earley.ParseGetProbability(sentence); var p2 = cyk.ParseGetProbability(sentence); if (!Helpers.IsNear(p2, p1)) { throw new Exception(); } var accepts1 = p1 > 0; var accepts2 = p2 > 0; if (accepts2) { accepts++; } } catch (Exception) { Report(g, sentence); throw; } } if (printStatus) { Console.WriteLine("Accepted {0} / {1}", accepts, preparedSentences.Count); } } sw.Stop(); Console.WriteLine(); Console.WriteLine("inner Elapsed: {0}s", sw.Elapsed.TotalMilliseconds / 1000.0); // Console.WriteLine("Per CYK: {0}ms", sw.Elapsed.TotalMilliseconds / (_numGrammars * preparedSentences.Count)); }
public static void Do() { // S -> aSa | bSb | ε var productions = new List <Production> { // construct productions by passing arguments... new Production( lhs: Nonterminal.Of("S"), rhs: new Sentence { Terminal.Of("a"), Nonterminal.Of("S"), Terminal.Of("a") }, weight: 20 ), // or from a string... CFGParser.Production(@"<S> -> 'b' <S> 'b' [10]"), CFGParser.Production(@"<S> -> ε [5]"), }; var cfg = new Grammar(productions, Nonterminal.Of("S")); // var cnf = cfg.ToCNF(); //var probs = cfg.EstimateProbabilities(1000000); //foreach (var entry in probs) { // var key = entry.Key; // var value = entry.Value; // if (key.Length <= 4) { // Console.WriteLine("{0}: {1}", key, value); // } //} // Print out the new CNF grammar // Console.WriteLine(cnf); var ep = new EarleyParser(cfg); // var cp = new CykParser(cnf); // Does this grammar accept the string "aabb"? Console.WriteLine("aabb: {0}", ep.ParseGetProbability(Sentence.FromLetters("aabb"))); // How about "abba"? Console.WriteLine("abba: {0}", ep.ParseGetProbability(Sentence.FromLetters("abba"))); Console.WriteLine(ep.ParseGetForest(Sentence.FromLetters("abba"))); for (int i = 0; i < 5; i++) { Console.WriteLine(cfg.ProduceRandom().AsTerminals()); } var sentences = cfg.ProduceToDepth(3); foreach (var sentence in sentences) { Console.WriteLine(sentence.Value.AsTerminals()); } var gg = new GrammarGenerator(1); var terminals = new List <Terminal> { Terminal.Of("a"), Terminal.Of("b") }; var randGram = gg.NextCFG( numNonterminals: 4, numProductions: 10, maxProductionLength: 4, terminals: terminals ); Console.WriteLine(randGram); }
static void Main(string[] args) { //var rand = new Random(0); //Experimental.TestSolver(rand); // RandomTests.RandomJacobianTest(); var t = new TestCFGToCNF(); var tp = new TestCFGToCNFEmptyProb(); var tr = new RegressionTests(); var testp = new TestParsing(); // testp.TestParsing21(); // testp.TestWeirdSppf06(); // testp.TestWeirdSppf07(); // Console.Read(); //var g = new Grammar(new List<Production>{ // CFGParser.Production("<S> → 'x' <X>"), // CFGParser.Production("<S> → <X> 'x'"), // CFGParser.Production("<S> → 'x' 'x'"), // CFGParser.Production("<X> → 'x'"), //}, Nonterminal.Of("S")); //var g = new Grammar(new List<Production>{ // CFGParser.Production("<S> → <S> <S> <S>"), // CFGParser.Production("<S> → 'x'"), // CFGParser.Production("<S> → ε"), //}, Nonterminal.Of("S")); //var g = new Grammar(new List<Production>{ // CFGParser.Production("<S> → <S> <S>"), // CFGParser.Production("<S> → 'b'"), // CFGParser.Production("<S> → ε"), //}, Nonterminal.Of("S")); var g = new Grammar(new List <Production> { CFGParser.Production("<S> → <S> '+' <S>"), // CFGParser.Production("<S> → <S> '*' <S>"), // CFGParser.Production("<S> → [0-9]+"), CFGParser.Production("<S> → '0'"), // CFGParser.Production("<S> → '2'"), }, Nonterminal.Of("S")); //var ests = g.EstimateProbabilities(10000); //foreach (var est in ests) { // Console.WriteLine("{0}: {1}", est.Key, est.Value); //} // 0 + 123 * 72 var ep = new EarleyParser(g); var sppf = ep.ParseGetForest(Sentence.FromWords("0 + 0 + 0")); // var sppf = ep.ParseGetForest(Sentence.FromWords("x x")); // var sppf = ep.ParseGetForest(Sentence.FromWords("b")); //Console.WriteLine(); Console.WriteLine(sppf); // var dot = ForestHelpers.ToDot(sppf); var rawdot = sppf.GetRawDot(); DotRunner.Run(rawdot, "rawGraph"); var dot = sppf.ToDot(); DotRunner.Run(dot, "addition"); // var dotShared = ForestHelpers.ToDot(sppf, true); //var dotShared = sppf.ToDot(true); //DotRunner.Run(dotShared, "additionShared"); //var pp = new PrettyPrinter(); //sppf.Accept(pp); //Console.WriteLine(pp.Result); //// Console.WriteLine(sppf.ToStringHelper("", new HashSet<Sppf>())); //Console.WriteLine(); // Readme.Do(); //var p = CFGParser.Production("<S> -> 'a' [5]"); //Console.WriteLine(p); //Console.Read(); //return; //var rt = new CFGLibTest.RandomTests(); //var sw = Stopwatch.StartNew(); //// rt.RandomParsingTest(50000, 4, 3, 5, 4, 6, 1); //// rt.RandomParsingTest(500, 10, 5, 30, 8, 6); //rt.RandomParsingTest(1, 10, 5, 50, 8, 6); //sw.Stop(); //Console.WriteLine("Elapsed: {0}s", sw.Elapsed.TotalMilliseconds / 1000.0); Console.WriteLine("Finished!"); Console.Read(); }
private bool ProcessOneGrammar() { var(g, terminals) = NextGrammar(); var h = g.ToCNF(); // Console.WriteLine(g.Productions.Count()); var preparedSentences = new List <Sentence>(); for (int length = 0; length <= _maxInputLength; length++) { var combinations = CFGLibTest.Helpers.CombinationsWithRepetition(terminals, length); foreach (var target in combinations) { var sentence = new Sentence(target); preparedSentences.Add(sentence); } } AddRandomSentences(preparedSentences, terminals); var uniquifySentences = new Dictionary <string, Sentence>(); foreach (var sentence in preparedSentences) { uniquifySentences[sentence.AsTerminals()] = sentence; } preparedSentences = uniquifySentences.Values.ToList(); // Console.WriteLine("Parsing sentences..."); EarleyParser earley1; EarleyParser2 earley2; CykParser cyk; try { earley1 = new EarleyParser(g); earley2 = new EarleyParser2(g); cyk = new CykParser(h); } catch (Exception e) { Report(g, e); return(true); } foreach (var sentence in preparedSentences) { try { var sppf1 = earley1.ParseGetForest(sentence); var sppf2 = earley2.ParseGetForest(sentence); if (sppf1 == null && sppf2 != null) { throw new Exception(); } if (sppf2 == null && sppf1 != null) { throw new Exception(); } var p1 = earley1.ProbabilityOfSppf(sppf1); var p2 = earley2.ProbabilityOfSppf(sppf2); var p3 = cyk.ParseGetProbability(sentence); if (!Helpers.IsNear(p1, p2)) { throw new Exception(); } if (!Helpers.IsNear(p1, p3)) { throw new Exception(); } try { TestTraversal.CheckTraversal(g, sentence, sppf1); } catch (TraversalLoopException) { } try { TestTraversal.CheckTraversal(g, sentence, sppf2); } catch (TraversalLoopException) { } } catch (Exception e) { Report(g, e, sentence); return(true); // throw new RandomTestException(e, g, sentence); } } return(false); }
/// <summary> /// Builds AST from tokens squence, using internal Earley parser /// </summary> /// <param name="tokens">Tokens sequence</param> /// <returns>AST's root</returns> /// <exception cref="ArgumentNullException" /> /// <exception cref="ParserException" /> /// <exception cref="SequenceSyntaxException" /> /// <exception cref="SyntaxException" /> public Node Parse(IEnumerable <Token> tokens) { var parser = new EarleyParser(); return(Parse(tokens, parser)); }