public void setUp() { gEmpty = new ProbCNFGrammar(); validR = new Rule(CollectionFactory.CreateQueue <string>(new[] { "A" }), CollectionFactory.CreateQueue <string>(new[] { "Y", "X" }), (float)0.50); invalidR = new Rule(CollectionFactory.CreateQueue <string>(new[] { "A" }), CollectionFactory.CreateQueue <string>(new[] { "Y", "X", "Z" }), (float)0.50); // too many RHS variables }
public static void main(params string[] args) { System.Console.WriteLine("Running..."); ProbCNFGrammar exampleG = ProbCNFGrammarExamples.buildTrivialGrammar(); CYK cyk = new CYK(); ICollection <string> words = CollectionFactory.CreateQueue <string>(new[] { "the", "man", "liked", "a", "woman" }); float[,,] probTable = cyk.parse(words, exampleG); cyk.printProbTable(probTable, words, exampleG); System.Console.WriteLine("Done!"); }
public float[,,] parse(ICollection <string> words, ProbCNFGrammar grammar) { int N = length(words); int M = grammar.vars.Size(); float[,,] P = new float[M, N, N]; // initialised to 0.0 for (int i = 0; i < N; ++i) { //for each rule of form( X -> words<sub>i</sub>[p]) do // P[X,i,1] <- p for (int j = 0; j < grammar.rules.Size(); j++) { Rule r = grammar.rules.Get(j); if (r.derives(words.Get(i))) { // rule is of form X -> w, where w = words[i] int x = grammar.vars.IndexOf(r.lhs.Get(0)); // get the index of rule's LHS variable P[x, i, 0] = r.PROB; // not P[X][i][1] because we use 0-based indexing } } } for (int length = 2; length <= N; length++) { for (int start = 1; start <= N - length + 1; start++) { for (int len1 = 1; len1 <= length - 1; len1++) { // N.B. the book incorrectly has N-1 instead of length-1 int len2 = length - len1; // for each rule of the form X -> Y Z, where Y,Z are variables of the grammar foreach (Rule r in grammar.rules) { if (r.rhs.Size() == 2) { // get index of rule's variables X, Y, and Z int x = grammar.vars.IndexOf(r.lhs.Get(0)); int y = grammar.vars.IndexOf(r.rhs.Get(0)); int z = grammar.vars.IndexOf(r.rhs.Get(1)); P[x, start - 1, length - 1] = System.Math.Max(P[x, start - 1, length - 1], P[y, start - 1, len1 - 1] * P[z, start + len1 - 1, len2 - 1] * r.PROB); } } } } } return(P); }
/** * A more restrictive phrase-structure grammar, used in testing and demonstrating * the CYK Algorithm. * Note: It is complemented by the "trivial lexicon" in LexiconExamples.java * @return */ public static ProbCNFGrammar buildTrivialGrammar() { ProbCNFGrammar g = new ProbCNFGrammar(); ICollection <Rule> rules = CollectionFactory.CreateQueue <Rule>(); rules.Add(new Rule("S", "NP,VP", (float)1.0)); rules.Add(new Rule("NP", "ARTICLE,NOUN", (float)0.50)); rules.Add(new Rule("NP", "PRONOUN,ADVERB", (float)0.5)); rules.Add(new Rule("VP", "VERB,NP", (float)1.0)); // add terminal rules Lexicon trivLex = LexiconExamples.buildTrivialLexicon(); ICollection <Rule> terminalRules = CollectionFactory.CreateQueue <Rule>(trivLex.getAllTerminalRules()); rules.AddAll(terminalRules); // Add all these rules into the grammar if (!g.addRules(rules)) { return(null); } return(g); }
/** * An elementary Chomsky-Normal-Form grammar for simple testing and * demonstrating. This type of grammar is seen more in Computing Theory classes, * and does not mock a subset of English phrase-structure. * @return */ public static ProbCNFGrammar buildExampleGrammarOne() { ProbCNFGrammar g = new ProbCNFGrammar(); ICollection <Rule> rules = CollectionFactory.CreateQueue <Rule>(); // Start Rules rules.Add(new Rule("S", "Y,Z", (float)0.10)); rules.Add(new Rule("B", "B,D", (float)0.10)); rules.Add(new Rule("B", "G,D", (float)0.10)); rules.Add(new Rule("C", "E,C", (float)0.10)); rules.Add(new Rule("C", "E,H", (float)0.10)); rules.Add(new Rule("E", "M,N", (float)0.10)); rules.Add(new Rule("D", "M,N", (float)0.10)); rules.Add(new Rule("Y", "E,C", (float)0.10)); rules.Add(new Rule("Z", "E,C", (float)0.10)); // Terminal Rules rules.Add(new Rule("M", "m", (float)1.0)); rules.Add(new Rule("N", "n", (float)1.0)); rules.Add(new Rule("B", "a", (float)0.25)); rules.Add(new Rule("B", "b", (float)0.25)); rules.Add(new Rule("B", "c", (float)0.25)); rules.Add(new Rule("B", "d", (float)0.25)); rules.Add(new Rule("G", "a", (float)0.50)); rules.Add(new Rule("G", "d", (float)0.50)); rules.Add(new Rule("C", "x", (float)0.20)); rules.Add(new Rule("C", "y", (float)0.20)); rules.Add(new Rule("C", "z", (float)0.60)); rules.Add(new Rule("H", "u", (float)0.50)); rules.Add(new Rule("H", "z", (float)0.50)); // Add all these rules into the grammar if (!g.addRules(rules)) { return(null); } return(g); }