protected internal virtual bool AddOneUnaryRule(UnaryRule rule, IDictionary <string, TransducerGraph> graphs) { string parentString = stateIndex.Get(rule.parent); string childString = stateIndex.Get(rule.child); if (IsSyntheticState(parentString)) { string topcat = GetTopCategoryOfSyntheticState(parentString); TransducerGraph graph = GetGraphFromMap(graphs, topcat); double output = SmartNegate(rule.Score()); graph.AddArc(graph.GetStartNode(), parentString, childString, output); return(true); } else { if (IsSyntheticState(childString)) { // need to add Arc from synthetic state to endState TransducerGraph graph = GetGraphFromMap(graphs, parentString); double output = SmartNegate(rule.Score()); graph.AddArc(childString, parentString, End, output); // parentString should the the same as endState graph.SetEndNode(parentString); return(true); } else { return(false); } } }
protected internal override void TallyInternalNode(Tree lt, double weight) { if (lt.Children().Length == 1) { UnaryRule ur = new UnaryRule(stateIndex.AddToIndex(lt.Label().Value()), stateIndex.AddToIndex(lt.Children()[0].Label().Value())); symbolCounter.IncrementCount(stateIndex.Get(ur.parent), weight); unaryRuleCounter.IncrementCount(ur, weight); unaryRules.Add(ur); } else { BinaryRule br = new BinaryRule(stateIndex.AddToIndex(lt.Label().Value()), stateIndex.AddToIndex(lt.Children()[0].Label().Value()), stateIndex.AddToIndex(lt.Children()[1].Label().Value())); symbolCounter.IncrementCount(stateIndex.Get(br.parent), weight); binaryRuleCounter.IncrementCount(br, weight); binaryRules.Add(br); } }
protected internal virtual void ProjectUnaries(Edge edge) { for (IEnumerator rI = ug.RuleIteratorByChild(edge.state); rI.MoveNext();) { UnaryRule ur = (UnaryRule)rI.Current; if (ur.child == ur.parent) { continue; } tempEdge.start = edge.start; tempEdge.end = edge.end; tempEdge.head = edge.head; tempEdge.tag = edge.tag; tempEdge.state = ur.parent; tempEdge.backEdge = edge; tempEdge.backHook = null; tempEdge.iScore = edge.iScore + ur.score; RelaxTempEdge(); } }
/// <param name="graphs">a Map from String categories to TransducerGraph objects</param> /// <param name="unaryRules">is a Set of UnaryRule objects that we need to add</param> /// <param name="binaryRules">is a Set of BinaryRule objects that we need to add</param> /// <returns>a new Pair of UnaryGrammar, BinaryGrammar</returns> protected internal virtual Pair <UnaryGrammar, BinaryGrammar> ConvertGraphsToGrammar(ICollection <TransducerGraph> graphs, ICollection <UnaryRule> unaryRules, ICollection <BinaryRule> binaryRules) { // first go through all the existing rules and number them with new numberer newStateIndex = new HashIndex <string>(); foreach (UnaryRule rule in unaryRules) { string parent = stateIndex.Get(rule.parent); rule.parent = newStateIndex.AddToIndex(parent); string child = stateIndex.Get(rule.child); rule.child = newStateIndex.AddToIndex(child); } foreach (BinaryRule rule_1 in binaryRules) { string parent = stateIndex.Get(rule_1.parent); rule_1.parent = newStateIndex.AddToIndex(parent); string leftChild = stateIndex.Get(rule_1.leftChild); rule_1.leftChild = newStateIndex.AddToIndex(leftChild); string rightChild = stateIndex.Get(rule_1.rightChild); rule_1.rightChild = newStateIndex.AddToIndex(rightChild); } // now go through the graphs and add the rules foreach (TransducerGraph graph in graphs) { object startNode = graph.GetStartNode(); foreach (TransducerGraph.Arc arc in graph.GetArcs()) { // TODO: make sure these are the strings we're looking for string source = arc.GetSourceNode().ToString(); string target = arc.GetTargetNode().ToString(); object input = arc.GetInput(); string inputString = input.ToString(); double output = ((double)arc.GetOutput()); if (source.Equals(startNode)) { // make a UnaryRule UnaryRule ur = new UnaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(inputString), SmartNegate(output)); unaryRules.Add(ur); } else { if (inputString.Equals(End) || inputString.Equals(Epsilon)) { // make a UnaryRule UnaryRule ur = new UnaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(source), SmartNegate(output)); unaryRules.Add(ur); } else { // make a BinaryRule // figure out whether the input was generated on the left or right int length = inputString.Length; char leftOrRight = inputString[length - 1]; inputString = Sharpen.Runtime.Substring(inputString, 0, length - 1); BinaryRule br; if (leftOrRight == '<' || leftOrRight == '[') { br = new BinaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(inputString), newStateIndex.AddToIndex(source), SmartNegate(output)); } else { if (leftOrRight == '>' || leftOrRight == ']') { br = new BinaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(source), newStateIndex.AddToIndex(inputString), SmartNegate(output)); } else { throw new Exception("Arc input is in unexpected format: " + arc); } } binaryRules.Add(br); } } } } // by now, the unaryRules and binaryRules Sets have old untouched and new rules with scores ClassicCounter <string> symbolCounter = new ClassicCounter <string>(); if (outputType == RawCounts) { // now we take the sets of rules and turn them into grammars // the scores of the rules we are given are actually counts // so we count parent symbol occurrences foreach (UnaryRule rule_2 in unaryRules) { symbolCounter.IncrementCount(newStateIndex.Get(rule_2.parent), rule_2.score); } foreach (BinaryRule rule_3 in binaryRules) { symbolCounter.IncrementCount(newStateIndex.Get(rule_3.parent), rule_3.score); } } // now we put the rules in the grammars int numStates = newStateIndex.Size(); // this should be smaller than last one int numRules = 0; UnaryGrammar ug = new UnaryGrammar(newStateIndex); BinaryGrammar bg = new BinaryGrammar(newStateIndex); foreach (UnaryRule rule_4 in unaryRules) { if (outputType == RawCounts) { double count = symbolCounter.GetCount(newStateIndex.Get(rule_4.parent)); rule_4.score = (float)Math.Log(rule_4.score / count); } ug.AddRule(rule_4); numRules++; } foreach (BinaryRule rule_5 in binaryRules) { if (outputType == RawCounts) { double count = symbolCounter.GetCount(newStateIndex.Get(rule_5.parent)); rule_5.score = (float)Math.Log((rule_5.score - op.trainOptions.ruleDiscount) / count); } bg.AddRule(rule_5); numRules++; } if (verbose) { System.Console.Out.WriteLine("Number of minimized rules: " + numRules); System.Console.Out.WriteLine("Number of minimized states: " + newStateIndex.Size()); } ug.PurgeRules(); bg.SplitRules(); return(new Pair <UnaryGrammar, BinaryGrammar>(ug, bg)); }
private static double ComputeLocalTreeScore(Tree localTree, IIndex <string> stateIndex, LexicalizedParser pd) { try { string parent = localTree.Value(); int parentState = stateIndex.IndexOf(parent); // System.out.println("parentState: " + parentState); Tree[] children = localTree.Children(); // let's find the unary to kick things off with the left child (since we assume a left to right grammar // first we create the synthetic parent of the leftmost child string nextChild = children[0].Value(); // childState = stateIndex.indexOf(nextChild); string current = "@" + parent + "| [ [" + nextChild + "] "; int currentState = stateIndex.IndexOf(current); IList <UnaryRule> rules = pd.ug.RulesByParent(currentState); UnaryRule ur = rules[0]; // System.out.println("rule: " + ur); double localTreeScore = ur.Score(); // go through rest of rules for (int i = 1; i < children.Length; i++) { // find rules in BinaryGrammar that can extend this state // System.out.println("currentState: " + currentState); nextChild = children[i].Value(); int childState = stateIndex.IndexOf(nextChild); // System.out.println("childState: " + childState); IList <BinaryRule> l = pd.bg.RuleListByLeftChild(currentState); BinaryRule foundBR = null; if (i < children.Length - 1) { // need to the rewrite that doesn't rewrite to the parent foreach (BinaryRule br in l) { // System.out.println("\t\trule: " + br + " parent: " + br.parent + " right: " + br.rightChild); if (br.rightChild == childState && br.parent != parentState) { foundBR = br; break; } } } else { // this is the last rule, need to find the rewrite to the parent of the whole local tree foreach (BinaryRule br in l) { // System.out.println("\t\trule: " + br + " parent: " + br.parent + " right: " + br.rightChild); if (br.rightChild == childState && br.parent == parentState) { foundBR = br; break; } } } if (foundBR == null) { // we never found a matching rule! // System.out.println("broke on " + nextChild); return(double.NegativeInfinity); } // System.out.println("rule: " + foundBR); currentState = foundBR.parent; localTreeScore += foundBR.score; } // end loop through children return(localTreeScore); } catch (NoSuchElementException) { // we couldn't find a state for one of the needed categories // System.out.println("no state found: " + e.toString()); // List tempRules = pd.ug.rulesByChild(childState); // for (Iterator iter = tempRules.iterator(); iter.hasNext();) { // UnaryRule ur = (UnaryRule) iter.next(); // System.out.println("\t\t\trule with child: " + ur); // } return(double.NegativeInfinity); } }