protected internal virtual void TriggerAllHooks(Edge edge) { // we might have built a new edge, enabling some old edges to project hooks bool newL = !chart.IsBuiltL(edge.state, edge.start, edge.head, edge.tag); bool newR = !chart.IsBuiltR(edge.state, edge.end, edge.head, edge.tag); chart.RegisterEdgeIndexes(edge); if (newR) { // PRE HOOKS for (IEnumerator <BinaryRule> rI = bg.RuleIteratorByLeftChild(edge.state); rI.MoveNext();) { BinaryRule br = rI.Current; ICollection <Edge> edges = chart.GetRealEdgesWithL(br.rightChild, edge.end); foreach (Edge real in edges) { tempHook.start = real.start; tempHook.end = real.end; tempHook.state = br.parent; tempHook.subState = br.leftChild; tempHook.head = edge.head; tempHook.tag = edge.tag; tempHook.backEdge = real; tempHook.iScore = real.iScore + br.score + dparser.headScore[dparser.binDistance[edge.head][edge.end]][edge.head][dg.TagBin(edge.tag)][real.head][dg.TagBin(real.tag)] + dparser.headStop[real.head][dg.TagBin(real.tag)][real.start] + dparser.headStop [real.head][dg.TagBin(real.tag)][real.end]; RelaxTempHook(); } } } if (newL) { // POST HOOKS for (IEnumerator rI = bg.RuleIteratorByRightChild(edge.state); rI.MoveNext();) { BinaryRule br = (BinaryRule)rI.Current; ICollection <Edge> edges = chart.GetRealEdgesWithR(br.leftChild, edge.start); foreach (Edge real in edges) { tempHook.start = real.start; tempHook.end = real.end; tempHook.state = br.parent; tempHook.subState = br.rightChild; tempHook.head = edge.head; tempHook.tag = edge.tag; tempHook.backEdge = real; tempHook.iScore = real.iScore + br.score + dparser.headScore[dparser.binDistance[edge.head][edge.start]][edge.head][dg.TagBin(edge.tag)][real.head][dg.TagBin(real.tag)] + dparser.headStop[real.head][dg.TagBin(real.tag)][real.start] + dparser .headStop[real.head][dg.TagBin(real.tag)][real.end]; RelaxTempHook(); } } } }
protected internal virtual bool AddOneBinaryRule(BinaryRule rule, IDictionary <string, TransducerGraph> graphs) { // parent has to be synthetic in BinaryRule string parentString = stateIndex.Get(rule.parent); string leftString = stateIndex.Get(rule.leftChild); string rightString = stateIndex.Get(rule.rightChild); string source; string target; string input; string bracket = null; if (op.trainOptions.markFinalStates) { bracket = Sharpen.Runtime.Substring(parentString, parentString.Length - 1, parentString.Length); } // the below test is not necessary with left to right grammars if (IsSyntheticState(leftString)) { source = leftString; input = rightString + (bracket == null ? ">" : bracket); } else { if (IsSyntheticState(rightString)) { source = rightString; input = leftString + (bracket == null ? "<" : bracket); } else { // we don't know what to do with this rule return(false); } } target = parentString; double output = SmartNegate(rule.Score()); // makes it a real 0 <= k <= infty string topcat = GetTopCategoryOfSyntheticState(source); if (topcat == null) { throw new Exception("can't have null topcat"); } TransducerGraph graph = GetGraphFromMap(graphs, topcat); graph.AddArc(source, target, input, output); return(true); }
protected internal override void TallyInternalNode(Tree lt, double weight) { if (lt.Children().Length == 1) { UnaryRule ur = new UnaryRule(stateIndex.AddToIndex(lt.Label().Value()), stateIndex.AddToIndex(lt.Children()[0].Label().Value())); symbolCounter.IncrementCount(stateIndex.Get(ur.parent), weight); unaryRuleCounter.IncrementCount(ur, weight); unaryRules.Add(ur); } else { BinaryRule br = new BinaryRule(stateIndex.AddToIndex(lt.Label().Value()), stateIndex.AddToIndex(lt.Children()[0].Label().Value()), stateIndex.AddToIndex(lt.Children()[1].Label().Value())); symbolCounter.IncrementCount(stateIndex.Get(br.parent), weight); binaryRuleCounter.IncrementCount(br, weight); binaryRules.Add(br); } }
/// <param name="graphs">a Map from String categories to TransducerGraph objects</param> /// <param name="unaryRules">is a Set of UnaryRule objects that we need to add</param> /// <param name="binaryRules">is a Set of BinaryRule objects that we need to add</param> /// <returns>a new Pair of UnaryGrammar, BinaryGrammar</returns> protected internal virtual Pair <UnaryGrammar, BinaryGrammar> ConvertGraphsToGrammar(ICollection <TransducerGraph> graphs, ICollection <UnaryRule> unaryRules, ICollection <BinaryRule> binaryRules) { // first go through all the existing rules and number them with new numberer newStateIndex = new HashIndex <string>(); foreach (UnaryRule rule in unaryRules) { string parent = stateIndex.Get(rule.parent); rule.parent = newStateIndex.AddToIndex(parent); string child = stateIndex.Get(rule.child); rule.child = newStateIndex.AddToIndex(child); } foreach (BinaryRule rule_1 in binaryRules) { string parent = stateIndex.Get(rule_1.parent); rule_1.parent = newStateIndex.AddToIndex(parent); string leftChild = stateIndex.Get(rule_1.leftChild); rule_1.leftChild = newStateIndex.AddToIndex(leftChild); string rightChild = stateIndex.Get(rule_1.rightChild); rule_1.rightChild = newStateIndex.AddToIndex(rightChild); } // now go through the graphs and add the rules foreach (TransducerGraph graph in graphs) { object startNode = graph.GetStartNode(); foreach (TransducerGraph.Arc arc in graph.GetArcs()) { // TODO: make sure these are the strings we're looking for string source = arc.GetSourceNode().ToString(); string target = arc.GetTargetNode().ToString(); object input = arc.GetInput(); string inputString = input.ToString(); double output = ((double)arc.GetOutput()); if (source.Equals(startNode)) { // make a UnaryRule UnaryRule ur = new UnaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(inputString), SmartNegate(output)); unaryRules.Add(ur); } else { if (inputString.Equals(End) || inputString.Equals(Epsilon)) { // make a UnaryRule UnaryRule ur = new UnaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(source), SmartNegate(output)); unaryRules.Add(ur); } else { // make a BinaryRule // figure out whether the input was generated on the left or right int length = inputString.Length; char leftOrRight = inputString[length - 1]; inputString = Sharpen.Runtime.Substring(inputString, 0, length - 1); BinaryRule br; if (leftOrRight == '<' || leftOrRight == '[') { br = new BinaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(inputString), newStateIndex.AddToIndex(source), SmartNegate(output)); } else { if (leftOrRight == '>' || leftOrRight == ']') { br = new BinaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(source), newStateIndex.AddToIndex(inputString), SmartNegate(output)); } else { throw new Exception("Arc input is in unexpected format: " + arc); } } binaryRules.Add(br); } } } } // by now, the unaryRules and binaryRules Sets have old untouched and new rules with scores ClassicCounter <string> symbolCounter = new ClassicCounter <string>(); if (outputType == RawCounts) { // now we take the sets of rules and turn them into grammars // the scores of the rules we are given are actually counts // so we count parent symbol occurrences foreach (UnaryRule rule_2 in unaryRules) { symbolCounter.IncrementCount(newStateIndex.Get(rule_2.parent), rule_2.score); } foreach (BinaryRule rule_3 in binaryRules) { symbolCounter.IncrementCount(newStateIndex.Get(rule_3.parent), rule_3.score); } } // now we put the rules in the grammars int numStates = newStateIndex.Size(); // this should be smaller than last one int numRules = 0; UnaryGrammar ug = new UnaryGrammar(newStateIndex); BinaryGrammar bg = new BinaryGrammar(newStateIndex); foreach (UnaryRule rule_4 in unaryRules) { if (outputType == RawCounts) { double count = symbolCounter.GetCount(newStateIndex.Get(rule_4.parent)); rule_4.score = (float)Math.Log(rule_4.score / count); } ug.AddRule(rule_4); numRules++; } foreach (BinaryRule rule_5 in binaryRules) { if (outputType == RawCounts) { double count = symbolCounter.GetCount(newStateIndex.Get(rule_5.parent)); rule_5.score = (float)Math.Log((rule_5.score - op.trainOptions.ruleDiscount) / count); } bg.AddRule(rule_5); numRules++; } if (verbose) { System.Console.Out.WriteLine("Number of minimized rules: " + numRules); System.Console.Out.WriteLine("Number of minimized states: " + newStateIndex.Size()); } ug.PurgeRules(); bg.SplitRules(); return(new Pair <UnaryGrammar, BinaryGrammar>(ug, bg)); }
private static double ComputeLocalTreeScore(Tree localTree, IIndex <string> stateIndex, LexicalizedParser pd) { try { string parent = localTree.Value(); int parentState = stateIndex.IndexOf(parent); // System.out.println("parentState: " + parentState); Tree[] children = localTree.Children(); // let's find the unary to kick things off with the left child (since we assume a left to right grammar // first we create the synthetic parent of the leftmost child string nextChild = children[0].Value(); // childState = stateIndex.indexOf(nextChild); string current = "@" + parent + "| [ [" + nextChild + "] "; int currentState = stateIndex.IndexOf(current); IList <UnaryRule> rules = pd.ug.RulesByParent(currentState); UnaryRule ur = rules[0]; // System.out.println("rule: " + ur); double localTreeScore = ur.Score(); // go through rest of rules for (int i = 1; i < children.Length; i++) { // find rules in BinaryGrammar that can extend this state // System.out.println("currentState: " + currentState); nextChild = children[i].Value(); int childState = stateIndex.IndexOf(nextChild); // System.out.println("childState: " + childState); IList <BinaryRule> l = pd.bg.RuleListByLeftChild(currentState); BinaryRule foundBR = null; if (i < children.Length - 1) { // need to the rewrite that doesn't rewrite to the parent foreach (BinaryRule br in l) { // System.out.println("\t\trule: " + br + " parent: " + br.parent + " right: " + br.rightChild); if (br.rightChild == childState && br.parent != parentState) { foundBR = br; break; } } } else { // this is the last rule, need to find the rewrite to the parent of the whole local tree foreach (BinaryRule br in l) { // System.out.println("\t\trule: " + br + " parent: " + br.parent + " right: " + br.rightChild); if (br.rightChild == childState && br.parent == parentState) { foundBR = br; break; } } } if (foundBR == null) { // we never found a matching rule! // System.out.println("broke on " + nextChild); return(double.NegativeInfinity); } // System.out.println("rule: " + foundBR); currentState = foundBR.parent; localTreeScore += foundBR.score; } // end loop through children return(localTreeScore); } catch (NoSuchElementException) { // we couldn't find a state for one of the needed categories // System.out.println("no state found: " + e.toString()); // List tempRules = pd.ug.rulesByChild(childState); // for (Iterator iter = tempRules.iterator(); iter.hasNext();) { // UnaryRule ur = (UnaryRule) iter.next(); // System.out.println("\t\t\trule with child: " + ur); // } return(double.NegativeInfinity); } }