protected internal virtual void TriggerAllHooks(Edge edge)
        {
            // we might have built a new edge, enabling some old edges to project hooks
            bool newL = !chart.IsBuiltL(edge.state, edge.start, edge.head, edge.tag);
            bool newR = !chart.IsBuiltR(edge.state, edge.end, edge.head, edge.tag);

            chart.RegisterEdgeIndexes(edge);
            if (newR)
            {
                // PRE HOOKS
                for (IEnumerator <BinaryRule> rI = bg.RuleIteratorByLeftChild(edge.state); rI.MoveNext();)
                {
                    BinaryRule         br    = rI.Current;
                    ICollection <Edge> edges = chart.GetRealEdgesWithL(br.rightChild, edge.end);
                    foreach (Edge real in edges)
                    {
                        tempHook.start    = real.start;
                        tempHook.end      = real.end;
                        tempHook.state    = br.parent;
                        tempHook.subState = br.leftChild;
                        tempHook.head     = edge.head;
                        tempHook.tag      = edge.tag;
                        tempHook.backEdge = real;
                        tempHook.iScore   = real.iScore + br.score + dparser.headScore[dparser.binDistance[edge.head][edge.end]][edge.head][dg.TagBin(edge.tag)][real.head][dg.TagBin(real.tag)] + dparser.headStop[real.head][dg.TagBin(real.tag)][real.start] + dparser.headStop
                                            [real.head][dg.TagBin(real.tag)][real.end];
                        RelaxTempHook();
                    }
                }
            }
            if (newL)
            {
                // POST HOOKS
                for (IEnumerator rI = bg.RuleIteratorByRightChild(edge.state); rI.MoveNext();)
                {
                    BinaryRule         br    = (BinaryRule)rI.Current;
                    ICollection <Edge> edges = chart.GetRealEdgesWithR(br.leftChild, edge.start);
                    foreach (Edge real in edges)
                    {
                        tempHook.start    = real.start;
                        tempHook.end      = real.end;
                        tempHook.state    = br.parent;
                        tempHook.subState = br.rightChild;
                        tempHook.head     = edge.head;
                        tempHook.tag      = edge.tag;
                        tempHook.backEdge = real;
                        tempHook.iScore   = real.iScore + br.score + dparser.headScore[dparser.binDistance[edge.head][edge.start]][edge.head][dg.TagBin(edge.tag)][real.head][dg.TagBin(real.tag)] + dparser.headStop[real.head][dg.TagBin(real.tag)][real.start] + dparser
                                            .headStop[real.head][dg.TagBin(real.tag)][real.end];
                        RelaxTempHook();
                    }
                }
            }
        }
Example #2
0
        protected internal virtual bool AddOneBinaryRule(BinaryRule rule, IDictionary <string, TransducerGraph> graphs)
        {
            // parent has to be synthetic in BinaryRule
            string parentString = stateIndex.Get(rule.parent);
            string leftString   = stateIndex.Get(rule.leftChild);
            string rightString  = stateIndex.Get(rule.rightChild);
            string source;
            string target;
            string input;
            string bracket = null;

            if (op.trainOptions.markFinalStates)
            {
                bracket = Sharpen.Runtime.Substring(parentString, parentString.Length - 1, parentString.Length);
            }
            // the below test is not necessary with left to right grammars
            if (IsSyntheticState(leftString))
            {
                source = leftString;
                input  = rightString + (bracket == null ? ">" : bracket);
            }
            else
            {
                if (IsSyntheticState(rightString))
                {
                    source = rightString;
                    input  = leftString + (bracket == null ? "<" : bracket);
                }
                else
                {
                    // we don't know what to do with this rule
                    return(false);
                }
            }
            target = parentString;
            double output = SmartNegate(rule.Score());
            // makes it a real  0 <= k <= infty
            string topcat = GetTopCategoryOfSyntheticState(source);

            if (topcat == null)
            {
                throw new Exception("can't have null topcat");
            }
            TransducerGraph graph = GetGraphFromMap(graphs, topcat);

            graph.AddArc(source, target, input, output);
            return(true);
        }
Example #3
0
 protected internal override void TallyInternalNode(Tree lt, double weight)
 {
     if (lt.Children().Length == 1)
     {
         UnaryRule ur = new UnaryRule(stateIndex.AddToIndex(lt.Label().Value()), stateIndex.AddToIndex(lt.Children()[0].Label().Value()));
         symbolCounter.IncrementCount(stateIndex.Get(ur.parent), weight);
         unaryRuleCounter.IncrementCount(ur, weight);
         unaryRules.Add(ur);
     }
     else
     {
         BinaryRule br = new BinaryRule(stateIndex.AddToIndex(lt.Label().Value()), stateIndex.AddToIndex(lt.Children()[0].Label().Value()), stateIndex.AddToIndex(lt.Children()[1].Label().Value()));
         symbolCounter.IncrementCount(stateIndex.Get(br.parent), weight);
         binaryRuleCounter.IncrementCount(br, weight);
         binaryRules.Add(br);
     }
 }
Example #4
0
        /// <param name="graphs">a Map from String categories to TransducerGraph objects</param>
        /// <param name="unaryRules">is a Set of UnaryRule objects that we need to add</param>
        /// <param name="binaryRules">is a Set of BinaryRule objects that we need to add</param>
        /// <returns>a new Pair of UnaryGrammar, BinaryGrammar</returns>
        protected internal virtual Pair <UnaryGrammar, BinaryGrammar> ConvertGraphsToGrammar(ICollection <TransducerGraph> graphs, ICollection <UnaryRule> unaryRules, ICollection <BinaryRule> binaryRules)
        {
            // first go through all the existing rules and number them with new numberer
            newStateIndex = new HashIndex <string>();
            foreach (UnaryRule rule in unaryRules)
            {
                string parent = stateIndex.Get(rule.parent);
                rule.parent = newStateIndex.AddToIndex(parent);
                string child = stateIndex.Get(rule.child);
                rule.child = newStateIndex.AddToIndex(child);
            }
            foreach (BinaryRule rule_1 in binaryRules)
            {
                string parent = stateIndex.Get(rule_1.parent);
                rule_1.parent = newStateIndex.AddToIndex(parent);
                string leftChild = stateIndex.Get(rule_1.leftChild);
                rule_1.leftChild = newStateIndex.AddToIndex(leftChild);
                string rightChild = stateIndex.Get(rule_1.rightChild);
                rule_1.rightChild = newStateIndex.AddToIndex(rightChild);
            }
            // now go through the graphs and add the rules
            foreach (TransducerGraph graph in graphs)
            {
                object startNode = graph.GetStartNode();
                foreach (TransducerGraph.Arc arc in graph.GetArcs())
                {
                    // TODO: make sure these are the strings we're looking for
                    string source      = arc.GetSourceNode().ToString();
                    string target      = arc.GetTargetNode().ToString();
                    object input       = arc.GetInput();
                    string inputString = input.ToString();
                    double output      = ((double)arc.GetOutput());
                    if (source.Equals(startNode))
                    {
                        // make a UnaryRule
                        UnaryRule ur = new UnaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(inputString), SmartNegate(output));
                        unaryRules.Add(ur);
                    }
                    else
                    {
                        if (inputString.Equals(End) || inputString.Equals(Epsilon))
                        {
                            // make a UnaryRule
                            UnaryRule ur = new UnaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(source), SmartNegate(output));
                            unaryRules.Add(ur);
                        }
                        else
                        {
                            // make a BinaryRule
                            // figure out whether the input was generated on the left or right
                            int  length      = inputString.Length;
                            char leftOrRight = inputString[length - 1];
                            inputString = Sharpen.Runtime.Substring(inputString, 0, length - 1);
                            BinaryRule br;
                            if (leftOrRight == '<' || leftOrRight == '[')
                            {
                                br = new BinaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(inputString), newStateIndex.AddToIndex(source), SmartNegate(output));
                            }
                            else
                            {
                                if (leftOrRight == '>' || leftOrRight == ']')
                                {
                                    br = new BinaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(source), newStateIndex.AddToIndex(inputString), SmartNegate(output));
                                }
                                else
                                {
                                    throw new Exception("Arc input is in unexpected format: " + arc);
                                }
                            }
                            binaryRules.Add(br);
                        }
                    }
                }
            }
            // by now, the unaryRules and binaryRules Sets have old untouched and new rules with scores
            ClassicCounter <string> symbolCounter = new ClassicCounter <string>();

            if (outputType == RawCounts)
            {
                // now we take the sets of rules and turn them into grammars
                // the scores of the rules we are given are actually counts
                // so we count parent symbol occurrences
                foreach (UnaryRule rule_2 in unaryRules)
                {
                    symbolCounter.IncrementCount(newStateIndex.Get(rule_2.parent), rule_2.score);
                }
                foreach (BinaryRule rule_3 in binaryRules)
                {
                    symbolCounter.IncrementCount(newStateIndex.Get(rule_3.parent), rule_3.score);
                }
            }
            // now we put the rules in the grammars
            int numStates = newStateIndex.Size();
            // this should be smaller than last one
            int           numRules = 0;
            UnaryGrammar  ug       = new UnaryGrammar(newStateIndex);
            BinaryGrammar bg       = new BinaryGrammar(newStateIndex);

            foreach (UnaryRule rule_4 in unaryRules)
            {
                if (outputType == RawCounts)
                {
                    double count = symbolCounter.GetCount(newStateIndex.Get(rule_4.parent));
                    rule_4.score = (float)Math.Log(rule_4.score / count);
                }
                ug.AddRule(rule_4);
                numRules++;
            }
            foreach (BinaryRule rule_5 in binaryRules)
            {
                if (outputType == RawCounts)
                {
                    double count = symbolCounter.GetCount(newStateIndex.Get(rule_5.parent));
                    rule_5.score = (float)Math.Log((rule_5.score - op.trainOptions.ruleDiscount) / count);
                }
                bg.AddRule(rule_5);
                numRules++;
            }
            if (verbose)
            {
                System.Console.Out.WriteLine("Number of minimized rules: " + numRules);
                System.Console.Out.WriteLine("Number of minimized states: " + newStateIndex.Size());
            }
            ug.PurgeRules();
            bg.SplitRules();
            return(new Pair <UnaryGrammar, BinaryGrammar>(ug, bg));
        }
Example #5
0
 private static double ComputeLocalTreeScore(Tree localTree, IIndex <string> stateIndex, LexicalizedParser pd)
 {
     try
     {
         string parent      = localTree.Value();
         int    parentState = stateIndex.IndexOf(parent);
         //      System.out.println("parentState: " + parentState);
         Tree[] children = localTree.Children();
         // let's find the unary to kick things off with the left child (since we assume a left to right grammar
         // first we create the synthetic parent of the leftmost child
         string nextChild = children[0].Value();
         // childState = stateIndex.indexOf(nextChild);
         string            current      = "@" + parent + "| [ [" + nextChild + "] ";
         int               currentState = stateIndex.IndexOf(current);
         IList <UnaryRule> rules        = pd.ug.RulesByParent(currentState);
         UnaryRule         ur           = rules[0];
         //      System.out.println("rule: " + ur);
         double localTreeScore = ur.Score();
         // go through rest of rules
         for (int i = 1; i < children.Length; i++)
         {
             // find rules in BinaryGrammar that can extend this state
             //        System.out.println("currentState: " + currentState);
             nextChild = children[i].Value();
             int childState = stateIndex.IndexOf(nextChild);
             //        System.out.println("childState: " + childState);
             IList <BinaryRule> l       = pd.bg.RuleListByLeftChild(currentState);
             BinaryRule         foundBR = null;
             if (i < children.Length - 1)
             {
                 // need to the rewrite that doesn't rewrite to the parent
                 foreach (BinaryRule br in l)
                 {
                     //            System.out.println("\t\trule: " + br + " parent: " + br.parent + " right: " + br.rightChild);
                     if (br.rightChild == childState && br.parent != parentState)
                     {
                         foundBR = br;
                         break;
                     }
                 }
             }
             else
             {
                 // this is the last rule, need to find the rewrite to the parent of the whole local tree
                 foreach (BinaryRule br in l)
                 {
                     //            System.out.println("\t\trule: " + br + " parent: " + br.parent + " right: " + br.rightChild);
                     if (br.rightChild == childState && br.parent == parentState)
                     {
                         foundBR = br;
                         break;
                     }
                 }
             }
             if (foundBR == null)
             {
                 // we never found a matching rule!
                 //          System.out.println("broke on " + nextChild);
                 return(double.NegativeInfinity);
             }
             //        System.out.println("rule: " + foundBR);
             currentState    = foundBR.parent;
             localTreeScore += foundBR.score;
         }
         // end loop through children
         return(localTreeScore);
     }
     catch (NoSuchElementException)
     {
         // we couldn't find a state for one of the needed categories
         //      System.out.println("no state found: " + e.toString());
         //      List tempRules = pd.ug.rulesByChild(childState);
         //      for (Iterator iter = tempRules.iterator(); iter.hasNext();) {
         //        UnaryRule ur = (UnaryRule) iter.next();
         //        System.out.println("\t\t\trule with child: " + ur);
         //      }
         return(double.NegativeInfinity);
     }
 }