Ejemplo n.º 1
0
        protected internal virtual bool AddOneUnaryRule(UnaryRule rule, IDictionary <string, TransducerGraph> graphs)
        {
            string parentString = stateIndex.Get(rule.parent);
            string childString  = stateIndex.Get(rule.child);

            if (IsSyntheticState(parentString))
            {
                string          topcat = GetTopCategoryOfSyntheticState(parentString);
                TransducerGraph graph  = GetGraphFromMap(graphs, topcat);
                double          output = SmartNegate(rule.Score());
                graph.AddArc(graph.GetStartNode(), parentString, childString, output);
                return(true);
            }
            else
            {
                if (IsSyntheticState(childString))
                {
                    // need to add Arc from synthetic state to endState
                    TransducerGraph graph  = GetGraphFromMap(graphs, parentString);
                    double          output = SmartNegate(rule.Score());
                    graph.AddArc(childString, parentString, End, output);
                    // parentString should the the same as endState
                    graph.SetEndNode(parentString);
                    return(true);
                }
                else
                {
                    return(false);
                }
            }
        }
Ejemplo n.º 2
0
 protected internal override void TallyInternalNode(Tree lt, double weight)
 {
     if (lt.Children().Length == 1)
     {
         UnaryRule ur = new UnaryRule(stateIndex.AddToIndex(lt.Label().Value()), stateIndex.AddToIndex(lt.Children()[0].Label().Value()));
         symbolCounter.IncrementCount(stateIndex.Get(ur.parent), weight);
         unaryRuleCounter.IncrementCount(ur, weight);
         unaryRules.Add(ur);
     }
     else
     {
         BinaryRule br = new BinaryRule(stateIndex.AddToIndex(lt.Label().Value()), stateIndex.AddToIndex(lt.Children()[0].Label().Value()), stateIndex.AddToIndex(lt.Children()[1].Label().Value()));
         symbolCounter.IncrementCount(stateIndex.Get(br.parent), weight);
         binaryRuleCounter.IncrementCount(br, weight);
         binaryRules.Add(br);
     }
 }
 protected internal virtual void ProjectUnaries(Edge edge)
 {
     for (IEnumerator rI = ug.RuleIteratorByChild(edge.state); rI.MoveNext();)
     {
         UnaryRule ur = (UnaryRule)rI.Current;
         if (ur.child == ur.parent)
         {
             continue;
         }
         tempEdge.start    = edge.start;
         tempEdge.end      = edge.end;
         tempEdge.head     = edge.head;
         tempEdge.tag      = edge.tag;
         tempEdge.state    = ur.parent;
         tempEdge.backEdge = edge;
         tempEdge.backHook = null;
         tempEdge.iScore   = edge.iScore + ur.score;
         RelaxTempEdge();
     }
 }
Ejemplo n.º 4
0
        /// <param name="graphs">a Map from String categories to TransducerGraph objects</param>
        /// <param name="unaryRules">is a Set of UnaryRule objects that we need to add</param>
        /// <param name="binaryRules">is a Set of BinaryRule objects that we need to add</param>
        /// <returns>a new Pair of UnaryGrammar, BinaryGrammar</returns>
        protected internal virtual Pair <UnaryGrammar, BinaryGrammar> ConvertGraphsToGrammar(ICollection <TransducerGraph> graphs, ICollection <UnaryRule> unaryRules, ICollection <BinaryRule> binaryRules)
        {
            // first go through all the existing rules and number them with new numberer
            newStateIndex = new HashIndex <string>();
            foreach (UnaryRule rule in unaryRules)
            {
                string parent = stateIndex.Get(rule.parent);
                rule.parent = newStateIndex.AddToIndex(parent);
                string child = stateIndex.Get(rule.child);
                rule.child = newStateIndex.AddToIndex(child);
            }
            foreach (BinaryRule rule_1 in binaryRules)
            {
                string parent = stateIndex.Get(rule_1.parent);
                rule_1.parent = newStateIndex.AddToIndex(parent);
                string leftChild = stateIndex.Get(rule_1.leftChild);
                rule_1.leftChild = newStateIndex.AddToIndex(leftChild);
                string rightChild = stateIndex.Get(rule_1.rightChild);
                rule_1.rightChild = newStateIndex.AddToIndex(rightChild);
            }
            // now go through the graphs and add the rules
            foreach (TransducerGraph graph in graphs)
            {
                object startNode = graph.GetStartNode();
                foreach (TransducerGraph.Arc arc in graph.GetArcs())
                {
                    // TODO: make sure these are the strings we're looking for
                    string source      = arc.GetSourceNode().ToString();
                    string target      = arc.GetTargetNode().ToString();
                    object input       = arc.GetInput();
                    string inputString = input.ToString();
                    double output      = ((double)arc.GetOutput());
                    if (source.Equals(startNode))
                    {
                        // make a UnaryRule
                        UnaryRule ur = new UnaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(inputString), SmartNegate(output));
                        unaryRules.Add(ur);
                    }
                    else
                    {
                        if (inputString.Equals(End) || inputString.Equals(Epsilon))
                        {
                            // make a UnaryRule
                            UnaryRule ur = new UnaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(source), SmartNegate(output));
                            unaryRules.Add(ur);
                        }
                        else
                        {
                            // make a BinaryRule
                            // figure out whether the input was generated on the left or right
                            int  length      = inputString.Length;
                            char leftOrRight = inputString[length - 1];
                            inputString = Sharpen.Runtime.Substring(inputString, 0, length - 1);
                            BinaryRule br;
                            if (leftOrRight == '<' || leftOrRight == '[')
                            {
                                br = new BinaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(inputString), newStateIndex.AddToIndex(source), SmartNegate(output));
                            }
                            else
                            {
                                if (leftOrRight == '>' || leftOrRight == ']')
                                {
                                    br = new BinaryRule(newStateIndex.AddToIndex(target), newStateIndex.AddToIndex(source), newStateIndex.AddToIndex(inputString), SmartNegate(output));
                                }
                                else
                                {
                                    throw new Exception("Arc input is in unexpected format: " + arc);
                                }
                            }
                            binaryRules.Add(br);
                        }
                    }
                }
            }
            // by now, the unaryRules and binaryRules Sets have old untouched and new rules with scores
            ClassicCounter <string> symbolCounter = new ClassicCounter <string>();

            if (outputType == RawCounts)
            {
                // now we take the sets of rules and turn them into grammars
                // the scores of the rules we are given are actually counts
                // so we count parent symbol occurrences
                foreach (UnaryRule rule_2 in unaryRules)
                {
                    symbolCounter.IncrementCount(newStateIndex.Get(rule_2.parent), rule_2.score);
                }
                foreach (BinaryRule rule_3 in binaryRules)
                {
                    symbolCounter.IncrementCount(newStateIndex.Get(rule_3.parent), rule_3.score);
                }
            }
            // now we put the rules in the grammars
            int numStates = newStateIndex.Size();
            // this should be smaller than last one
            int           numRules = 0;
            UnaryGrammar  ug       = new UnaryGrammar(newStateIndex);
            BinaryGrammar bg       = new BinaryGrammar(newStateIndex);

            foreach (UnaryRule rule_4 in unaryRules)
            {
                if (outputType == RawCounts)
                {
                    double count = symbolCounter.GetCount(newStateIndex.Get(rule_4.parent));
                    rule_4.score = (float)Math.Log(rule_4.score / count);
                }
                ug.AddRule(rule_4);
                numRules++;
            }
            foreach (BinaryRule rule_5 in binaryRules)
            {
                if (outputType == RawCounts)
                {
                    double count = symbolCounter.GetCount(newStateIndex.Get(rule_5.parent));
                    rule_5.score = (float)Math.Log((rule_5.score - op.trainOptions.ruleDiscount) / count);
                }
                bg.AddRule(rule_5);
                numRules++;
            }
            if (verbose)
            {
                System.Console.Out.WriteLine("Number of minimized rules: " + numRules);
                System.Console.Out.WriteLine("Number of minimized states: " + newStateIndex.Size());
            }
            ug.PurgeRules();
            bg.SplitRules();
            return(new Pair <UnaryGrammar, BinaryGrammar>(ug, bg));
        }
Ejemplo n.º 5
0
 private static double ComputeLocalTreeScore(Tree localTree, IIndex <string> stateIndex, LexicalizedParser pd)
 {
     try
     {
         string parent      = localTree.Value();
         int    parentState = stateIndex.IndexOf(parent);
         //      System.out.println("parentState: " + parentState);
         Tree[] children = localTree.Children();
         // let's find the unary to kick things off with the left child (since we assume a left to right grammar
         // first we create the synthetic parent of the leftmost child
         string nextChild = children[0].Value();
         // childState = stateIndex.indexOf(nextChild);
         string            current      = "@" + parent + "| [ [" + nextChild + "] ";
         int               currentState = stateIndex.IndexOf(current);
         IList <UnaryRule> rules        = pd.ug.RulesByParent(currentState);
         UnaryRule         ur           = rules[0];
         //      System.out.println("rule: " + ur);
         double localTreeScore = ur.Score();
         // go through rest of rules
         for (int i = 1; i < children.Length; i++)
         {
             // find rules in BinaryGrammar that can extend this state
             //        System.out.println("currentState: " + currentState);
             nextChild = children[i].Value();
             int childState = stateIndex.IndexOf(nextChild);
             //        System.out.println("childState: " + childState);
             IList <BinaryRule> l       = pd.bg.RuleListByLeftChild(currentState);
             BinaryRule         foundBR = null;
             if (i < children.Length - 1)
             {
                 // need to the rewrite that doesn't rewrite to the parent
                 foreach (BinaryRule br in l)
                 {
                     //            System.out.println("\t\trule: " + br + " parent: " + br.parent + " right: " + br.rightChild);
                     if (br.rightChild == childState && br.parent != parentState)
                     {
                         foundBR = br;
                         break;
                     }
                 }
             }
             else
             {
                 // this is the last rule, need to find the rewrite to the parent of the whole local tree
                 foreach (BinaryRule br in l)
                 {
                     //            System.out.println("\t\trule: " + br + " parent: " + br.parent + " right: " + br.rightChild);
                     if (br.rightChild == childState && br.parent == parentState)
                     {
                         foundBR = br;
                         break;
                     }
                 }
             }
             if (foundBR == null)
             {
                 // we never found a matching rule!
                 //          System.out.println("broke on " + nextChild);
                 return(double.NegativeInfinity);
             }
             //        System.out.println("rule: " + foundBR);
             currentState    = foundBR.parent;
             localTreeScore += foundBR.score;
         }
         // end loop through children
         return(localTreeScore);
     }
     catch (NoSuchElementException)
     {
         // we couldn't find a state for one of the needed categories
         //      System.out.println("no state found: " + e.toString());
         //      List tempRules = pd.ug.rulesByChild(childState);
         //      for (Iterator iter = tempRules.iterator(); iter.hasNext();) {
         //        UnaryRule ur = (UnaryRule) iter.next();
         //        System.out.println("\t\t\trule with child: " + ur);
         //      }
         return(double.NegativeInfinity);
     }
 }