예제 #1
0
        protected internal virtual bool AddOneUnaryRule(UnaryRule rule, IDictionary <string, TransducerGraph> graphs)
        {
            string parentString = stateIndex.Get(rule.parent);
            string childString  = stateIndex.Get(rule.child);

            if (IsSyntheticState(parentString))
            {
                string          topcat = GetTopCategoryOfSyntheticState(parentString);
                TransducerGraph graph  = GetGraphFromMap(graphs, topcat);
                double          output = SmartNegate(rule.Score());
                graph.AddArc(graph.GetStartNode(), parentString, childString, output);
                return(true);
            }
            else
            {
                if (IsSyntheticState(childString))
                {
                    // need to add Arc from synthetic state to endState
                    TransducerGraph graph  = GetGraphFromMap(graphs, parentString);
                    double          output = SmartNegate(rule.Score());
                    graph.AddArc(childString, parentString, End, output);
                    // parentString should the the same as endState
                    graph.SetEndNode(parentString);
                    return(true);
                }
                else
                {
                    return(false);
                }
            }
        }
예제 #2
0
        protected internal override TransducerGraph DoCompaction(TransducerGraph graph, IList l1, IList l3)
        {
            TransducerGraph result = graph;

            if (saveGraphs)
            {
                WriteFile(result, "unminimized", (string)result.GetEndNodes().GetEnumerator().Current);
            }
            result = quasiDeterminizer.ProcessGraph(result);
            result = new TransducerGraph(result, ocp);
            // combine outputs into inputs
            result = minimizer.MinimizeFA(result);
            // minimize the thing
            //result = new  TransducerGraph(graph, otsp); // for debugging
            result = new TransducerGraph(result, ntsp);
            // pull out strings from sets returned by minimizer
            result = new TransducerGraph(result, isp);
            // split outputs from inputs
            if (saveGraphs)
            {
                WriteFile(result, "exactminimized", (string)result.GetEndNodes().GetEnumerator().Current);
            }
            // for debugging do comparison of the paths accepted by graph and result
            //log.info(TransducerGraph.testGraphPaths(graph, result, 100));
            return(result);
        }
예제 #3
0
        /// <summary>Compacts the grammar specified by the Pair.</summary>
        /// <param name="grammar">a Pair of grammars, ordered UnaryGrammar BinaryGrammar.</param>
        /// <param name="allTrainPaths">a Map from String passive constituents to Lists of paths</param>
        /// <param name="allTestPaths">a Map from String passive constituents to Lists of paths</param>
        /// <returns>a Pair of grammars, ordered UnaryGrammar BinaryGrammar.</returns>
        public virtual Triple <IIndex <string>, UnaryGrammar, BinaryGrammar> CompactGrammar(Pair <UnaryGrammar, BinaryGrammar> grammar, IDictionary <string, IList <IList <string> > > allTrainPaths, IDictionary <string, IList <IList <string> > > allTestPaths, IIndex
                                                                                            <string> originalStateIndex)
        {
            inputPrior = ComputeInputPrior(allTrainPaths);
            // computed once for the whole grammar
            // BinaryGrammar bg = grammar.second;
            this.stateIndex = originalStateIndex;
            IList <IList <string> >  trainPaths;
            IList <IList <string> >  testPaths;
            ICollection <UnaryRule>  unaryRules          = Generics.NewHashSet();
            ICollection <BinaryRule> binaryRules         = Generics.NewHashSet();
            IDictionary <string, TransducerGraph> graphs = ConvertGrammarToGraphs(grammar, unaryRules, binaryRules);

            compactedGraphs = Generics.NewHashSet();
            if (verbose)
            {
                System.Console.Out.WriteLine("There are " + graphs.Count + " categories to compact.");
            }
            int i = 0;

            for (IEnumerator <KeyValuePair <string, TransducerGraph> > graphIter = graphs.GetEnumerator(); graphIter.MoveNext();)
            {
                KeyValuePair <string, TransducerGraph> entry = graphIter.Current;
                string          cat   = entry.Key;
                TransducerGraph graph = entry.Value;
                if (verbose)
                {
                    System.Console.Out.WriteLine("About to compact grammar for " + cat + " with numNodes=" + graph.GetNodes().Count);
                }
                trainPaths = Sharpen.Collections.Remove(allTrainPaths, cat);
                // to save memory
                if (trainPaths == null)
                {
                    trainPaths = new List <IList <string> >();
                }
                testPaths = Sharpen.Collections.Remove(allTestPaths, cat);
                // to save memory
                if (testPaths == null)
                {
                    testPaths = new List <IList <string> >();
                }
                TransducerGraph compactedGraph = DoCompaction(graph, trainPaths, testPaths);
                i++;
                if (verbose)
                {
                    System.Console.Out.WriteLine(i + ". Compacted grammar for " + cat + " from " + graph.GetArcs().Count + " arcs to " + compactedGraph.GetArcs().Count + " arcs.");
                }
                graphIter.Remove();
                // to save memory, remove the last thing
                compactedGraphs.Add(compactedGraph);
            }
            Pair <UnaryGrammar, BinaryGrammar> ugbg = ConvertGraphsToGrammar(compactedGraphs, unaryRules, binaryRules);

            return(new Triple <IIndex <string>, UnaryGrammar, BinaryGrammar>(newStateIndex, ugbg.First(), ugbg.Second()));
        }
예제 #4
0
        protected internal static TransducerGraph GetGraphFromMap(IDictionary <string, TransducerGraph> m, string o)
        {
            TransducerGraph graph = m[o];

            if (graph == null)
            {
                graph = new TransducerGraph();
                graph.SetEndNode(o);
                m[o] = graph;
            }
            return(graph);
        }
예제 #5
0
        protected internal virtual bool AddOneBinaryRule(BinaryRule rule, IDictionary <string, TransducerGraph> graphs)
        {
            // parent has to be synthetic in BinaryRule
            string parentString = stateIndex.Get(rule.parent);
            string leftString   = stateIndex.Get(rule.leftChild);
            string rightString  = stateIndex.Get(rule.rightChild);
            string source;
            string target;
            string input;
            string bracket = null;

            if (op.trainOptions.markFinalStates)
            {
                bracket = Sharpen.Runtime.Substring(parentString, parentString.Length - 1, parentString.Length);
            }
            // the below test is not necessary with left to right grammars
            if (IsSyntheticState(leftString))
            {
                source = leftString;
                input  = rightString + (bracket == null ? ">" : bracket);
            }
            else
            {
                if (IsSyntheticState(rightString))
                {
                    source = rightString;
                    input  = leftString + (bracket == null ? "<" : bracket);
                }
                else
                {
                    // we don't know what to do with this rule
                    return(false);
                }
            }
            target = parentString;
            double output = SmartNegate(rule.Score());
            // makes it a real  0 <= k <= infty
            string topcat = GetTopCategoryOfSyntheticState(source);

            if (topcat == null)
            {
                throw new Exception("can't have null topcat");
            }
            TransducerGraph graph = GetGraphFromMap(graphs, topcat);

            graph.AddArc(source, target, input, output);
            return(true);
        }
예제 #6
0
 public static bool WriteFile(TransducerGraph graph, string dir, string name)
 {
     try
     {
         File baseDir = new File(dir);
         if (baseDir.Exists())
         {
             if (!baseDir.IsDirectory())
             {
                 return(false);
             }
         }
         else
         {
             if (!baseDir.Mkdirs())
             {
                 return(false);
             }
         }
         File        file = new File(baseDir, name + ".dot");
         PrintWriter w;
         try
         {
             w = new PrintWriter(new FileWriter(file));
             string dotString = graph.AsDOTString();
             w.Print(dotString);
             w.Flush();
             w.Close();
         }
         catch (FileNotFoundException)
         {
             log.Info("Failed to open file in writeToDOTfile: " + file);
             return(false);
         }
         catch (IOException)
         {
             log.Info("Failed to open file in writeToDOTfile: " + file);
             return(false);
         }
         return(true);
     }
     catch (Exception e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
         return(false);
     }
 }
예제 #7
0
 protected internal abstract TransducerGraph DoCompaction(TransducerGraph graph, IList <IList <string> > trainPaths, IList <IList <string> > testPaths);