protected internal virtual bool AddOneUnaryRule(UnaryRule rule, IDictionary <string, TransducerGraph> graphs) { string parentString = stateIndex.Get(rule.parent); string childString = stateIndex.Get(rule.child); if (IsSyntheticState(parentString)) { string topcat = GetTopCategoryOfSyntheticState(parentString); TransducerGraph graph = GetGraphFromMap(graphs, topcat); double output = SmartNegate(rule.Score()); graph.AddArc(graph.GetStartNode(), parentString, childString, output); return(true); } else { if (IsSyntheticState(childString)) { // need to add Arc from synthetic state to endState TransducerGraph graph = GetGraphFromMap(graphs, parentString); double output = SmartNegate(rule.Score()); graph.AddArc(childString, parentString, End, output); // parentString should the the same as endState graph.SetEndNode(parentString); return(true); } else { return(false); } } }
protected internal override TransducerGraph DoCompaction(TransducerGraph graph, IList l1, IList l3) { TransducerGraph result = graph; if (saveGraphs) { WriteFile(result, "unminimized", (string)result.GetEndNodes().GetEnumerator().Current); } result = quasiDeterminizer.ProcessGraph(result); result = new TransducerGraph(result, ocp); // combine outputs into inputs result = minimizer.MinimizeFA(result); // minimize the thing //result = new TransducerGraph(graph, otsp); // for debugging result = new TransducerGraph(result, ntsp); // pull out strings from sets returned by minimizer result = new TransducerGraph(result, isp); // split outputs from inputs if (saveGraphs) { WriteFile(result, "exactminimized", (string)result.GetEndNodes().GetEnumerator().Current); } // for debugging do comparison of the paths accepted by graph and result //log.info(TransducerGraph.testGraphPaths(graph, result, 100)); return(result); }
/// <summary>Compacts the grammar specified by the Pair.</summary> /// <param name="grammar">a Pair of grammars, ordered UnaryGrammar BinaryGrammar.</param> /// <param name="allTrainPaths">a Map from String passive constituents to Lists of paths</param> /// <param name="allTestPaths">a Map from String passive constituents to Lists of paths</param> /// <returns>a Pair of grammars, ordered UnaryGrammar BinaryGrammar.</returns> public virtual Triple <IIndex <string>, UnaryGrammar, BinaryGrammar> CompactGrammar(Pair <UnaryGrammar, BinaryGrammar> grammar, IDictionary <string, IList <IList <string> > > allTrainPaths, IDictionary <string, IList <IList <string> > > allTestPaths, IIndex <string> originalStateIndex) { inputPrior = ComputeInputPrior(allTrainPaths); // computed once for the whole grammar // BinaryGrammar bg = grammar.second; this.stateIndex = originalStateIndex; IList <IList <string> > trainPaths; IList <IList <string> > testPaths; ICollection <UnaryRule> unaryRules = Generics.NewHashSet(); ICollection <BinaryRule> binaryRules = Generics.NewHashSet(); IDictionary <string, TransducerGraph> graphs = ConvertGrammarToGraphs(grammar, unaryRules, binaryRules); compactedGraphs = Generics.NewHashSet(); if (verbose) { System.Console.Out.WriteLine("There are " + graphs.Count + " categories to compact."); } int i = 0; for (IEnumerator <KeyValuePair <string, TransducerGraph> > graphIter = graphs.GetEnumerator(); graphIter.MoveNext();) { KeyValuePair <string, TransducerGraph> entry = graphIter.Current; string cat = entry.Key; TransducerGraph graph = entry.Value; if (verbose) { System.Console.Out.WriteLine("About to compact grammar for " + cat + " with numNodes=" + graph.GetNodes().Count); } trainPaths = Sharpen.Collections.Remove(allTrainPaths, cat); // to save memory if (trainPaths == null) { trainPaths = new List <IList <string> >(); } testPaths = Sharpen.Collections.Remove(allTestPaths, cat); // to save memory if (testPaths == null) { testPaths = new List <IList <string> >(); } TransducerGraph compactedGraph = DoCompaction(graph, trainPaths, testPaths); i++; if (verbose) { System.Console.Out.WriteLine(i + ". Compacted grammar for " + cat + " from " + graph.GetArcs().Count + " arcs to " + compactedGraph.GetArcs().Count + " arcs."); } graphIter.Remove(); // to save memory, remove the last thing compactedGraphs.Add(compactedGraph); } Pair <UnaryGrammar, BinaryGrammar> ugbg = ConvertGraphsToGrammar(compactedGraphs, unaryRules, binaryRules); return(new Triple <IIndex <string>, UnaryGrammar, BinaryGrammar>(newStateIndex, ugbg.First(), ugbg.Second())); }
protected internal static TransducerGraph GetGraphFromMap(IDictionary <string, TransducerGraph> m, string o) { TransducerGraph graph = m[o]; if (graph == null) { graph = new TransducerGraph(); graph.SetEndNode(o); m[o] = graph; } return(graph); }
protected internal virtual bool AddOneBinaryRule(BinaryRule rule, IDictionary <string, TransducerGraph> graphs) { // parent has to be synthetic in BinaryRule string parentString = stateIndex.Get(rule.parent); string leftString = stateIndex.Get(rule.leftChild); string rightString = stateIndex.Get(rule.rightChild); string source; string target; string input; string bracket = null; if (op.trainOptions.markFinalStates) { bracket = Sharpen.Runtime.Substring(parentString, parentString.Length - 1, parentString.Length); } // the below test is not necessary with left to right grammars if (IsSyntheticState(leftString)) { source = leftString; input = rightString + (bracket == null ? ">" : bracket); } else { if (IsSyntheticState(rightString)) { source = rightString; input = leftString + (bracket == null ? "<" : bracket); } else { // we don't know what to do with this rule return(false); } } target = parentString; double output = SmartNegate(rule.Score()); // makes it a real 0 <= k <= infty string topcat = GetTopCategoryOfSyntheticState(source); if (topcat == null) { throw new Exception("can't have null topcat"); } TransducerGraph graph = GetGraphFromMap(graphs, topcat); graph.AddArc(source, target, input, output); return(true); }
public static bool WriteFile(TransducerGraph graph, string dir, string name) { try { File baseDir = new File(dir); if (baseDir.Exists()) { if (!baseDir.IsDirectory()) { return(false); } } else { if (!baseDir.Mkdirs()) { return(false); } } File file = new File(baseDir, name + ".dot"); PrintWriter w; try { w = new PrintWriter(new FileWriter(file)); string dotString = graph.AsDOTString(); w.Print(dotString); w.Flush(); w.Close(); } catch (FileNotFoundException) { log.Info("Failed to open file in writeToDOTfile: " + file); return(false); } catch (IOException) { log.Info("Failed to open file in writeToDOTfile: " + file); return(false); } return(true); } catch (Exception e) { Sharpen.Runtime.PrintStackTrace(e); return(false); } }
protected internal abstract TransducerGraph DoCompaction(TransducerGraph graph, IList <IList <string> > trainPaths, IList <IList <string> > testPaths);