/** * Reads the FST file in the given path, and creates the nodes in the FST file. * * @param path the path of the FST file to read * @return the highest ID of all nodes * @throws java.io.IOException */ private int CreateNodes(String path) { ExtendedStreamTokenizer tok = new ExtendedStreamTokenizer(path, true); int maxNodeId = 0; while (!tok.IsEOF()) { tok.Skipwhite(); String token = tok.GetString(); if (token == null) { break; } else if (token.Equals("T")) { tok.GetInt("src id"); // toss source node int id = tok.GetInt("dest id"); // dest node numb if (id > maxNodeId) { maxNodeId = id; } String word1 = tok.GetString(); // get word if (word1 == null) { continue; } String word2 = tok.GetString(); // get word tok.GetString(); // toss probability String nodeName = "G" + id; GrammarNode node = _nodes.Get(nodeName); if (node == null) { if (word2.Equals(",")) { node = CreateGrammarNode(id, false); } else { node = CreateGrammarNode(id, word2); } _nodes.Put(nodeName, node); } else { if (!word2.Equals(",")) { /* * if (!word2.equals(getWord(node))) { * System.out.println(node + ": " + word2 + ' ' + getWord(node)); } */ Debug.Assert(word2.Equals(GetWord(node))); } } } } tok.Close(); return(maxNodeId); }
/// <summary> /// Creates the grammar. /// </summary> /// <returns>The initial node for the grammar.</returns> protected override GrammarNode CreateGrammar() { GrammarNode initialNode = null; GrammarNode finalNode = null; // first pass create the FST nodes int maxNodeId = CreateNodes(_path); // create the final node: finalNode = CreateGrammarNode(++maxNodeId, IDictionary.SilenceSpelling); finalNode.SetFinalNode(true); // replace each word node with a pair of nodes, which // consists of the word node and a new dummy end node, which is // for adding null or backoff transitions maxNodeId = ExpandWordNodes(maxNodeId); ExtendedStreamTokenizer tok = new ExtendedStreamTokenizer(_path, true); // Second pass, add all of the arcs while (!tok.IsEOF()) { String token; tok.Skipwhite(); token = tok.GetString(); // System.out.println(token); if (token == null) { break; } else if (token.Equals("I")) { Debug.Assert(initialNode == null); int initialID = tok.GetInt("initial ID"); String nodeName = "G" + initialID; // TODO: FlatLinguist requires the initial grammar node // to contain a single silence. We'll do that for now, // but once the FlatLinguist is fixed, this should be // returned to its former method of creating an empty // initial grammar node // initialNode = createGrammarNode(initialID, false); initialNode = CreateGrammarNode(initialID, IDictionary.SilenceSpelling); _nodes.Put(nodeName, initialNode); // optionally add a silence node if (_addInitialSilenceNode) { GrammarNode silenceNode = CreateGrammarNode(++maxNodeId, IDictionary.SilenceSpelling); initialNode.Add(silenceNode, LogMath.LogOne); silenceNode.Add(initialNode, LogMath.LogOne); } } else if (token.Equals("T")) { int thisID = tok.GetInt("this id"); int nextID = tok.GetInt("next id"); GrammarNode thisNode = Get(thisID); GrammarNode nextNode = Get(nextID); // if the source node is an FSTGrammarNode, we want // to join the endNode to the destination node if (HasEndNode(thisNode)) { thisNode = GetEndNode(thisNode); } float lnProb = 0f; // negative natural log String output = tok.GetString(); if (output == null || output.Equals(",")) { // these are epsilon (meaning backoff) transitions if (output != null && output.Equals(",")) { tok.GetString(); // skip the word lnProb = tok.GetFloat("probability"); } // if the destination node has been expanded // we actually want to add the backoff transition // to the endNode if (HasEndNode(nextNode)) { nextNode = GetEndNode(nextNode); } } else { String word = tok.GetString(); // skip words lnProb = tok.GetFloat("probability"); if (_ignoreUnknownTransitions && word.Equals("<unknown>")) { continue; } /* * System.out.println(nextNode + ": " + output); */ Debug.Assert(HasWord(nextNode)); } thisNode.Add(nextNode, ConvertProbability(lnProb)); } else if (token.Equals("F")) { int thisID = tok.GetInt("this id"); float lnProb = tok.GetFloat("probability"); GrammarNode thisNode = Get(thisID); GrammarNode nextNode = finalNode; if (HasEndNode(thisNode)) { thisNode = GetEndNode(thisNode); } thisNode.Add(nextNode, ConvertProbability(lnProb)); } } tok.Close(); Debug.Assert(initialNode != null); return(initialNode); }