/** * Reads the FST file in the given path, and creates the nodes in the FST file. * * @param path the path of the FST file to read * @return the highest ID of all nodes * @throws java.io.IOException */ private int CreateNodes(String path) { ExtendedStreamTokenizer tok = new ExtendedStreamTokenizer(path, true); int maxNodeId = 0; while (!tok.IsEOF()) { tok.Skipwhite(); String token = tok.GetString(); if (token == null) { break; } else if (token.Equals("T")) { tok.GetInt("src id"); // toss source node int id = tok.GetInt("dest id"); // dest node numb if (id > maxNodeId) { maxNodeId = id; } String word1 = tok.GetString(); // get word if (word1 == null) { continue; } String word2 = tok.GetString(); // get word tok.GetString(); // toss probability String nodeName = "G" + id; GrammarNode node = _nodes.Get(nodeName); if (node == null) { if (word2.Equals(",")) { node = CreateGrammarNode(id, false); } else { node = CreateGrammarNode(id, word2); } _nodes.Put(nodeName, node); } else { if (!word2.Equals(",")) { /* * if (!word2.equals(getWord(node))) { * System.out.println(node + ": " + word2 + ' ' + getWord(node)); } */ Debug.Assert(word2.Equals(GetWord(node))); } } } } tok.Close(); return(maxNodeId); }
/** * /// Loads the sphinx3 density file, a set of density arrays are created and * /// placed in the given pool. * /// * /// @param useCDUnits * /// if true, loads also the context dependent units * /// @param inputStream * /// the open input stream to use * /// @param path * /// the path to a density file * /// @throws FileNotFoundException * /// if a file cannot be found * /// @throws IOException * /// if an error occurs while loading the data */ protected void LoadHMMPool(Boolean useCDUnits, Stream inputStream, string path) { var est = new ExtendedStreamTokenizer(inputStream, '#', false); this.LogInfo("Loading HMM file from: " + path); est.ExpectString(ModelVersion); var numBase = est.GetInt("numBase"); est.ExpectString("n_base"); var numTri = est.GetInt("numTri"); est.ExpectString("n_tri"); var numStateMap = est.GetInt("numStateMap"); est.ExpectString("n_state_map"); var numTiedState = est.GetInt("numTiedState"); est.ExpectString("n_tied_state"); var numContextIndependentTiedState = est .GetInt("numContextIndependentTiedState"); est.ExpectString("n_tied_ci_state"); var numTiedTransitionMatrices = est.GetInt("numTiedTransitionMatrices"); est.ExpectString("n_tied_tmat"); var numStatePerHMM = numStateMap / (numTri + numBase); Debug.Assert(numTiedState == MixtureWeightsPool.StatesNum); Debug.Assert(numTiedTransitionMatrices == MatrixPool.Size); // Load the base phones for (var i = 0; i < numBase; i++) { var name = est.GetString(); var left = est.GetString(); var right = est.GetString(); var position = est.GetString(); var attribute = est.GetString(); var tmat = est.GetInt("tmat"); var stid = new int[numStatePerHMM - 1]; for (var j = 0; j < numStatePerHMM - 1; j++) { stid[j] = est.GetInt("j"); Debug.Assert(stid[j] >= 0 && stid[j] < numContextIndependentTiedState); } est.ExpectString("N"); Debug.Assert(left.Equals("-")); Debug.Assert(right.Equals("-")); Debug.Assert(position.Equals("-")); Debug.Assert(tmat < numTiedTransitionMatrices); var unit = _unitManager.GetUnit(name, attribute.Equals(Filler)); ContextIndependentUnits.Put(unit.Name, unit); //this.LogInfo("Loaded " + unit.ToString()); // The first filler if (unit.IsFiller && unit.Name.Equals(SilenceCiphone)) { unit = UnitManager.Silence; } var transitionMatrix = MatrixPool.Get(tmat); var ss = GetSenoneSequence(stid); IHMM hmm = new SenoneHMM(unit, ss, transitionMatrix, GetHMMPosition(position)); HmmManager.Put(hmm); } if (HmmManager.Get(HMMPosition.Undefined, UnitManager.Silence) == null) { throw new IOException("Could not find SIL unit in acoustic model"); } // Load the context dependent phones. If the useCDUnits // property is false, the CD phones will not be created, but // the values still need to be read in from the file. var lastUnitName = ""; Unit lastUnit = null; int[] lastStid = null; SenoneSequence lastSenoneSequence = null; for (var i = 0; i < numTri; i++) { var name = est.GetString(); var left = est.GetString(); var right = est.GetString(); var position = est.GetString(); var attribute = est.GetString(); var tmat = est.GetInt("tmat"); var stid = new int[numStatePerHMM - 1]; for (var j = 0; j < numStatePerHMM - 1; j++) { stid[j] = est.GetInt("j"); Debug.Assert(stid[j] >= numContextIndependentTiedState && stid[j] < numTiedState); } est.ExpectString("N"); Debug.Assert(!left.Equals("-")); Debug.Assert(!right.Equals("-")); Debug.Assert(!position.Equals("-")); Debug.Assert(attribute.Equals("n/a")); Debug.Assert(tmat < numTiedTransitionMatrices); if (useCDUnits) { Unit unit; var unitName = (name + ' ' + left + ' ' + right); if (unitName.Equals(lastUnitName)) { unit = lastUnit; } else { var leftContext = new Unit[1]; leftContext[0] = ContextIndependentUnits.Get(left); var rightContext = new Unit[1]; rightContext[0] = ContextIndependentUnits.Get(right); Context context = LeftRightContext.Get(leftContext, rightContext); unit = _unitManager.GetUnit(name, false, context); } lastUnitName = unitName; lastUnit = unit; //this.LogInfo("Loaded " + unit.ToString()); var transitionMatrix = MatrixPool.Get(tmat); var ss = lastSenoneSequence; if (ss == null || !SameSenoneSequence(stid, lastStid)) { ss = GetSenoneSequence(stid); } lastSenoneSequence = ss; lastStid = stid; IHMM hmm = new SenoneHMM(unit, ss, transitionMatrix, GetHMMPosition(position)); HmmManager.Put(hmm); } } est.Close(); }
private void GetSenoneToCIPhone() { var inputStream = GetDataStream(Path.Combine(Location.Path, Model)); if (inputStream == null) { throw new IOException("can't find modelDef " + Model); } var est = new ExtendedStreamTokenizer(inputStream, '#', false); this.LogInfo("Loading HMM file from: " + Model); est.ExpectString(ModelVersion); _numBase = est.GetInt("numBase"); est.ExpectString("n_base"); var numTri = est.GetInt("numTri"); est.ExpectString("n_tri"); var numStateMap = est.GetInt("numStateMap"); est.ExpectString("n_state_map"); var numTiedState = est.GetInt("numTiedState"); est.ExpectString("n_tied_state"); Senone2Ci = new int[numTiedState]; est.GetInt("numContextIndependentTiedState"); est.ExpectString("n_tied_ci_state"); var numTiedTransitionMatrices = est.GetInt("numTiedTransitionMatrices"); est.ExpectString("n_tied_tmat"); var numStatePerHMM = numStateMap / (numTri + _numBase); Debug.Assert(numTiedState == MixtureWeightsPool.StatesNum); Debug.Assert(numTiedTransitionMatrices == MatrixPool.Size); // Load the base phones for (var i = 0; i < _numBase + numTri; i++) { //TODO name this magic const somehow for (var j = 0; j < 5; j++) { est.GetString(); } var tmat = est.GetInt("tmat"); for (var j = 0; j < numStatePerHMM - 1; j++) { Senone2Ci[est.GetInt("j")] = tmat; } est.ExpectString("N"); Debug.Assert(tmat < numTiedTransitionMatrices); } est.Close(); }
/// <summary> /// Creates the grammar. /// </summary> /// <returns>The initial node for the grammar.</returns> protected override GrammarNode CreateGrammar() { GrammarNode initialNode = null; GrammarNode finalNode = null; // first pass create the FST nodes int maxNodeId = CreateNodes(_path); // create the final node: finalNode = CreateGrammarNode(++maxNodeId, IDictionary.SilenceSpelling); finalNode.SetFinalNode(true); // replace each word node with a pair of nodes, which // consists of the word node and a new dummy end node, which is // for adding null or backoff transitions maxNodeId = ExpandWordNodes(maxNodeId); ExtendedStreamTokenizer tok = new ExtendedStreamTokenizer(_path, true); // Second pass, add all of the arcs while (!tok.IsEOF()) { String token; tok.Skipwhite(); token = tok.GetString(); // System.out.println(token); if (token == null) { break; } else if (token.Equals("I")) { Debug.Assert(initialNode == null); int initialID = tok.GetInt("initial ID"); String nodeName = "G" + initialID; // TODO: FlatLinguist requires the initial grammar node // to contain a single silence. We'll do that for now, // but once the FlatLinguist is fixed, this should be // returned to its former method of creating an empty // initial grammar node // initialNode = createGrammarNode(initialID, false); initialNode = CreateGrammarNode(initialID, IDictionary.SilenceSpelling); _nodes.Put(nodeName, initialNode); // optionally add a silence node if (_addInitialSilenceNode) { GrammarNode silenceNode = CreateGrammarNode(++maxNodeId, IDictionary.SilenceSpelling); initialNode.Add(silenceNode, LogMath.LogOne); silenceNode.Add(initialNode, LogMath.LogOne); } } else if (token.Equals("T")) { int thisID = tok.GetInt("this id"); int nextID = tok.GetInt("next id"); GrammarNode thisNode = Get(thisID); GrammarNode nextNode = Get(nextID); // if the source node is an FSTGrammarNode, we want // to join the endNode to the destination node if (HasEndNode(thisNode)) { thisNode = GetEndNode(thisNode); } float lnProb = 0f; // negative natural log String output = tok.GetString(); if (output == null || output.Equals(",")) { // these are epsilon (meaning backoff) transitions if (output != null && output.Equals(",")) { tok.GetString(); // skip the word lnProb = tok.GetFloat("probability"); } // if the destination node has been expanded // we actually want to add the backoff transition // to the endNode if (HasEndNode(nextNode)) { nextNode = GetEndNode(nextNode); } } else { String word = tok.GetString(); // skip words lnProb = tok.GetFloat("probability"); if (_ignoreUnknownTransitions && word.Equals("<unknown>")) { continue; } /* * System.out.println(nextNode + ": " + output); */ Debug.Assert(HasWord(nextNode)); } thisNode.Add(nextNode, ConvertProbability(lnProb)); } else if (token.Equals("F")) { int thisID = tok.GetInt("this id"); float lnProb = tok.GetFloat("probability"); GrammarNode thisNode = Get(thisID); GrammarNode nextNode = finalNode; if (HasEndNode(thisNode)) { thisNode = GetEndNode(thisNode); } thisNode.Add(nextNode, ConvertProbability(lnProb)); } } tok.Close(); Debug.Assert(initialNode != null); return(initialNode); }