Пример #1
0
        /**
         * Reads the FST file in the given path, and creates the nodes in the FST file.
         *
         * @param path the path of the FST file to read
         * @return the highest ID of all nodes
         * @throws java.io.IOException
         */
        private int CreateNodes(String path)
        {
            ExtendedStreamTokenizer tok = new ExtendedStreamTokenizer(path, true);
            int maxNodeId = 0;

            while (!tok.IsEOF())
            {
                tok.Skipwhite();
                String token = tok.GetString();
                if (token == null)
                {
                    break;
                }
                else if (token.Equals("T"))
                {
                    tok.GetInt("src id");           // toss source node
                    int id = tok.GetInt("dest id"); // dest node numb
                    if (id > maxNodeId)
                    {
                        maxNodeId = id;
                    }
                    String word1 = tok.GetString(); // get word
                    if (word1 == null)
                    {
                        continue;
                    }
                    String word2 = tok.GetString(); // get word
                    tok.GetString();                // toss probability
                    String      nodeName = "G" + id;
                    GrammarNode node     = _nodes.Get(nodeName);
                    if (node == null)
                    {
                        if (word2.Equals(","))
                        {
                            node = CreateGrammarNode(id, false);
                        }
                        else
                        {
                            node = CreateGrammarNode(id, word2);
                        }
                        _nodes.Put(nodeName, node);
                    }
                    else
                    {
                        if (!word2.Equals(","))
                        {
                            /*
                             * if (!word2.equals(getWord(node))) {
                             * System.out.println(node + ": " + word2 + ' ' + getWord(node)); }
                             */
                            Debug.Assert(word2.Equals(GetWord(node)));
                        }
                    }
                }
            }
            tok.Close();
            return(maxNodeId);
        }
Пример #2
0
        /**
         * /// Loads the sphinx3 density file, a set of density arrays are created and
         * /// placed in the given pool.
         * ///
         * /// @param useCDUnits
         * ///            if true, loads also the context dependent units
         * /// @param inputStream
         * ///            the open input stream to use
         * /// @param path
         * ///            the path to a density file
         * /// @throws FileNotFoundException
         * ///             if a file cannot be found
         * /// @throws IOException
         * ///             if an error occurs while loading the data
         */
        protected void LoadHMMPool(Boolean useCDUnits, Stream inputStream,
                                   string path)
        {
            var est = new ExtendedStreamTokenizer(inputStream,
                                                  '#', false);

            this.LogInfo("Loading HMM file from: " + path);

            est.ExpectString(ModelVersion);

            var numBase = est.GetInt("numBase");

            est.ExpectString("n_base");

            var numTri = est.GetInt("numTri");

            est.ExpectString("n_tri");

            var numStateMap = est.GetInt("numStateMap");

            est.ExpectString("n_state_map");

            var numTiedState = est.GetInt("numTiedState");

            est.ExpectString("n_tied_state");

            var numContextIndependentTiedState = est
                                                 .GetInt("numContextIndependentTiedState");

            est.ExpectString("n_tied_ci_state");

            var numTiedTransitionMatrices = est.GetInt("numTiedTransitionMatrices");

            est.ExpectString("n_tied_tmat");

            var numStatePerHMM = numStateMap / (numTri + numBase);

            Debug.Assert(numTiedState == MixtureWeightsPool.StatesNum);
            Debug.Assert(numTiedTransitionMatrices == MatrixPool.Size);

            // Load the base phones
            for (var i = 0; i < numBase; i++)
            {
                var name      = est.GetString();
                var left      = est.GetString();
                var right     = est.GetString();
                var position  = est.GetString();
                var attribute = est.GetString();
                var tmat      = est.GetInt("tmat");

                var stid = new int[numStatePerHMM - 1];

                for (var j = 0; j < numStatePerHMM - 1; j++)
                {
                    stid[j] = est.GetInt("j");
                    Debug.Assert(stid[j] >= 0 && stid[j] < numContextIndependentTiedState);
                }
                est.ExpectString("N");

                Debug.Assert(left.Equals("-"));
                Debug.Assert(right.Equals("-"));
                Debug.Assert(position.Equals("-"));
                Debug.Assert(tmat < numTiedTransitionMatrices);

                var unit = _unitManager.GetUnit(name, attribute.Equals(Filler));
                ContextIndependentUnits.Put(unit.Name, unit);


                //this.LogInfo("Loaded " + unit.ToString());

                // The first filler
                if (unit.IsFiller && unit.Name.Equals(SilenceCiphone))
                {
                    unit = UnitManager.Silence;
                }

                var transitionMatrix = MatrixPool.Get(tmat);
                var ss = GetSenoneSequence(stid);

                IHMM hmm = new SenoneHMM(unit, ss, transitionMatrix, GetHMMPosition(position));
                HmmManager.Put(hmm);
            }

            if (HmmManager.Get(HMMPosition.Undefined, UnitManager.Silence) == null)
            {
                throw new IOException("Could not find SIL unit in acoustic model");
            }

            // Load the context dependent phones. If the useCDUnits
            // property is false, the CD phones will not be created, but
            // the values still need to be read in from the file.

            var  lastUnitName = "";
            Unit lastUnit     = null;

            int[]          lastStid           = null;
            SenoneSequence lastSenoneSequence = null;

            for (var i = 0; i < numTri; i++)
            {
                var name      = est.GetString();
                var left      = est.GetString();
                var right     = est.GetString();
                var position  = est.GetString();
                var attribute = est.GetString();
                var tmat      = est.GetInt("tmat");

                var stid = new int[numStatePerHMM - 1];

                for (var j = 0; j < numStatePerHMM - 1; j++)
                {
                    stid[j] = est.GetInt("j");
                    Debug.Assert(stid[j] >= numContextIndependentTiedState &&
                                 stid[j] < numTiedState);
                }
                est.ExpectString("N");

                Debug.Assert(!left.Equals("-"));
                Debug.Assert(!right.Equals("-"));
                Debug.Assert(!position.Equals("-"));
                Debug.Assert(attribute.Equals("n/a"));
                Debug.Assert(tmat < numTiedTransitionMatrices);

                if (useCDUnits)
                {
                    Unit unit;
                    var  unitName = (name + ' ' + left + ' ' + right);

                    if (unitName.Equals(lastUnitName))
                    {
                        unit = lastUnit;
                    }
                    else
                    {
                        var leftContext = new Unit[1];
                        leftContext[0] = ContextIndependentUnits.Get(left);

                        var rightContext = new Unit[1];
                        rightContext[0] = ContextIndependentUnits.Get(right);

                        Context context = LeftRightContext.Get(leftContext,
                                                               rightContext);
                        unit = _unitManager.GetUnit(name, false, context);
                    }
                    lastUnitName = unitName;
                    lastUnit     = unit;


                    //this.LogInfo("Loaded " + unit.ToString());


                    var transitionMatrix = MatrixPool.Get(tmat);

                    var ss = lastSenoneSequence;
                    if (ss == null || !SameSenoneSequence(stid, lastStid))
                    {
                        ss = GetSenoneSequence(stid);
                    }
                    lastSenoneSequence = ss;
                    lastStid           = stid;

                    IHMM hmm = new SenoneHMM(unit, ss, transitionMatrix, GetHMMPosition(position));
                    HmmManager.Put(hmm);
                }
            }

            est.Close();
        }
Пример #3
0
        private void GetSenoneToCIPhone()
        {
            var inputStream = GetDataStream(Path.Combine(Location.Path, Model));

            if (inputStream == null)
            {
                throw new IOException("can't find modelDef " + Model);
            }

            var est = new ExtendedStreamTokenizer(inputStream, '#', false);

            this.LogInfo("Loading HMM file from: " + Model);

            est.ExpectString(ModelVersion);

            _numBase = est.GetInt("numBase");
            est.ExpectString("n_base");

            var numTri = est.GetInt("numTri");

            est.ExpectString("n_tri");

            var numStateMap = est.GetInt("numStateMap");

            est.ExpectString("n_state_map");

            var numTiedState = est.GetInt("numTiedState");

            est.ExpectString("n_tied_state");

            Senone2Ci = new int[numTiedState];

            est.GetInt("numContextIndependentTiedState");
            est.ExpectString("n_tied_ci_state");

            var numTiedTransitionMatrices = est.GetInt("numTiedTransitionMatrices");

            est.ExpectString("n_tied_tmat");

            var numStatePerHMM = numStateMap / (numTri + _numBase);

            Debug.Assert(numTiedState == MixtureWeightsPool.StatesNum);
            Debug.Assert(numTiedTransitionMatrices == MatrixPool.Size);

            // Load the base phones
            for (var i = 0; i < _numBase + numTri; i++)
            {
                //TODO name this magic const somehow
                for (var j = 0; j < 5; j++)
                {
                    est.GetString();
                }
                var tmat = est.GetInt("tmat");

                for (var j = 0; j < numStatePerHMM - 1; j++)
                {
                    Senone2Ci[est.GetInt("j")] = tmat;
                }
                est.ExpectString("N");

                Debug.Assert(tmat < numTiedTransitionMatrices);
            }

            est.Close();
        }
Пример #4
0
        /// <summary>
        /// Returns the next Data object, which is the mel cepstrum of the input frame. However, it can also be other Data objects like DataStartSignal.
        /// </summary>
        /// <returns>
        /// The next available Data object, returns null if no Data object is available.
        /// </returns>
        /// <exception cref="System.Exception">
        /// IOException closing cepstrum stream
        /// or
        /// IOException reading from cepstrum stream
        /// </exception>
        public override IData GetData()
        {
            IData data;

            if (_curPoint == -1)
            {
                data = new DataStartSignal(_sampleRate);
                _curPoint++;
            }
            else if (_curPoint == _numPoints)
            {
                if (_numPoints > 0)
                {
                    _firstSampleNumber =
                        (_firstSampleNumber - _frameShift + _frameSize - 1);
                }
                // send a DataEndSignal
                var numberFrames = _curPoint / _cepstrumLength;
                var totalSamples = (numberFrames - 1) * _frameShift + _frameSize;
                var duration     = (long)
                                   ((totalSamples / (double)_sampleRate) * 1000.0);

                data = new DataEndSignal(duration);

                try {
                    if (_binary)
                    {
                        _binaryStream.Close();
                    }
                    else
                    {
                        _est.Close();
                    }
                    _curPoint++;
                } catch (IOException ioe) {
                    throw new Exception("IOException closing cepstrum stream", ioe);
                }
            }
            else if (_curPoint > _numPoints)
            {
                data = null;
            }
            else
            {
                var vectorData = new double[_cepstrumLength];

                for (var i = 0; i < _cepstrumLength; i++)
                {
                    try {
                        if (_binary)
                        {
                            if (_bigEndian)
                            {
                                vectorData[i] = _binaryStream.ReadFloat();
                            }
                            else
                            {
                                vectorData[i] = Utilities.ReadLittleEndianFloat(_binaryStream);
                            }
                        }
                        else
                        {
                            vectorData[i] = _est.GetFloat("cepstrum data");
                        }
                        _curPoint++;
                    } catch (IOException ioe) {
                        throw new Exception("IOException reading from cepstrum stream", ioe);
                    }
                }

                // System.out.println("Read: " + curPoint);
                data = new DoubleData
                           (vectorData, _sampleRate, _firstSampleNumber);
                _firstSampleNumber += _frameShift;
                // System.out.println(data);
            }
            return(data);
        }
Пример #5
0
        /// <summary>
        /// Creates the grammar.
        /// </summary>
        /// <returns>The initial node for the grammar.</returns>
        protected override GrammarNode CreateGrammar()
        {
            GrammarNode initialNode = null;
            GrammarNode finalNode   = null;

            // first pass create the FST nodes
            int maxNodeId = CreateNodes(_path);

            // create the final node:
            finalNode = CreateGrammarNode(++maxNodeId, IDictionary.SilenceSpelling);
            finalNode.SetFinalNode(true);

            // replace each word node with a pair of nodes, which
            // consists of the word node and a new dummy end node, which is
            // for adding null or backoff transitions
            maxNodeId = ExpandWordNodes(maxNodeId);

            ExtendedStreamTokenizer tok = new ExtendedStreamTokenizer(_path, true);

            // Second pass, add all of the arcs

            while (!tok.IsEOF())
            {
                String token;
                tok.Skipwhite();
                token = tok.GetString();

                // System.out.println(token);

                if (token == null)
                {
                    break;
                }
                else if (token.Equals("I"))
                {
                    Debug.Assert(initialNode == null);
                    int    initialID = tok.GetInt("initial ID");
                    String nodeName  = "G" + initialID;

                    // TODO: FlatLinguist requires the initial grammar node
                    // to contain a single silence. We'll do that for now,
                    // but once the FlatLinguist is fixed, this should be
                    // returned to its former method of creating an empty
                    // initial grammar node
                    //          initialNode = createGrammarNode(initialID, false);

                    initialNode = CreateGrammarNode(initialID, IDictionary.SilenceSpelling);
                    _nodes.Put(nodeName, initialNode);

                    // optionally add a silence node
                    if (_addInitialSilenceNode)
                    {
                        GrammarNode silenceNode = CreateGrammarNode(++maxNodeId, IDictionary.SilenceSpelling);
                        initialNode.Add(silenceNode, LogMath.LogOne);
                        silenceNode.Add(initialNode, LogMath.LogOne);
                    }
                }
                else if (token.Equals("T"))
                {
                    int thisID = tok.GetInt("this id");
                    int nextID = tok.GetInt("next id");

                    GrammarNode thisNode = Get(thisID);
                    GrammarNode nextNode = Get(nextID);

                    // if the source node is an FSTGrammarNode, we want
                    // to join the endNode to the destination node

                    if (HasEndNode(thisNode))
                    {
                        thisNode = GetEndNode(thisNode);
                    }

                    float  lnProb = 0f;       // negative natural log
                    String output = tok.GetString();

                    if (output == null || output.Equals(","))
                    {
                        // these are epsilon (meaning backoff) transitions

                        if (output != null && output.Equals(","))
                        {
                            tok.GetString(); // skip the word
                            lnProb = tok.GetFloat("probability");
                        }

                        // if the destination node has been expanded
                        // we actually want to add the backoff transition
                        // to the endNode

                        if (HasEndNode(nextNode))
                        {
                            nextNode = GetEndNode(nextNode);
                        }
                    }
                    else
                    {
                        String word = tok.GetString();     // skip words
                        lnProb = tok.GetFloat("probability");

                        if (_ignoreUnknownTransitions && word.Equals("<unknown>"))
                        {
                            continue;
                        }

                        /*
                         * System.out.println(nextNode + ": " + output);
                         */
                        Debug.Assert(HasWord(nextNode));
                    }

                    thisNode.Add(nextNode, ConvertProbability(lnProb));
                }
                else if (token.Equals("F"))
                {
                    int   thisID = tok.GetInt("this id");
                    float lnProb = tok.GetFloat("probability");

                    GrammarNode thisNode = Get(thisID);
                    GrammarNode nextNode = finalNode;

                    if (HasEndNode(thisNode))
                    {
                        thisNode = GetEndNode(thisNode);
                    }

                    thisNode.Add(nextNode, ConvertProbability(lnProb));
                }
            }
            tok.Close();

            Debug.Assert(initialNode != null);

            return(initialNode);
        }