Example #1
0
        protected GrammarNode CreateGrammar()
        {
            ExtendedStreamTokenizer tok = null;

            if (File.Exists(_path))
            {
                tok = new ExtendedStreamTokenizer(_path, true);
            }
            else
            {
                tok = new ExtendedStreamTokenizer((StreamReader)null, true);
            }
            GrammarNode initialNode = CreateGrammarNode("<sil>");
            GrammarNode branchNode  = CreateGrammarNode(false);
            GrammarNode finalNode   = CreateGrammarNode("<sil>");

            finalNode.SetFinalNode(true);
            List <GrammarNode> wordGrammarNodes = new List <GrammarNode>();

            while (!tok.IsEOF())
            {
                string word;
                while ((word = tok.GetString()) != null)
                {
                    wordGrammarNodes.Add(CreateGrammarNode(word));
                }
            }
            // now connect all the GrammarNodes together
            initialNode.Add(branchNode, LogMath.LogOne);
            if (wordGrammarNodes.Count != 0)
            {
                float branchScore = _logMath.LinearToLog(1.0 / wordGrammarNodes.Count);
                foreach (GrammarNode wordNode in wordGrammarNodes)
                {
                    branchNode.Add(wordNode, branchScore);
                    wordNode.Add(finalNode, LogMath.LogOne);
                    if (_isLooping)
                    {
                        wordNode.Add(branchNode, LogMath.LogOne);
                    }
                }
            }
            return(initialNode);
        }
Example #2
0
        /// <summary>
        /// Creates the grammar.
        /// </summary>
        /// <returns>The initial node for the grammar.</returns>
        protected override GrammarNode CreateGrammar()
        {
            GrammarNode initialNode = null;
            GrammarNode finalNode   = null;

            // first pass create the FST nodes
            int maxNodeId = CreateNodes(_path);

            // create the final node:
            finalNode = CreateGrammarNode(++maxNodeId, IDictionary.SilenceSpelling);
            finalNode.SetFinalNode(true);

            // replace each word node with a pair of nodes, which
            // consists of the word node and a new dummy end node, which is
            // for adding null or backoff transitions
            maxNodeId = ExpandWordNodes(maxNodeId);

            ExtendedStreamTokenizer tok = new ExtendedStreamTokenizer(_path, true);

            // Second pass, add all of the arcs

            while (!tok.IsEOF())
            {
                String token;
                tok.Skipwhite();
                token = tok.GetString();

                // System.out.println(token);

                if (token == null)
                {
                    break;
                }
                else if (token.Equals("I"))
                {
                    Debug.Assert(initialNode == null);
                    int    initialID = tok.GetInt("initial ID");
                    String nodeName  = "G" + initialID;

                    // TODO: FlatLinguist requires the initial grammar node
                    // to contain a single silence. We'll do that for now,
                    // but once the FlatLinguist is fixed, this should be
                    // returned to its former method of creating an empty
                    // initial grammar node
                    //          initialNode = createGrammarNode(initialID, false);

                    initialNode = CreateGrammarNode(initialID, IDictionary.SilenceSpelling);
                    _nodes.Put(nodeName, initialNode);

                    // optionally add a silence node
                    if (_addInitialSilenceNode)
                    {
                        GrammarNode silenceNode = CreateGrammarNode(++maxNodeId, IDictionary.SilenceSpelling);
                        initialNode.Add(silenceNode, LogMath.LogOne);
                        silenceNode.Add(initialNode, LogMath.LogOne);
                    }
                }
                else if (token.Equals("T"))
                {
                    int thisID = tok.GetInt("this id");
                    int nextID = tok.GetInt("next id");

                    GrammarNode thisNode = Get(thisID);
                    GrammarNode nextNode = Get(nextID);

                    // if the source node is an FSTGrammarNode, we want
                    // to join the endNode to the destination node

                    if (HasEndNode(thisNode))
                    {
                        thisNode = GetEndNode(thisNode);
                    }

                    float  lnProb = 0f;       // negative natural log
                    String output = tok.GetString();

                    if (output == null || output.Equals(","))
                    {
                        // these are epsilon (meaning backoff) transitions

                        if (output != null && output.Equals(","))
                        {
                            tok.GetString(); // skip the word
                            lnProb = tok.GetFloat("probability");
                        }

                        // if the destination node has been expanded
                        // we actually want to add the backoff transition
                        // to the endNode

                        if (HasEndNode(nextNode))
                        {
                            nextNode = GetEndNode(nextNode);
                        }
                    }
                    else
                    {
                        String word = tok.GetString();     // skip words
                        lnProb = tok.GetFloat("probability");

                        if (_ignoreUnknownTransitions && word.Equals("<unknown>"))
                        {
                            continue;
                        }

                        /*
                         * System.out.println(nextNode + ": " + output);
                         */
                        Debug.Assert(HasWord(nextNode));
                    }

                    thisNode.Add(nextNode, ConvertProbability(lnProb));
                }
                else if (token.Equals("F"))
                {
                    int   thisID = tok.GetInt("this id");
                    float lnProb = tok.GetFloat("probability");

                    GrammarNode thisNode = Get(thisID);
                    GrammarNode nextNode = finalNode;

                    if (HasEndNode(thisNode))
                    {
                        thisNode = GetEndNode(thisNode);
                    }

                    thisNode.Add(nextNode, ConvertProbability(lnProb));
                }
            }
            tok.Close();

            Debug.Assert(initialNode != null);

            return(initialNode);
        }