示例#1
0
        /**
         * Reads the FST file in the given path, and creates the nodes in the FST file.
         *
         * @param path the path of the FST file to read
         * @return the highest ID of all nodes
         * @throws java.io.IOException
         */
        private int CreateNodes(String path)
        {
            ExtendedStreamTokenizer tok = new ExtendedStreamTokenizer(path, true);
            int maxNodeId = 0;

            while (!tok.IsEOF())
            {
                tok.Skipwhite();
                String token = tok.GetString();
                if (token == null)
                {
                    break;
                }
                else if (token.Equals("T"))
                {
                    tok.GetInt("src id");           // toss source node
                    int id = tok.GetInt("dest id"); // dest node numb
                    if (id > maxNodeId)
                    {
                        maxNodeId = id;
                    }
                    String word1 = tok.GetString(); // get word
                    if (word1 == null)
                    {
                        continue;
                    }
                    String word2 = tok.GetString(); // get word
                    tok.GetString();                // toss probability
                    String      nodeName = "G" + id;
                    GrammarNode node     = _nodes.Get(nodeName);
                    if (node == null)
                    {
                        if (word2.Equals(","))
                        {
                            node = CreateGrammarNode(id, false);
                        }
                        else
                        {
                            node = CreateGrammarNode(id, word2);
                        }
                        _nodes.Put(nodeName, node);
                    }
                    else
                    {
                        if (!word2.Equals(","))
                        {
                            /*
                             * if (!word2.equals(getWord(node))) {
                             * System.out.println(node + ": " + word2 + ' ' + getWord(node)); }
                             */
                            Debug.Assert(word2.Equals(GetWord(node)));
                        }
                    }
                }
            }
            tok.Close();
            return(maxNodeId);
        }
示例#2
0
        /// <summary>
        /// Creates the grammar.
        /// </summary>
        /// <returns>The initial node for the grammar.</returns>
        protected override GrammarNode CreateGrammar()
        {
            GrammarNode initialNode = null;
            GrammarNode finalNode   = null;

            // first pass create the FST nodes
            int maxNodeId = CreateNodes(_path);

            // create the final node:
            finalNode = CreateGrammarNode(++maxNodeId, IDictionary.SilenceSpelling);
            finalNode.SetFinalNode(true);

            // replace each word node with a pair of nodes, which
            // consists of the word node and a new dummy end node, which is
            // for adding null or backoff transitions
            maxNodeId = ExpandWordNodes(maxNodeId);

            ExtendedStreamTokenizer tok = new ExtendedStreamTokenizer(_path, true);

            // Second pass, add all of the arcs

            while (!tok.IsEOF())
            {
                String token;
                tok.Skipwhite();
                token = tok.GetString();

                // System.out.println(token);

                if (token == null)
                {
                    break;
                }
                else if (token.Equals("I"))
                {
                    Debug.Assert(initialNode == null);
                    int    initialID = tok.GetInt("initial ID");
                    String nodeName  = "G" + initialID;

                    // TODO: FlatLinguist requires the initial grammar node
                    // to contain a single silence. We'll do that for now,
                    // but once the FlatLinguist is fixed, this should be
                    // returned to its former method of creating an empty
                    // initial grammar node
                    //          initialNode = createGrammarNode(initialID, false);

                    initialNode = CreateGrammarNode(initialID, IDictionary.SilenceSpelling);
                    _nodes.Put(nodeName, initialNode);

                    // optionally add a silence node
                    if (_addInitialSilenceNode)
                    {
                        GrammarNode silenceNode = CreateGrammarNode(++maxNodeId, IDictionary.SilenceSpelling);
                        initialNode.Add(silenceNode, LogMath.LogOne);
                        silenceNode.Add(initialNode, LogMath.LogOne);
                    }
                }
                else if (token.Equals("T"))
                {
                    int thisID = tok.GetInt("this id");
                    int nextID = tok.GetInt("next id");

                    GrammarNode thisNode = Get(thisID);
                    GrammarNode nextNode = Get(nextID);

                    // if the source node is an FSTGrammarNode, we want
                    // to join the endNode to the destination node

                    if (HasEndNode(thisNode))
                    {
                        thisNode = GetEndNode(thisNode);
                    }

                    float  lnProb = 0f;       // negative natural log
                    String output = tok.GetString();

                    if (output == null || output.Equals(","))
                    {
                        // these are epsilon (meaning backoff) transitions

                        if (output != null && output.Equals(","))
                        {
                            tok.GetString(); // skip the word
                            lnProb = tok.GetFloat("probability");
                        }

                        // if the destination node has been expanded
                        // we actually want to add the backoff transition
                        // to the endNode

                        if (HasEndNode(nextNode))
                        {
                            nextNode = GetEndNode(nextNode);
                        }
                    }
                    else
                    {
                        String word = tok.GetString();     // skip words
                        lnProb = tok.GetFloat("probability");

                        if (_ignoreUnknownTransitions && word.Equals("<unknown>"))
                        {
                            continue;
                        }

                        /*
                         * System.out.println(nextNode + ": " + output);
                         */
                        Debug.Assert(HasWord(nextNode));
                    }

                    thisNode.Add(nextNode, ConvertProbability(lnProb));
                }
                else if (token.Equals("F"))
                {
                    int   thisID = tok.GetInt("this id");
                    float lnProb = tok.GetFloat("probability");

                    GrammarNode thisNode = Get(thisID);
                    GrammarNode nextNode = finalNode;

                    if (HasEndNode(thisNode))
                    {
                        thisNode = GetEndNode(thisNode);
                    }

                    thisNode.Add(nextNode, ConvertProbability(lnProb));
                }
            }
            tok.Close();

            Debug.Assert(initialNode != null);

            return(initialNode);
        }