/// <summary> /// Adds a child node holding a pronunciation to the successor. If a node similar to the child has already been /// added, we use the previously added node, otherwise we add this. Also, we record the base unit of the child in the /// set of right context /// </summary> /// <param name="pronunciation">the pronunciation to add</param> /// <param name="probability"></param> /// <returns>the node that holds the pronunciation (new or old)</returns> public WordNode AddSuccessor(Pronunciation pronunciation, float probability, HashMap <Pronunciation, WordNode> wordNodeMap) { WordNode child = null; WordNode matchingChild = (WordNode)GetSuccessor(pronunciation); if (matchingChild == null) { child = wordNodeMap.Get(pronunciation); if (child == null) { child = new WordNode(pronunciation, probability); wordNodeMap.Put(pronunciation, child); } PutSuccessor(pronunciation, child); } else { if (matchingChild.UnigramProbability < probability) { matchingChild.UnigramProbability = probability; } child = matchingChild; } return(child); }
/** * /// Gets the set of hmm nodes associated with the given end node * * /// @param endNode the end node * /// @return an array of associated hmm nodes */ public HMMNode[] GetHMMNodes(EndNode endNode) { HMMNode[] results = _endNodeMap.Get(endNode.Key); if (results == null) { // System.out.println("Filling cache for " + endNode.getKey() // + " size " + endNodeMap.size()); HashMap <IHMM, HMMNode> resultMap = new HashMap <IHMM, HMMNode>(); Unit baseUnit = endNode.BaseUnit; Unit lc = endNode.LeftContext; foreach (Unit rc in EntryPoints) { IHMM hmm = HMMPool.GetHMM(baseUnit, lc, rc, HMMPosition.End); HMMNode hmmNode = resultMap.Get(hmm); if (hmmNode == null) { hmmNode = new HMMNode(hmm, LogMath.LogOne); resultMap.Add(hmm, hmmNode); } hmmNode.AddRC(rc); foreach (Node node in endNode.GetSuccessors()) { WordNode wordNode = (WordNode)node; hmmNode.AddSuccessor(wordNode); } } // cache it results = resultMap.Values.ToArray(); _endNodeMap.Add(endNode.Key, results); } // System.out.println("GHN: " + endNode + " " + results.length); return(results); }
/** * /// Adds the given pronunciation to the lex tree * * /// @param pronunciation the pronunciation * /// @param probability the unigram probability */ private void AddPronunciation(Pronunciation pronunciation, float probability) { Unit baseUnit; Unit lc; Unit rc; Node curNode; WordNode wordNode; Unit[] units = pronunciation.Units; baseUnit = units[0]; EntryPoint ep = _entryPointTable.GetEntryPoint(baseUnit); ep.AddProbability(probability); if (units.Length > 1) { curNode = ep.Node; lc = baseUnit; for (int i = 1; i < units.Length - 1; i++) { baseUnit = units[i]; rc = units[i + 1]; IHMM hmm = HMMPool.GetHMM(baseUnit, lc, rc, HMMPosition.Internal); if (hmm == null) { if (_debug) { Trace.TraceError("Missing HMM for unit " + baseUnit.Name + " with lc=" + lc.Name + " rc=" + rc.Name); } } else { curNode = curNode.AddSuccessor(hmm, probability); } lc = baseUnit; // next lc is this baseUnit } // now add the last unit as an end unit baseUnit = units[units.Length - 1]; EndNode endNode = new EndNode(baseUnit, lc, probability); curNode = curNode.AddSuccessor(endNode, probability); wordNode = curNode.AddSuccessor(pronunciation, probability, WordNodeMap); if (wordNode.GetWord().IsSentenceEndWord) { sentenceEndWordNode = wordNode; } } else { ep.AddSingleUnitWord(pronunciation); } }
/** * /// Constructs a LexTreeWordState * * /// @param wordNode the word node * /// @param wordSequence the sequence of words triphone context * /// @param languageProbability the probability of this word */ public LexTreeWordState(WordNode wordNode, HMMNode lastNode, WordSequence wordSequence, float smearTerm, float smearProb, float languageProbability, LexTreeLinguist _parent) : base(wordNode, wordSequence, smearTerm, smearProb, _parent) { // Trace.WriteLine(string.Format("LexTreeWordState Created with values wordNode: {0}, lastNode: {1}, wordSequence: {2}, smearTerm: {3}, smearProb: {4}, languageProbability: {5}", // wordNode, lastNode, wordSequence, smearTerm, smearProb, languageProbability)); this._lastNode = lastNode; _logLanguageProbability = languageProbability; //if (wordNode.ToString().Contains("NSN")) //{ // this.LogInfo("FOUND NOISE!"); //} }
/** * /// Returns the list of successors to this state * * /// @return a list of SearchState objects */ public override ISearchStateArc[] GetSuccessors() { ISearchStateArc[] arcs = GetCachedArcs(); if (arcs == null) { arcs = LexTreeLinguist.EmptyArc; WordNode wordNode = (WordNode)GetNode(); if (wordNode.GetWord() != Parent.SentenceEndWord) { int index = 0; List <Node> list = new List <Node>(); Unit[] rc = _lastNode.GetRC(); Unit left = wordNode.LastUnit; foreach (Unit unit in rc) { Node[] epList = Parent.HMMTree.GetEntryPoint(left, unit); foreach (Node n in epList) { list.Add(n); } } //this.LogDebug("NodeList: {0}",list.Count); // add a link to every possible entry point as well // as link to the </s> node arcs = new ISearchStateArc[list.Count + 1]; foreach (Node node in list) { arcs[index++] = CreateUnitStateArc((HMMNode)node, this); } // now add the link to the end of sentence arc: arcs[index++] = CreateWordStateArc(Parent.HMMTree.SentenceEndWordNode, _lastNode, this); } PutCachedArcs(arcs); } return(arcs); }
/** * /// Creates a word search state for the given word node * * /// @param wordNode the wordNode * * * /// @return the search state for the wordNode */ protected ISearchStateArc CreateWordStateArc(WordNode wordNode, HMMNode lastUnit, LexTreeState previous) { //TODO: UNCOMMENT DURING RELEASE //this.LogInfo("CWSA " + wordNode + " fup " /*+ fixupProb*/); float languageProbability = Parent.LogOne; Word nextWord = wordNode.GetWord(); float smearTerm = previous.SmearTerm; if (nextWord.IsFiller && !Equals(nextWord, Parent.SentenceEndWord)) { return(new LexTreeWordState(wordNode, lastUnit, _wordSequence, smearTerm, Parent.LogOne, languageProbability, Parent)); } WordSequence nextWordSequence = _wordSequence.AddWord(nextWord, Parent.MaxDepth); float probability = Parent.LanguageModel.GetProbability(nextWordSequence) * Parent.LanguageWeight; smearTerm = Parent.GetSmearTermFromLanguageModel(nextWordSequence); //this.LogInfo("LP " + nextWordSequence + " " /*+ logProbability*/); // subtract off the previously applied smear probability languageProbability = probability - previous.SmearProb; //Boolean collapse = (probability.depth < parent.maxDepth - 1) || !parent.fullWordHistories; if (Equals(nextWord, Parent.SentenceEndWord)) { return(new LexTreeEndWordState(wordNode, lastUnit, nextWordSequence.Trim(Parent.MaxDepth - 1), smearTerm, Parent.LogOne, languageProbability, Parent)); } return(new LexTreeWordState(wordNode, lastUnit, nextWordSequence.Trim(Parent.MaxDepth - 1), smearTerm, Parent.LogOne, languageProbability, Parent)); }
/// <summary> /// add a WordNode succesor /// </summary> /// <param name="wordNode"></param> public void AddSuccessor(WordNode wordNode) { PutSuccessor(wordNode, wordNode); }