private float GetAvailableBackoff(WordSequence wordSequence) { float backoff = 0.0f; int wordsNum = wordSequence.Size; int wordId = unigramIDMap.Get(wordSequence.GetWord(wordsNum - 2)); TrieRange range = new TrieRange(unigrams[wordId].next, unigrams[wordId + 1].next); if (curDepth == 1) { backoff += unigrams[wordId].backoff; } int sequenceIdx, orderMinusTwo; for (sequenceIdx = wordsNum - 3, orderMinusTwo = 0; sequenceIdx >= 0; sequenceIdx--, orderMinusTwo++) { int tmpWordId = unigramIDMap.Get(wordSequence.GetWord(sequenceIdx)); float tmpBackoff = trie.readNgramBackoff(tmpWordId, orderMinusTwo, range, quant); if (!range.getFound()) { break; } backoff += tmpBackoff; if (!range.isSearchable()) { break; } } return(backoff); }
private float GetAvailableProb(WordSequence wordSequence, TrieRange range, float prob) { if (!range.isSearchable()) { return(prob); } for (int reverseOrderMinusTwo = wordSequence.Size - 2; reverseOrderMinusTwo >= 0; reverseOrderMinusTwo--) { int orderMinusTwo = wordSequence.Size - 2 - reverseOrderMinusTwo; if (orderMinusTwo + 1 == MaxDepth) { break; } int wordId = unigramIDMap.Get(wordSequence.GetWord(reverseOrderMinusTwo)); float updatedProb = trie.readNgramProb(wordId, orderMinusTwo, range, quant); if (!range.getFound()) { break; } prob = updatedProb; curDepth++; if (!range.isSearchable()) { break; } } return(prob); }
internal void readNextRange(int ngramIdx, TrieRange range) { int offset = ngramIdx * totalBits; offset += wordBits; offset += getQuantBits(); range.begin = _parent.bitArr.readInt(memPtr, offset, nextMask); offset += totalBits; range.end = _parent.bitArr.readInt(memPtr, offset, nextMask); }
private float GetProbabilityRaw(WordSequence wordSequence) { int wordsNum = wordSequence.Size; int wordId = unigramIDMap.Get(wordSequence.GetWord(wordsNum - 1)); TrieRange range = new TrieRange(unigrams[wordId].next, unigrams[wordId + 1].next); float prob = unigrams[wordId].prob; curDepth = 1; if (wordsNum == 1) { return(prob); } //find prob of ngrams of higher order if any prob = GetAvailableProb(wordSequence, range, prob); if (curDepth < wordsNum) { //use backoff for rest of ngram prob += GetAvailableBackoff(wordSequence); } return(prob); }