Beispiel #1
0
        private float GetAvailableBackoff(WordSequence wordSequence)
        {
            float     backoff  = 0.0f;
            int       wordsNum = wordSequence.Size;
            int       wordId   = unigramIDMap.Get(wordSequence.GetWord(wordsNum - 2));
            TrieRange range    = new TrieRange(unigrams[wordId].next, unigrams[wordId + 1].next);

            if (curDepth == 1)
            {
                backoff += unigrams[wordId].backoff;
            }
            int sequenceIdx, orderMinusTwo;

            for (sequenceIdx = wordsNum - 3, orderMinusTwo = 0; sequenceIdx >= 0; sequenceIdx--, orderMinusTwo++)
            {
                int   tmpWordId  = unigramIDMap.Get(wordSequence.GetWord(sequenceIdx));
                float tmpBackoff = trie.readNgramBackoff(tmpWordId, orderMinusTwo, range, quant);
                if (!range.getFound())
                {
                    break;
                }
                backoff += tmpBackoff;
                if (!range.isSearchable())
                {
                    break;
                }
            }
            return(backoff);
        }
Beispiel #2
0
 private float GetAvailableProb(WordSequence wordSequence, TrieRange range, float prob)
 {
     if (!range.isSearchable())
     {
         return(prob);
     }
     for (int reverseOrderMinusTwo = wordSequence.Size - 2; reverseOrderMinusTwo >= 0; reverseOrderMinusTwo--)
     {
         int orderMinusTwo = wordSequence.Size - 2 - reverseOrderMinusTwo;
         if (orderMinusTwo + 1 == MaxDepth)
         {
             break;
         }
         int   wordId      = unigramIDMap.Get(wordSequence.GetWord(reverseOrderMinusTwo));
         float updatedProb = trie.readNgramProb(wordId, orderMinusTwo, range, quant);
         if (!range.getFound())
         {
             break;
         }
         prob = updatedProb;
         curDepth++;
         if (!range.isSearchable())
         {
             break;
         }
     }
     return(prob);
 }
        internal void readNextRange(int ngramIdx, TrieRange range)
        {
            int offset = ngramIdx * totalBits;

            offset     += wordBits;
            offset     += getQuantBits();
            range.begin = _parent.bitArr.readInt(memPtr, offset, nextMask);
            offset     += totalBits;
            range.end   = _parent.bitArr.readInt(memPtr, offset, nextMask);
        }
Beispiel #4
0
        private float GetProbabilityRaw(WordSequence wordSequence)
        {
            int       wordsNum = wordSequence.Size;
            int       wordId   = unigramIDMap.Get(wordSequence.GetWord(wordsNum - 1));
            TrieRange range    = new TrieRange(unigrams[wordId].next, unigrams[wordId + 1].next);
            float     prob     = unigrams[wordId].prob;

            curDepth = 1;
            if (wordsNum == 1)
            {
                return(prob);
            }
            //find prob of ngrams of higher order if any
            prob = GetAvailableProb(wordSequence, range, prob);
            if (curDepth < wordsNum)
            {
                //use backoff for rest of ngram
                prob += GetAvailableBackoff(wordSequence);
            }
            return(prob);
        }