/** * Returns the NGrams of the given word sequence * * @param wordSequence the word sequence from which to get the buffer * @return the NGramBuffer of the word sequence */ private NGramBuffer GetNGramBuffer(WordSequence wordSequence) { NGramBuffer nGramBuffer = null; var order = wordSequence.Size; if (order > 1) { nGramBuffer = _loadedNGramBuffers[order - 1].Get(wordSequence); // better } // when // using // containsKey if (nGramBuffer == null) { nGramBuffer = LoadNGramBuffer(wordSequence); if (nGramBuffer != null) { Java.Put(_loadedNGramBuffers[order - 1], wordSequence, nGramBuffer); // optimizable } // by // adding // an // 'empty' // nGramBuffer } return(nGramBuffer); }
/** * Loads into a buffer all the NGram followers of the given N-1Gram. * * @param ws the N-1Gram to find followers * * @return a NGramBuffer of all the NGram followers of the given sequence */ private NGramBuffer LoadNGramBuffer(WordSequence ws) { var firstWordID = GetWordID(ws.GetWord(0)); int firstCurrentNGramEntry; var numberNGrams = 0; int size; long position; var orderBuffer = ws.Size + 1; NGramBuffer currentBuffer = null; NGramBuffer nMinus1Buffer = null; firstCurrentNGramEntry = _unigrams[firstWordID].FirstBigramEntry; numberNGrams = GetNumberBigramFollowers(firstWordID) + 1; if (numberNGrams == 1) // 1 means that there is no bigram starting with // firstWordID { return(null); } if (orderBuffer == 2) { size = numberNGrams * ((_loader.MaxDepth == orderBuffer) ? BytesPerNmaxgram : BytesPerNgram) * _loader.BytesPerField; position = _loader.GetNGramOffset(orderBuffer) + firstCurrentNGramEntry * (long)((_loader.MaxDepth == orderBuffer) ? BytesPerNmaxgram : BytesPerNgram) * _loader.BytesPerField; } else // only for ws.size() >= 2 { var lastWordId = GetWordID(ws.GetWord(ws.Size - 1)); nMinus1Buffer = GetNGramBuffer(ws.GetOldest()); var index = nMinus1Buffer.FindNGramIndex(lastWordId); if (index == -1) { return(null); } var firstNMinus1GramEntry = nMinus1Buffer.FirstNGramEntry; firstCurrentNGramEntry = GetFirstNGramEntry(nMinus1Buffer.GetNGramProbability(index), firstNMinus1GramEntry, orderBuffer); var firstNextNGramEntry = GetFirstNGramEntry(nMinus1Buffer.GetNGramProbability(index + 1), firstNMinus1GramEntry, orderBuffer); numberNGrams = firstNextNGramEntry - firstCurrentNGramEntry; if (numberNGrams == 0) { return(null); } if (_loader.MaxDepth != orderBuffer) { numberNGrams++; } size = numberNGrams * ((_loader.MaxDepth == orderBuffer) ? BytesPerNmaxgram : BytesPerNgram) * _loader.BytesPerField; position = _loader.GetNGramOffset(orderBuffer) + firstCurrentNGramEntry * (long)((_loader.MaxDepth == orderBuffer) ? BytesPerNmaxgram : BytesPerNgram) * _loader.BytesPerField; } try { var buffer = _loader.LoadBuffer(position, size); if (_loader.MaxDepth == orderBuffer) { currentBuffer = new NMaxGramBuffer(buffer, numberNGrams, _loader.GetBigEndian(), Is32Bits(), orderBuffer, firstCurrentNGramEntry); } else { currentBuffer = new NGramBuffer(buffer, numberNGrams, _loader.GetBigEndian(), Is32Bits(), orderBuffer, firstCurrentNGramEntry); } } catch (IOException ioe) { Trace.TraceError(ioe.ToString()); throw new Exception("Error loading " + orderBuffer + "-Grams."); } return(currentBuffer); }