public MiddleNgramSet(NgramTrie parent, int memPtr, int quantBits, int entries, int maxVocab, int maxNext) : base(parent, memPtr, maxVocab, quantBits + NgramTrie.requiredBits(maxNext)) { _parent = parent; nextMask = (1 << NgramTrie.requiredBits(maxNext)) - 1; if (entries + 1 >= (1 << 25) || (maxNext >= (1 << 25))) { throw new Error("Sorry, current implementation doesn't support more than " + (1 << 25) + " n-grams of particular order"); } }
public override void Allocate() { TimerPool.GetTimer(this, "Load LM").Start(); this.LogInfo("Loading n-gram language model from: " + location); // create the log file if specified if (ngramLogFile != null) { logFile = new StreamWriter(ngramLogFile); } BinaryLoader loader; if (location.Path == null || location.Path.Equals("file")) { try { loader = new BinaryLoader(new FileInfo(location.Path)); } catch (Exception ex) { loader = new BinaryLoader(new FileInfo(location.Path)); } } else { loader = new BinaryLoader(location); } loader.verifyHeader(); counts = loader.readCounts(); if (MaxDepth <= 0 || MaxDepth > counts.Length) { MaxDepth = counts.Length; } if (MaxDepth > 1) { quant = loader.readQuant(MaxDepth); } unigrams = loader.readUnigrams(counts[0]); if (MaxDepth > 1) { trie = new NgramTrie(counts, quant.getProbBoSize(), quant.getProbSize()); loader.readTrieByteArr(trie.getMem()); } //string words can be read here words = loader.readWords(counts[0]); BuildUnigramIDMap(); ngramProbCache = new LRUCache <WordSequence, Float>(ngramCacheSize); loader.close(); TimerPool.GetTimer(this, "Load LM").Stop(); }
protected NgramSet(NgramTrie parent, int memPtr, int maxVocab, int remainingBits) { _parent = parent; this.maxVocab = maxVocab; this.memPtr = memPtr; wordBits = NgramTrie.requiredBits(maxVocab); if (wordBits > 25) { throw new Error("Sorry, word indices more than" + (1 << 25) + " are not implemented"); } totalBits = wordBits + remainingBits; wordMask = (1 << wordBits) - 1; insertIdx = 0; }