/* * (non-Javadoc) * * @see edu.cmu.sphinx.linguist.dictionary.Dictionary#allocate() */ public override void Allocate() { if (!allocated) { dictionary = new HashMap <String, String>(); wordDictionary = new HashMap <String, Word>(); Timer loadTimer = TimerPool.GetTimer(this, "Load Dictionary"); fillerWords = new HashSet <String>(); loadTimer.Start(); this.LogInfo("Loading dictionary from: " + wordDictionaryFile); LoadDictionary(wordDictionaryFile.OpenStream(), false); LoadCustomDictionaries(addendaUrlList); this.LogInfo("Loading filler dictionary from: " + fillerDictionaryFile); LoadDictionary(fillerDictionaryFile.OpenStream(), true); if (g2pModelFile != null && !g2pModelFile.Path.Equals("")) { g2pDecoder = new G2PConverter(g2pModelFile); } loadTimer.Stop(); } }
public void Load() { if (!_loaded) { TimerPool.GetTimer(this, "Load AM").Start(); HmmManager = new HMMManager(); ContextIndependentUnits = new LinkedHashMap <String, Unit>(); // dummy pools for these elements MeansTransformationMatrixPool = null; MeansTransformationVectorPool = null; VarianceTransformationMatrixPool = null; VarianceTransformationVectorPool = null; TransformMatrix = null; // do the actual acoustic model loading try { LoadModelFiles(Model); } catch (UriFormatException e) { this.LogInfo(e.Message); throw new RuntimeException(e); } // done _loaded = true; TimerPool.GetTimer(this, "Load AM").Stop(); } }
/** Create the grammar * /// @throws java.io.IOException*/ public void Allocate() { Dictionary.Allocate(); NewGrammar(); Timer timer = TimerPool.GetTimer(this, "grammarLoad"); timer.Start(); InitialNode = CreateGrammar(); timer.Stop(); }
public override void Allocate() { TimerPool.GetTimer(this, "Load LM").Start(); this.LogInfo("Loading n-gram language model from: " + location); // create the log file if specified if (ngramLogFile != null) { logFile = new StreamWriter(ngramLogFile); } BinaryLoader loader; if (location.Path == null || location.Path.Equals("file")) { try { loader = new BinaryLoader(new FileInfo(location.Path)); } catch (Exception ex) { loader = new BinaryLoader(new FileInfo(location.Path)); } } else { loader = new BinaryLoader(location); } loader.verifyHeader(); counts = loader.readCounts(); if (MaxDepth <= 0 || MaxDepth > counts.Length) { MaxDepth = counts.Length; } if (MaxDepth > 1) { quant = loader.readQuant(MaxDepth); } unigrams = loader.readUnigrams(counts[0]); if (MaxDepth > 1) { trie = new NgramTrie(counts, quant.getProbBoSize(), quant.getProbSize()); loader.readTrieByteArr(trie.getMem()); } //string words can be read here words = loader.readWords(counts[0]); BuildUnigramIDMap(); ngramProbCache = new LRUCache <WordSequence, Float>(ngramCacheSize); loader.close(); TimerPool.GetTimer(this, "Load LM").Stop(); }
/** * /// Creates the HMMTree * * /// @param pool the pool of HMMs and units * /// @param dictionary the dictionary containing the pronunciations * /// @param lm the source of the set of words to add to the lex tree * /// @param addFillerWords if <code>false</code> add filler words * /// @param languageWeight the languageWeight */ public HMMTree(HMMPool pool, IDictionary dictionary, LanguageModel lm, Boolean addFillerWords, float languageWeight) { HMMPool = pool; Dictionary = dictionary; _lm = lm; _endNodeMap = new HashMap <Object, HMMNode[]>(); WordNodeMap = new HashMap <Pronunciation, WordNode>(); _addFillerWords = addFillerWords; _languageWeight = languageWeight; TimerPool.GetTimer(this, "Create HMM Tree").Start(); Compile(); TimerPool.GetTimer(this, "Create HMM Tree").Stop(); }
/** Compiles the n-gram into a lex tree that is used during the search */ private void CompileGrammar() { TimerPool.GetTimer(this, "Compile").Start(); SentenceEndWord = Dictionary.GetSentenceEndWord(); _sentenceStartWordArray = new Word[1]; _sentenceStartWordArray[0] = Dictionary.GetSentenceStartWord(); MaxDepth = LanguageModel.MaxDepth; GenerateHmmTree(); TimerPool.GetTimer(this, "Compile").Stop(); // Now that we are all done, dump out some interesting // information about the process _searchGraph = new LexTreeSearchGraph(GetInitialSearchState()); }
void Benchmark() { var nullCount = 0; this.LogInfo("benchmarking ..."); TimerPool.GetTimer(this, "hmmPoolBenchmark").Start(); for (var i = 0; i < 1000000; i++) { var id = Ids[i % Ids.Length]; var position = Pos[i % Pos.Length]; var hmm = GetHMM(id, position); if (hmm == null) { nullCount++; } } TimerPool.GetTimer(this, "hmmPoolBenchmark").Stop(); this.LogInfo("null count " + nullCount); }
private void Init() { _timer = TimerPool.GetTimer(this, "FrontEnd"); LastDataProcessor = null; foreach (IDataProcessor dp in _frontEndList) { Debug.Assert(dp != null); if (LastDataProcessor != null) { dp.Predecessor = LastDataProcessor; } if (_first == null) { _first = dp; } LastDataProcessor = dp; } Initialize(); }
/// <summary> /// @see Search.SearchManager#allocate() /// </summary> public override void Allocate() { _totalTokensScored = StatisticsVariable.GetStatisticsVariable("totalTokensScored"); _tokensPerSecond = StatisticsVariable.GetStatisticsVariable("tokensScoredPerSecond"); _curTokensScored = StatisticsVariable.GetStatisticsVariable("curTokensScored"); TokensCreated = StatisticsVariable.GetStatisticsVariable("tokensCreated"); _viterbiPruned = StatisticsVariable.GetStatisticsVariable("viterbiPruned"); _beamPruned = StatisticsVariable.GetStatisticsVariable("beamPruned"); try { Linguist.Allocate(); _pruner.Allocate(); _scorer.Allocate(); } catch (IOException e) { throw new SystemException("Allocation of search manager resources failed", e); } _scoreTimer = TimerPool.GetTimer(this, "Score"); _pruneTimer = TimerPool.GetTimer(this, "Prune"); GrowTimer = TimerPool.GetTimer(this, "Grow"); }
public override void Allocate() { // tokenTracker = new TokenTracker(); // tokenTypeTracker = new TokenTypeTracker(); ScoreTimer = TimerPool.GetTimer(this, "Score"); PruneTimer = TimerPool.GetTimer(this, "Prune"); GrowTimer = TimerPool.GetTimer(this, "Grow"); TotalTokensScored = StatisticsVariable.GetStatisticsVariable("totalTokensScored"); CurTokensScored = StatisticsVariable.GetStatisticsVariable("curTokensScored"); TokensCreated = StatisticsVariable.GetStatisticsVariable("tokensCreated"); try { Linguist.Allocate(); Pruner.Allocate(); Scorer.Allocate(); } catch (IOException e) { throw new SystemException("Allocation of search manager resources failed", e); } }
/// <summary> /// Compiles the grammar into a sentence HMM. A GrammarJob is created for the /// initial grammar node and added to the GrammarJob queue. While there are /// jobs left on the grammar job queue, a job is removed from the queue and /// the associated grammar node is expanded and attached to the tails. /// GrammarJobs for the successors are added to the grammar job queue. /// </summary> /// <returns></returns> protected HashSet <SentenceHMMState> CompileGrammar() { InitialGrammarState = Grammar.InitialNode; NodeStateMap = new HashMap <GrammarNode, GState>(); // create in declaration section (22.12.2014) ArcPool = new Cache <SentenceHMMStateArc>(); var gstateList = new List <GState>(); TimerPool.GetTimer(this, "Compile").Start(); // get the nodes from the grammar and create states // for them. Add the non-empty gstates to the gstate list. TimerPool.GetTimer(this, "Create States").Start(); foreach (var grammarNode in Grammar.GrammarNodes) { var gstate = CreateGState(grammarNode); gstateList.Add(gstate); } TimerPool.GetTimer(this, "Create States").Stop(); AddStartingPath(); // ensures an initial path to the start state // Prep all the gstates, by gathering all of the contexts up // this allows each gstate to know about its surrounding contexts TimerPool.GetTimer(this, "Collect Contexts").Start(); foreach (var gstate in gstateList) { gstate.CollectContexts(); } TimerPool.GetTimer(this, "Collect Contexts").Stop(); // now all gstates know all about their contexts, we can expand them fully TimerPool.GetTimer(this, "Expand States").Start(); foreach (var gstate in gstateList) { gstate.Expand(); } TimerPool.GetTimer(this, "Expand States").Stop(); // now that all states are expanded fully, we can connect all the states up TimerPool.GetTimer(this, "Connect Nodes").Start(); foreach (var gstate in gstateList) { gstate.Connect(); } TimerPool.GetTimer(this, "Connect Nodes").Stop(); var initialState = FindStartingState(); // add an out-of-grammar branch if configured to do so if (AddOutOfGrammarBranch) { var phoneLoop = new CIPhoneLoop(PhoneLoopAcousticModel, LogPhoneInsertionProbability); var firstBranchState = (SentenceHMMState)phoneLoop.GetSearchGraph().InitialState; initialState.Connect(GetArc(firstBranchState, LogOne, LogOutOfGrammarBranchProbability)); } _searchGraph = new FlatSearchGraph(initialState); TimerPool.GetTimer(this, "Compile").Stop(); // Now that we are all done, dump out some interesting // information about the process if (_dumpGStates) { foreach (var grammarNode in Grammar.GrammarNodes) { var gstate = GetGState(grammarNode); gstate.DumpInfo(); } } NodeStateMap = null; ArcPool = null; return(SentenceHMMState.CollectStates(initialState)); }
/* * (non-Javadoc) * @see edu.cmu.sphinx.linguist.language.ngram.LanguageModel#allocate() */ public override void Allocate() { TimerPool.GetTimer(this, "Load LM").Start(); this.LogInfo("Loading n-gram language model from: " + Location); // create the log file if specified if (NgramLogFile != null) { _logFile = new StreamWriter(NgramLogFile); } //Java's URL.getProtocol()==null if (!String.IsNullOrEmpty(Location.Path)) { try { _loader = new BinaryLoader(new FileInfo(Location.Path), Format, ApplyLanguageWeightAndWip, LanguageWeight, Wip, UnigramWeight); } catch (Exception e) { _loader = new BinaryLoader(new FileInfo(Location.Path), Format, ApplyLanguageWeightAndWip, LanguageWeight, Wip, UnigramWeight); } } else { _loader = new BinaryStreamLoader(Location.Path, Format, ApplyLanguageWeightAndWip, LanguageWeight, Wip, UnigramWeight); } _unigramIDMap = new HashMap <Word, UnigramProbability>(); _unigrams = _loader.Unigrams; _loadedNGramBuffers = new HashMap <WordSequence, NGramBuffer> [_loader.MaxDepth]; _ngramProbTable = new float[_loader.MaxDepth][]; _ngramBackoffTable = new float[_loader.MaxDepth][]; _ngramSegmentTable = new int[_loader.MaxDepth][]; for (var i = 1; i <= _loader.MaxDepth; i++) { _loadedNGramBuffers[i - 1] = new HashMap <WordSequence, NGramBuffer>(); if (i >= 2) { _ngramProbTable[i - 1] = _loader.GetNGramProbabilities(i); } if (i > 2) { _ngramBackoffTable[i - 1] = _loader.GetNGramBackoffWeights(i); _ngramSegmentTable[i - 1] = _loader.GetNGramSegments(i); } } _ngramDepthCache = new LRUCache <WordSequence, Float>(NgramCacheSize); if (Dictionary != null) { BuildUnigramIDMap(Dictionary); } else { BuildUnigramIDMap(); } _loadedBigramBuffers = new NGramBuffer[_unigrams.Length]; if (MaxDepth <= 0 || MaxDepth > _loader.MaxDepth) { MaxDepth = _loader.MaxDepth; } for (var i = 1; i <= _loader.MaxDepth; i++) { this.LogInfo(i + "-grams: " + _loader.GetNumberNGrams(i)); } if (FullSmear) { this.LogInfo("Full Smear"); try { this.LogInfo("... Reading ..."); ReadSmearInfo("smear.dat"); this.LogInfo("... Done "); } catch (IOException e) { this.LogInfo("... " + e); this.LogInfo("... Calculating"); BuildSmearInfo(); this.LogInfo("... Writing"); // writeSmearInfo("smear.dat"); this.LogInfo("... Done"); } } TimerPool.GetTimer(this, "Load LM").Stop(); }
/// <summary> /// Initializes a new instance of the <see cref="HMMPool"/> class. /// </summary> /// <param name="model">The model to use for the pool</param> /// <param name="unitManager">The unit manager.</param> /// <exception cref="System.Exception"> /// LexTreeLinguist: Unsupported left context size /// or /// LexTreeLinguist: Unsupported right context size /// </exception> public HMMPool(AcousticModel model, UnitManager unitManager) { var maxCiUnits = 0; this.Model = model; this._unitManager = unitManager; TimerPool.GetTimer(this, "Build HMM Pool").Start(); if (model.GetLeftContextSize() != 1) { throw new Exception("LexTreeLinguist: Unsupported left context size"); } if (model.GetRightContextSize() != 1) { throw new Exception("LexTreeLinguist: Unsupported right context size"); } // count CI units: var i = model.GetContextIndependentUnitIterator(); while (i.MoveNext()) { var unit = i.Current; //this.LogInfo("CI unit " + unit); if (unit.BaseID > maxCiUnits) { maxCiUnits = unit.BaseID; } } NumCiUnits = maxCiUnits + 1; _unitTable = new Unit[NumCiUnits * NumCiUnits * NumCiUnits]; var iHMM = model.GetHMMIterator(); while (iHMM.MoveNext()) { var hmm = iHMM.Current; var unit = hmm.Unit; var id = GetId(unit); _unitTable[id] = unit; //this.LogInfo("Unit " + unit + " id " + id); } // build up the hmm table to allow quick access to the hmms _hmmTable = new Dictionary <HMMPosition, IHMM[]>(); foreach (HMMPosition position in Enum.GetValues(typeof(HMMPosition))) { var hmms = new IHMM[_unitTable.Length]; Java.Put(_hmmTable, position, hmms); //hmmTable.Put(position, hmms); for (var j = 1; j < _unitTable.Length; j++) { var unit = _unitTable[j]; if (unit == null) { unit = SynthesizeUnit(j); } if (unit != null) { hmms[j] = model.LookupNearestHMM(unit, position, false); Debug.Assert(hmms[j] != null); } } } TimerPool.GetTimer(this, "Build HMM Pool").Stop(); }