Example #1
0
        /*
         * (non-Javadoc)
         *
         * @see edu.cmu.sphinx.linguist.dictionary.Dictionary#allocate()
         */

        public override void Allocate()
        {
            if (!allocated)
            {
                dictionary     = new HashMap <String, String>();
                wordDictionary = new HashMap <String, Word>();

                Timer loadTimer = TimerPool.GetTimer(this, "Load Dictionary");
                fillerWords = new HashSet <String>();

                loadTimer.Start();

                this.LogInfo("Loading dictionary from: " + wordDictionaryFile);

                LoadDictionary(wordDictionaryFile.OpenStream(), false);

                LoadCustomDictionaries(addendaUrlList);

                this.LogInfo("Loading filler dictionary from: " + fillerDictionaryFile);

                LoadDictionary(fillerDictionaryFile.OpenStream(), true);

                if (g2pModelFile != null && !g2pModelFile.Path.Equals(""))
                {
                    g2pDecoder = new G2PConverter(g2pModelFile);
                }
                loadTimer.Stop();
            }
        }
Example #2
0
        public void Load()
        {
            if (!_loaded)
            {
                TimerPool.GetTimer(this, "Load AM").Start();

                HmmManager = new HMMManager();
                ContextIndependentUnits = new LinkedHashMap <String, Unit>();

                // dummy pools for these elements
                MeansTransformationMatrixPool    = null;
                MeansTransformationVectorPool    = null;
                VarianceTransformationMatrixPool = null;
                VarianceTransformationVectorPool = null;
                TransformMatrix = null;

                // do the actual acoustic model loading
                try
                {
                    LoadModelFiles(Model);
                }
                catch (UriFormatException e)
                {
                    this.LogInfo(e.Message);
                    throw new RuntimeException(e);
                }

                // done
                _loaded = true;
                TimerPool.GetTimer(this, "Load AM").Stop();
            }
        }
Example #3
0
        /** Create the grammar
         * /// @throws java.io.IOException*/
        public void Allocate()
        {
            Dictionary.Allocate();
            NewGrammar();
            Timer timer = TimerPool.GetTimer(this, "grammarLoad");

            timer.Start();
            InitialNode = CreateGrammar();
            timer.Stop();
        }
Example #4
0
        public override void Allocate()
        {
            TimerPool.GetTimer(this, "Load LM").Start();

            this.LogInfo("Loading n-gram language model from: " + location);

            // create the log file if specified
            if (ngramLogFile != null)
            {
                logFile = new StreamWriter(ngramLogFile);
            }

            BinaryLoader loader;

            if (location.Path == null || location.Path.Equals("file"))
            {
                try
                {
                    loader = new BinaryLoader(new FileInfo(location.Path));
                }
                catch (Exception ex)
                {
                    loader = new BinaryLoader(new FileInfo(location.Path));
                }
            }
            else
            {
                loader = new BinaryLoader(location);
            }
            loader.verifyHeader();
            counts = loader.readCounts();
            if (MaxDepth <= 0 || MaxDepth > counts.Length)
            {
                MaxDepth = counts.Length;
            }
            if (MaxDepth > 1)
            {
                quant = loader.readQuant(MaxDepth);
            }
            unigrams = loader.readUnigrams(counts[0]);
            if (MaxDepth > 1)
            {
                trie = new NgramTrie(counts, quant.getProbBoSize(), quant.getProbSize());
                loader.readTrieByteArr(trie.getMem());
            }
            //string words can be read here
            words = loader.readWords(counts[0]);
            BuildUnigramIDMap();
            ngramProbCache = new LRUCache <WordSequence, Float>(ngramCacheSize);
            loader.close();
            TimerPool.GetTimer(this, "Load LM").Stop();
        }
Example #5
0
        /**
         * /// Creates the HMMTree
         *
         * /// @param pool           the pool of HMMs and units
         * /// @param dictionary     the dictionary containing the pronunciations
         * /// @param lm             the source of the set of words to add to the lex tree
         * /// @param addFillerWords if <code>false</code> add filler words
         * /// @param languageWeight the languageWeight
         */
        public HMMTree(HMMPool pool, IDictionary dictionary, LanguageModel lm, Boolean addFillerWords, float languageWeight)
        {
            HMMPool         = pool;
            Dictionary      = dictionary;
            _lm             = lm;
            _endNodeMap     = new HashMap <Object, HMMNode[]>();
            WordNodeMap     = new HashMap <Pronunciation, WordNode>();
            _addFillerWords = addFillerWords;
            _languageWeight = languageWeight;

            TimerPool.GetTimer(this, "Create HMM Tree").Start();
            Compile();
            TimerPool.GetTimer(this, "Create HMM Tree").Stop();
        }
        /** Compiles the n-gram into a lex tree that is used during the search */
        private void CompileGrammar()
        {
            TimerPool.GetTimer(this, "Compile").Start();

            SentenceEndWord            = Dictionary.GetSentenceEndWord();
            _sentenceStartWordArray    = new Word[1];
            _sentenceStartWordArray[0] = Dictionary.GetSentenceStartWord();
            MaxDepth = LanguageModel.MaxDepth;

            GenerateHmmTree();

            TimerPool.GetTimer(this, "Compile").Stop();
            //    Now that we are all done, dump out some interesting
            //    information about the process

            _searchGraph = new LexTreeSearchGraph(GetInitialSearchState());
        }
Example #7
0
        void Benchmark()
        {
            var nullCount = 0;

            this.LogInfo("benchmarking ...");
            TimerPool.GetTimer(this, "hmmPoolBenchmark").Start();

            for (var i = 0; i < 1000000; i++)
            {
                var id       = Ids[i % Ids.Length];
                var position = Pos[i % Pos.Length];
                var hmm      = GetHMM(id, position);
                if (hmm == null)
                {
                    nullCount++;
                }
            }
            TimerPool.GetTimer(this, "hmmPoolBenchmark").Stop();
            this.LogInfo("null count " + nullCount);
        }
Example #8
0
        private void Init()
        {
            _timer = TimerPool.GetTimer(this, "FrontEnd");

            LastDataProcessor = null;
            foreach (IDataProcessor dp in _frontEndList)
            {
                Debug.Assert(dp != null);

                if (LastDataProcessor != null)
                {
                    dp.Predecessor = LastDataProcessor;
                }

                if (_first == null)
                {
                    _first = dp;
                }
                LastDataProcessor = dp;
            }
            Initialize();
        }
Example #9
0
        /// <summary>
        /// @see Search.SearchManager#allocate()
        /// </summary>
        public override void Allocate()
        {
            _totalTokensScored = StatisticsVariable.GetStatisticsVariable("totalTokensScored");
            _tokensPerSecond   = StatisticsVariable.GetStatisticsVariable("tokensScoredPerSecond");
            _curTokensScored   = StatisticsVariable.GetStatisticsVariable("curTokensScored");
            TokensCreated      = StatisticsVariable.GetStatisticsVariable("tokensCreated");
            _viterbiPruned     = StatisticsVariable.GetStatisticsVariable("viterbiPruned");
            _beamPruned        = StatisticsVariable.GetStatisticsVariable("beamPruned");

            try
            {
                Linguist.Allocate();
                _pruner.Allocate();
                _scorer.Allocate();
            }
            catch (IOException e)
            {
                throw new SystemException("Allocation of search manager resources failed", e);
            }

            _scoreTimer = TimerPool.GetTimer(this, "Score");
            _pruneTimer = TimerPool.GetTimer(this, "Prune");
            GrowTimer   = TimerPool.GetTimer(this, "Grow");
        }
        public override void Allocate()
        {
            // tokenTracker = new TokenTracker();
            // tokenTypeTracker = new TokenTypeTracker();

            ScoreTimer = TimerPool.GetTimer(this, "Score");
            PruneTimer = TimerPool.GetTimer(this, "Prune");
            GrowTimer  = TimerPool.GetTimer(this, "Grow");

            TotalTokensScored = StatisticsVariable.GetStatisticsVariable("totalTokensScored");
            CurTokensScored   = StatisticsVariable.GetStatisticsVariable("curTokensScored");
            TokensCreated     = StatisticsVariable.GetStatisticsVariable("tokensCreated");

            try
            {
                Linguist.Allocate();
                Pruner.Allocate();
                Scorer.Allocate();
            }
            catch (IOException e)
            {
                throw new SystemException("Allocation of search manager resources failed", e);
            }
        }
Example #11
0
        /// <summary>
        /// Compiles the grammar into a sentence HMM. A GrammarJob is created for the
        /// initial grammar node and added to the GrammarJob queue. While there are
        /// jobs left on the grammar job queue, a job is removed from the queue and
        /// the associated grammar node is expanded and attached to the tails.
        /// GrammarJobs for the successors are added to the grammar job queue.
        /// </summary>
        /// <returns></returns>
        protected HashSet <SentenceHMMState> CompileGrammar()
        {
            InitialGrammarState = Grammar.InitialNode;

            NodeStateMap = new HashMap <GrammarNode, GState>();
            // create in declaration section (22.12.2014)

            ArcPool = new Cache <SentenceHMMStateArc>();

            var gstateList = new List <GState>();

            TimerPool.GetTimer(this, "Compile").Start();

            // get the nodes from the grammar and create states
            // for them. Add the non-empty gstates to the gstate list.
            TimerPool.GetTimer(this, "Create States").Start();
            foreach (var grammarNode in Grammar.GrammarNodes)
            {
                var gstate = CreateGState(grammarNode);
                gstateList.Add(gstate);
            }
            TimerPool.GetTimer(this, "Create States").Stop();
            AddStartingPath();

            // ensures an initial path to the start state
            // Prep all the gstates, by gathering all of the contexts up
            // this allows each gstate to know about its surrounding contexts
            TimerPool.GetTimer(this, "Collect Contexts").Start();
            foreach (var gstate in gstateList)
            {
                gstate.CollectContexts();
            }
            TimerPool.GetTimer(this, "Collect Contexts").Stop();

            // now all gstates know all about their contexts, we can expand them fully
            TimerPool.GetTimer(this, "Expand States").Start();
            foreach (var gstate in gstateList)
            {
                gstate.Expand();
            }
            TimerPool.GetTimer(this, "Expand States").Stop();

            // now that all states are expanded fully, we can connect all the states up
            TimerPool.GetTimer(this, "Connect Nodes").Start();
            foreach (var gstate in gstateList)
            {
                gstate.Connect();
            }
            TimerPool.GetTimer(this, "Connect Nodes").Stop();

            var initialState = FindStartingState();

            // add an out-of-grammar branch if configured to do so
            if (AddOutOfGrammarBranch)
            {
                var phoneLoop        = new CIPhoneLoop(PhoneLoopAcousticModel, LogPhoneInsertionProbability);
                var firstBranchState = (SentenceHMMState)phoneLoop.GetSearchGraph().InitialState;
                initialState.Connect(GetArc(firstBranchState, LogOne, LogOutOfGrammarBranchProbability));
            }

            _searchGraph = new FlatSearchGraph(initialState);
            TimerPool.GetTimer(this, "Compile").Stop();
            // Now that we are all done, dump out some interesting
            // information about the process
            if (_dumpGStates)
            {
                foreach (var grammarNode in Grammar.GrammarNodes)
                {
                    var gstate = GetGState(grammarNode);
                    gstate.DumpInfo();
                }
            }
            NodeStateMap = null;
            ArcPool      = null;
            return(SentenceHMMState.CollectStates(initialState));
        }
Example #12
0
        /*
         * (non-Javadoc)
         * @see edu.cmu.sphinx.linguist.language.ngram.LanguageModel#allocate()
         */
        public override void Allocate()
        {
            TimerPool.GetTimer(this, "Load LM").Start();

            this.LogInfo("Loading n-gram language model from: " + Location);

            // create the log file if specified
            if (NgramLogFile != null)
            {
                _logFile = new StreamWriter(NgramLogFile);
            }
            //Java's URL.getProtocol()==null
            if (!String.IsNullOrEmpty(Location.Path))
            {
                try {
                    _loader =
                        new BinaryLoader(new FileInfo(Location.Path), Format,
                                         ApplyLanguageWeightAndWip, LanguageWeight, Wip,
                                         UnigramWeight);
                }
                catch (Exception e)
                {
                    _loader =
                        new BinaryLoader(new FileInfo(Location.Path), Format,
                                         ApplyLanguageWeightAndWip, LanguageWeight, Wip,
                                         UnigramWeight);
                }
            }
            else
            {
                _loader =
                    new BinaryStreamLoader(Location.Path, Format,
                                           ApplyLanguageWeightAndWip, LanguageWeight, Wip,
                                           UnigramWeight);
            }

            _unigramIDMap       = new HashMap <Word, UnigramProbability>();
            _unigrams           = _loader.Unigrams;
            _loadedNGramBuffers = new HashMap <WordSequence, NGramBuffer> [_loader.MaxDepth];
            _ngramProbTable     = new float[_loader.MaxDepth][];
            _ngramBackoffTable  = new float[_loader.MaxDepth][];
            _ngramSegmentTable  = new int[_loader.MaxDepth][];

            for (var i = 1; i <= _loader.MaxDepth; i++)
            {
                _loadedNGramBuffers[i - 1] = new HashMap <WordSequence, NGramBuffer>();

                if (i >= 2)
                {
                    _ngramProbTable[i - 1] = _loader.GetNGramProbabilities(i);
                }

                if (i > 2)
                {
                    _ngramBackoffTable[i - 1] = _loader.GetNGramBackoffWeights(i);
                    _ngramSegmentTable[i - 1] = _loader.GetNGramSegments(i);
                }
            }

            _ngramDepthCache = new LRUCache <WordSequence, Float>(NgramCacheSize);
            if (Dictionary != null)
            {
                BuildUnigramIDMap(Dictionary);
            }
            else
            {
                BuildUnigramIDMap();
            }
            _loadedBigramBuffers = new NGramBuffer[_unigrams.Length];

            if (MaxDepth <= 0 || MaxDepth > _loader.MaxDepth)
            {
                MaxDepth = _loader.MaxDepth;
            }

            for (var i = 1; i <= _loader.MaxDepth; i++)
            {
                this.LogInfo(i + "-grams: " +
                             _loader.GetNumberNGrams(i));
            }

            if (FullSmear)
            {
                this.LogInfo("Full Smear");
                try {
                    this.LogInfo("... Reading ...");
                    ReadSmearInfo("smear.dat");
                    this.LogInfo("... Done ");
                } catch (IOException e) {
                    this.LogInfo("... " + e);
                    this.LogInfo("... Calculating");
                    BuildSmearInfo();
                    this.LogInfo("... Writing");
                    // writeSmearInfo("smear.dat");
                    this.LogInfo("... Done");
                }
            }

            TimerPool.GetTimer(this, "Load LM").Stop();
        }
Example #13
0
        /// <summary>
        /// Initializes a new instance of the <see cref="HMMPool"/> class.
        /// </summary>
        /// <param name="model">The model to use for the pool</param>
        /// <param name="unitManager">The unit manager.</param>
        /// <exception cref="System.Exception">
        /// LexTreeLinguist: Unsupported left context size
        /// or
        /// LexTreeLinguist: Unsupported right context size
        /// </exception>
        public HMMPool(AcousticModel model, UnitManager unitManager)
        {
            var maxCiUnits = 0;

            this.Model        = model;
            this._unitManager = unitManager;
            TimerPool.GetTimer(this, "Build HMM Pool").Start();

            if (model.GetLeftContextSize() != 1)
            {
                throw new Exception("LexTreeLinguist: Unsupported left context size");
            }

            if (model.GetRightContextSize() != 1)
            {
                throw new Exception("LexTreeLinguist: Unsupported right context size");
            }

            // count CI units:
            var i = model.GetContextIndependentUnitIterator();

            while (i.MoveNext())
            {
                var unit = i.Current;
                //this.LogInfo("CI unit " + unit);
                if (unit.BaseID > maxCiUnits)
                {
                    maxCiUnits = unit.BaseID;
                }
            }

            NumCiUnits = maxCiUnits + 1;

            _unitTable = new Unit[NumCiUnits * NumCiUnits * NumCiUnits];
            var iHMM = model.GetHMMIterator();

            while (iHMM.MoveNext())
            {
                var hmm  = iHMM.Current;
                var unit = hmm.Unit;
                var id   = GetId(unit);
                _unitTable[id] = unit;

                //this.LogInfo("Unit " + unit + " id " + id);
            }

            // build up the hmm table to allow quick access to the hmms
            _hmmTable = new Dictionary <HMMPosition, IHMM[]>();
            foreach (HMMPosition position in Enum.GetValues(typeof(HMMPosition)))
            {
                var hmms = new IHMM[_unitTable.Length];
                Java.Put(_hmmTable, position, hmms);
                //hmmTable.Put(position, hmms);
                for (var j = 1; j < _unitTable.Length; j++)
                {
                    var unit = _unitTable[j];
                    if (unit == null)
                    {
                        unit = SynthesizeUnit(j);
                    }
                    if (unit != null)
                    {
                        hmms[j] = model.LookupNearestHMM(unit, position, false);
                        Debug.Assert(hmms[j] != null);
                    }
                }
            }
            TimerPool.GetTimer(this, "Build HMM Pool").Stop();
        }