Пример #1
0
        public void CopyModel(LdaSingleBox trainer, int wordId)
        {
            int length = NumTopic;

            LdaInterface.GetWordTopic(trainer._engine, wordId, _topics, _probabilities, ref length);
            LdaInterface.SetWordTopic(_engine, wordId, _topics, _probabilities, length);
        }
Пример #2
0
        public List <KeyValuePair <int, float> > TestDoc(ReadOnlySpan <int> termID, ReadOnlySpan <double> termVal, int termNum, int numBurninIter, bool reset)
        {
            Contracts.Check(termNum > 0);
            Contracts.Check(termVal.Length >= termNum);
            Contracts.Check(termID.Length >= termNum);

            int[] pID  = new int[termNum];
            int[] pVal = new int[termVal.Length];
            for (int i = 0; i < termVal.Length; i++)
            {
                pVal[i] = (int)termVal[i];
            }
            int[] pTopic = new int[NumTopic];
            int[] pProb  = new int[NumTopic];
            termID.Slice(0, termNum).CopyTo(pID);

            int numTopicReturn = NumTopic;

            LdaInterface.TestOneDoc(_engine, pID, pVal, termNum, pTopic, pProb, ref numTopicReturn, numBurninIter, reset);

            // PREfast suspects that the value of numTopicReturn could be changed in _engine->TestOneDoc, which might result in read overrun in the following loop.
            if (numTopicReturn > NumTopic)
            {
                Contracts.Check(false);
                numTopicReturn = NumTopic;
            }

            var topicRet = new List <KeyValuePair <int, float> >();

            for (int i = 0; i < numTopicReturn; i++)
            {
                topicRet.Add(new KeyValuePair <int, float>(pTopic[i], (float)pProb[i]));
            }
            return(topicRet);
        }
Пример #3
0
        public void Test(int numBurninIter, float[] logLikelihood)
        {
            Contracts.Check(numBurninIter >= 0);
            var pLogLikelihood = new float[numBurninIter];

            LdaInterface.Test(_engine, numBurninIter, pLogLikelihood);
            logLikelihood = pLogLikelihood.Select(item => (float)item).ToArray();
        }
Пример #4
0
 public void AllocateModelMemory(int numTopic, int numVocab, long tableSize, long aliasTableSize)
 {
     Contracts.Check(numTopic >= 0);
     Contracts.Check(numVocab >= 0);
     Contracts.Check(tableSize >= 0);
     Contracts.Check(aliasTableSize >= 0);
     LdaInterface.AllocateModelMemory(_engine, numVocab, numTopic, tableSize, aliasTableSize);
 }
Пример #5
0
 public void SetModel(int termID, int[] topicID, int[] topicProb, int topicNum)
 {
     Contracts.Check(termID >= 0);
     Contracts.Check(topicNum <= NumTopic);
     Array.Copy(topicID, _topics, topicNum);
     Array.Copy(topicProb, _probabilities, topicNum);
     LdaInterface.SetWordTopic(_engine, termID, _topics, _probabilities, topicNum);
 }
Пример #6
0
 public void Dispose()
 {
     if (_isDisposed)
     {
         return;
     }
     _isDisposed = true;
     LdaInterface.DestroyEngine(_engine);
     _engine.Ptr = IntPtr.Zero;
 }
Пример #7
0
 public void Train(string trainOutput)
 {
     if (string.IsNullOrWhiteSpace(trainOutput))
     {
         LdaInterface.Train(_engine, null);
     }
     else
     {
         LdaInterface.Train(_engine, trainOutput);
     }
 }
Пример #8
0
        public KeyValuePair <int, float>[] GetTopicSummary(int topicId)
        {
            int length = _numSummaryTerms;

            LdaInterface.GetTopicSummary(_engine, topicId, _summaryTerm, _summaryTermProb, ref length);
            var topicSummary = new KeyValuePair <int, float> [length];

            for (int i = 0; i < length; i++)
            {
                topicSummary[i] = new KeyValuePair <int, float>(_summaryTerm[i], _summaryTermProb[i]);
            }
            return(topicSummary);
        }
Пример #9
0
        public KeyValuePair <int, int>[] GetModel(int wordId)
        {
            int length = NumTopic;

            LdaInterface.GetWordTopic(_engine, wordId, _topics, _probabilities, ref length);
            var wordTopicVector = new KeyValuePair <int, int> [length];

            for (int i = 0; i < length; i++)
            {
                wordTopicVector[i] = new KeyValuePair <int, int>(_topics[i], _probabilities[i]);
            }
            return(wordTopicVector);
        }
Пример #10
0
        public int LoadDocDense(ReadOnlySpan <double> termVal, int termNum, int numVocab)
        {
            Contracts.Check(numVocab == NumVocab);
            Contracts.Check(termNum > 0);

            Contracts.Check(termVal.Length >= termNum);

            int[] pID  = new int[termNum];
            int[] pVal = new int[termVal.Length];
            for (int i = 0; i < termVal.Length; i++)
            {
                pVal[i] = (int)termVal[i];
            }
            return(LdaInterface.FeedInDataDense(_engine, pVal, termNum, NumVocab));
        }
Пример #11
0
        public int LoadDoc(ReadOnlySpan <int> termID, ReadOnlySpan <double> termVal, int termNum, int numVocab)
        {
            Contracts.Check(numVocab == NumVocab);
            Contracts.Check(termNum > 0);
            Contracts.Check(termID.Length >= termNum);
            Contracts.Check(termVal.Length >= termNum);

            int[] pID  = new int[termNum];
            int[] pVal = new int[termVal.Length];
            for (int i = 0; i < termVal.Length; i++)
            {
                pVal[i] = (int)termVal[i];
            }
            termID.Slice(0, termNum).CopyTo(pID);
            return(LdaInterface.FeedInData(_engine, pID, pVal, termNum, NumVocab));
        }
Пример #12
0
        public LdaSingleBox(int numTopic, int numVocab, float alpha,
                            float beta, int numIter, int likelihoodInterval, int numThread,
                            int mhstep, int numSummaryTerms, bool denseOutput, int maxDocToken)
        {
            NumTopic            = numTopic;
            NumVocab            = numVocab;
            _alpha              = alpha;
            _beta               = beta;
            _mhStep             = mhstep;
            _numSummaryTerms    = numSummaryTerms;
            _denseOutput        = denseOutput;
            _likelihoodInterval = likelihoodInterval;
            _numThread          = numThread;

            _topics        = new int[numTopic];
            _probabilities = new int[numTopic];

            _summaryTerm     = new int[_numSummaryTerms];
            _summaryTermProb = new float[_numSummaryTerms];

            _engine = LdaInterface.CreateEngine(numTopic, numVocab, alpha, beta, numIter, likelihoodInterval, numThread, mhstep, maxDocToken);
        }
Пример #13
0
        public List <KeyValuePair <int, float> > GetDocTopicVector(int docID)
        {
            int numTopicReturn = NumTopic;

            LdaInterface.GetDocTopic(_engine, docID, _topics, _probabilities, ref numTopicReturn);
            var topicRet     = new List <KeyValuePair <int, float> >();
            int currentTopic = 0;

            for (int i = 0; i < numTopicReturn; i++)
            {
                if (_denseOutput)
                {
                    while (currentTopic < _topics[i])
                    {
                        //use a value to smooth the count so that we get dense output on each topic
                        //the smooth value is usually set to 0.1
                        topicRet.Add(new KeyValuePair <int, float>(currentTopic, (float)_alpha));
                        currentTopic++;
                    }
                    topicRet.Add(new KeyValuePair <int, float>(_topics[i], _probabilities[i] + (float)_alpha));
                    currentTopic++;
                }
                else
                {
                    topicRet.Add(new KeyValuePair <int, float>(_topics[i], (float)_probabilities[i]));
                }
            }

            if (_denseOutput)
            {
                while (currentTopic < NumTopic)
                {
                    topicRet.Add(new KeyValuePair <int, float>(currentTopic, (float)_alpha));
                    currentTopic++;
                }
            }
            return(topicRet);
        }
Пример #14
0
        public List <KeyValuePair <int, float> > TestDocDense(ReadOnlySpan <double> termVal, int termNum, int numBurninIter, bool reset)
        {
            Contracts.Check(termNum > 0);
            Contracts.Check(numBurninIter > 0);
            Contracts.Check(termVal.Length >= termNum);
            int[] pVal = new int[termVal.Length];
            for (int i = 0; i < termVal.Length; i++)
            {
                pVal[i] = (int)termVal[i];
            }
            int[] pTopic = new int[NumTopic];
            int[] pProb  = new int[NumTopic];

            int numTopicReturn = NumTopic;

            // There are two versions of TestOneDoc interfaces
            // (1) TestOneDoc
            // (2) TestOneDocRestart
            // The second one is the same as the first one except that it will reset
            // the states of the internal random number generator, so that it yields reproducable results for the same input
            LdaInterface.TestOneDocDense(_engine, pVal, termNum, pTopic, pProb, ref numTopicReturn, numBurninIter, reset);

            // PREfast suspects that the value of numTopicReturn could be changed in _engine->TestOneDoc, which might result in read overrun in the following loop.
            if (numTopicReturn > NumTopic)
            {
                Contracts.Check(false);
                numTopicReturn = NumTopic;
            }

            var topicRet = new List <KeyValuePair <int, float> >();

            for (int i = 0; i < numTopicReturn; i++)
            {
                topicRet.Add(new KeyValuePair <int, float>(pTopic[i], (float)pProb[i]));
            }
            return(topicRet);
        }
Пример #15
0
 public void SetAlphaSum(float averageDocLength)
 {
     LdaInterface.SetAlphaSum(_engine, averageDocLength);
 }
Пример #16
0
 public void InitializeBeforeTest()
 {
     LdaInterface.InitializeBeforeTest(_engine);
 }
Пример #17
0
 public void InitializeBeforeTrain()
 {
     LdaInterface.InitializeBeforeTrain(_engine);
 }
Пример #18
0
 public void AllocateDataMemory(int docNum, long corpusSize)
 {
     Contracts.Check(docNum >= 0);
     Contracts.Check(corpusSize >= 0);
     LdaInterface.AllocateDataMemory(_engine, docNum, corpusSize);
 }
Пример #19
0
 public void CleanModel()
 {
     LdaInterface.CleanModel(_engine);
 }
Пример #20
0
 public void CleanData()
 {
     LdaInterface.CleanData(_engine);
 }
Пример #21
0
 public void GetModelStat(out long memBlockSize, out long aliasMemBlockSize)
 {
     LdaInterface.GetModelStat(_engine, out memBlockSize, out aliasMemBlockSize);
 }