C# (CSharp) LdaSingleBox.InitializeBeforeTest 예제들

프로그래밍 언어: C# (CSharp)

클래스/타입: LdaSingleBox

메소드/함수: InitializeBeforeTest

hotexamples.com에서의 예제들: 3

C# (CSharp) LdaSingleBox.InitializeBeforeTest - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 LdaSingleBox.InitializeBeforeTest에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

InitializeBeforeTest(3)

AllocateModelMemory(2)

GetModel(2)

GetTopicSummary(2)

LoadDoc(2)

SetModel(2)

TestDoc(2)

AllocateDataMemory(1)

Dispose(1)

GetModelStat(1)

InitializeBeforeTrain(1)

LoadDocDense(1)

TestDocDense(1)

Train(1)

예제 #1

파일 보기

            internal void InitializePretrained(LDAModel model)
            {
                _ldaTrainer.AllocateModelMemory(model.VocabularyBuckets, model.NumberOfTopics, model.MemBlockSize, model.AliasMemBlockSize);
                Debug.Assert(model.VocabularyBuckets == model.LDA_Data.Length);

                for (int termID = 0; termID < model.VocabularyBuckets; termID++)
                {
                    var kvs          = model.LDA_Data[termID];
                    var topicId      = kvs.Select(kv => kv.Key).ToArray();
                    var topicProb    = kvs.Select(kv => kv.Value).ToArray();
                    var termTopicNum = topicId.Length;

                    _ldaTrainer.SetModel(termID, topicId, topicProb, termTopicNum);
                }

                //do the preparation
                if (!_predictionPreparationDone)
                {
                    lock (_preparationSyncRoot)
                    {
                        _ldaTrainer.InitializeBeforeTest();
                        _predictionPreparationDone = true;
                    }
                }
            }

예제 #2

파일 보기

파일: LdaTransform.cs 프로젝트: thomshib/machinelearning

            public LdaState(IExceptionContext ectx, ModelLoadContext ctx)
                : this()
            {
                ectx.AssertValue(ctx);

                // *** Binary format ***
                // <ColInfoEx>
                // int: vocabnum
                // long: memblocksize
                // long: aliasMemBlockSize
                // (serializing term by term, for one term)
                // int: term_id, int: topic_num, KeyValuePair<int, int>[]: termTopicVector

                InfoEx = new ColInfoEx(ectx, ctx);

                _numVocab = ctx.Reader.ReadInt32();
                ectx.CheckDecode(_numVocab > 0);

                long memBlockSize = ctx.Reader.ReadInt64();

                ectx.CheckDecode(memBlockSize > 0);

                long aliasMemBlockSize = ctx.Reader.ReadInt64();

                ectx.CheckDecode(aliasMemBlockSize > 0);

                _ldaTrainer = new LdaSingleBox(
                    InfoEx.NumTopic,
                    _numVocab, /* Need to set number of vocabulary here */
                    InfoEx.AlphaSum,
                    InfoEx.Beta,
                    InfoEx.NumIter,
                    InfoEx.LikelihoodInterval,
                    InfoEx.NumThread,
                    InfoEx.MHStep,
                    InfoEx.NumSummaryTermPerTopic,
                    false,
                    InfoEx.NumMaxDocToken);

                _ldaTrainer.AllocateModelMemory(_numVocab, InfoEx.NumTopic, memBlockSize, aliasMemBlockSize);

                for (int i = 0; i < _numVocab; i++)
                {
                    int termID = ctx.Reader.ReadInt32();
                    ectx.CheckDecode(termID >= 0);
                    int termTopicNum = ctx.Reader.ReadInt32();
                    ectx.CheckDecode(termTopicNum >= 0);

                    int[] topicId   = new int[termTopicNum];
                    int[] topicProb = new int[termTopicNum];

                    for (int j = 0; j < termTopicNum; j++)
                    {
                        topicId[j]   = ctx.Reader.ReadInt32();
                        topicProb[j] = ctx.Reader.ReadInt32();
                    }

                    //set the topic into _ldaTrainer inner topic table
                    _ldaTrainer.SetModel(termID, topicId, topicProb, termTopicNum);
                }

                //do the preparation
                if (!_predictionPreparationDone)
                {
                    _ldaTrainer.InitializeBeforeTest();
                    _predictionPreparationDone = true;
                }
            }

예제 #3

파일 보기

            public void Output(ref VBuffer <Double> src, ref VBuffer <Float> dst, int numBurninIter, bool reset)
            {
                // Prediction for a single document.
                // LdaSingleBox.InitializeBeforeTest() is NOT thread-safe.
                if (!_predictionPreparationDone)
                {
                    lock (_preparationSyncRoot)
                    {
                        if (!_predictionPreparationDone)
                        {
                            //do some preparation for building tables in native c++
                            _ldaTrainer.InitializeBeforeTest();
                            _predictionPreparationDone = true;
                        }
                    }
                }

                int len     = InfoEx.NumTopic;
                var values  = dst.Values;
                var indices = dst.Indices;

                if (src.Count == 0)
                {
                    dst = new VBuffer <Float>(len, 0, values, indices);
                    return;
                }

                // Make sure all the frequencies are valid and truncate if the sum gets too large.
                int docSize = 0;
                int termNum = 0;

                for (int i = 0; i < src.Count; i++)
                {
                    int termFreq = GetFrequency(src.Values[i]);
                    if (termFreq < 0)
                    {
                        // REVIEW: Should this log a warning message? And what should it produce?
                        // It currently produces a vbuffer of all NA values.
                        // REVIEW: Need a utility method to do this...
                        if (Utils.Size(values) < len)
                        {
                            values = new Float[len];
                        }
                        for (int k = 0; k < len; k++)
                        {
                            values[k] = Float.NaN;
                        }
                        dst = new VBuffer <Float>(len, values, indices);
                        return;
                    }

                    if (docSize >= InfoEx.NumMaxDocToken - termFreq)
                    {
                        break;
                    }

                    docSize += termFreq;
                    termNum++;
                }

                // REVIEW: Too much memory allocation here on each prediction.
                List <KeyValuePair <int, float> > retTopics;

                if (src.IsDense)
                {
                    retTopics = _ldaTrainer.TestDocDense(src.Values, termNum, numBurninIter, reset);
                }
                else
                {
                    retTopics = _ldaTrainer.TestDoc(src.Indices.Take(src.Count).ToArray(), src.Values.Take(src.Count).ToArray(), termNum, numBurninIter, reset);
                }

                int count = retTopics.Count;

                Contracts.Assert(count <= len);
                if (Utils.Size(values) < count)
                {
                    values = new Float[count];
                }
                if (count < len && Utils.Size(indices) < count)
                {
                    indices = new int[count];
                }

                double normalizer = 0;

                for (int i = 0; i < count; i++)
                {
                    int   index = retTopics[i].Key;
                    Float value = retTopics[i].Value;
                    Contracts.Assert(value >= 0);
                    Contracts.Assert(0 <= index && index < len);
                    if (count < len)
                    {
                        Contracts.Assert(i == 0 || indices[i - 1] < index);
                        indices[i] = index;
                    }
                    else
                    {
                        Contracts.Assert(index == i);
                    }

                    values[i]   = value;
                    normalizer += value;
                }

                if (normalizer > 0)
                {
                    for (int i = 0; i < count; i++)
                    {
                        values[i] = (Float)(values[i] / normalizer);
                    }
                }
                dst = new VBuffer <Float>(len, count, values, indices);
            }