Пример #1
0
            internal int FeedTrain(LDAModel model, ReadOnlySpan <int> tokenIndices, int tokenCount, ReadOnlySpan <double> frequency)
            {
                if (tokenCount < model.MinimumTokenCountPerDocument)
                {
                    return(0);
                }

                return(_ldaTrainer.LoadDoc(tokenIndices, frequency, tokenCount, model.VocabularyBuckets));
            }
Пример #2
0
            public int FeedTrain(IExceptionContext ectx, ref VBuffer <Double> input)
            {
                Contracts.AssertValue(ectx);

                // REVIEW: Input the counts to your trainer here. This
                // is called multiple times.

                int docSize = 0;
                int termNum = 0;

                for (int i = 0; i < input.Count; i++)
                {
                    int termFreq = GetFrequency(input.Values[i]);
                    if (termFreq < 0)
                    {
                        // Ignore this row.
                        return(0);
                    }
                    if (docSize >= InfoEx.NumMaxDocToken - termFreq)
                    {
                        break;
                    }

                    // If legal then add the term.
                    docSize += termFreq;
                    termNum++;
                }

                // Ignore empty doc.
                if (docSize == 0)
                {
                    return(0);
                }

                int actualSize = 0;

                if (input.IsDense)
                {
                    actualSize = _ldaTrainer.LoadDocDense(input.Values, termNum, input.Length);
                }
                else
                {
                    actualSize = _ldaTrainer.LoadDoc(input.Indices, input.Values, termNum, input.Length);
                }

                ectx.Assert(actualSize == 2 * docSize + 1, string.Format("The doc size are distinct. Actual: {0}, Expected: {1}", actualSize, 2 * docSize + 1));
                return(actualSize);
            }