internal int FeedTrain(LDAModel model, ReadOnlySpan <int> tokenIndices, int tokenCount, ReadOnlySpan <double> frequency) { if (tokenCount < model.MinimumTokenCountPerDocument) { return(0); } return(_ldaTrainer.LoadDoc(tokenIndices, frequency, tokenCount, model.VocabularyBuckets)); }
public int FeedTrain(IExceptionContext ectx, ref VBuffer <Double> input) { Contracts.AssertValue(ectx); // REVIEW: Input the counts to your trainer here. This // is called multiple times. int docSize = 0; int termNum = 0; for (int i = 0; i < input.Count; i++) { int termFreq = GetFrequency(input.Values[i]); if (termFreq < 0) { // Ignore this row. return(0); } if (docSize >= InfoEx.NumMaxDocToken - termFreq) { break; } // If legal then add the term. docSize += termFreq; termNum++; } // Ignore empty doc. if (docSize == 0) { return(0); } int actualSize = 0; if (input.IsDense) { actualSize = _ldaTrainer.LoadDocDense(input.Values, termNum, input.Length); } else { actualSize = _ldaTrainer.LoadDoc(input.Indices, input.Values, termNum, input.Length); } ectx.Assert(actualSize == 2 * docSize + 1, string.Format("The doc size are distinct. Actual: {0}, Expected: {1}", actualSize, 2 * docSize + 1)); return(actualSize); }