public void SortFeaturesByCountWhenApplyingCountLimit() { // arrange var settings = new CountVectorizerSettings { MaxFeatures = 5 }; var target = new CountVectorizer(settings); var trainingData = GetTrainingData(); target.Fit(trainingData); var vectorizerWithoutLimit = GetFittedVectorizer(trainingData); IEnumerable <uint> getTopCounts(IEnumerable <IDictionary <string, uint> > tokensCounts) => tokensCounts.SelectMany(kv => kv.Values) .OrderByDescending(v => v) .Take((int)settings.MaxFeatures); var expectedTopCounts = getTopCounts(vectorizerWithoutLimit.Transform(trainingData)); // act var result = target.Transform(trainingData); // assert var resultTopCounts = getTopCounts(result); Assert.Equal(expectedTopCounts, resultTopCounts); }
public void ThrowExceptionOnTransformIfIsNotFitted() { // arrange var target = new CountVectorizer(); // act & assert Assert.Throws <NotFittedException>(() => target.Transform("some test text")); }