public void SortFeaturesByCountWhenApplyingCountLimit()
        {
            // arrange
            var settings = new CountVectorizerSettings
            {
                MaxFeatures = 5
            };

            var target       = new CountVectorizer(settings);
            var trainingData = GetTrainingData();

            target.Fit(trainingData);

            var vectorizerWithoutLimit = GetFittedVectorizer(trainingData);

            IEnumerable <uint> getTopCounts(IEnumerable <IDictionary <string, uint> > tokensCounts) =>
            tokensCounts.SelectMany(kv => kv.Values)
            .OrderByDescending(v => v)
            .Take((int)settings.MaxFeatures);

            var expectedTopCounts = getTopCounts(vectorizerWithoutLimit.Transform(trainingData));

            // act
            var result = target.Transform(trainingData);

            // assert
            var resultTopCounts = getTopCounts(result);

            Assert.Equal(expectedTopCounts, resultTopCounts);
        }
        public void CanLimitFeaturesCount()
        {
            // arrange
            var settings = new CountVectorizerSettings
            {
                MaxFeatures = 5
            };

            var target       = new CountVectorizer(settings);
            var trainingData = GetTrainingData();

            // act
            target.Fit(trainingData);

            // assert
            Assert.True(target.Vocabulary.Count() == 5);
        }