public void TestCountVectorizer() { DataFrame input = _spark.Sql("SELECT array('hello', 'I', 'AM', 'a', 'string', 'TO', " + "'TOKENIZE') as input from range(100)"); const string inputColumn = "input"; const string outputColumn = "output"; const double minDf = 1; const double minTf = 10; const int vocabSize = 10000; const bool binary = false; var countVectorizer = new CountVectorizer(); countVectorizer .SetInputCol(inputColumn) .SetOutputCol(outputColumn) .SetMinDF(minDf) .SetMinTF(minTf) .SetVocabSize(vocabSize); Assert.IsType <CountVectorizerModel>(countVectorizer.Fit(input)); Assert.Equal(inputColumn, countVectorizer.GetInputCol()); Assert.Equal(outputColumn, countVectorizer.GetOutputCol()); Assert.Equal(minDf, countVectorizer.GetMinDF()); Assert.Equal(minTf, countVectorizer.GetMinTF()); Assert.Equal(vocabSize, countVectorizer.GetVocabSize()); Assert.Equal(binary, countVectorizer.GetBinary()); using (var tempDirectory = new TemporaryDirectory()) { string savePath = Path.Join(tempDirectory.Path, "countVectorizer"); countVectorizer.Save(savePath); CountVectorizer loadedVectorizer = CountVectorizer.Load(savePath); Assert.Equal(countVectorizer.Uid(), loadedVectorizer.Uid()); } Assert.NotEmpty(countVectorizer.ExplainParams()); Assert.NotEmpty(countVectorizer.ToString()); }