public void TrainSentiment() { // Pipeline var arguments = new TextLoader.Arguments() { Column = new TextLoader.Column[] { new TextLoader.Column() { Name = "Label", Source = new[] { new TextLoader.Range() { Min = 0, Max = 0 } }, Type = DataKind.Num }, new TextLoader.Column() { Name = "SentimentText", Source = new[] { new TextLoader.Range() { Min = 1, Max = 1 } }, Type = DataKind.Text } }, HasHeader = true, AllowQuoting = false, AllowSparse = false }; var loader = _env.Data.ReadFromTextFile(_sentimentDataPath, arguments); var text = new TextFeaturizingEstimator(_env, "SentimentText", "WordEmbeddings", args => { args.OutputTokens = true; args.KeepPunctuations = false; args.UseStopRemover = true; args.VectorNormalizer = TextFeaturizingEstimator.TextNormKind.None; args.UseCharExtractor = false; args.UseWordExtractor = false; }).Fit(loader).Transform(loader); var trans = new WordEmbeddingsExtractingEstimator(_env, "WordEmbeddings_TransformedText", "Features", WordEmbeddingsExtractingTransformer.PretrainedModelKind.Sswe).Fit(text).Transform(text); // Train var trainer = new SdcaMultiClassTrainer(_env, "Label", "Features", maxIterations: 20); var predicted = trainer.Fit(trans); _consumer.Consume(predicted); }
public void TrainSentiment() { var env = new MLContext(seed: 1); // Pipeline var arguments = new TextLoader.Arguments() { Column = new TextLoader.Column[] { new TextLoader.Column() { Name = "Label", Source = new[] { new TextLoader.Range() { Min = 0, Max = 0 } }, Type = DataKind.Num }, new TextLoader.Column() { Name = "SentimentText", Source = new[] { new TextLoader.Range() { Min = 1, Max = 1 } }, Type = DataKind.Text } }, HasHeader = true, AllowQuoting = false, AllowSparse = false }; var loader = env.Data.ReadFromTextFile(_sentimentDataPath, arguments); var text = TextFeaturizingEstimator.Create(env, new TextFeaturizingEstimator.Arguments() { Column = new TextFeaturizingEstimator.Column { Name = "WordEmbeddings", Source = new[] { "SentimentText" } }, OutputTokens = true, KeepPunctuations = false, UsePredefinedStopWordRemover = true, VectorNormalizer = TextFeaturizingEstimator.TextNormKind.None, CharFeatureExtractor = null, WordFeatureExtractor = null, }, loader); var trans = WordEmbeddingsExtractingTransformer.Create(env, new WordEmbeddingsExtractingTransformer.Arguments() { Column = new WordEmbeddingsExtractingTransformer.Column[1] { new WordEmbeddingsExtractingTransformer.Column { Name = "Features", Source = "WordEmbeddings_TransformedText" } }, ModelKind = WordEmbeddingsExtractingTransformer.PretrainedModelKind.Sswe, }, text); // Train var trainer = new SdcaMultiClassTrainer(env, "Label", "Features", maxIterations: 20); var predicted = trainer.Fit(trans); _consumer.Consume(predicted); }