public static void UsersTimelineTest(PredictionEngine <SentenceData, PredictionData> engine, string userName, string language, int count = 100) { Console.WriteLine($"Getting @{userName}'s recent tweets.."); var timeline = _token.Statuses.UserTimeline(screen_name: userName, count: count, exclude_replies: true); int lan = 0; List <SentenceData> testData = new List <SentenceData>(); foreach (var item in timeline) { var text = Regex.Replace(item.Text, @"http[^\s]+", ""); var tweet = new SentenceData() { Sentence = text }; var prediction = engine.Predict(tweet); if (prediction.PredictedLabel.Equals(language)) { lan++; } tweet.Label = prediction.PredictedLabel; testData.Add(tweet); } // Expected + Predicted + Sentence File.AppendAllLines("tweet.txt", testData.Select(i => language + '\t' + i.Label + '\t' + i.Sentence)); Console.WriteLine($"Last {lan} tweets from {timeline.Count} are in '{language}' class "); }
static void Train() { string dataPath = "train.txt", testDataPath = "test.txt"; var context = new MLContext(); // Create textloader for our structure var textLoader = context.Data.CreateTextReader(new TextLoader.Arguments() { Separator = "\t", Column = new[] { new TextLoader.Column("Label", DataKind.Text, 0), new TextLoader.Column("Sentence", DataKind.Text, 1) } }); var trainDataView = textLoader.Read(dataPath); var testDataView = textLoader.Read(testDataPath); // Create data process pipeline // First we have to change label value into ML.NET KeyType var dataProcessPipeline = context.Transforms.Conversion.MapValueToKey("Label") // Then, we have to normalize text .Append(context.Transforms.Text.NormalizeText("Sentence", "NormalizedSentence")) // Featurize the given text with n-grams .Append(context.Transforms.Text.FeaturizeText("NormalizedSentence", "Features")) // Give the naive bayes algorithm .Append(context.MulticlassClassification.Trainers.NaiveBayes()) // Convert back the label value to it's origin .Append(context.Transforms.Conversion.MapKeyToValue("PredictedLabel")); // Create our model with train data var model = dataProcessPipeline.Fit(trainDataView); // Transform our model with test data var testPredictions = model.Transform(testDataView); // Evaluate the model and Print the results Evaluate(context, testPredictions); // Create single prediction engine var predictionEngine = model.CreatePredictionEngine <SentenceData, PredictionData>(context); // Create the testing data var testData = new SentenceData() { Sentence = "Enfunda tu espada, saca tu baraja y prepárate para disfrutar con Hearthstone, un trepidante juego de cartas de estrategia, fácil de aprender y salvajemente divertido. Inicia una partida gratuita y utiliza tus mejores cartas para lanzar hechizos, invocar criaturas y dar órdenes a los héroes de Warcraft en épicos y estratégicos duelos." }; // Predict the testing data var result = predictionEngine.Predict(testData); Console.WriteLine("Predicted language {0}", result.PredictedLabel); TweetTest.UsersTimelineTest(predictionEngine, "NetflixES", "es", 400); TweetTest.UsersTimelineTest(predictionEngine, "netflix", "en", 400); TweetTest.UsersTimelineTest(predictionEngine, "netflixturkiye", "tr", 400); }