Beispiel #1
0
        public static void UsersTimelineTest(PredictionEngine <SentenceData, PredictionData> engine, string userName, string language, int count = 100)
        {
            Console.WriteLine($"Getting @{userName}'s recent tweets..");
            var timeline = _token.Statuses.UserTimeline(screen_name: userName, count: count, exclude_replies: true);
            int lan      = 0;
            List <SentenceData> testData = new List <SentenceData>();

            foreach (var item in timeline)
            {
                var text  = Regex.Replace(item.Text, @"http[^\s]+", "");
                var tweet = new SentenceData()
                {
                    Sentence = text
                };
                var prediction = engine.Predict(tweet);

                if (prediction.PredictedLabel.Equals(language))
                {
                    lan++;
                }
                tweet.Label = prediction.PredictedLabel;
                testData.Add(tweet);
            }

            // Expected + Predicted + Sentence
            File.AppendAllLines("tweet.txt", testData.Select(i => language + '\t' + i.Label + '\t' + i.Sentence));

            Console.WriteLine($"Last {lan} tweets from {timeline.Count} are in '{language}' class ");
        }
        static void Train()
        {
            string dataPath = "train.txt", testDataPath = "test.txt";

            var context = new MLContext();

            // Create textloader for our structure
            var textLoader = context.Data.CreateTextReader(new TextLoader.Arguments()
            {
                Separator = "\t",
                Column    = new[] {
                    new TextLoader.Column("Label", DataKind.Text, 0),
                    new TextLoader.Column("Sentence", DataKind.Text, 1)
                }
            });

            var trainDataView = textLoader.Read(dataPath);
            var testDataView  = textLoader.Read(testDataPath);

            // Create data process pipeline
            // First we have to change label value into ML.NET KeyType
            var dataProcessPipeline = context.Transforms.Conversion.MapValueToKey("Label")
                                      // Then, we have to normalize text
                                      .Append(context.Transforms.Text.NormalizeText("Sentence", "NormalizedSentence"))
                                      // Featurize the given text with n-grams
                                      .Append(context.Transforms.Text.FeaturizeText("NormalizedSentence", "Features"))
                                      // Give the naive bayes algorithm
                                      .Append(context.MulticlassClassification.Trainers.NaiveBayes())
                                      // Convert back the label value to it's origin
                                      .Append(context.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            // Create our model with train data
            var model = dataProcessPipeline.Fit(trainDataView);
            // Transform our model with test data
            var testPredictions = model.Transform(testDataView);

            // Evaluate the model and Print the results
            Evaluate(context, testPredictions);

            // Create single prediction engine
            var predictionEngine = model.CreatePredictionEngine <SentenceData, PredictionData>(context);
            // Create the testing data
            var testData = new SentenceData()
            {
                Sentence = "Enfunda tu espada, saca tu baraja y prepárate para disfrutar con Hearthstone, un trepidante juego de cartas de estrategia, fácil de aprender y salvajemente divertido. Inicia una partida gratuita y utiliza tus mejores cartas para lanzar hechizos, invocar criaturas y dar órdenes a los héroes de Warcraft en épicos y estratégicos duelos."
            };
            // Predict the testing data
            var result = predictionEngine.Predict(testData);

            Console.WriteLine("Predicted language {0}", result.PredictedLabel);

            TweetTest.UsersTimelineTest(predictionEngine, "NetflixES", "es", 400);
            TweetTest.UsersTimelineTest(predictionEngine, "netflix", "en", 400);
            TweetTest.UsersTimelineTest(predictionEngine, "netflixturkiye", "tr", 400);
        }