示例#1
0
        static void Main(string[] args)
        {
            if (false == File.Exists(Program.TrainDataPath))
            {
                using (var client = new WebClient())
                {
                    client.DownloadFile(@"https://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip", "spam.zip");
                }

                ZipFile.ExtractToDirectory("spam.zip", Program.DataDirectoryPath);
            }

            var context = new MLContext();

            var reader = new TextLoader(context, new TextLoader.Arguments()
            {
                Separator = "tab",
                HasHeader = true,
                Column    = new[]
                {
                    new TextLoader.Column("Label", DataKind.Text, 0),
                    new TextLoader.Column("Message", DataKind.Text, 1)
                }
            });

            var data = reader.Read(new MultiFileSource(Program.TrainDataPath));

            var estimator = context.Transforms.CustomMapping <MyInput, MyOutput>(MyLambda.MyAction, "MyLambda")
                            .Append(context.Transforms.Text.FeaturizeText("Message", "Features"))
                            .Append(context.BinaryClassification.Trainers.StochasticDualCoordinateAscent());

            var cvResult = context.BinaryClassification.CrossValidate(data, estimator, numFolds: 5);
            var aucs     = cvResult.Select(r => r.metrics.Auc);

            Console.WriteLine($"The AUC is {aucs.Average()}");

            var model  = estimator.Fit(data);
            var inPipe = new TransformerChain <ITransformer>(model.Take(model.Count() - 1).ToArray());

            var lastTransformer = new BinaryPredictionTransformer <IPredictorProducing <float> >(
                context,
                model.LastTransformer.Model,
                inPipe.GetOutputSchema(data.Schema),
                model.LastTransformer.FeatureColumn,
                threshold: 0.15f,
                thresholdColumn: DefaultColumnNames.Probability);
            var parts = model.ToArray();

            parts[parts.Length - 1] = lastTransformer;
            var newModel  = new TransformerChain <ITransformer>(parts);
            var predictor = newModel.MakePredictionFunction <Input, Prediction>(context);

            Program.ClassifyMessage(predictor, "That's a great idea. It should work.");
            Program.ClassifyMessage(predictor, "Free medicine winner! Congratulations");
            Program.ClassifyMessage(predictor, "Yes we should meet over the weekend");
            Program.ClassifyMessage(predictor, "You win pills and free entry vouchers");
        }
        public Destination Predict(Passenger passenger, List <Destination> destinations)
        {
            var prediction = Model.MakePredictionFunction <PassengerData, DestPrediction>(LocalEnvironment).Predict(
                new PassengerData()
            {
                Gender      = (float)passenger.Gender,
                Nationality = (float)passenger.Nationality,
                year        = passenger.DateOfBirth.Value.Year
            });

            return(destinations.FirstOrDefault(obj => obj.Country.Contains(prediction.PredictedLabels)));
        }
示例#3
0
        private static PredictionFunction <SpamInput, SpamPrediction> GetPredictor()
        {
            if (_predictor == null)
            {
                // Set up the MLContext, which is a catalog of components in ML.NET.
                var mlContext = new MLContext();

                // Create the reader and define which columns from the file should be read.
                var reader = new TextLoader(mlContext, new TextLoader.Arguments()
                {
                    Separator = "tab",
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoader.Column("Label", DataKind.Text, 0),
                        new TextLoader.Column("Message", DataKind.Text, 1)
                    }
                });

                var data = reader.Read(new MultiFileSource(TrainDataPath));

                // Create the estimator which converts the text label to boolean, featurizes the text, and adds a linear trainer.
                var estimator = mlContext.Transforms.CustomMapping <MyInput, MyOutput>(MyLambda.MyAction, "MyLambda")
                                .Append(mlContext.Transforms.Text.FeaturizeText("Message", "Features"))
                                .Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent());

                // Evaluate the model using cross-validation.
                // Cross-validation splits our dataset into 'folds', trains a model on some folds and
                // evaluates it on the remaining fold. We are using 5 folds so we get back 5 sets of scores.
                // Let's compute the average AUC, which should be between 0.5 and 1 (higher is better).
                var cvResults = mlContext.BinaryClassification.CrossValidate(data, estimator, numFolds: 5);
                var aucs      = cvResults.Select(r => r.metrics.Auc);

                // Now let's train a model on the full dataset to help us get better results
                var model = estimator.Fit(data);

                // The dataset we have is skewed, as there are many more non-spam messages than spam messages.
                // While our model is relatively good at detecting the difference, this skewness leads it to always
                // say the message is not spam. We deal with this by lowering the threshold of the predictor. In reality,
                // it is useful to look at the precision-recall curve to identify the best possible threshold.
                var inPipe          = new TransformerChain <ITransformer>(model.Take(model.Count() - 1).ToArray());
                var lastTransformer = new BinaryPredictionTransformer <IPredictorProducing <float> >(mlContext, model.LastTransformer.Model, inPipe.GetOutputSchema(data.Schema), model.LastTransformer.FeatureColumn, threshold: 0.15f, thresholdColumn: DefaultColumnNames.Probability);

                ITransformer[] parts = model.ToArray();
                parts[parts.Length - 1] = lastTransformer;
                var newModel = new TransformerChain <ITransformer>(parts);

                // Create a PredictionFunction from our model
                _predictor = newModel.MakePredictionFunction <SpamInput, SpamPrediction>(mlContext);
            }

            return(_predictor);
        }
示例#4
0
        /// <summary>
        /// Use your model to make a prediction
        /// You can change these numbers to test different predictions
        /// </summary>
        /// <typeparam name="TSrc"></typeparam>
        /// <typeparam name="TDst"></typeparam>
        /// <param name="dataPath"></param>
        /// <param name="parameters"></param>
        /// <param name="inputData"></param>
        /// <returns></returns>
        public static TDst Predict <TSrc, TDst>(string dataPath, string[] parameters, TSrc inputData)
            where TSrc : class
            where TDst : class, new()
        {
            // Create a ML.NET environment
            MLContext mlContext = new MLContext();

            IDataView trainingDataView = mlContext.LoadTrainingData <TSrc>(dataPath, inputData);
            EstimatorChain <KeyToValueMappingTransformer>   learningPipeline = mlContext.GetLearningPipeline(parameters);
            TransformerChain <KeyToValueMappingTransformer> model            = trainingDataView.TrainModel(learningPipeline);

            PredictionFunction <TSrc, TDst> prediction = model.MakePredictionFunction <TSrc, TDst>(mlContext);

            TDst result = prediction.Predict(inputData);

            return(result);
        }
        // This method gets an article ID and predicts the most related article
        public static int GetRelatedArticle(string dataPath, int ArticleId)
        {
            // Create an environment for the learning process
            LocalEnvironment env = new LocalEnvironment();

            // Create a reader object to parse our training data from the training data file
            TextLoader reader = new TextLoader(env,
                                               new TextLoader.Arguments()
            {
                Separator = ",",
                HasHeader = true,
                Column    = new[]
                {
                    new TextLoader.Column("CurrentArticleId", DataKind.R4, 0),
                    new TextLoader.Column("Label", DataKind.R4, 1)
                }
            });

            // Read the training data
            IDataView trainingData = reader.Read(new MultiFileSource(dataPath));

            // Process the training data, set a target column and create a learning model (SDCA multi-class model)
            EstimatorChain <KeyToValueTransform> pipeline = new TermEstimator(env, "Label", "Label")
                                                            .Append(new ConcatEstimator(env, "Features", "CurrentArticleId"))
                                                            .Append(new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments()))
                                                            .Append(new KeyToValueEstimator(env, "PredictedLabel"));

            // Train the learning model based on the training data
            TransformerChain <KeyToValueTransform> model = pipeline.Fit(trainingData);

            // Activate the model to make a prediction for the requested article
            RelatedArticlesPrediction prediction = model.MakePredictionFunction <RelatedArticleData, RelatedArticlesPrediction>(env).Predict(
                new RelatedArticleData()
            {
                CurrentArticleId = (float)ArticleId
            });

            // Return the predicted articles ID
            return((int)prediction.PredictedRelatedArticle);
        }
示例#6
0
 public IrisPrediction Predict(IrisData data)
 {
     // STEP 5: Use your model to make a prediction
     // You can change these numbers to test different predictions
     return(_model.MakePredictionFunction <IrisData, IrisPrediction>(_env).Predict(data));
 }
示例#7
0
        static void Main(string[] args)
        {
            DownloadTrainingData();
            // 创建上下文
            MLContext mlContext = new MLContext();
            // 创建文本数据加载器
            TextLoader textLoader = mlContext.Data.TextReader(new TextLoader.Arguments()
            {
                Separator = "tab",
                HasHeader = false,
                Column    = new[]
                {
                    new TextLoader.Column("Label", DataKind.Text, 0),
                    new TextLoader.Column("Message", DataKind.Text, 1)
                }
            });

            // 读取数据集
            var fullData = textLoader.Read(DataPath);
            // 特征工程和指定训练算法
            var estimator = mlContext.Transforms.CustomMapping <MyInput, MyOutput>(MyLambda.MyAction, "MyLambda")

                            .Append(mlContext.Transforms.Text.FeaturizeText("Message", "Features"))
                            .Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent());
            // 使用交叉验证进行模型评估
            var cvResults = mlContext.BinaryClassification.CrossValidate(fullData, estimator, numFolds: 5);
            var aucs      = cvResults.Select(r => r.metrics.Auc);

            Console.WriteLine($"The AUC is {aucs.Average()}");

            // 训练
            var model = estimator.Fit(fullData);


            var inPipe          = new TransformerChain <ITransformer>(model.Take(model.Count() - 1).ToArray());
            var lastTransFormer = new BinaryPredictionTransformer <IPredictorProducing <float> >(mlContext,
                                                                                                 model.LastTransformer.Model,
                                                                                                 inPipe.GetOutputSchema(fullData.Schema), model.LastTransformer.FeatureColumn, threshold: 0.15f);
            var parts = model.ToArray();

            parts[parts.Length - 1] = lastTransFormer;
            var newModel = new TransformerChain <ITransformer>(parts);

            var predictor = newModel.MakePredictionFunction <SpamData, SpamPrediction>(mlContext);

            var testMsgs = new string[]
            {
                "That's a great idea. It should work.",
                "free medicine winner! congratulations",
                "Yes we should meet over the weekend!",
                "you win pills and free entry vouchers"
            };

            foreach (var message in testMsgs)
            {
                var input = new SpamData {
                    Message = message
                };
                var prediction = predictor.Predict(input);

                Console.WriteLine("The message '{0}' is spam? {1}!", input.Message, prediction.IsSpam.ToString());
            }

            Console.WriteLine("Hello World!");
        }
示例#8
0
        static void Main(string[] args)
        {
            // Download the dataset if it doesn't exist.
            if (!File.Exists(TrainDataPath))
            {
                using (var client = new WebClient())
                {
                    client.DownloadFile("https://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip", "spam.zip");
                }

                ZipFile.ExtractToDirectory("spam.zip", DataDirectoryPath);
            }

            // Set up the MLContext, which is a catalog of components in ML.NET.
            var mlContext = new MLContext();

            // Create the reader and define which columns from the file should be read.
            var reader = new TextLoader(mlContext, new TextLoader.Arguments()
            {
                Separator = "tab",
                HasHeader = true,
                Column    = new[]
                {
                    new TextLoader.Column("Label", DataKind.Text, 0),
                    new TextLoader.Column("Message", DataKind.Text, 1)
                }
            });

            var data = reader.Read(new MultiFileSource(TrainDataPath));

            // Create the estimator which converts the text label to boolean, featurizes the text, and adds a linear trainer.
            var estimator = mlContext.Transforms.CustomMapping <MyInput, MyOutput>(MyLambda.MyAction, "MyLambda")
                            .Append(mlContext.Transforms.Text.FeaturizeText("Message", "Features"))
                            .Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent());

            // Evaluate the model using cross-validation.
            // Cross-validation splits our dataset into 'folds', trains a model on some folds and
            // evaluates it on the remaining fold. We are using 5 folds so we get back 5 sets of scores.
            // Let's compute the average AUC, which should be between 0.5 and 1 (higher is better).
            var cvResults = mlContext.BinaryClassification.CrossValidate(data, estimator, numFolds: 5);
            var aucs      = cvResults.Select(r => r.metrics.Auc);

            Console.WriteLine("The AUC is {0}", aucs.Average());

            // Now let's train a model on the full dataset to help us get better results
            var model = estimator.Fit(data);

            // The dataset we have is skewed, as there are many more non-spam messages than spam messages.
            // While our model is relatively good at detecting the difference, this skewness leads it to always
            // say the message is not spam. We deal with this by lowering the threshold of the predictor. In reality,
            // it is useful to look at the precision-recall curve to identify the best possible threshold.
            var inPipe          = new TransformerChain <ITransformer>(model.Take(model.Count() - 1).ToArray());
            var lastTransformer = new BinaryPredictionTransformer <IPredictorProducing <float> >(mlContext, model.LastTransformer.Model, inPipe.GetOutputSchema(data.Schema), model.LastTransformer.FeatureColumn, threshold: 0.15f, thresholdColumn: DefaultColumnNames.Probability);

            ITransformer[] parts = model.ToArray();
            parts[parts.Length - 1] = lastTransformer;
            var newModel = new TransformerChain <ITransformer>(parts);

            // Create a PredictionFunction from our model
            var predictor = newModel.MakePredictionFunction <SpamInput, SpamPrediction>(mlContext);

            // Test a few examples
            ClassifyMessage(predictor, "That's a great idea. It should work.");
            ClassifyMessage(predictor, "free medicine winner! congratulations");
            ClassifyMessage(predictor, "Yes we should meet over the weekend!");
            ClassifyMessage(predictor, "you win pills and free entry vouchers");

            Console.ReadLine();
        }