public StocasticDualCoordianteAscent(double[][] inputs, double[] labels)
        {
            IDataView data_in = context.Data.LoadFromEnumerable <_data>(GetSampleData(inputs, labels));

            DataOperationsCatalog.TrainTestData partitions = context.Data.TrainTestSplit(data_in);

            Microsoft.ML.Transforms.ColumnConcatenatingEstimator pipeline = context.Transforms.Concatenate("Features", nameof(_data.Features));

            pipeline.AppendCacheCheckpoint(context);

            pipeline.Append(context.MulticlassClassification.Trainers.SdcaNonCalibrated());

            ColumnConcatenatingTransformer model = pipeline.Fit(partitions.TrainSet);

            //var engine = ModelOperationsCatalog.CreatePredictionEngine<Digit, DigitPrediction>(model);
            Console.WriteLine("Evaluating model....");
            IDataView predictions = model.Transform(partitions.TestSet);

            // evaluate the predictions
            MulticlassClassificationMetrics metrics = context.MulticlassClassification.Evaluate(predictions);

            // show evaluation metrics
            Console.WriteLine($"Evaluation metrics");
            Console.WriteLine($"    MicroAccuracy:    {metrics.MicroAccuracy:0.###}");
            Console.WriteLine($"    MacroAccuracy:    {metrics.MacroAccuracy:0.###}");
            Console.WriteLine($"    LogLoss:          {metrics.LogLoss:#.###}");
            Console.WriteLine($"    LogLossReduction: {metrics.LogLossReduction:#.###}");
            Console.WriteLine();
        }
Exemplo n.º 2
0
        private static ITransformer TrainModel(MLContext mlContext, IDataView dataView, string dataSetDir, ImageClassificationTrainer.Architecture architecture, int epochs, double testFraction)
        {
            IDataView shuffledData          = mlContext.Data.ShuffleRows(dataView);
            var       preprocessingPipeline = mlContext.Transforms.Conversion.MapValueToKey(inputColumnName: "Label",
                                                                                            outputColumnName: "LabelKey")
                                              .Append(mlContext.Transforms.LoadRawImageBytes(outputColumnName: "Img",
                                                                                             imageFolder: dataSetDir,
                                                                                             inputColumnName: "ImgPath"));    // > InputData.cs
            IDataView preProcData = preprocessingPipeline.Fit(shuffledData).Transform(shuffledData);

            DataOperationsCatalog.TrainTestData trainSplit          = mlContext.Data.TrainTestSplit(data: preProcData, testFraction: testFraction);
            DataOperationsCatalog.TrainTestData validationTestSplit = mlContext.Data.TrainTestSplit(trainSplit.TestSet);

            IDataView trainSet      = trainSplit.TrainSet;
            IDataView validationSet = validationTestSplit.TrainSet;
            IDataView testSet       = validationTestSplit.TestSet;

            var classifierOptions = new ImageClassificationTrainer.Options
            {
                FeatureColumnName = "Img",
                LabelColumnName   = "LabelKey",
                ValidationSet     = validationSet,
                Arch            = architecture,
                MetricsCallback = metrics => Console.WriteLine(metrics),
                TestOnTrainSet  = false,
                ReuseTrainSetBottleneckCachedValues      = true,
                ReuseValidationSetBottleneckCachedValues = true,
                Epoch = epochs,
            };

            var trainingPipeline = mlContext.MulticlassClassification.Trainers.ImageClassification(classifierOptions)
                                   .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            return(trainingPipeline.Fit(trainSet));
        }
Exemplo n.º 3
0
        static void Main(string[] args)
        {
            Console.WriteLine("Please enter a date for Prediction dd/MM/yyyy");
            _datePrediction = DateTime.ParseExact(Console.ReadLine(), "dd/MM/yyyy", null);

            //_includeDay = args.Contains("-d");
            //_includeMonth = args.Contains("-m");
            //_includeWeek = args.Contains("-w");


            _mlContext = new MLContext();
            Console.WriteLine("Loading Data set");
            var data = JsonToList();


            _trainingDataView = _mlContext.Data.LoadFromEnumerable <ResultatJsonFormat>(data);

            DataOperationsCatalog.TrainTestData dataSplit = _mlContext.Data.TrainTestSplit(_trainingDataView, 0.99);
            _trainData = dataSplit.TrainSet;
            _testData  = dataSplit.TestSet;

            Console.WriteLine("Prediction for all");
            MainTraitement(true, true, true);
            Console.WriteLine("Prediction for day-week");
            MainTraitement(true, true, false);
            Console.WriteLine("Prediction for week-month");
            MainTraitement(false, true, true);

            // DisplayNumberOccurences(data, _datePrediction);
        }
Exemplo n.º 4
0
        private static PredictionEngine <TestInput, BinaryTestPrediction> BuildAndTrainBinaryModel(
            MLContext context,
            DataOperationsCatalog.TrainTestData partitions,
            IEstimator <ITransformer> pipeline,
            string labelColumnName)
        {
            Console.WriteLine($"Training for {labelColumnName}");

            var modelPipeline = pipeline.Append(context.BinaryClassification.Trainers.LbfgsLogisticRegression(labelColumnName: labelColumnName));
            var model         = modelPipeline.Fit(partitions.TrainSet);
            var predictions   = model.Transform(partitions.TestSet);
            var metrics       = context.BinaryClassification.Evaluate(
                data: predictions,
                labelColumnName: labelColumnName,
                scoreColumnName: "Score");

            Console.WriteLine(labelColumnName);
            Console.WriteLine($"  Accuracy:          {metrics.Accuracy:P2}");
            Console.WriteLine($"  Auc:               {metrics.AreaUnderRocCurve:P2}");
            Console.WriteLine($"  Auprc:             {metrics.AreaUnderPrecisionRecallCurve:P2}");
            Console.WriteLine($"  F1Score:           {metrics.F1Score:P2}");
            Console.WriteLine($"  LogLoss:           {metrics.LogLoss:0.##}");
            Console.WriteLine($"  LogLossReduction:  {metrics.LogLossReduction:0.##}");
            Console.WriteLine($"  PositivePrecision: {metrics.PositivePrecision:0.##}");
            Console.WriteLine($"  PositiveRecall:    {metrics.PositiveRecall:0.##}");
            Console.WriteLine($"  NegativePrecision: {metrics.NegativePrecision:0.##}");
            Console.WriteLine($"  NegativeRecall:    {metrics.NegativeRecall:0.##}");
            Console.WriteLine();


            return(context.Model.CreatePredictionEngine <TestInput, BinaryTestPrediction>(model));
        }
Exemplo n.º 5
0
        static void Main(string[] args)
        {
            // Create MLContext
            mlContext = new MLContext();

            // Load Data
            var data = mlContext.Data.LoadFromTextFile <ModelInput>("Data/yelp_labelled.tsv", hasHeader: true);

            // Split data into training and test sets
            DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(data);
            IDataView trainingData = dataSplit.TrainSet;
            IDataView testData     = dataSplit.TestSet;

            // Define training pipeline
            IEstimator <ITransformer> trainingPipeline = GetTrainingPipeline();

            // Train model using training pipeline
            ITransformer model = TrainModel(trainingData, trainingPipeline);

            var preview = model.Transform(testData).Preview();

            // Evaluate the model
            Evaluate(testData, model);

            // Save the model
            mlContext.Model.Save(model, trainingData.Schema, "MLModel.zip");

            Console.ReadKey();
        }
Exemplo n.º 6
0
        public void CreateModel(string trainDataFilePath, string modelFilePath)
        {
            // Load Data
            IDataView trainingDataView = _mlContext.Data.LoadFromTextFile <ModelInput>(
                path: trainDataFilePath,
                hasHeader: true,
                separatorChar: '\t',
                allowQuoting: true,
                allowSparse: false);

            // Build training pipeline
            IEstimator <ITransformer> trainingPipeline = BuildTrainingPipeline(_mlContext);

            // Split data into training and test sets
            DataOperationsCatalog.TrainTestData dataSplit = _mlContext.Data.TrainTestSplit(trainingDataView);
            IDataView trainingData = dataSplit.TrainSet;
            IDataView testData     = dataSplit.TestSet;

            // Evaluate quality of Model
            Evaluate(_mlContext, trainingData, trainingPipeline);

            // Train Model
            ITransformer mlModel = TrainModel(trainingDataView, trainingPipeline);

            // Evaluate quality of Model
            EvaluateTestData(_mlContext, testData, mlModel);

            // Save model
            SaveModel(_mlContext, mlModel, modelFilePath, trainingDataView.Schema);
        }
Exemplo n.º 7
0
        public void Train(string trainingFileName)
        {
            //1. First, we check to make sure that the training data filename exists
            if (!File.Exists(trainingFileName))
            {
                Console.WriteLine($"Failed to find training data file ({trainingFileName}");

                return;
            }

            //2. Use the LoadFromTextFile helper method that ML.NET provides to assist with
            //the loading of text files into an IDataView object
            IDataView trainingDataView = MlContext.Data.LoadFromTextFile <RestaurantFeedback>(trainingFileName);

            //3. Use the TrainTestSplit method that ML.NET provides to create a test set from the
            //main training data
            DataOperationsCatalog.TrainTestData dataSplit = MlContext.Data.TrainTestSplit(trainingDataView, testFraction: 0.2);

            //4. Firstly, we create the pipeline
            Microsoft.ML.Transforms.Text.TextFeaturizingEstimator dataProcessPipeline = MlContext.Transforms.Text.FeaturizeText(
                outputColumnName: "Features",
                inputColumnName: nameof(RestaurantFeedback.Text));

            //5. Next, we instantiate our Trainer class
            Microsoft.ML.Trainers.SdcaLogisticRegressionBinaryTrainer sdcaRegressionTrainer = MlContext.BinaryClassification.Trainers.SdcaLogisticRegression(
                labelColumnName: nameof(RestaurantFeedback.Label),
                featureColumnName: "Features");

            //6. Then, we complete the pipeline by appending the trainer we instantiated
            //previously
            Microsoft.ML.Data.EstimatorChain <Microsoft.ML.Data.BinaryPredictionTransformer <Microsoft.ML.Calibrators.CalibratedModelParametersBase <Microsoft.ML.Trainers.LinearBinaryModelParameters, Microsoft.ML.Calibrators.PlattCalibrator> > > trainingPipeline = dataProcessPipeline.Append(sdcaRegressionTrainer);

            //7. Next, we train the model with the dataset we created earlier in the chapter
            ITransformer trainedModel = trainingPipeline.Fit(dataSplit.TrainSet);

            MlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, ModelPath);

            //8. We save our newly created model to the filename specified, matching the
            //training set's schema
            IDataView testSetTransform = trainedModel.Transform(dataSplit.TestSet);

            //9. Now, we transform our newly created model with the test set we created earlier
            Microsoft.ML.Data.CalibratedBinaryClassificationMetrics modelMetrics = MlContext.BinaryClassification.Evaluate(
                data: testSetTransform,
                labelColumnName: nameof(RestaurantFeedback.Label),
                scoreColumnName: nameof(RestaurantPrediction.Score));

            //10. Finally, we will use the testSetTransform function created previously and
            //pass it into the BinaryClassification class's Evaluate method
            //This method allows us to generate model metrics. We then print the main metrics using the
            //trained model with the test set. We will dive into these properties specifically in the
            //Evaluating the Model section of this chapter.
            Console.WriteLine($"Area Under Curve: {modelMetrics.AreaUnderRocCurve:P2}{Environment.NewLine}" +
                              $"Area Under Precision Recall Curve: {modelMetrics.AreaUnderPrecisionRecallCurve:P2}{Environment.NewLine}" +
                              $"Accuracy: {modelMetrics.Accuracy:P2}{Environment.NewLine}" +
                              $"F1Score: {modelMetrics.F1Score:P2}{Environment.NewLine}" +
                              $"Positive Recall: {modelMetrics.PositiveRecall:#.##}{Environment.NewLine}" +
                              $"Negative Recall: {modelMetrics.NegativeRecall:#.##}{Environment.NewLine}");
        }
Exemplo n.º 8
0
        public static DataOperationsCatalog.TrainTestData LoadData(MLContext mlContext)
        {
            IDataView dataView = mlContext.Data.LoadFromTextFile <SentimentData>(_dataPath, hasHeader: false);

            DataOperationsCatalog.TrainTestData splitDataView = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);

            return(splitDataView);
        }
Exemplo n.º 9
0
 /// <summary>
 /// 加载数据
 /// </summary>
 /// <param name="list"></param>
 private void LoadData(IEnumerable <Goods> list)
 {
     _ml = new MLContext(0);
     //加载数据
     _tranDataView = _ml.Data.LoadFromEnumerable(list);
     //拆分数据集进行模拟训练和测试
     _dataView = _ml.Data.TrainTestSplit(_tranDataView, 0.2);
 }
Exemplo n.º 10
0
        /// <summary>
        /// 加载数据
        /// </summary>
        /// <param name="list"></param>
        private void LoadData(List <BGoods> list)
        {
            _ml = new MLContext();
            //加载数据
            IDataView dataView = _ml.Data.LoadFromEnumerable(list);

            //拆分数据集进行模拟训练和测试
            _dataView = _ml.Data.TrainTestSplit(dataView, 0.2);
        }
Exemplo n.º 11
0
        /// <summary>
        /// Загрузка набора данных и его разделение на набор для обучения и тестовый набор
        /// 20% - тестовый набор, константа
        /// </summary>
        /// <returns></returns>
        private DataOperationsCatalog.TrainTestData LoadData()
        {
            var       path     = ResourcesPath + "yelp_labelled.txt";
            IDataView dataView = mlContext.Data.LoadFromTextFile <SentimentData>(path, hasHeader: false);

            DataOperationsCatalog.TrainTestData splitDataView = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);

            return(splitDataView);
        }
Exemplo n.º 12
0
        //Load data

        public Model(List <TemperatureData> data)
        {
            trainData      = context.Data.LoadFromEnumerable(data);
            testTrainSplit = context.Data.TrainTestSplit(trainData, testFraction: 0.30);
            pipeline       = context.Transforms.Concatenate("Features", new[] { "Stores", "AlarmItems", "TempMean", "Humidity", "Pressure", "TempMin", "TempMax" })
                             .Append(context.Regression.Trainers.FastTreeTweedie());
            model          = pipeline.Fit(testTrainSplit.TrainSet);
            predictionFunc = context.Model.CreatePredictionEngine <TemperatureData, AlarmPressurePrediction>(model);
        }
Exemplo n.º 13
0
    internal static async Task Main(string[] args)
    {
        if (args is null)
        {
            throw new ArgumentNullException(nameof(args));
        }

        IConfigurationRoot?configuration = new ConfigurationBuilder()
                                           .AddJsonFile("appsettings.json")
                                           .AddEnvironmentVariables()
                                           .AddUserSecrets(typeof(Program).Assembly)
                                           .Build();

        OwntracksSettings?        owntracksSettings = configuration.GetSection("Owntracks").Get <OwntracksSettings>();
        IEnumerable <ML.Location>?locations         = await QueryDataAsync(owntracksSettings).ConfigureAwait(false);

        var mlContext = new MLContext(seed: 1);

        IDataView?dataView = mlContext.Data.LoadFromEnumerable(LocationsToTransitionsConverter.Convert(locations));

        DataOperationsCatalog.TrainTestData trainTestData = mlContext.Data.TrainTestSplit(dataView);

        EstimatorChain <Microsoft.ML.Calibrators.CalibratorTransformer <Microsoft.ML.Calibrators.PlattCalibrator> >?pipeline = mlContext.Transforms.Categorical.OneHotEncoding(new[]
        {
            new InputOutputColumnPair(nameof(Transition.User), nameof(Transition.User)),
            new InputOutputColumnPair(nameof(Transition.Device), nameof(Transition.Device)),
            new InputOutputColumnPair(nameof(Transition.FromDayOfWeek), nameof(Transition.FromDayOfWeek)),
            new InputOutputColumnPair(nameof(Transition.FromHours), nameof(Transition.FromHours)),
            new InputOutputColumnPair(nameof(Transition.FromGeohash), nameof(Transition.FromGeohash)),
            new InputOutputColumnPair(nameof(Transition.ToDayOfWeek), nameof(Transition.ToDayOfWeek)),
            new InputOutputColumnPair(nameof(Transition.ToHours), nameof(Transition.ToHours)),
            new InputOutputColumnPair(nameof(Transition.ToGeohash), nameof(Transition.ToGeohash))
        }, OneHotEncodingEstimator.OutputKind.Binary)
                                                                                                                               .Append(mlContext.Transforms.Concatenate("Features",
                                                                                                                                                                        nameof(Transition.User), nameof(Transition.Device),
                                                                                                                                                                        nameof(Transition.FromDayOfWeek), nameof(Transition.FromHours), nameof(Transition.FromGeohash),
                                                                                                                                                                        nameof(Transition.ToDayOfWeek), nameof(Transition.ToHours), nameof(Transition.ToGeohash)))
                                                                                                                               .Append(mlContext.BinaryClassification.Trainers.LinearSvm())
                                                                                                                               .Append(mlContext.BinaryClassification.Calibrators.Platt());

        Console.WriteLine("Training model...");
        TransformerChain <Microsoft.ML.Calibrators.CalibratorTransformer <Microsoft.ML.Calibrators.PlattCalibrator> >?model = pipeline.Fit(trainTestData.TrainSet);

        Console.WriteLine("Predicting...");

        // Now that the model is trained, we want to test it's prediction results, which is done by using a test dataset
        IDataView?predictions = model.Transform(trainTestData.TestSet);

        // Now that we have the predictions, calculate the metrics of those predictions and output the results.
        CalibratedBinaryClassificationMetrics?metrics = mlContext.BinaryClassification.Evaluate(predictions);

        PrintBinaryClassificationMetrics(metrics);

        MLSettings?mlSettings = configuration.GetSection("ML").Get <MLSettings>();

        mlContext.Model.Save(model, dataView.Schema, mlSettings.ModelPath);
    }
        //Loads the data
        public void LoadData()
        {
            //Read all the data
            IDataView allData = _mlContext.Data.LoadFromTextFile <TranscoderData>(path: _dataPath, hasHeader: true, separatorChar: ',');

            //split the data into test and training
            DataOperationsCatalog.TrainTestData splitData = _mlContext.Data.TrainTestSplit(allData, testFraction: 0.3, seed: 1);
            _trainData = splitData.TrainSet;
            _testData  = splitData.TestSet;
        }
Exemplo n.º 15
0
        public static IEnumerable <Eyeglasses> Calculate(string sourceData, int numberOfSimilar)
        {
            if (numberOfSimilar > 8 || numberOfSimilar < 1)
            {
                throw new ArgumentOutOfRangeException(nameof(numberOfSimilar), "Количество похожих: от 1 до 8");
            }

            // Загрузка данных
            var eyeglassesType = DataExtractor.ParseEyeglassesType(sourceData);
            var parsedData     = DataExtractor.ParseEyeglassesData(sourceData, eyeglassesType).ToArray();
            var mlContext      = new MLContext(1);
            var data           = mlContext.Data.LoadFromEnumerable(parsedData);

            // Трансформация
            var dataProcessPipeline = mlContext.Transforms.Categorical
                                      .OneHotEncoding(nameof(Eyeglasses.Sex), nameof(Eyeglasses.Sex))
                                      .Append(mlContext.Transforms.Categorical.OneHotEncoding(nameof(Eyeglasses.Shape)))
                                      .Append(mlContext.Transforms.Categorical.OneHotEncoding(nameof(Eyeglasses.Material)))
                                      .Append(mlContext.Transforms.Categorical.OneHotEncoding(nameof(Eyeglasses.Color)))
                                      .Append(mlContext.Transforms.Categorical.OneHotEncoding(nameof(Eyeglasses.RimGlasses)))
                                      .Append(mlContext.Transforms.Concatenate("Features", nameof(Eyeglasses.Sex), nameof(Eyeglasses.Shape), nameof(Eyeglasses.Material), nameof(Eyeglasses.Color), nameof(Eyeglasses.RimGlasses)));

            DataOperationsCatalog.TrainTestData trainTestData = mlContext.Data.TrainTestSplit(data);

            // Тренировка
            int numberOfClusters = parsedData.Length / (numberOfSimilar * 6);
            var trainer          = mlContext.Clustering.Trainers.KMeans(featureColumnName: "Features", numberOfClusters: numberOfClusters);
            var trainingPipeline = dataProcessPipeline.Append(trainer);
            var trainedModel     = trainingPipeline.Fit(trainTestData.TrainSet);

            // Вычисления
            var predictionEngine = mlContext.Model.CreatePredictionEngine <Eyeglasses, Prediction>(trainedModel);

            var clusters = new Dictionary <uint, List <Eyeglasses> >();

            foreach (var eyeglasses in parsedData)
            {
                var result = predictionEngine.Predict(eyeglasses);

                if (!clusters.ContainsKey(result.SelectedClusterId))
                {
                    clusters[result.SelectedClusterId] = new List <Eyeglasses>();
                }

                eyeglasses.Cluster = result.SelectedClusterId;
                clusters[result.SelectedClusterId].Add(eyeglasses);
            }

            // Построение результирующего набора
            foreach (var eyeglasses in parsedData)
            {
                eyeglasses.SimilarEyeglasses = clusters[eyeglasses.Cluster].PickRandomSimilar(eyeglasses, numberOfSimilar).ToArray();
                yield return(eyeglasses);
            }
        }
Exemplo n.º 16
0
        private static void PreProcessData()
        {
            estimator = _context.Transforms.Conversion
                        .MapValueToKey("Encoded_UserID", nameof(InputModel.UserId))
                        .Append(_context.Transforms.Conversion.MapValueToKey("Encoded_Book",
                                                                             nameof(InputModel.ISBN)));

            var preProcessData = estimator.Fit(_dataView).Transform(_dataView);

            splitData = _context.Data.TrainTestSplit(preProcessData, 0.05);
        }
Exemplo n.º 17
0
 public AnalyzesSentiment(string path = null)
 {
     if (path != null)
     {
         ResourcesPath = path;
     }
     mlContext = new MLContext();
     DataOperationsCatalog.TrainTestData splitDataView = LoadData();
     BuildAndTrainModel(splitDataView.TrainSet);
     Evaluate(splitDataView.TestSet);
 }
Exemplo n.º 18
0
        /// <summary>
        /// 加载数据
        /// </summary>
        private void LoadData()
        {
            _mlContext = new MLContext();
            //加载数据
            IDataView dataView = _mlContext.Data
                                 .LoadFromTextFile <qingxubase>(_filepath
                                                                , hasHeader: false);

            //拆分数据集以进行模型训练和测试
            _qingxuDataView = _mlContext.Data
                              .TrainTestSplit(dataView, testFraction: 0.2);
        }
Exemplo n.º 19
0
        private static void DiamondML()
        {
            var mlContext = new MLContext();

            DataRelativePath  = $"{BaseDatasetsRelativePath}/diamond_data.csv";
            DataPath          = GetAbsolutePath(DataRelativePath);
            ModelRelativePath = $"{BaseModelsRelativePath}/whitediamondModel.zip";
            ModelPath         = GetAbsolutePath(ModelRelativePath);

            // STEP 1: Common data loading configuration
            IDataView dataView = mlContext.Data.LoadFromTextFile <DiamondData>(DataPath, separatorChar: ',', hasHeader: true);

            DataOperationsCatalog.TrainTestData trainTestSplit = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);
            IDataView trainingData = trainTestSplit.TrainSet;
            IDataView testData     = trainTestSplit.TestSet;

            // STEP 2: Common data process configuration with pipeline data transformations
            //var dataProcessPipeline = mlContext.Transforms.CopyColumns(outputColumnName: "Features", inputColumnName: nameof(DiamondData.LumiStdHueMean));
            var dataProcessPipeline = mlContext.Transforms.Concatenate("Features", "LumiStd", "HueMean");

            // STEP 3: Set the training algorithm, then create and config the modelBuilder
            var trainer          = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label", featureColumnName: "Features");
            var trainingPipeline = dataProcessPipeline.Append(trainer);

            // STEP 4: Train the model fitting to the DataSet
            ITransformer trainedModel = trainingPipeline.Fit(trainingData);

            // STEP 5: Evaluate the model and show accuracy stats
            var predictions = trainedModel.Transform(testData);
            var metrics     = mlContext.BinaryClassification.Evaluate(data: predictions, labelColumnName: "Label", scoreColumnName: "Score");

            ConsoleHelper.PrintBinaryClassificationMetrics(trainer.ToString(), metrics);

            // STEP 6: Save/persist the trained model to a .ZIP file
            mlContext.Model.Save(trainedModel, trainingData.Schema, ModelPath);

            Console.WriteLine("The model is saved to {0}", ModelPath);

            // TRY IT: Make a single test prediction, loading the model from .ZIP file
            DiamondData sampleDiamond = new DiamondData {
                LumiStd = 0.1169f, HueMean = 0.13599f
            };

            // Create prediction engine related to the loaded trained model
            var predEngine = mlContext.Model.CreatePredictionEngine <DiamondData, DiamondPrediction>(trainedModel);

            // Score
            var resultprediction = predEngine.Predict(sampleDiamond);

            Console.WriteLine($"=============== Single Prediction  ===============");
            Console.WriteLine($"{sampleDiamond.LumiStd}, {sampleDiamond.HueMean} | Prediction: {(Convert.ToBoolean(resultprediction.Prediction) ? "white diamond" : "Non white diamond")} Probability of being white diamond: {resultprediction.Probability} ");
            Console.WriteLine($"================End of Process.Hit any key to exit==================================");
        }
Exemplo n.º 20
0
        private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext)
        {
            // STEP 1: Common data loading configuration
            IDataView dataView = mlContext.Data.LoadFromTextFile <SentimentIssue>(DataPath, hasHeader: true);

            DataOperationsCatalog.TrainTestData trainTestSplit =
                mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);
            IDataView trainingData = trainTestSplit.TrainSet;
            IDataView testData     = trainTestSplit.TestSet;

            // STEP 2: Common data process configuration with pipeline data transformations
            TextFeaturizingEstimator dataProcessPipeline = mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Features",
                                                                                                   inputColumnName: nameof(SentimentIssue.Text));

            // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            ConsoleHelper.PeekDataViewInConsole(mlContext, dataView, dataProcessPipeline, 2);
            //Peak the transformed features column
            //ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", dataView, dataProcessPipeline, 1);

            // STEP 3: Set the training algorithm, then create and config the modelBuilder
            SdcaLogisticRegressionBinaryTrainer trainer =
                mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label",
                                                                               featureColumnName: "Features");
            EstimatorChain <BinaryPredictionTransformer <CalibratedModelParametersBase <LinearBinaryModelParameters, PlattCalibrator> > > trainingPipeline = dataProcessPipeline.Append(trainer);

            //Measure training time
            Stopwatch watch = Stopwatch.StartNew();

            // STEP 4: Train the model fitting to the DataSet
            Console.WriteLine("=============== Training the model ===============");
            ITransformer trainedModel = trainingPipeline.Fit(trainingData);

            //Stop measuring time
            watch.Stop();
            long elapsedMs = watch.ElapsedMilliseconds;

            Console.WriteLine($"***** Training time: {elapsedMs / 1000} seconds *****");

            // STEP 5: Evaluate the model and show accuracy stats
            Console.WriteLine("===== Evaluating Model's accuracy with Test data =====");
            IDataView predictions = trainedModel.Transform(testData);
            CalibratedBinaryClassificationMetrics metrics = mlContext.BinaryClassification.Evaluate(data: predictions, labelColumnName: "Label",
                                                                                                    scoreColumnName: "Score");

            ConsoleHelper.PrintBinaryClassificationMetrics(trainer.ToString(), metrics);

            // STEP 6: Save/persist the trained model to a .ZIP file
            mlContext.Model.Save(trainedModel, trainingData.Schema, ModelPath);

            Console.WriteLine("The model is saved to {0}", ModelPath);

            return(trainedModel);
        }
Exemplo n.º 21
0
        public void Fit(string trainingFileName)
        {
            if (!File.Exists(trainingFileName))
            {
                throw new FileNotFoundException($"File {trainingFileName} doesn't exist.");
            }

            _dataSplit = LoadAndPrepareData(trainingFileName);
            var dataProcessPipeline = BuildDataProcessingPipeline();
            var trainingPipeline    = dataProcessPipeline.Append(_model);

            _trainedModel = trainingPipeline.Fit(_dataSplit.TrainSet);
        }
Exemplo n.º 22
0
        static void Main(string[] args)
        {
            MLContext mlContext = new MLContext();

            DataOperationsCatalog.TrainTestData splitDataView = LoadData(mlContext);
            ITransformer model = BuildAndTrainModel(mlContext, splitDataView.TrainSet);

            Evaluate(mlContext, model, splitDataView.TestSet);

            UseModelWithSingleItem(mlContext, model);

            //UseLoadedModelWithBatchItems(mlContext);
        }
Exemplo n.º 23
0
        double GetRSquared(IEstimator <ITransformer> pipeline)
        {
            DataOperationsCatalog.TrainTestData dataSplit = MlContext.Data.TrainTestSplit(AllData, testFraction: 0.2);
            IDataView trainData = dataSplit.TrainSet;
            IDataView testData  = dataSplit.TestSet;

            var model = pipeline.Fit(trainData);

            IDataView         transformedData = model.Transform(testData);
            RegressionMetrics metrics         = MlContext.Regression.Evaluate(transformedData);

            return(metrics.RSquared);
        }
Exemplo n.º 24
0
        private ITransformer CreateModel(MLContext mlContext)
        {
            var workspacePath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString());

            Directory.CreateDirectory(workspacePath);


            IEnumerable <ImageData> images = LoadImagesFromDirectory(_options.TrainingPath, useFolderNameAsLabel: true);

            IDataView imageData    = mlContext.Data.LoadFromEnumerable(images);
            IDataView shuffledData = mlContext.Data.ShuffleRows(imageData);

            IDataView preProcessedData = GetPreprocessingPipeline(mlContext)
                                         .Fit(shuffledData)
                                         .Transform(shuffledData);

            DataOperationsCatalog.TrainTestData trainSplit =
                mlContext.Data.TrainTestSplit(data: preProcessedData, testFraction: 0.3);
            DataOperationsCatalog.TrainTestData validationTestSplit = mlContext.Data.TrainTestSplit(trainSplit.TestSet);

            IDataView trainSet      = trainSplit.TrainSet;
            IDataView validationSet = validationTestSplit.TrainSet;
            IDataView testSet       = validationTestSplit.TestSet;

            var classifierOptions = new ImageClassificationTrainer.Options()
            {
                Epoch             = 600,
                FeatureColumnName = "Image",
                LabelColumnName   = "LabelAsKey",
                ValidationSet     = validationSet,
                Arch           = ImageClassificationTrainer.Architecture.ResnetV2101,
                TestOnTrainSet = true,
                ReuseTrainSetBottleneckCachedValues      = true,
                ReuseValidationSetBottleneckCachedValues = true,
                WorkspacePath = workspacePath
            };

            classifierOptions.MetricsCallback = metrics => Serilog.Log.Information(metrics.ToString());

            var trainingPipeline = mlContext.MulticlassClassification.Trainers
                                   .ImageClassification(classifierOptions)
                                   .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            var trainedModel = trainingPipeline.Fit(trainSet);

            mlContext.Model.Save(trainedModel, imageData.Schema, _options.ModelPath);

            Directory.Delete(workspacePath, true);

            return(trainedModel);
        }
Exemplo n.º 25
0
        public static void TrainConveyor(AppConfig appConfig, string modelCreateQuery, string TrainerType)
        {
            string    _modelSaveBasePath = TrainerType + "Model.zip";
            string    _modelSaveFullPath = Path.Combine(Environment.CurrentDirectory, "Data", _modelSaveBasePath);
            MLContext mlContext          = new MLContext();
            IDataView dataView           = Program.DataExtract(mlContext, appConfig, modelCreateQuery);

            DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);
            IDataView trainData    = dataSplit.TrainSet;
            IDataView testData     = dataSplit.TestSet;
            var       basePipeline = BasePipelineBuilder(mlContext, trainData);

            ITransformer waitTimeModel = null;

            switch (TrainerType)
            {
            case "LightGbm":
                waitTimeModel = ForecastTrainer.WaitTimeTrainWithLightGbm(mlContext, trainData, basePipeline);
                break;

            case "FastTree":
                waitTimeModel = ForecastTrainer.WaitTimeTrainWithFastTree(mlContext, trainData, basePipeline);
                break;

            case "LbfgsPoissonRegression":
                waitTimeModel = ForecastTrainer.WaitTimeTrainWithLbfgsPoissonRegression(mlContext, trainData, basePipeline);
                break;

            case "FastTreeTweedie":
                waitTimeModel = ForecastTrainer.WaitTimeTrainWithFastTreeTweedie(mlContext, trainData, basePipeline);
                break;

            case "FastForest":
                waitTimeModel = ForecastTrainer.WaitTimeTrainWithFastForest(mlContext, trainData, basePipeline);
                break;

            case "Sdca":
                waitTimeModel = ForecastTrainer.WaitTimeTrainWithSdca(mlContext, trainData, basePipeline);
                break;

            case "Ols":
                waitTimeModel = ForecastTrainer.WaitTimeTrainWithOls(mlContext, trainData, basePipeline);
                break;

            default:
                waitTimeModel = ForecastTrainer.WaitTimeTrainWithLightGbm(mlContext, trainData, basePipeline);
                break;
            }
            mlContext.Model.Save(waitTimeModel, dataView.Schema, _modelSaveFullPath);
            EvaluateWaitTime(mlContext, waitTimeModel, testData, TrainerType);
        }
Exemplo n.º 26
0
        public static void Initialize()
        {
            Context = new MLContext(seed: 0);

            IDataView data_view_full = null;

            data_view_full = Context
                             .Data
                             .LoadFromTextFile <SomatotypeInputData>
                             (
                Data.File,
                hasHeader: true,
                separatorChar: ','
                             );

            Data.DataViewSchema   = data_view_full.Schema;;
            Data.DataViewSplit    = Context.Data.TrainTestSplit(data_view_full, testFraction: 0.25);
            Data.DataViewTraining = Data.DataViewSplit.TrainSet;
            Data.DataViewTesting  = Data.DataViewSplit.TestSet;


            Data.TransformPipeline = Context.Transforms.CopyColumns
                                     (
                outputColumnName: "Label",
                inputColumnName: "EndomorphicComponent"
                                     );

            Data.TransformPipeline.Append
            (
                Context.Transforms.Concatenate
                (
                    "Features",
                    "Height",
                    "Mass",
                    "BreadthHumerus",
                    "BreadthFemur",
                    "GirthArmUpper",
                    "GirthCalfStanding",
                    "SkinfoldSubscapular",
                    "SkinfoldTriceps",
                    "SkinfoldSupraspinale",
                    "SkinfoldMedialCalf"
                )
            );

            ColumnCopyingTransformer model_endomorphic = null;

            Data.Transformer = model_endomorphic;

            return;
        }
Exemplo n.º 27
0
        static void Main(string[] args)
        {
            Console.WriteLine("Hello World 'Microsoft.ML'!");

            List <FormulaData> pointsValues = Enumerable
                                              .Range(-1, 100)
                                              .Select(value => { return(new FormulaData(value, value * 2 - 1)); })
                                              .ToList();

            // Create MLContext
            var mlContext = new MLContext(1);

            // Load Data
            IDataView data = mlContext.Data.LoadFromEnumerable <FormulaData>(pointsValues);

            DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.2);
            IDataView trainData = dataSplit.TrainSet;
            IDataView testData  = dataSplit.TestSet;

            // Define trainer options.
            var options = new SdcaRegressionTrainer.Options
            {
                LabelColumnName   = "Label",    //nameof(FormulaData.Y),
                FeatureColumnName = "Features", //nameof(FormulaData.X),
                // Make the convergence tolerance tighter. It effectively leads to more training iterations.
                ConvergenceTolerance = 0.02f,
                // Increase the maximum number of passes over training data. Similar to ConvergenceTolerance,
                // this value specifics the hard iteration limit on the training algorithm.
                MaximumNumberOfIterations = 30,
                // Increase learning rate for bias.
                BiasLearningRate = 0.1f
            };

            // Define StochasticDualCoodrinateAscent regression algorithm estimator
            var sdcaEstimator = mlContext.Regression.Trainers.Sdca(options);

            // Build machine learning model
            var trainedModel = sdcaEstimator.Fit(trainData);

            // Use trained model to make inferences on test data
            IDataView testDataPredictions = trainedModel.Transform(testData);

            // Extract model metrics and get RSquared
            RegressionMetrics trainedModelMetrics = mlContext.Regression.Evaluate(testDataPredictions);
            double            rSquared            = trainedModelMetrics.RSquared;

            Console.WriteLine($"rSquared: {rSquared}");
        }
        public void TrainModelIfNotExists()
        {
            try
            {
                GenerateDataset();
                string modelPath = Path.Combine(Environment.CurrentDirectory, "Data", "trainedModel.zip");
                if (File.Exists(modelPath))
                {
                    Logger.LogInformation($"Trained model found at {InputPath}. Skipping training.");
                    return;
                }

                var result     = GenerateNames();
                var textLoader = MLContext.Data.CreateTextLoader(result.Item1, hasHeader: true, separatorChar: ',');
                var data       = textLoader.Load(InputPath);
                DataOperationsCatalog.TrainTestData trainTestData = MLContext.Data.TrainTestSplit(data);
                var trainingDataView = trainTestData.TrainSet;
                var testingDataView  = trainTestData.TestSet;

                var options = new KMeansTrainer.Options
                {
                    NumberOfClusters          = NumberOfClusters,
                    OptimizationTolerance     = 1e-6f,
                    NumberOfThreads           = 1,
                    MaximumNumberOfIterations = 10,
                    FeatureColumnName         = "Features"
                };

                var dataProcessPipeline = MLContext
                                          .Transforms
                                          .Concatenate("Features", result.Item2)
                                          .Append(MLContext.Clustering.Trainers.KMeans(options));
                var trainedModel = dataProcessPipeline.Fit(data);

                IDataView predictions = trainedModel.Transform(testingDataView);
                var       metrics     = MLContext.Clustering.Evaluate(predictions, scoreColumnName: "Score", featureColumnName: "Features");

                // Save/persist the trained model to a .ZIP file
                MLContext.Model.Save(trainedModel, data.Schema, modelPath);

                Logger.LogInformation($"The model was saved to {modelPath}");
            }
            catch (Exception ex)
            {
                Logger.LogError(ex, "Model training operation failed.");
                throw;
            }
        }
Exemplo n.º 29
0
        public void Entrenar()
        {
            List <InputData> lista = ModeloAprendizaje.Estructuras2List();

            System.Console.WriteLine("Emprezando Entrenamiento: " + this.Descripcion());
            IDataView dataview = mlContext.Data.LoadFromEnumerable <InputData>(lista);


            //10% para testing
            DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(dataview, testFraction: _level);
            IDataView trainData = dataSplit.TrainSet;
            IDataView testData  = dataSplit.TestSet;


            IEstimator <ITransformer> dataPrepEstimator =
                mlContext.Transforms.NormalizeMinMax("Features");


            ITransformer dataPrepTransformer     = dataPrepEstimator.Fit(trainData);
            IDataView    transformedTrainingData = dataPrepTransformer.Transform(trainData);

            var sdcaEstimator = mlContext.Regression.Trainers.Sdca();

            var trainedModel = sdcaEstimator.Fit(transformedTrainingData);


            //TESTEO
            // Measure trained model performance
            // Apply data prep transformer to test data
            IDataView transformedTestData = dataPrepTransformer.Transform(testData);

            // Use trained model to make inferences on test data
            IDataView testDataPredictions = trainedModel.Transform(transformedTestData);

            // Extract model metrics and get RSquared
            RegressionMetrics trainedModelMetrics = mlContext.Regression.Evaluate(testDataPredictions);
            double            rSquared            = trainedModelMetrics.RSquared;

            System.Console.WriteLine("ERROR {0}", rSquared);
            string fichero = "Level" + (int)(_level * 100) + ".zip";

            mlContext.Model.Save(trainedModel, dataview.Schema, fichero);



            System.Console.WriteLine("Fin Entrenamiento: " + this.Descripcion());
        }
Exemplo n.º 30
0
        public void Train(string trainingFileName)
        {
            if (!File.Exists(trainingFileName))
            {
                Console.WriteLine($"Failed to find training data file ({trainingFileName}");

                return;
            }

            //The first change is the use of a comma to separate the data
            IDataView trainingDataView = MlContext.Data.LoadFromTextFile <EmploymentHistory>(trainingFileName, ',');

            DataOperationsCatalog.TrainTestData dataSplit = MlContext.Data.TrainTestSplit(trainingDataView, testFraction: 0.4);

            Microsoft.ML.Data.EstimatorChain <Microsoft.ML.Data.TransformerChain <Microsoft.ML.Data.ColumnConcatenatingTransformer> > dataProcessPipeline = MlContext.Transforms.CopyColumns("Label", nameof(EmploymentHistory.DurationInMonths))
                                                                                                                                                            .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.IsMarried)))
                                                                                                                                                            .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.BSDegree)))
                                                                                                                                                            .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.MSDegree)))
                                                                                                                                                            .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.YearsExperience))
                                                                                                                                                                    .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.AgeAtHire)))
                                                                                                                                                                    .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.HasKids)))
                                                                                                                                                                    .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.WithinMonthOfVesting)))
                                                                                                                                                                    .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.DeskDecorations)))
                                                                                                                                                                    .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.LongCommute)))
                                                                                                                                                                    .Append(MlContext.Transforms.Concatenate("Features",
                                                                                                                                                                                                             typeof(EmploymentHistory).ToPropertyList <EmploymentHistory>(nameof(EmploymentHistory.DurationInMonths)))));

            //We can then create the Sdca trainer using the default parameters
            Microsoft.ML.Trainers.SdcaRegressionTrainer trainer = MlContext.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features");

            Microsoft.ML.Data.EstimatorChain <Microsoft.ML.Data.RegressionPredictionTransformer <Microsoft.ML.Trainers.LinearRegressionModelParameters> > trainingPipeline = dataProcessPipeline.Append(trainer);

            ITransformer trainedModel = trainingPipeline.Fit(dataSplit.TrainSet);

            MlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, ModelPath);

            IDataView testSetTransform = trainedModel.Transform(dataSplit.TestSet);

            //Lastly, we call the Regression.Evaluate method to provide regression specific metrics
            Microsoft.ML.Data.RegressionMetrics modelMetrics = MlContext.Regression.Evaluate(testSetTransform);

            Console.WriteLine($"Loss Function: {modelMetrics.LossFunction:0.##}{Environment.NewLine}" +
                              $"Mean Absolute Error: {modelMetrics.MeanAbsoluteError:#.##}{Environment.NewLine}" +
                              $"Mean Squared Error: {modelMetrics.MeanSquaredError:#.##}{Environment.NewLine}" +
                              $"RSquared: {modelMetrics.RSquared:0.##}{Environment.NewLine}" +
                              $"Root Mean Squared Error: {modelMetrics.RootMeanSquaredError:#.##}");
        }