Пример #1
0
        /// <summary>
        /// Train the model fitting to the DataSet.
        /// The pipeline is trained on the dataset that has been loaded and transformed.
        /// </summary>
        /// <param name="trainingPipeline"></param>
        /// <param name="trainingDataView"></param>
        /// <returns></returns>
        private static TransformerChain <RegressionPredictionTransformer <LinearRegressionModelParameters> > GetTrainedModel(EstimatorChain <RegressionPredictionTransformer <LinearRegressionModelParameters> > trainingPipeline, IDataView trainingDataView)
        {
            Console.WriteLine("=============== Training the model ===============");
            var trainedModel = trainingPipeline.Fit(trainingDataView);

            return(trainedModel);
        }
Пример #2
0
        public static void CreateModel()
        {
            MLContext mlContext = new MLContext();

            IDataView trainingDataView = mlContext.Data.LoadFromTextFile <ModelInput>(
                path: TrainDataFilePath,
                hasHeader: true,
                separatorChar: ',',
                allowQuoting: true,
                allowSparse: false);

            var dataProcessPipeline = mlContext.Transforms.Categorical.OneHotEncoding(
                new[]
            {
                new InputOutputColumnPair("District", "District"),
                new InputOutputColumnPair("Type", "Type"),
                new InputOutputColumnPair("BuildingType", "BuildingType")
            })
                                      .Append(mlContext.Transforms.Concatenate("Features", new[] { "District", "Type", "BuildingType", "Size", "Floor", "TotalFloors", "Year" }));

            FastTreeTweedieTrainer trainer = mlContext.Regression.Trainers.FastTreeTweedie(new FastTreeTweedieTrainer.Options()
            {
                NumberOfLeaves = 20, MinimumExampleCountPerLeaf = 10, NumberOfTrees = 500, LearningRate = 0.07684207f, Shrinkage = 1.057825f, LabelColumnName = "Price", FeatureColumnName = "Features"
            });

            EstimatorChain <RegressionPredictionTransformer <FastTreeTweedieModelParameters> > trainingPipeline = dataProcessPipeline.Append(trainer);

            ITransformer model = trainingPipeline.Fit(trainingDataView);

            mlContext.Model.Save(model, trainingDataView.Schema, ModelFilePath);
        }
Пример #3
0
        static void Main(string[] args)
        {
            var stopWatch = new Stopwatch();

            stopWatch.Start();

            var mlContext = new MLContext(seed: 1);

            Console.WriteLine($"Loading data from {DataPath}");
            var data = mlContext.Data.LoadFromTextFile <ModelInput>(DataPath, hasHeader: HasHeader, separatorChar: SeparatorChar);

            Console.WriteLine("Splitting the data");
            var trainTestSplit = mlContext.Data.TrainTestSplit(data);

            Console.WriteLine("Transforming the data");
            IEstimator <ITransformer> dataProcessPipeline = null;

            Console.WriteLine("Training the model");
            IEstimator <ITransformer>     trainer          = null;
            EstimatorChain <ITransformer> trainingPipeline = dataProcessPipeline.Append(trainer);

            ITransformer model = trainingPipeline.Fit(trainTestSplit.TrainSet);

            Console.WriteLine("Evaluating the model's performance");
            //Depends on Trainer

            stopWatch.Stop();
            Console.WriteLine($"Training finished in: {stopWatch.ElapsedMilliseconds} milliseconds");

            Console.WriteLine($"Saving the model to {ModelName}");
            mlContext.Model.Save(model, trainTestSplit.TrainSet.Schema, ModelName);
        }
        /// <summary>
        /// Run the pipeline to train the model, then save the model to specific output folder path
        /// </summary>
        public void RunPipeline()
        {
            // 1., 2., 3., 4.
            PrepareDataset(useValidationSet);

            // 5. Call pipeline
            EstimatorChain <KeyToValueMappingTransformer> pipeline = CreateCustomPipeline();

            // 6. Train/create the ML Model
            Console.WriteLine("*** Training the image classification model with DNN Transfer Learning on top of the selected pre-trained model/architecture ***");

            ////////// Begin training
            Stopwatch watch = Stopwatch.StartNew();

            trainedModel = pipeline.Fit(trainDataset);
            watch.Stop();
            ////////// End training

            long ms = watch.ElapsedMilliseconds;

            Console.WriteLine($"Training with transfer learning took: {ms / 1000} seconds");

            // 8->7. Save the model to assets/outputs ML.NET .zip model file and TF .pb model file
            mlContext.Model.Save(trainedModel, trainDataset.Schema, OutputModelFilePath);
            Console.WriteLine($"Model saved to: {OutputModelFilePath}");

            // 7->8. Get the quality metrics
            EvaluateModel();
        }
Пример #5
0
        public async Task <ModelMetrics> GenerateModel(BaseDAL storage, string modelFileName)
        {
            if (storage == null)
            {
                Log.Error("Trainer::GenerateModel - BaseDAL is null");

                throw new ArgumentNullException(nameof(storage));
            }

            if (string.IsNullOrEmpty(modelFileName))
            {
                Log.Error("Trainer::GenerateModel - modelFileName is null");

                throw new ArgumentNullException(nameof(modelFileName));
            }

            if (!File.Exists(modelFileName))
            {
                Log.Error($"Trainer::GenerateModel - {modelFileName} does not exist");

                throw new FileNotFoundException(modelFileName);
            }

            var startTime = DateTime.Now;

            var options = new RandomizedPcaTrainer.Options
            {
                FeatureColumnName       = FEATURES,
                ExampleWeightColumnName = null,
                Rank           = 4,
                Oversampling   = 20,
                EnsureZeroMean = true,
                Seed           = Constants.ML_SEED
            };

            var(data, cleanRowCount, maliciousRowCount) = GetDataView(await storage.QueryPacketsAsync(a => a.IsClean), await storage.QueryPacketsAsync(a => !a.IsClean));

            IEstimator <ITransformer> dataProcessPipeline = _mlContext.Transforms.Concatenate(
                FEATURES,
                typeof(PayloadItem).ToPropertyList <PayloadItem>(nameof(PayloadItem.Label)));

            IEstimator <ITransformer> trainer = _mlContext.AnomalyDetection.Trainers.RandomizedPca(options: options);

            EstimatorChain <ITransformer> trainingPipeline = dataProcessPipeline.Append(trainer);

            TransformerChain <ITransformer> trainedModel = trainingPipeline.Fit(data.TrainSet);

            _mlContext.Model.Save(trainedModel, data.TrainSet.Schema, modelFileName);

            var testSetTransform = trainedModel.Transform(data.TestSet);

            return(new ModelMetrics
            {
                Metrics = _mlContext.AnomalyDetection.Evaluate(testSetTransform),
                NumCleanRows = cleanRowCount,
                NumMaliciousRows = maliciousRowCount,
                Duration = DateTime.Now.Subtract(startTime)
            });
        }
        public PredictionEngine <CenterFaceImageInput, CenterFaceImageOutput> GetMlNetPredictionEngine()
        {
            EstimatorChain <OnnxTransformer> pipeline = CreatePipeline();
            IDataView emptyFitData = mlContext.Data.LoadFromEnumerable(CenterFaceImageInput.EmptyEnumerable);
            TransformerChain <OnnxTransformer> transformer = pipeline.Fit(emptyFitData);

            return(mlContext.Model.CreatePredictionEngine <CenterFaceImageInput, CenterFaceImageOutput>(transformer));
        }
        public void SaveMLNetModel()
        {
            EstimatorChain <OnnxTransformer> pipeline = CreatePipeline();
            IDataView emptyFitData = mlContext.Data.LoadFromEnumerable(CenterFaceImageInput.EmptyEnumerable);
            TransformerChain <OnnxTransformer> transformer = pipeline.Fit(emptyFitData);

            mlContext.Model.Save(transformer, null, _mlModelDestn);
        }
        public CompositeDataReader <TSource, TLastTransformer> Fit(TSource input)
        {
            var start = _start.Fit(input);
            var idv   = start.Read(input);

            var xfChain = _estimatorChain.Fit(idv);

            return(new CompositeDataReader <TSource, TLastTransformer>(start, xfChain));
        }
Пример #9
0
        public void Create(string fileName)
        {
            string       modelPath        = AppDomain.CurrentDomain.BaseDirectory + $"./data/{fileName}";
            IDataView    trainingDataView = _context.Data.LoadFromEnumerable <Archive>(_trainingData);
            ITransformer model            = _pipeline.Fit(trainingDataView);

            _engine = _context.Model.CreatePredictionEngine <Archive, Output>(model);

            _context.Model.Save(model, trainingDataView.Schema, modelPath);
        }
Пример #10
0
        private static void PreProcessData()
        {
            estimator = _context.Transforms.Conversion
                        .MapValueToKey("Encoded_UserID", nameof(InputModel.UserId))
                        .Append(_context.Transforms.Conversion.MapValueToKey("Encoded_Book",
                                                                             nameof(InputModel.ISBN)));

            var preProcessData = estimator.Fit(_dataView).Transform(_dataView);

            splitData = _context.Data.TrainTestSplit(preProcessData, 0.05);
        }
        public SignalClassifierController(string frameSize, string sensorType, string[] datasets, string[] labels)
        {
            mlContext = new MLContext();

            categories = labels;

            var reader = getFrameReader(frameSize, sensorType);

            var trainingDataView = reader.Load(datasets);

            var split = mlContext.Data.TrainTestSplit(trainingDataView, testFraction: 0.2);

            estimatorPipeline = mlContext.Transforms.Conversion.MapValueToKey("Label")
                                .Append(mlContext.Transforms.NormalizeMinMax("readings", fixZero: true))
                                .Append(mlContext.MulticlassClassification.Trainers
                                        .OneVersusAll(mlContext.BinaryClassification.Trainers
                                                      .FastTree(featureColumnName: "readings")));
            // .Append(mlContext.MulticlassClassification.Trainers
            //     .NaiveBayes(featureColumnName: "readings"));
            // .Append(mlContext.MulticlassClassification.Trainers
            //     .OneVersusAll(mlContext.BinaryClassification.Trainers
            //         .LbfgsLogisticRegression(featureColumnName: "readings")));
            // .Append(mlContext.MulticlassClassification.Trainers
            //         .OneVersusAll(mlContext.BinaryClassification.Trainers
            //             .LdSvm(featureColumnName: "readings")));

            transformer = estimatorPipeline.Fit(split.TrainSet);

            // var OVAEstimator = mlContext.MulticlassClassification.Trainers
            //     .OneVersusAll(mlContext.BinaryClassification.Trainers
            //         .LbfgsLogisticRegression(featureColumnName: "readings"));
            // var OVAEstimator = mlContext.MulticlassClassification.Trainers
            //     .OneVersusAll(mlContext.BinaryClassification.Trainers
            //         .LdSvm(featureColumnName: "readings"));
            // var NBEstimator = mlContext.MulticlassClassification.Trainers
            //     .NaiveBayes(featureColumnName: "readings");
            var OVAEstimator = mlContext.MulticlassClassification.Trainers
                               .OneVersusAll(mlContext.BinaryClassification.Trainers
                                             .FastTree(featureColumnName: "readings"));

            var transformedTrainingData = transformer.Transform(split.TrainSet);

            model = OVAEstimator.Fit(transformedTrainingData);
            // model = NBEstimator.Fit(transformedTrainingData);

            Console.WriteLine("Model fitted");

            var transformedTestData = transformer.Transform(split.TestSet);

            var testPredictions = model.Transform(transformedTestData);

            Console.WriteLine(mlContext.MulticlassClassification.Evaluate(testPredictions).ConfusionMatrix.GetFormattedConfusionTable());
        }
Пример #12
0
        private void Train()
        {
            txtResult.Text += "=============== Create and Train the Model ===============" + Environment.NewLine;
            txtResult.Text += "Catalog: " + sCatalog + Environment.NewLine;
            txtResult.Text += "Trainer: " + sTrainer + Environment.NewLine;
            txtResult.Text += "Database: " + sDatabase + Environment.NewLine;

            //Train model
            _model = _trainingPipeline.Fit(_data);

            txtResult.Text += "==================== End of training =====================" + Environment.NewLine;
        }
Пример #13
0
        private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext)
        {
            // STEP 1: Common data loading configuration
            IDataView dataView = mlContext.Data.LoadFromTextFile <SentimentIssue>(DataPath, hasHeader: true);

            DataOperationsCatalog.TrainTestData trainTestSplit =
                mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);
            IDataView trainingData = trainTestSplit.TrainSet;
            IDataView testData     = trainTestSplit.TestSet;

            // STEP 2: Common data process configuration with pipeline data transformations
            TextFeaturizingEstimator dataProcessPipeline = mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Features",
                                                                                                   inputColumnName: nameof(SentimentIssue.Text));

            // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            ConsoleHelper.PeekDataViewInConsole(mlContext, dataView, dataProcessPipeline, 2);
            //Peak the transformed features column
            //ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", dataView, dataProcessPipeline, 1);

            // STEP 3: Set the training algorithm, then create and config the modelBuilder
            SdcaLogisticRegressionBinaryTrainer trainer =
                mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label",
                                                                               featureColumnName: "Features");
            EstimatorChain <BinaryPredictionTransformer <CalibratedModelParametersBase <LinearBinaryModelParameters, PlattCalibrator> > > trainingPipeline = dataProcessPipeline.Append(trainer);

            //Measure training time
            Stopwatch watch = Stopwatch.StartNew();

            // STEP 4: Train the model fitting to the DataSet
            Console.WriteLine("=============== Training the model ===============");
            ITransformer trainedModel = trainingPipeline.Fit(trainingData);

            //Stop measuring time
            watch.Stop();
            long elapsedMs = watch.ElapsedMilliseconds;

            Console.WriteLine($"***** Training time: {elapsedMs / 1000} seconds *****");

            // STEP 5: Evaluate the model and show accuracy stats
            Console.WriteLine("===== Evaluating Model's accuracy with Test data =====");
            IDataView predictions = trainedModel.Transform(testData);
            CalibratedBinaryClassificationMetrics metrics = mlContext.BinaryClassification.Evaluate(data: predictions, labelColumnName: "Label",
                                                                                                    scoreColumnName: "Score");

            ConsoleHelper.PrintBinaryClassificationMetrics(trainer.ToString(), metrics);

            // STEP 6: Save/persist the trained model to a .ZIP file
            mlContext.Model.Save(trainedModel, trainingData.Schema, ModelPath);

            Console.WriteLine("The model is saved to {0}", ModelPath);

            return(trainedModel);
        }
 public void Predict(string imagepath)
 {
     EstimatorChain <OnnxTransformer> pipeline = CreatePipeline();
     IDataView emptyFitData = mlContext.Data.LoadFromEnumerable(CenterFaceImageInput.EmptyEnumerable);
     TransformerChain <OnnxTransformer> transformer = pipeline.Fit(emptyFitData);
     IDataView emptyTestData = mlContext.Data.LoadFromEnumerable(new List <CenterFaceImageInput>()
     {
         new CenterFaceImageInput()
         {
             Image = (Bitmap)Bitmap.FromFile(imagepath)
         }
     });
     var res = transformer.Transform(emptyTestData);
 }
Пример #15
0
        //
        //  Training
        //

        public void StartTraining()
        {
            Console.WriteLine(" ");
            Console.WriteLine("[ML.Trainer] Starting training... ");

            DateTime started = DateTime.Now;

            trainedModel = pipeline.Fit(trainDataView);


            Console.WriteLine($"[ML.Trainer] Training ended in {(DateTime.Now - started).TotalSeconds} seconds");
            Console.WriteLine(" ");
            Console.WriteLine(" ");
        }
Пример #16
0
        public static ITransformer TrainModel(MLContext mlContext, IDataView trainDataView)
        {
            // Get all the feature column names (All except the Label and the IdPreservationColumn)
            string[] featureColumnNames = trainDataView.Schema.AsQueryable()
                                          .Select(column => column.Name)                               // Get all the column names
                                          .Where(name => name != nameof(TransactionObservation.Label)) // Do not include the Label column
                                          .Where(name => name != "IdPreservationColumn")               // Do not include the IdPreservationColumn/StratificationColumn
                                          .Where(name => name != nameof(TransactionObservation.Time))  // Do not include the Time column. Not needed as feature column
                                          .ToArray();


            // Create the data process pipeline
            IEstimator <ITransformer> dataProcessPipeline = mlContext.Transforms.Concatenate("Features", featureColumnNames)
                                                            .Append(mlContext.Transforms.DropColumns(new string[] { nameof(TransactionObservation.Time) }))
                                                            .Append(mlContext.Transforms.NormalizeLpNorm(outputColumnName: "NormalizedFeatures", inputColumnName: "Features"));

            // In Anomaly Detection, the learner assumes all training examples have label 0, as it only learns from normal examples.
            // If any of the training examples has label 1, it is recommended to use a Filter transform to filter them out before training:
            IDataView normalTrainDataView = mlContext.Data.FilterRowsByColumn(trainDataView, columnName: nameof(TransactionObservation.Label), lowerBound: 0, upperBound: 1);


            // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            ConsoleHelper.PeekDataViewInConsole(mlContext, normalTrainDataView, dataProcessPipeline, 2);
            ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "NormalizedFeatures", normalTrainDataView, dataProcessPipeline, 2);


            var options = new RandomizedPcaTrainer.Options
            {
                FeatureColumnName       = "NormalizedFeatures", // The name of the feature column. The column data must be a known-sized vector of Single.
                ExampleWeightColumnName = null,                 // The name of the example weight column (optional). To use the weight column, the column data must be of type Single.
                Rank           = 28,                            // The number of components in the PCA.
                Oversampling   = 20,                            // Oversampling parameter for randomized PCA training.
                EnsureZeroMean = true,                          // If enabled, data is centered to be zero mean.
                Seed           = 1                              // The seed for random number generation.
            };


            // Create an anomaly detector. Its underlying algorithm is randomized PCA.
            IEstimator <ITransformer> trainer = mlContext.AnomalyDetection.Trainers.RandomizedPca(options: options);

            EstimatorChain <ITransformer> trainingPipeline = dataProcessPipeline.Append(trainer);

            ConsoleHelper.ConsoleWriteHeader("=============== Training model ===============");

            TransformerChain <ITransformer> model = trainingPipeline.Fit(normalTrainDataView);

            ConsoleHelper.ConsoleWriteHeader("=============== End of training process ===============");

            return(model);
        }
Пример #17
0
        private static void Main(string[] args)
        {
            MLContext context = new();

            IDataView data = context.Data.LoadFromTextFile <AppCriteriasDataModel>("Data/GPRanking.csv", hasHeader: true,
                                                                                   separatorChar: ';', allowQuoting: true);

            EstimatorChain <RegressionPredictionTransformer <LightGbmRegressionModelParameters> > pipeline =
                context.Transforms.Concatenate("Features",
                                               "GooglePlayRank",
                                               "Orientation",
                                               "Downloads",
                                               "Size",
                                               "Android")
                .Append(context.Regression.Trainers.LightGbm());

            TransformerChain <RegressionPredictionTransformer <LightGbmRegressionModelParameters> > model =
                pipeline.Fit(data);

            PredictionEngine <AppCriteriasDataModel, AppRunPredictionModel> predictor =
                context.Model.CreatePredictionEngine <AppCriteriasDataModel, AppRunPredictionModel>(model);

            AppRunPredictionModel prediction = predictor.Predict(new AppCriteriasDataModel
            {
                Android        = 12,
                Downloads      = 533334,
                Size           = 666,
                Orientation    = 1,
                GooglePlayRank = 8
            });

            if (prediction.Score < 0.25)
            {
                Console.WriteLine("Не запустится");
            }
            if (prediction.Score >= 0.25 && prediction.Score < 0.5)
            {
                Console.WriteLine("Запуск маловероятен");
            }
            if (prediction.Score >= 0.5 && prediction.Score < 0.75)
            {
                Console.WriteLine("Возможно, запустится");
            }
            if (prediction.Score >= 0.75)
            {
                Console.WriteLine("Высокая вероятность запуска");
            }
        }
Пример #18
0
        private ITransformer GetModel(AlgorithmType?algorithmType, EstimatorChain <TransformerChain <ColumnConcatenatingTransformer> > pipeline)
        {
            if (_predictedColumn.IsAlphanumeric)
            {
                return(algorithmType != null
                                        ? pipeline.Append(GetAlgorithm(algorithmType.Value)).Append(MlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")).Fit(DataView)
                                        : pipeline.Append(MlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")).Fit(DataView));
            }

            if (algorithmType != null)
            {
                return(pipeline.Append(GetAlgorithm(algorithmType.Value)).Fit(DataView));
            }

            return(pipeline.Fit(DataView));
        }
Пример #19
0
        public void Train(string trainingFileName, string testingFileName)
        {
            if (!File.Exists(trainingFileName))
            {
                Console.WriteLine($"Failed to find training data file ({trainingFileName}");

                return;
            }

            if (!File.Exists(testingFileName))
            {
                Console.WriteLine($"Failed to find test data file ({testingFileName}");

                return;
            }

            var trainingDataView = GetDataView(trainingFileName);

            var options = new RandomizedPcaTrainer.Options
            {
                FeatureColumnName       = FEATURES,
                ExampleWeightColumnName = null,
                Rank           = 5,
                Oversampling   = 20,
                EnsureZeroMean = true,
                Seed           = 1
            };

            IEstimator <ITransformer> trainer = MlContext.AnomalyDetection.Trainers.RandomizedPca(options: options);

            EstimatorChain <ITransformer> trainingPipeline = trainingDataView.Transformer.Append(trainer);

            TransformerChain <ITransformer> trainedModel = trainingPipeline.Fit(trainingDataView.DataView);

            MlContext.Model.Save(trainedModel, trainingDataView.DataView.Schema, ModelPath);

            var testingDataView = GetDataView(testingFileName, true);

            var testSetTransform = trainedModel.Transform(testingDataView.DataView);

            var modelMetrics = MlContext.AnomalyDetection.Evaluate(testSetTransform);

            Console.WriteLine($"Area Under Curve: {modelMetrics.AreaUnderRocCurve:P2}{Environment.NewLine}" +
                              $"Detection at FP Count: {modelMetrics.DetectionRateAtFalsePositiveCount}");
        }
Пример #20
0
        static async Task Main(string[] args)
        {
            var stopWatch = new Stopwatch();

            stopWatch.Start();

            (IMLOpsContext mlOpsContext, Run run) = await CreateRun("MyExperiment");

            var mlContext = new MLContext(seed: 1);

            Console.WriteLine($"Loading data from {DataPath}");
            var data = mlContext.Data.LoadFromTextFile <ModelInput>(DataPath, hasHeader: HasHeader, separatorChar: SeparatorChar);

            Console.WriteLine("Logging data");
            await mlOpsContext.Data.LogDataAsync(run.RunId, data);

            Console.WriteLine("Splitting the data");
            var trainTestSplit = mlContext.Data.TrainTestSplit(data);

            Console.WriteLine("Transforming the data");
            IEstimator <ITransformer> dataProcessPipeline = null;

            Console.WriteLine("Training the model");
            IEstimator <ITransformer>     trainer          = null;
            EstimatorChain <ITransformer> trainingPipeline = dataProcessPipeline.Append(trainer);

            Console.WriteLine("Logging hyper-parameters");
            await mlOpsContext.Training.LogHyperParametersAsync(run.RunId, trainer);

            ITransformer model = trainingPipeline.Fit(trainTestSplit.TrainSet);

            Console.WriteLine("Evaluating the model's performance");
            //await mlOpsContext.Evaluation.LogMetricsAsync(run.RunId, metrics);

            stopWatch.Stop();
            Console.WriteLine($"Training finished in: {stopWatch.ElapsedMilliseconds} milliseconds");
            await mlOpsContext.LifeCycle.SetTrainingTimeAsync(run.RunId, stopWatch.Elapsed);

            Console.WriteLine($"Saving the model to {ModelName}");
            mlContext.Model.Save(model, trainTestSplit.TrainSet.Schema, ModelName);

            Console.WriteLine("Uploading model to model repository");
            await mlOpsContext.Model.UploadAsync(run.RunId, ModelName);
        }
Пример #21
0
        // ===========================================================================================================

        public void BuildTrainingPipelineAndModel()
        {
            if (ErrorHasOccured)
            {
                return;
            }

            try
            {
                EstimatorChain <NormalizingTransformer> dataProcessPipeline = _mlContext.Transforms.Conversion.ConvertType(new[] { new InputOutputColumnPair("CentralAir", "CentralAir") })
                                                                              .Append(_mlContext.Transforms.Categorical.OneHotEncoding(new[] { new InputOutputColumnPair("MSZoning", "MSZoning"), new InputOutputColumnPair("Street", "Street"), new InputOutputColumnPair("Alley", "Alley"), new InputOutputColumnPair("LotShape", "LotShape"), new InputOutputColumnPair("LandContour", "LandContour"), new InputOutputColumnPair("Utilities", "Utilities"), new InputOutputColumnPair("LotConfig", "LotConfig"), new InputOutputColumnPair("LandSlope", "LandSlope"), new InputOutputColumnPair("Neighborhood", "Neighborhood"), new InputOutputColumnPair("Condition1", "Condition1"), new InputOutputColumnPair("Condition2", "Condition2"), new InputOutputColumnPair("BldgType", "BldgType"), new InputOutputColumnPair("HouseStyle", "HouseStyle"), new InputOutputColumnPair("RoofStyle", "RoofStyle"), new InputOutputColumnPair("RoofMatl", "RoofMatl"), new InputOutputColumnPair("Exterior1st", "Exterior1st"), new InputOutputColumnPair("Exterior2nd", "Exterior2nd"), new InputOutputColumnPair("MasVnrType", "MasVnrType"), new InputOutputColumnPair("ExterQual", "ExterQual"), new InputOutputColumnPair("ExterCond", "ExterCond"), new InputOutputColumnPair("Foundation", "Foundation"), new InputOutputColumnPair("BsmtQual", "BsmtQual"), new InputOutputColumnPair("BsmtCond", "BsmtCond"), new InputOutputColumnPair("BsmtExposure", "BsmtExposure"), new InputOutputColumnPair("BsmtFinType1", "BsmtFinType1"), new InputOutputColumnPair("BsmtFinType2", "BsmtFinType2"), new InputOutputColumnPair("Heating", "Heating"), new InputOutputColumnPair("HeatingQC", "HeatingQC"), new InputOutputColumnPair("Electrical", "Electrical"), new InputOutputColumnPair("KitchenQual", "KitchenQual"), new InputOutputColumnPair("Functional", "Functional"), new InputOutputColumnPair("FireplaceQu", "FireplaceQu"), new InputOutputColumnPair("GarageType", "GarageType"), new InputOutputColumnPair("GarageFinish", "GarageFinish"), new InputOutputColumnPair("GarageQual", "GarageQual"), new InputOutputColumnPair("GarageCond", "GarageCond"), new InputOutputColumnPair("PavedDrive", "PavedDrive"), new InputOutputColumnPair("PoolQC", "PoolQC"), new InputOutputColumnPair("Fence", "Fence"), new InputOutputColumnPair("MiscFeature", "MiscFeature"), new InputOutputColumnPair("SaleType", "SaleType"), new InputOutputColumnPair("SaleCondition", "SaleCondition") }))
                                                                              .Append(_mlContext.Transforms.IndicateMissingValues(new[] { new InputOutputColumnPair("LotFrontage_MissingIndicator", "LotFrontage"), new InputOutputColumnPair("MasVnrArea_MissingIndicator", "MasVnrArea"), new InputOutputColumnPair("GarageYrBlt_MissingIndicator", "GarageYrBlt") }))
                                                                              .Append(_mlContext.Transforms.Conversion.ConvertType(new[] { new InputOutputColumnPair("LotFrontage_MissingIndicator", "LotFrontage_MissingIndicator"), new InputOutputColumnPair("MasVnrArea_MissingIndicator", "MasVnrArea_MissingIndicator"), new InputOutputColumnPair("GarageYrBlt_MissingIndicator", "GarageYrBlt_MissingIndicator") }))
                                                                              .Append(_mlContext.Transforms.ReplaceMissingValues(new[] { new InputOutputColumnPair("LotFrontage", "LotFrontage"), new InputOutputColumnPair("MasVnrArea", "MasVnrArea"), new InputOutputColumnPair("GarageYrBlt", "GarageYrBlt") }))
                                                                              .Append(_mlContext.Transforms.Concatenate("Features", new[] { "CentralAir", "MSZoning", "Street", "Alley", "LotShape", "LandContour", "Utilities", "LotConfig", "LandSlope", "Neighborhood", "Condition1", "Condition2", "BldgType", "HouseStyle", "RoofStyle", "RoofMatl", "Exterior1st", "Exterior2nd", "MasVnrType", "ExterQual", "ExterCond", "Foundation", "BsmtQual", "BsmtCond", "BsmtExposure", "BsmtFinType1", "BsmtFinType2", "Heating", "HeatingQC", "Electrical", "KitchenQual", "Functional", "FireplaceQu", "GarageType", "GarageFinish", "GarageQual", "GarageCond", "PavedDrive", "PoolQC", "Fence", "MiscFeature", "SaleType", "SaleCondition", "LotFrontage_MissingIndicator", "MasVnrArea_MissingIndicator", "GarageYrBlt_MissingIndicator", "LotFrontage", "MasVnrArea", "GarageYrBlt", "Id", "MSSubClass", "LotArea", "OverallQual", "OverallCond", "YearBuilt", "YearRemodAdd", "BsmtFinSF1", "BsmtFinSF2", "BsmtUnfSF", "TotalBsmtSF", "1stFlrSF", "2ndFlrSF", "LowQualFinSF", "GrLivArea", "BsmtFullBath", "BsmtHalfBath", "FullBath", "HalfBath", "BedroomAbvGr", "KitchenAbvGr", "TotRmsAbvGrd", "Fireplaces", "GarageCars", "GarageArea", "WoodDeckSF", "OpenPorchSF", "EnclosedPorch", "3SsnPorch", "ScreenPorch", "PoolArea", "MiscVal", "MoSold", "YrSold" }))
                                                                              .Append(_mlContext.Transforms.NormalizeMinMax("Features", "Features"))
                                                                              .AppendCacheCheckpoint(_mlContext);

                LbfgsPoissonRegressionTrainer trainer = _mlContext.Regression.Trainers.LbfgsPoissonRegression(new LbfgsPoissonRegressionTrainer.Options()
                {
                    L2Regularization = 0.07404655f, L1Regularization = 0.2087761f, OptimizationTolerance = 0.0001f, HistorySize = 5, MaximumNumberOfIterations = 462473459, InitialWeightsDiameter = 0.5613934f, DenseOptimizer = false, LabelColumnName = "SalePrice", FeatureColumnName = "Features"
                });

                EstimatorChain <RegressionPredictionTransformer <PoissonRegressionModelParameters> > trainingPipeline = dataProcessPipeline.Append(trainer);

                // TODO
                // Evaluate quality of Model
                // Evaluate(_mlContext, _trainingDataView, trainingPipeline);

                // Train Model
                _mlModel = trainingPipeline.Fit(_trainingDataView);

                // TODO
                // Save _mlModel
                // SaveModel(_mlContext, mlModel, ModelSettings.ModelFilePath, _trainingDataView.Schema);
            }
            catch (Exception ex)
            {
                Debug.WriteLine(ex.Message);
                ErrorHasOccured    = true;
                FailureInformation = ex.Message;
                return;
            }
        }
Пример #22
0
        public List <NGramModel> GenerateNGrams(string[] inputStrings, int ngramLength = 3)
        {
            var retList = new List <NGramModel>();

            MLContext mlContext = new MLContext();
            EstimatorChain <NgramExtractingTransformer> textPipeline = mlContext.Transforms.Text.TokenizeIntoWords("Tokens", "Text")
                                                                       // 'ProduceNgrams' takes key type as input. Converting the tokens into key type using 'MapValueToKey'.
                                                                       .Append(mlContext.Transforms.Conversion.MapValueToKey("Tokens"))
                                                                       .Append(mlContext.Transforms.Text.ProduceNgrams("NgramFeatures", "Tokens",
                                                                                                                       ngramLength: ngramLength,
                                                                                                                       useAllLengths: false,
                                                                                                                       weighting: NgramExtractingEstimator.WeightingCriteria.Tf));

            var strings  = StringsToTextDataList(inputStrings);
            var dataview = mlContext.Data.LoadFromEnumerable(strings);

            var textTransformer     = textPipeline.Fit(dataview);
            var transformedDataView = textTransformer.Transform(dataview);
            var predictionEngine    = mlContext.Model.CreatePredictionEngine <TextData, TransformedTextData>(textTransformer);
            var prediction          = predictionEngine.Predict(strings[0]);
            VBuffer <ReadOnlyMemory <char> > slotNames = default;

            transformedDataView.Schema["NgramFeatures"].GetSlotNames(ref slotNames);
            // ReSharper disable once InconsistentNaming
            var NgramFeaturesColumn = transformedDataView.GetColumn <VBuffer <float> >(transformedDataView.Schema["NgramFeatures"]);
            var slots = slotNames.GetValues();

            foreach (var featureRow in NgramFeaturesColumn)
            {
                //Console.Write($"row:{rowCount}\t");
                foreach (var item in featureRow.Items())
                {
                    var ng = new NGramModel
                    {
                        NGramString  = slots[item.Key].ToString()
                        , NGramArray = slots[item.Key].ToString().Split('|')
                    };
                    retList.Add(ng);
                }
            }

            return(retList);
        }
Пример #23
0
        public IEstimator <ITransformer> ToEstimator(IDataView trainset      = null,
                                                     IDataView validationSet = null)
        {
            IEstimator <ITransformer> pipeline = new EstimatorChain <ITransformer>();

            // Append each transformer to the pipeline
            foreach (var transform in Transforms)
            {
                if (transform.Estimator != null)
                {
                    pipeline = pipeline.Append(transform.Estimator);
                }
            }

            if (validationSet != null)
            {
                validationSet = pipeline.Fit(validationSet).Transform(validationSet);
            }

            // Get learner
            var learner = Trainer.BuildTrainer(validationSet);

            if (_cacheBeforeTrainer)
            {
                pipeline = pipeline.AppendCacheCheckpoint(_context);
            }

            // Append learner to pipeline
            pipeline = pipeline.Append(learner);

            // Append each post-trainer transformer to the pipeline
            foreach (var transform in TransformsPostTrainer)
            {
                if (transform.Estimator != null)
                {
                    pipeline = pipeline.Append(transform.Estimator);
                }
            }

            return(pipeline);
        }
Пример #24
0
        public ITransformer TrainTransformer(IDataView trainData)
        {
            IEstimator <ITransformer> pipeline = new EstimatorChain <ITransformer>();

            // append each transformer to the pipeline
            foreach (var transform in Transforms)
            {
                if (transform.Estimator != null)
                {
                    pipeline = pipeline.Append(transform.Estimator);
                }
            }

            // get learner
            var learner = Trainer.BuildTrainer(_context);

            // append learner to pipeline
            pipeline = pipeline.Append(learner);

            return(pipeline.Fit(trainData));
        }
Пример #25
0
        public static List <TaxiTrip> PeekDataViewInConsole(LocalEnvironment context, IDataView dataView, EstimatorChain <ITransformer> pipeline, int numberOfRows = 4)
        {
            string msg = string.Format("Show {0} rows with all the columns", numberOfRows.ToString());

            ConsoleWriteHeader(msg);

            //https://github.com/dotnet/machinelearning/blob/master/docs/code/MlNetCookBook.md#how-do-i-look-at-the-intermediate-data
            var transformedData = pipeline.Fit(dataView).Transform(dataView);

            // 'transformedData' is a 'promise' of data, lazy-loading. Let's actually read it.
            // Convert to an enumerable of user-defined type.
            var someRows = transformedData.AsEnumerable <TaxiTrip>(context, reuseRowObject: false)
                           //.Where(x => x.Count > 0)
                           // Take a couple values as an array.
                           .Take(numberOfRows)
                           .ToList();

            // print to console the peeked rows
            someRows.ForEach(row => { Console.WriteLine($"Label [FareAmount]: {row.FareAmount} || Features: [RateCode] {row.RateCode} [PassengerCount] {row.PassengerCount} [TripTime] {row.TripTime} [TripDistance] {row.TripDistance} [PaymentType] {row.PaymentType} "); });

            return(someRows);
        }
Пример #26
0
        public static IActionResult RunTokenizer(
            [HttpTrigger(AuthorizationLevel.Function, "post", Route = null)] HttpRequest req,
            ILogger log,
            ExecutionContext executionContext)
        {
            log.LogInformation("Tokenizer Custom Skill: C# HTTP trigger function processed a request.");

            string skillName = executionContext.FunctionName;
            IEnumerable <WebApiRequestRecord> requestRecords = WebApiSkillHelpers.GetRequestRecords(req);

            if (requestRecords == null)
            {
                return(new BadRequestObjectResult($"{skillName} - Invalid request record array."));
            }

            WebApiSkillResponse response = WebApiSkillHelpers.ProcessRequestRecords(skillName, requestRecords,
                                                                                    (inRecord, outRecord) =>
            {
                var text = new TextData {
                    Text = inRecord.Data["text"] as string
                };
                StopWordsRemovingEstimator.Language language =
                    MapToMlNetLanguage(inRecord.Data.TryGetValue("languageCode", out object languageCode) ? languageCode as string : "en");

                var mlContext           = new MLContext();
                IDataView emptyDataView = mlContext.Data.LoadFromEnumerable(new List <TextData>());
                EstimatorChain <StopWordsRemovingTransformer> textPipeline = mlContext.Transforms.Text
                                                                             .NormalizeText("Text", caseMode: TextNormalizingEstimator.CaseMode.Lower, keepDiacritics: true, keepPunctuations: false, keepNumbers: false)
                                                                             .Append(mlContext.Transforms.Text.TokenizeIntoWords("Words", "Text", separators: new[] { ' ' }))
                                                                             .Append(mlContext.Transforms.Text.RemoveDefaultStopWords("Words", language: language));
                TransformerChain <StopWordsRemovingTransformer> textTransformer   = textPipeline.Fit(emptyDataView);
                PredictionEngine <TextData, TransformedTextData> predictionEngine = mlContext.Model.CreatePredictionEngine <TextData, TransformedTextData>(textTransformer);

                outRecord.Data["words"] = predictionEngine.Predict(text).Words ?? Array.Empty <string>();
                return(outRecord);
            });

            return(new OkObjectResult(response));
        }
        public static IEnumerable <string> GetPreviewData <T>(EstimatorChain <T> pipeline, IDataView dataView, int maxRows = 10) where T : class, ITransformer
        {
            var transformer     = pipeline.Fit(dataView);
            var transformedData = transformer.Transform(dataView);
            var previewData     = transformedData.Preview(maxRows).RowView;

            var sparseVectorData = new Dictionary <string, List <float[]> >();

            foreach (var column in transformedData.Schema)
            {
                if (column.Type.RawType == typeof(VBuffer <float>))
                {
                    sparseVectorData.Add(column.Name, transformedData.GetColumn <float[]>(column).Take(maxRows).ToList());
                }
            }

            string SparseVector(IEnumerable <float> vector)
            {
                return("[" + string.Join(",", vector) + "]");
            }

            string GetValue(string key, object value, int i)
            {
                return(value is VBuffer <float>?SparseVector(sparseVectorData[key][i]) : value.ToString());
            }

            for (var i = 0; i < previewData.Length; i++)
            {
                var data = new List <string>();

                foreach (var pair in previewData[i].Values)
                {
                    data.Add($"{pair.Key}: {GetValue(pair.Key, pair.Value, i)}");
                }

                yield return(string.Join(" | ", data));
            }
        }
Пример #28
0
        static void Main(string[] args)
        {
            TextLoader loader       = TextLoader.Create(typeof(IrisData), separator: ",");
            IDataView  trainingData = loader.Read("iris-data.txt");

            var pipeline = new EstimatorChain();

            pipeline.Add(new ValueToKeyMappingEstimator(nameof(IrisData.Label)));
            pipeline.Add(new ColumnConcatenatingEstimator(
                             inputColumns: new[]
            {
                nameof(IrisData.SepalLength),
                nameof(IrisData.SepalWidth),
                nameof(IrisData.PetalLength),
                nameof(IrisData.PetalWidth)
            },
                             outputColumn: DefaultColumnNames.Features));
            pipeline.Add(new SdcaMultiClassTrainer(
                             featureColumn: DefaultColumnNames.Features,
                             labelColumn: nameof(IrisData.Label),
                             predictedLabelColumn: nameof(IrisPrediction.PredictedLabel)));
            pipeline.Add(new KeyToValueMappingEstimator(nameof(IrisPrediction.PredictedLabel)));

            var model = pipeline.Fit(trainingData);

            IrisData newInput = new IrisData()
            {
                SepalLength = 3.3f,
                SepalWidth  = 1.6f,
                PetalLength = 0.2f,
                PetalWidth  = 5.1f,
            };
            IrisPrediction prediction = model
                                        .MakePredictionFunction <IrisData, IrisPrediction>()
                                        .Predict(newInput);

            Console.WriteLine($"Predicted flower type is: {prediction.PredictedLabel}");
        }
Пример #29
0
        public static List <float[]> PeekFeaturesColumnDataInConsole(string columnName, LocalEnvironment mlcontext, IDataView dataView, EstimatorChain <ITransformer> pipeline, int numberOfRows = 4)
        {
            string msg = string.Format("Show {0} rows with just the '{1}' column", numberOfRows, columnName);

            ConsoleWriteHeader(msg);

            var transformedData = pipeline.Fit(dataView).Transform(dataView);
            // Extract the 'Features' column.

            var someColumnData = transformedData.GetColumn <float[]>(mlcontext, columnName)
                                 .Take(numberOfRows).ToList();

            // print to console the peeked rows
            someColumnData.ForEach(row => {
                String concatColumn = String.Empty;
                foreach (float f in row)
                {
                    concatColumn += f.ToString();
                }
                Console.WriteLine(concatColumn);
            });

            return(someColumnData);
        }
Пример #30
0
        private static void TrainModel()
        {
            MLContext context = new MLContext(0);

            IDataView dataView = context.Data.LoadFromTextFile <LanguageSentence>(@"data.corpus");

            DataOperationsCatalog.TrainTestData data = context.Data.TrainTestSplit(dataView, 0.2D);

            EstimatorChain <KeyToValueMappingTransformer> pipeline = context.Transforms.Conversion.MapValueToKey("Label", nameof(LanguageSentence.Label))

                                                                     .Append(context.Transforms.Text.FeaturizeText("Features", nameof(LanguageSentence.Sentence)))

                                                                     .AppendCacheCheckpoint(context)

                                                                     .Append(context.MulticlassClassification.Trainers.SdcaMaximumEntropy())

                                                                     .Append(context.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

            TransformerChain <KeyToValueMappingTransformer> model = pipeline.Fit(data.TrainSet);

            Console.WriteLine($"=============== Evaluating to get model's accuracy metrics - Starting time: {DateTime.Now.ToString()} ===============");

            MulticlassClassificationMetrics testMetrics = context.MulticlassClassification.Evaluate(model.Transform(data.TestSet));

            Console.WriteLine($"=============== Evaluating to get model's accuracy metrics - Ending time: {DateTime.Now.ToString(CultureInfo.InvariantCulture)} ===============");
            Console.WriteLine($"*************************************************************************************************************");
            Console.WriteLine($"*       Metrics for Multi-class Classification model - Test Data     ");
            Console.WriteLine($"*------------------------------------------------------------------------------------------------------------");
            Console.WriteLine($"*       MicroAccuracy:    {testMetrics.MicroAccuracy:0.###}");
            Console.WriteLine($"*       MacroAccuracy:    {testMetrics.MacroAccuracy:0.###}");
            Console.WriteLine($"*       LogLoss:          {testMetrics.LogLoss:#.###}");
            Console.WriteLine($"*       LogLossReduction: {testMetrics.LogLossReduction:#.###}");
            Console.WriteLine($"*************************************************************************************************************");

            context.Model.Save(model, data.TrainSet.Schema, @"language-detection.model");
        }