private static IDataView GetRegressionMetrics(
            IHostEnvironment env,
            IPredictor predictor,
            RoleMappedData roleMappedData,
            PermutationFeatureImportanceArguments input)
        {
            var roles             = roleMappedData.Schema.GetColumnRoleNames();
            var featureColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Feature.Value).First().Value;
            var labelColumnName   = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Label.Value).First().Value;
            var pred = new RegressionPredictionTransformer <IPredictorProducing <float> >(
                env, predictor as IPredictorProducing <float>, roleMappedData.Data.Schema, featureColumnName);
            var regressionCatalog  = new RegressionCatalog(env);
            var permutationMetrics = regressionCatalog
                                     .PermutationFeatureImportance(pred,
                                                                   roleMappedData.Data,
                                                                   labelColumnName: labelColumnName,
                                                                   useFeatureWeightFilter: input.UseFeatureWeightFilter,
                                                                   numberOfExamplesToUse: input.NumberOfExamplesToUse,
                                                                   permutationCount: input.PermutationCount);

            var slotNames = GetSlotNames(roleMappedData.Schema);

            Contracts.Assert(slotNames.Length == permutationMetrics.Length,
                             "Mismatch between number of feature slots and number of features permuted.");

            List <RegressionMetrics> metrics = new List <RegressionMetrics>();

            for (int i = 0; i < permutationMetrics.Length; i++)
            {
                if (string.IsNullOrWhiteSpace(slotNames[i]))
                {
                    continue;
                }
                var pMetric = permutationMetrics[i];
                metrics.Add(new RegressionMetrics
                {
                    FeatureName                = slotNames[i],
                    MeanAbsoluteError          = pMetric.MeanAbsoluteError.Mean,
                    MeanAbsoluteErrorStdErr    = pMetric.MeanAbsoluteError.StandardError,
                    MeanSquaredError           = pMetric.MeanSquaredError.Mean,
                    MeanSquaredErrorStdErr     = pMetric.MeanSquaredError.StandardError,
                    RootMeanSquaredError       = pMetric.RootMeanSquaredError.Mean,
                    RootMeanSquaredErrorStdErr = pMetric.RootMeanSquaredError.StandardError,
                    LossFunction               = pMetric.LossFunction.Mean,
                    LossFunctionStdErr         = pMetric.LossFunction.StandardError,
                    RSquared       = pMetric.RSquared.Mean,
                    RSquaredStdErr = pMetric.RSquared.StandardError
                });
            }

            var dataOps = new DataOperationsCatalog(env);
            var result  = dataOps.LoadFromEnumerable(metrics);

            return(result);
        }
        public void FastForestRegressionIntrospectiveTraining()
        {
            var ml       = new MLContext(seed: 1, conc: 1);
            var data     = SamplesUtils.DatasetUtils.GenerateFloatLabelFloatFeatureVectorSamples(1000);
            var dataView = ml.Data.ReadFromEnumerable(data);

            RegressionPredictionTransformer <FastForestRegressionModelParameters> pred = null;
            var trainer = ml.Regression.Trainers.FastForest(numLeaves: 5, numTrees: 3).WithOnFitDelegate(p => pred = p);

            // Train.
            var model = trainer.Fit(dataView);

            // Extract the learned RF model.
            var treeCollection = pred.Model.TrainedTreeEnsemble;

            // Inspect properties in the extracted model.
            Assert.Equal(3, treeCollection.Trees.Count);
            Assert.Equal(3, treeCollection.TreeWeights.Count);
            Assert.Equal(0, treeCollection.Bias);
            Assert.All(treeCollection.TreeWeights, weight => Assert.Equal(1.0, weight));

            // Inspect the last tree.
            var tree = treeCollection.Trees[2];

            Assert.Equal(5, tree.NumLeaves);
            Assert.Equal(4, tree.NumNodes);
            Assert.Equal(tree.LteChild, new int[] { -1, -2, -3, -4 });
            Assert.Equal(tree.GtChild, new int[] { 1, 2, 3, -5 });
            Assert.Equal(tree.NumericalSplitFeatureIndexes, new int[] { 9, 0, 1, 8 });
            var expectedThresholds = new float[] { 0.208134219f, 0.198336035f, 0.202952743f, 0.205061346f };

            for (int i = 0; i < tree.NumNodes; ++i)
            {
                Assert.Equal(expectedThresholds[i], tree.NumericalSplitThresholds[i], 6);
            }
            Assert.All(tree.CategoricalSplitFlags, flag => Assert.False(flag));

            Assert.Equal(0, tree.GetCategoricalSplitFeaturesAt(0).Count);
            Assert.Equal(0, tree.GetCategoricalCategoricalSplitFeatureRangeAt(0).Count);

            var samples = new double[] { 0.97468354430379744, 1.0, 0.97727272727272729, 0.972972972972973, 0.26124197002141325 };

            for (int i = 0; i < tree.NumLeaves; ++i)
            {
                var sample = tree.GetLeafSamplesAt(i);
                Assert.Single(sample);
                Assert.Equal(samples[i], sample[0], 6);
                var weight = tree.GetLeafSampleWeightsAt(i);
                Assert.Single(weight);
                Assert.Equal(1, weight[0]);
            }
        }
Esempio n. 3
0
        static void Main(string[] args)
        {
            Helper.PrintLine("使用的单独的数据准备和模型管道");
            MLContext mlContext = new MLContext();

            Helper.PrintLine("准备训练数据集...");
            HousingData[] housingData = new HousingData[]
            {
                new HousingData
                {
                    Size             = 600f,
                    HistoricalPrices = new float[] { 100000f, 125000f, 122000f },
                    CurrentPrice     = 170000f
                },
                new HousingData
                {
                    Size             = 1000f,
                    HistoricalPrices = new float[] { 200000f, 250000f, 230000f },
                    CurrentPrice     = 225000f
                },
                new HousingData
                {
                    Size             = 1000f,
                    HistoricalPrices = new float[] { 126000f, 130000f, 200000f },
                    CurrentPrice     = 195000f
                }
            };
            IDataView data = mlContext.Data.LoadFromEnumerable(housingData);

            Helper.PrintLine("准备数据处理管道...");
            IEstimator <ITransformer> dataPrepEstimator =
                mlContext.Transforms.Concatenate("Features", new string[] { "Size", "HistoricalPrices" })
                .Append(mlContext.Transforms.NormalizeMinMax("Features"));

            Helper.PrintLine("训练数据处理管道...");
            ITransformer dataPrepTransformer = dataPrepEstimator.Fit(data);

            Helper.PrintLine("准备神经网络模型...");
            var sdcaEstimator = mlContext.Regression.Trainers.Sdca();

            Helper.PrintLine("训练神经网络模型...");
            IDataView transformedData = dataPrepTransformer.Transform(data);
            RegressionPredictionTransformer <LinearRegressionModelParameters> trainedModel = sdcaEstimator.Fit(transformedData);

            Helper.PrintLine("训练神经网络完成");

            Helper.PrintLine("保存数据处理管道和神经网络模型...");
            mlContext.Model.Save(dataPrepTransformer, data.Schema, DataPipelinePath);
            mlContext.Model.Save(trainedModel, transformedData.Schema, ModelPath);

            Helper.Exit(0);
        }
Esempio n. 4
0
 public RegressionPoiOnDisplayEstimator(RegressionPoiOnDisplayEstimatorConfiguration data)
 {
     mlContext = new MLContext(seed: 0);
     using (var s = new MemoryStream()) {
         s.Write(data.PredictorX, 0, data.PredictorX.Length);
         estimatorX = (RegressionPredictionTransformer <FastTreeRegressionModelParameters>)mlContext.Model.Load(s, out _);
     }
     using (var s = new MemoryStream()) {
         s.Write(data.PredictorY, 0, data.PredictorY.Length);
         estimatorY = (RegressionPredictionTransformer <FastTreeRegressionModelParameters>)mlContext.Model.Load(s, out _);
     }
     GenPredictors();
 }
Esempio n. 5
0
        public void Load(GazeToDisplayConverterParameters param)
        {
            var data = (EndToEndConverterParameters)param;

            mlContext = new MLContext(seed: 0);
            using (var s = new MemoryStream()) {
                s.Write(data.PredictorX, 0, data.PredictorX.Length);
                estimatorX = (RegressionPredictionTransformer <FastTreeRegressionModelParameters>)mlContext.Model.Load(s, out _);
            }
            using (var s = new MemoryStream()) {
                s.Write(data.PredictorY, 0, data.PredictorY.Length);
                estimatorY = (RegressionPredictionTransformer <FastTreeRegressionModelParameters>)mlContext.Model.Load(s, out _);
            }
            GenPredictors();
        }
Esempio n. 6
0
        static void Main(string[] args)
        {
            Helper.PrintLine("重新训练模型项目");
            MLContext mlContext = new MLContext();

            Helper.PrintLine("加载数据处理管道和神经网络模型...");
            ITransformer dataPrepPipeline = mlContext.Model.Load(DataPipelinePath, out DataViewSchema dataPrepPipelineSchema);
            ITransformer trainedModel     = mlContext.Model.Load(ModelPath, out DataViewSchema modelSchema);

            LinearRegressionModelParameters originalMP =
                ((ISingleFeaturePredictionTransformer <object>)trainedModel).Model as LinearRegressionModelParameters;

            Helper.PrintLine("重新训练神经网络...");
            HousingData[] housingData = new HousingData[]
            {
                new HousingData
                {
                    Size             = 850f,
                    HistoricalPrices = new float[] { 150000f, 175000f, 210000f },
                    CurrentPrice     = 205000f
                },
                new HousingData
                {
                    Size             = 900f,
                    HistoricalPrices = new float[] { 155000f, 190000f, 220000f },
                    CurrentPrice     = 210000f
                },
                new HousingData
                {
                    Size             = 550f,
                    HistoricalPrices = new float[] { 99000f, 98000f, 130000f },
                    CurrentPrice     = 180000f
                }
            };

            IDataView newData            = mlContext.Data.LoadFromEnumerable(housingData);
            IDataView transformedNewData = dataPrepPipeline.Transform(newData);

            RegressionPredictionTransformer <LinearRegressionModelParameters> retrainedModel =
                mlContext.Regression.Trainers.OnlineGradientDescent()
                .Fit(transformedNewData, originalMP);

            LinearRegressionModelParameters retrainedMP = retrainedModel.Model as LinearRegressionModelParameters;

            Helper.PrintLine($"比较模型参数变化:\n\t源模型参数\t|更新模型参数\t|变化\n\t{string.Join("\n\t", originalMP.Weights.Append(originalMP.Bias).Zip(retrainedMP.Weights.Append(retrainedMP.Bias)).Select(weights => $"{weights.First:F2}\t|{weights.Second:F2}\t|{weights.Second - weights.First:F2}"))}");

            Helper.Exit(0);
        }
Esempio n. 7
0
        private static ITransformer TrainModel(MLContext mlContext)
        {
            // Common data loading configuration
            string initialdataKey = ConfigurationManager.AppSettings["InitialDataSet"];

            if (String.IsNullOrEmpty(initialdataKey))
            {
                Console.WriteLine("Initial DataSet key value cannot be null or empty.");
                return(null);
            }

            LoadInitialDataInCache(initialdataKey);
            var datasetTraining = Cache.DataTypeManager.GetList <TaxiTrip>(initialdataKey);
            var inputDataView   = mlContext.Data.LoadFromEnumerable(datasetTraining);

            //Sample code of removing extreme data like "outliers" for FareAmounts higher than $150 and lower than $1 which can be error-data
            var       cnt = inputDataView.GetColumn <float>(nameof(TaxiTrip.FareAmount)).Count();
            IDataView trainingDataView = mlContext.Data.FilterRowsByColumn(inputDataView, nameof(TaxiTrip.FareAmount), lowerBound: 1, upperBound: 150);
            var       cnt2             = trainingDataView.GetColumn <float>(nameof(TaxiTrip.FareAmount)).Count();

            // Define data preparation estimator
            IEstimator <ITransformer> dataPrepEstimator =
                mlContext.Transforms.Concatenate("Features", new string[] { "PassengerCount", "TripTime", "TripDistance", "FareAmount" })
                .Append(mlContext.Transforms.NormalizeMinMax("Features"));

            // Create data preparation transformer
            ITransformer dataPrepTransformer = dataPrepEstimator.Fit(trainingDataView);

            // Define StochasticDualCoordinateAscent regression algorithm estimator
            var sdcaEstimator = mlContext.Regression.Trainers.Sdca();

            // Pre-process data using data prep operations
            IDataView transformedData = dataPrepTransformer.Transform(trainingDataView);

            // Train regression model
            RegressionPredictionTransformer <LinearRegressionModelParameters> trainedModel = sdcaEstimator.Fit(transformedData);

            Console.WriteLine("Initial model trained.");
            // Save Data Prep transformer
            mlContext.Model.Save(dataPrepTransformer, trainingDataView.Schema, ModelParamsPath);

            // Save Trained Model
            mlContext.Model.Save(trainedModel, transformedData.Schema, ModelPath);
            Console.WriteLine("The model is saved to {0}", ModelPath);
            ModelTrained = true;
            Cache.Add(ModelPath, ModelPath);
            return(trainedModel);
        }
Esempio n. 8
0
        public new ExecuteResult <Agency_Trainer> Update(Agency_Trainer atrain)
        {
            var res = base.Update(atrain);

            // Create MLContext
            MLContext mlContext = new MLContext();

            // Load Data
            IDataView data = mlContext.Data.LoadFromEnumerable(ChatData.Sample);

            // Define data preparation estimator
            IEstimator <ITransformer> dataPrepEstimator =
                mlContext.Transforms.Concatenate("Features", new string[] { "Size", "HistoricalPrices" })
                .Append(mlContext.Transforms.NormalizeMinMax("Features"))
            ;
            // Create data preparation transformer
            ITransformer dataPrepTransformer = dataPrepEstimator.Fit(data);
            //Console.WriteLine("数据准备 填充完成");
            // Pre-process data using data prep operations
            IDataView transformedData = dataPrepTransformer.Transform(data);
            //Console.WriteLine($"数据视图: {Newtonsoft.Json.JsonConvert.SerializeObject(mlContext.Data.CreateEnumerable<TransformedHousingData>(transformedData, true))}");

            // Define StochasticDualCoordinateAscent regression algorithm estimator
            var sdcaEstimator = mlContext.Regression.Trainers.Sdca();//labelColumnName: "Label", featureColumnName: "Features"
            //var sdcaEstimator = mlContext.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features");

            //Console.WriteLine("正在训练");
            // Train regression model
            RegressionPredictionTransformer <LinearRegressionModelParameters> trainedModel = sdcaEstimator.Fit(transformedData);

            //Console.WriteLine("训练完成");

            // Save Data Prep transformer
            mlContext.Model.Save(dataPrepTransformer, data.Schema, "data_preparation_pipeline.zip");

            // Save Trained Model
            mlContext.Model.Save(trainedModel, transformedData.Schema, "model11.zip");
            Console.WriteLine("预训数据 保存完成");

            return(res);
        }
Esempio n. 9
0
        public (double RSquaredX, double RSquaredY) Train(IList <Record> data)
        {
            mlContext = new MLContext(seed: 0);
            var train = data.Select(r => new RegressionRecord(r));

            dataView = mlContext.Data.LoadFromEnumerable(train);

            labelX = nameof(RegressionRecord.DisplayX);
            var pipelineX = mlContext.Regression.Trainers.FastTree(labelX, FEATURE_COLUMN_NAME);

            estimatorX = pipelineX.Fit(dataView);
            labelY     = nameof(RegressionRecord.DisplayY);
            var pipelineY = mlContext.Regression.Trainers.FastTree(labelY, FEATURE_COLUMN_NAME);

            estimatorY = pipelineY.Fit(dataView);
            GenPredictors();

            var transX = estimatorX.Transform(dataView);
            var evalX  = mlContext.Regression.Evaluate(transX, labelX);
            var transY = estimatorY.Transform(dataView);
            var evalY  = mlContext.Regression.Evaluate(transY, labelY);

            return(evalX.RSquared, evalY.RSquared);
        }