private static IDataView GetRegressionMetrics( IHostEnvironment env, IPredictor predictor, RoleMappedData roleMappedData, PermutationFeatureImportanceArguments input) { var roles = roleMappedData.Schema.GetColumnRoleNames(); var featureColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Feature.Value).First().Value; var labelColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Label.Value).First().Value; var pred = new RegressionPredictionTransformer <IPredictorProducing <float> >( env, predictor as IPredictorProducing <float>, roleMappedData.Data.Schema, featureColumnName); var regressionCatalog = new RegressionCatalog(env); var permutationMetrics = regressionCatalog .PermutationFeatureImportance(pred, roleMappedData.Data, labelColumnName: labelColumnName, useFeatureWeightFilter: input.UseFeatureWeightFilter, numberOfExamplesToUse: input.NumberOfExamplesToUse, permutationCount: input.PermutationCount); var slotNames = GetSlotNames(roleMappedData.Schema); Contracts.Assert(slotNames.Length == permutationMetrics.Length, "Mismatch between number of feature slots and number of features permuted."); List <RegressionMetrics> metrics = new List <RegressionMetrics>(); for (int i = 0; i < permutationMetrics.Length; i++) { if (string.IsNullOrWhiteSpace(slotNames[i])) { continue; } var pMetric = permutationMetrics[i]; metrics.Add(new RegressionMetrics { FeatureName = slotNames[i], MeanAbsoluteError = pMetric.MeanAbsoluteError.Mean, MeanAbsoluteErrorStdErr = pMetric.MeanAbsoluteError.StandardError, MeanSquaredError = pMetric.MeanSquaredError.Mean, MeanSquaredErrorStdErr = pMetric.MeanSquaredError.StandardError, RootMeanSquaredError = pMetric.RootMeanSquaredError.Mean, RootMeanSquaredErrorStdErr = pMetric.RootMeanSquaredError.StandardError, LossFunction = pMetric.LossFunction.Mean, LossFunctionStdErr = pMetric.LossFunction.StandardError, RSquared = pMetric.RSquared.Mean, RSquaredStdErr = pMetric.RSquared.StandardError }); } var dataOps = new DataOperationsCatalog(env); var result = dataOps.LoadFromEnumerable(metrics); return(result); }
public void FastForestRegressionIntrospectiveTraining() { var ml = new MLContext(seed: 1, conc: 1); var data = SamplesUtils.DatasetUtils.GenerateFloatLabelFloatFeatureVectorSamples(1000); var dataView = ml.Data.ReadFromEnumerable(data); RegressionPredictionTransformer <FastForestRegressionModelParameters> pred = null; var trainer = ml.Regression.Trainers.FastForest(numLeaves: 5, numTrees: 3).WithOnFitDelegate(p => pred = p); // Train. var model = trainer.Fit(dataView); // Extract the learned RF model. var treeCollection = pred.Model.TrainedTreeEnsemble; // Inspect properties in the extracted model. Assert.Equal(3, treeCollection.Trees.Count); Assert.Equal(3, treeCollection.TreeWeights.Count); Assert.Equal(0, treeCollection.Bias); Assert.All(treeCollection.TreeWeights, weight => Assert.Equal(1.0, weight)); // Inspect the last tree. var tree = treeCollection.Trees[2]; Assert.Equal(5, tree.NumLeaves); Assert.Equal(4, tree.NumNodes); Assert.Equal(tree.LteChild, new int[] { -1, -2, -3, -4 }); Assert.Equal(tree.GtChild, new int[] { 1, 2, 3, -5 }); Assert.Equal(tree.NumericalSplitFeatureIndexes, new int[] { 9, 0, 1, 8 }); var expectedThresholds = new float[] { 0.208134219f, 0.198336035f, 0.202952743f, 0.205061346f }; for (int i = 0; i < tree.NumNodes; ++i) { Assert.Equal(expectedThresholds[i], tree.NumericalSplitThresholds[i], 6); } Assert.All(tree.CategoricalSplitFlags, flag => Assert.False(flag)); Assert.Equal(0, tree.GetCategoricalSplitFeaturesAt(0).Count); Assert.Equal(0, tree.GetCategoricalCategoricalSplitFeatureRangeAt(0).Count); var samples = new double[] { 0.97468354430379744, 1.0, 0.97727272727272729, 0.972972972972973, 0.26124197002141325 }; for (int i = 0; i < tree.NumLeaves; ++i) { var sample = tree.GetLeafSamplesAt(i); Assert.Single(sample); Assert.Equal(samples[i], sample[0], 6); var weight = tree.GetLeafSampleWeightsAt(i); Assert.Single(weight); Assert.Equal(1, weight[0]); } }
static void Main(string[] args) { Helper.PrintLine("使用的单独的数据准备和模型管道"); MLContext mlContext = new MLContext(); Helper.PrintLine("准备训练数据集..."); HousingData[] housingData = new HousingData[] { new HousingData { Size = 600f, HistoricalPrices = new float[] { 100000f, 125000f, 122000f }, CurrentPrice = 170000f }, new HousingData { Size = 1000f, HistoricalPrices = new float[] { 200000f, 250000f, 230000f }, CurrentPrice = 225000f }, new HousingData { Size = 1000f, HistoricalPrices = new float[] { 126000f, 130000f, 200000f }, CurrentPrice = 195000f } }; IDataView data = mlContext.Data.LoadFromEnumerable(housingData); Helper.PrintLine("准备数据处理管道..."); IEstimator <ITransformer> dataPrepEstimator = mlContext.Transforms.Concatenate("Features", new string[] { "Size", "HistoricalPrices" }) .Append(mlContext.Transforms.NormalizeMinMax("Features")); Helper.PrintLine("训练数据处理管道..."); ITransformer dataPrepTransformer = dataPrepEstimator.Fit(data); Helper.PrintLine("准备神经网络模型..."); var sdcaEstimator = mlContext.Regression.Trainers.Sdca(); Helper.PrintLine("训练神经网络模型..."); IDataView transformedData = dataPrepTransformer.Transform(data); RegressionPredictionTransformer <LinearRegressionModelParameters> trainedModel = sdcaEstimator.Fit(transformedData); Helper.PrintLine("训练神经网络完成"); Helper.PrintLine("保存数据处理管道和神经网络模型..."); mlContext.Model.Save(dataPrepTransformer, data.Schema, DataPipelinePath); mlContext.Model.Save(trainedModel, transformedData.Schema, ModelPath); Helper.Exit(0); }
public RegressionPoiOnDisplayEstimator(RegressionPoiOnDisplayEstimatorConfiguration data) { mlContext = new MLContext(seed: 0); using (var s = new MemoryStream()) { s.Write(data.PredictorX, 0, data.PredictorX.Length); estimatorX = (RegressionPredictionTransformer <FastTreeRegressionModelParameters>)mlContext.Model.Load(s, out _); } using (var s = new MemoryStream()) { s.Write(data.PredictorY, 0, data.PredictorY.Length); estimatorY = (RegressionPredictionTransformer <FastTreeRegressionModelParameters>)mlContext.Model.Load(s, out _); } GenPredictors(); }
public void Load(GazeToDisplayConverterParameters param) { var data = (EndToEndConverterParameters)param; mlContext = new MLContext(seed: 0); using (var s = new MemoryStream()) { s.Write(data.PredictorX, 0, data.PredictorX.Length); estimatorX = (RegressionPredictionTransformer <FastTreeRegressionModelParameters>)mlContext.Model.Load(s, out _); } using (var s = new MemoryStream()) { s.Write(data.PredictorY, 0, data.PredictorY.Length); estimatorY = (RegressionPredictionTransformer <FastTreeRegressionModelParameters>)mlContext.Model.Load(s, out _); } GenPredictors(); }
static void Main(string[] args) { Helper.PrintLine("重新训练模型项目"); MLContext mlContext = new MLContext(); Helper.PrintLine("加载数据处理管道和神经网络模型..."); ITransformer dataPrepPipeline = mlContext.Model.Load(DataPipelinePath, out DataViewSchema dataPrepPipelineSchema); ITransformer trainedModel = mlContext.Model.Load(ModelPath, out DataViewSchema modelSchema); LinearRegressionModelParameters originalMP = ((ISingleFeaturePredictionTransformer <object>)trainedModel).Model as LinearRegressionModelParameters; Helper.PrintLine("重新训练神经网络..."); HousingData[] housingData = new HousingData[] { new HousingData { Size = 850f, HistoricalPrices = new float[] { 150000f, 175000f, 210000f }, CurrentPrice = 205000f }, new HousingData { Size = 900f, HistoricalPrices = new float[] { 155000f, 190000f, 220000f }, CurrentPrice = 210000f }, new HousingData { Size = 550f, HistoricalPrices = new float[] { 99000f, 98000f, 130000f }, CurrentPrice = 180000f } }; IDataView newData = mlContext.Data.LoadFromEnumerable(housingData); IDataView transformedNewData = dataPrepPipeline.Transform(newData); RegressionPredictionTransformer <LinearRegressionModelParameters> retrainedModel = mlContext.Regression.Trainers.OnlineGradientDescent() .Fit(transformedNewData, originalMP); LinearRegressionModelParameters retrainedMP = retrainedModel.Model as LinearRegressionModelParameters; Helper.PrintLine($"比较模型参数变化:\n\t源模型参数\t|更新模型参数\t|变化\n\t{string.Join("\n\t", originalMP.Weights.Append(originalMP.Bias).Zip(retrainedMP.Weights.Append(retrainedMP.Bias)).Select(weights => $"{weights.First:F2}\t|{weights.Second:F2}\t|{weights.Second - weights.First:F2}"))}"); Helper.Exit(0); }
private static ITransformer TrainModel(MLContext mlContext) { // Common data loading configuration string initialdataKey = ConfigurationManager.AppSettings["InitialDataSet"]; if (String.IsNullOrEmpty(initialdataKey)) { Console.WriteLine("Initial DataSet key value cannot be null or empty."); return(null); } LoadInitialDataInCache(initialdataKey); var datasetTraining = Cache.DataTypeManager.GetList <TaxiTrip>(initialdataKey); var inputDataView = mlContext.Data.LoadFromEnumerable(datasetTraining); //Sample code of removing extreme data like "outliers" for FareAmounts higher than $150 and lower than $1 which can be error-data var cnt = inputDataView.GetColumn <float>(nameof(TaxiTrip.FareAmount)).Count(); IDataView trainingDataView = mlContext.Data.FilterRowsByColumn(inputDataView, nameof(TaxiTrip.FareAmount), lowerBound: 1, upperBound: 150); var cnt2 = trainingDataView.GetColumn <float>(nameof(TaxiTrip.FareAmount)).Count(); // Define data preparation estimator IEstimator <ITransformer> dataPrepEstimator = mlContext.Transforms.Concatenate("Features", new string[] { "PassengerCount", "TripTime", "TripDistance", "FareAmount" }) .Append(mlContext.Transforms.NormalizeMinMax("Features")); // Create data preparation transformer ITransformer dataPrepTransformer = dataPrepEstimator.Fit(trainingDataView); // Define StochasticDualCoordinateAscent regression algorithm estimator var sdcaEstimator = mlContext.Regression.Trainers.Sdca(); // Pre-process data using data prep operations IDataView transformedData = dataPrepTransformer.Transform(trainingDataView); // Train regression model RegressionPredictionTransformer <LinearRegressionModelParameters> trainedModel = sdcaEstimator.Fit(transformedData); Console.WriteLine("Initial model trained."); // Save Data Prep transformer mlContext.Model.Save(dataPrepTransformer, trainingDataView.Schema, ModelParamsPath); // Save Trained Model mlContext.Model.Save(trainedModel, transformedData.Schema, ModelPath); Console.WriteLine("The model is saved to {0}", ModelPath); ModelTrained = true; Cache.Add(ModelPath, ModelPath); return(trainedModel); }
public new ExecuteResult <Agency_Trainer> Update(Agency_Trainer atrain) { var res = base.Update(atrain); // Create MLContext MLContext mlContext = new MLContext(); // Load Data IDataView data = mlContext.Data.LoadFromEnumerable(ChatData.Sample); // Define data preparation estimator IEstimator <ITransformer> dataPrepEstimator = mlContext.Transforms.Concatenate("Features", new string[] { "Size", "HistoricalPrices" }) .Append(mlContext.Transforms.NormalizeMinMax("Features")) ; // Create data preparation transformer ITransformer dataPrepTransformer = dataPrepEstimator.Fit(data); //Console.WriteLine("数据准备 填充完成"); // Pre-process data using data prep operations IDataView transformedData = dataPrepTransformer.Transform(data); //Console.WriteLine($"数据视图: {Newtonsoft.Json.JsonConvert.SerializeObject(mlContext.Data.CreateEnumerable<TransformedHousingData>(transformedData, true))}"); // Define StochasticDualCoordinateAscent regression algorithm estimator var sdcaEstimator = mlContext.Regression.Trainers.Sdca();//labelColumnName: "Label", featureColumnName: "Features" //var sdcaEstimator = mlContext.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features"); //Console.WriteLine("正在训练"); // Train regression model RegressionPredictionTransformer <LinearRegressionModelParameters> trainedModel = sdcaEstimator.Fit(transformedData); //Console.WriteLine("训练完成"); // Save Data Prep transformer mlContext.Model.Save(dataPrepTransformer, data.Schema, "data_preparation_pipeline.zip"); // Save Trained Model mlContext.Model.Save(trainedModel, transformedData.Schema, "model11.zip"); Console.WriteLine("预训数据 保存完成"); return(res); }
public (double RSquaredX, double RSquaredY) Train(IList <Record> data) { mlContext = new MLContext(seed: 0); var train = data.Select(r => new RegressionRecord(r)); dataView = mlContext.Data.LoadFromEnumerable(train); labelX = nameof(RegressionRecord.DisplayX); var pipelineX = mlContext.Regression.Trainers.FastTree(labelX, FEATURE_COLUMN_NAME); estimatorX = pipelineX.Fit(dataView); labelY = nameof(RegressionRecord.DisplayY); var pipelineY = mlContext.Regression.Trainers.FastTree(labelY, FEATURE_COLUMN_NAME); estimatorY = pipelineY.Fit(dataView); GenPredictors(); var transX = estimatorX.Transform(dataView); var evalX = mlContext.Regression.Evaluate(transX, labelX); var transY = estimatorY.Transform(dataView); var evalY = mlContext.Regression.Evaluate(transY, labelY); return(evalX.RSquared, evalY.RSquared); }