void IAiTest.Train() { Console.WriteLine("=============== Regression task - Price Prediction ==============="); IDataView dataView = _context.Data.LoadFromTextFile <TaxiTrip>($"{RootFolder}/{TrainDataFile}", hasHeader: true, separatorChar: ','); //Sample code of removing extreme data like "outliers" for FareAmounts higher than $150 and lower than $1 which can be error-data //IDataView dataView = _context.Data.FilterRowsByColumn(baseTrainingDataView, nameof(TaxiTrip.FareAmount), lowerBound: 1, upperBound: 150); var dataProcessPipeline = _context.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: nameof(TaxiTrip.FareAmount)) .Append(_context.Transforms.Categorical.OneHotEncoding(outputColumnName: "VendorIdEncoded", inputColumnName: nameof(TaxiTrip.VendorId))) .Append(_context.Transforms.Categorical.OneHotEncoding(outputColumnName: "RateCodeEncoded", inputColumnName: nameof(TaxiTrip.RateCode))) .Append(_context.Transforms.Categorical.OneHotEncoding(outputColumnName: "PaymentTypeEncoded", inputColumnName: nameof(TaxiTrip.PaymentType))) .Append(_context.Transforms.NormalizeMeanVariance(outputColumnName: nameof(TaxiTrip.PassengerCount))) .Append(_context.Transforms.NormalizeMeanVariance(outputColumnName: nameof(TaxiTrip.TripTime))) .Append(_context.Transforms.NormalizeMeanVariance(outputColumnName: nameof(TaxiTrip.TripDistance))) .Append(_context.Transforms.Concatenate("Features", "VendorIdEncoded", "RateCodeEncoded", "PaymentTypeEncoded", nameof(TaxiTrip.PassengerCount) , nameof(TaxiTrip.TripTime), nameof(TaxiTrip.TripDistance))); _trainer = _context.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features"); var trainingPipeline = dataProcessPipeline.Append(_trainer); Stopwatch stop = new Stopwatch(); Console.WriteLine("=============== Create and Train the Model ==============="); stop.Start(); _model = trainingPipeline.Fit(dataView); stop.Stop(); Console.WriteLine($" Total {stop.ElapsedMilliseconds} ms"); Console.WriteLine("=============== End of training ==============="); Console.WriteLine(); }
/// <summary> /// Predict a target using a linear regression model trained with the SDCA trainer. /// </summary> /// <param name="ctx">The regression context trainer object.</param> /// <param name="label">The label, or dependent variable.</param> /// <param name="features">The features, or independent variables.</param> /// <param name="weights">The optional example weights.</param> /// <param name="l2Const">The L2 regularization hyperparameter.</param> /// <param name="l1Threshold">The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model.</param> /// <param name="maxIterations">The maximum number of passes to perform over the data.</param> /// <param name="loss">The custom loss, if unspecified will be <see cref="SquaredLoss"/>.</param> /// <param name="advancedSettings">A delegate to set more settings. /// The settings here will override the ones provided in the direct method signature, /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the <paramref name="advancedSettings"/>.</param> /// <param name="onFit">A delegate that is called every time the /// <see cref="Estimator{TInShape, TShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive /// the linear model that was trained. Note that this action cannot change the result in any way; it is only a way for the caller to /// be informed about what was learnt.</param> /// <returns>The predicted output.</returns> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[SDCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs)] /// ]]></format> /// </example> public static Scalar <float> Sdca(this RegressionContext.RegressionTrainers ctx, Scalar <float> label, Vector <float> features, Scalar <float> weights = null, float?l2Const = null, float?l1Threshold = null, int?maxIterations = null, ISupportSdcaRegressionLoss loss = null, Action <SdcaRegressionTrainer.Arguments> advancedSettings = null, Action <LinearRegressionModelParameters> onFit = null) { Contracts.CheckValue(label, nameof(label)); Contracts.CheckValue(features, nameof(features)); Contracts.CheckValueOrNull(weights); Contracts.CheckParam(!(l2Const < 0), nameof(l2Const), "Must not be negative, if specified."); Contracts.CheckParam(!(l1Threshold < 0), nameof(l1Threshold), "Must not be negative, if specified."); Contracts.CheckParam(!(maxIterations < 1), nameof(maxIterations), "Must be positive if specified"); Contracts.CheckValueOrNull(loss); Contracts.CheckValueOrNull(advancedSettings); Contracts.CheckValueOrNull(onFit); var rec = new TrainerEstimatorReconciler.Regression( (env, labelName, featuresName, weightsName) => { var trainer = new SdcaRegressionTrainer(env, labelName, featuresName, weightsName, loss, l2Const, l1Threshold, maxIterations, advancedSettings); if (onFit != null) { return(trainer.WithOnFitDelegate(trans => onFit(trans.Model))); } return(trainer); }, label, features, weights); return(rec.Score); }
/// <summary> /// Predict a target using a linear regression model trained with the SDCA trainer. /// </summary> /// <param name="catalog">The regression catalog trainer object.</param> /// <param name="label">The label, or dependent variable.</param> /// <param name="features">The features, or independent variables.</param> /// <param name="weights">The optional example weights.</param> /// <param name="l2Regularization">The L2 regularization hyperparameter.</param> /// <param name="l1Threshold">The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model.</param> /// <param name="numberOfIterations">The maximum number of passes to perform over the data.</param> /// <param name="lossFunction">The custom loss, if unspecified will be <see cref="SquaredLoss"/>.</param> /// <param name="onFit">A delegate that is called every time the /// <see cref="Estimator{TInShape, TShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive /// the linear model that was trained. Note that this action cannot change the result in any way; it is only a way for the caller to /// be informed about what was learnt.</param> /// <returns>The predicted output.</returns> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[SDCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs)] /// ]]></format> /// </example> public static Scalar <float> Sdca(this RegressionCatalog.RegressionTrainers catalog, Scalar <float> label, Vector <float> features, Scalar <float> weights = null, float?l2Regularization = null, float?l1Threshold = null, int?numberOfIterations = null, ISupportSdcaRegressionLoss lossFunction = null, Action <LinearRegressionModelParameters> onFit = null) { Contracts.CheckValue(label, nameof(label)); Contracts.CheckValue(features, nameof(features)); Contracts.CheckValueOrNull(weights); Contracts.CheckParam(!(l2Regularization < 0), nameof(l2Regularization), "Must not be negative, if specified."); Contracts.CheckParam(!(l1Threshold < 0), nameof(l1Threshold), "Must not be negative, if specified."); Contracts.CheckParam(!(numberOfIterations < 1), nameof(numberOfIterations), "Must be positive if specified"); Contracts.CheckValueOrNull(lossFunction); Contracts.CheckValueOrNull(onFit); var rec = new TrainerEstimatorReconciler.Regression( (env, labelName, featuresName, weightsName) => { var trainer = new SdcaRegressionTrainer(env, labelName, featuresName, weightsName, lossFunction, l2Regularization, l1Threshold, numberOfIterations); if (onFit != null) { return(trainer.WithOnFitDelegate(trans => onFit(trans.Model))); } return(trainer); }, label, features, weights); return(rec.Score); }
/// <summary> /// Predict a target using a linear regression model trained with the SDCA trainer. /// </summary> /// <param name="catalog">The regression catalog trainer object.</param> /// <param name="label">The label, or dependent variable.</param> /// <param name="features">The features, or independent variables.</param> /// <param name="weights">The optional example weights.</param> /// <param name="options">Advanced arguments to the algorithm.</param> /// <param name="onFit">A delegate that is called every time the /// <see cref="Estimator{TInShape, TShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive /// the linear model that was trained. Note that this action cannot change the result in any way; it is only a way for the caller to /// be informed about what was learnt.</param> /// <returns>The predicted output.</returns> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[SDCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs)] /// ]]></format> /// </example> public static Scalar <float> Sdca(this RegressionCatalog.RegressionTrainers catalog, Scalar <float> label, Vector <float> features, Scalar <float> weights, SdcaRegressionTrainer.Options options, Action <LinearRegressionModelParameters> onFit = null) { Contracts.CheckValue(label, nameof(label)); Contracts.CheckValue(features, nameof(features)); Contracts.CheckValueOrNull(weights); Contracts.CheckValueOrNull(options); Contracts.CheckValueOrNull(onFit); var rec = new TrainerEstimatorReconciler.Regression( (env, labelName, featuresName, weightsName) => { options.LabelColumnName = labelName; options.FeatureColumnName = featuresName; var trainer = new SdcaRegressionTrainer(env, options); if (onFit != null) { return(trainer.WithOnFitDelegate(trans => onFit(trans.Model))); } return(trainer); }, label, features, weights); return(rec.Score); }
public void SdcaWorkout() { var dataPath = GetDataPath("breast-cancer.txt"); var data = TextLoader.CreateReader(Env, ctx => (Label: ctx.LoadFloat(0), Features: ctx.LoadFloat(1, 10))) .Read(new MultiFileSource(dataPath)); IEstimator <ITransformer> est = new LinearClassificationTrainer(Env, new LinearClassificationTrainer.Arguments { ConvergenceTolerance = 1e-2f }, "Features", "Label"); TestEstimatorCore(est, data.AsDynamic); est = new SdcaRegressionTrainer(Env, new SdcaRegressionTrainer.Arguments { ConvergenceTolerance = 1e-2f }, "Features", "Label"); TestEstimatorCore(est, data.AsDynamic); est = new SdcaMultiClassTrainer(Env, new SdcaMultiClassTrainer.Arguments { ConvergenceTolerance = 1e-2f }, "Features", "Label"); TestEstimatorCore(est, data.AsDynamic); Done(); }
static void Main(string[] args) { Helper.PrintLine($"使用 PFI 解释模型..."); MLContext mlContext = new MLContext(); Helper.PrintLine("加载训练数据集..."); IDataView trainDataView = mlContext.Data.LoadFromTextFile <HousingPriceData>(TrainDataPath, separatorChar: ','); Helper.PrintLine("获取特征成员名称..."); string[] featureColumnNames = trainDataView.Schema .Select(column => column.Name) .Where(columnName => columnName != "Label") .ToArray(); Helper.PrintLine("创建数据初始化对象..."); IEstimator <ITransformer> dataPrepEstimator = mlContext.Transforms.Concatenate("Features", featureColumnNames) .Append(mlContext.Transforms.NormalizeMinMax("Features")); Helper.PrintLine("初始化数据..."); ITransformer dataPrepTransformer = dataPrepEstimator.Fit(trainDataView); IDataView preprocessedTrainData = dataPrepTransformer.Transform(trainDataView); Helper.PrintLine("创建数据估算器对象..."); SdcaRegressionTrainer sdcaEstimator = mlContext.Regression.Trainers.Sdca(); Helper.PrintSplit(); Helper.PrintLine($"开始训练神经网络..."); var sdcaModel = sdcaEstimator.Fit(preprocessedTrainData); Helper.PrintLine($"训练神经网络完成"); Helper.PrintSplit(); ImmutableArray <RegressionMetricsStatistics> pfi = mlContext.Regression.PermutationFeatureImportance( sdcaModel, preprocessedTrainData, permutationCount: 3); Helper.PrintLine("按相关性排序特征..."); var featureImportanceMetrics = pfi .Select((metric, index) => new { index, metric.RSquared }) .OrderByDescending(myFeatures => Math.Abs(myFeatures.RSquared.Mean)) .ToArray(); Helper.PrintSplit(); Helper.PrintLine($"特征 PFI:\n\t{string.Join("\n\t", featureImportanceMetrics.Select(feature => $">>> {featureColumnNames[feature.index]}\n\tMean: {feature.RSquared.Mean:F6}\n\tStandardDeviation: {feature.RSquared.StandardDeviation:F6}\n\tStandardError: {feature.RSquared.StandardError:F6}"))}"); Helper.Exit(0); }
public void SdcaWorkout() { var dataPath = GetDataPath("breast-cancer.txt"); var data = TextLoader.CreateReader(Env, ctx => (Label: ctx.LoadFloat(0), Features: ctx.LoadFloat(1, 10))) .Read(dataPath); IEstimator <ITransformer> est = new SdcaBinaryTrainer(Env, "Features", "Label", advancedSettings: (s) => s.ConvergenceTolerance = 1e-2f); TestEstimatorCore(est, data.AsDynamic); est = new SdcaRegressionTrainer(Env, "Features", "Label", advancedSettings: (s) => s.ConvergenceTolerance = 1e-2f); TestEstimatorCore(est, data.AsDynamic); est = new SdcaMultiClassTrainer(Env, "Features", "Label", advancedSettings: (s) => s.ConvergenceTolerance = 1e-2f); TestEstimatorCore(est, data.AsDynamic); Done(); }
void IAiTest.Train() { Console.WriteLine("=============== Regression task - Salary Prediction ==============="); IDataView dataView = _context.Data.LoadFromTextFile <SalaryData>($"{RootFolder}/{TrainDataFile}", hasHeader: true, separatorChar: ','); var dataProcessPipeline = _context.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: nameof(SalaryData.Salary)) .Append(_context.Transforms.Concatenate("Features", nameof(SalaryData.YearsExperience))); _trainer = _context.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features"); var trainingPipeline = dataProcessPipeline.Append(_trainer); Stopwatch stop = new Stopwatch(); Console.WriteLine("=============== Create and Train the Model ==============="); stop.Start(); _model = trainingPipeline.Fit(dataView); stop.Stop(); Console.WriteLine($" Total {stop.ElapsedMilliseconds} ms"); Console.WriteLine("=============== End of training ==============="); Console.WriteLine(); }
/// <summary> /// Set the training algorithm, then create and config the modelBuilder - Selected Trainer (SDCA Regression algorithm). /// </summary> /// <param name="mlContext"></param> /// <param name="dataProcessPipeline"></param> /// <param name="trainer"></param> /// <param name="trainingPipeline"></param> private static void SetTrainingAlgorithm(MLContext mlContext, EstimatorChain <ColumnConcatenatingTransformer> dataProcessPipeline, out SdcaRegressionTrainer trainer, out EstimatorChain <RegressionPredictionTransformer <LinearRegressionModelParameters> > trainingPipeline) { trainer = mlContext.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features"); trainingPipeline = dataProcessPipeline.Append(trainer); }