Exemplo n.º 1
0
        void IAiTest.Train()
        {
            Console.WriteLine("=============== Regression task - Price Prediction ===============");
            IDataView dataView = _context.Data.LoadFromTextFile <TaxiTrip>($"{RootFolder}/{TrainDataFile}", hasHeader: true, separatorChar: ',');
            //Sample code of removing extreme data like "outliers" for FareAmounts higher than $150 and lower than $1 which can be error-data
            //IDataView dataView = _context.Data.FilterRowsByColumn(baseTrainingDataView, nameof(TaxiTrip.FareAmount), lowerBound: 1, upperBound: 150);

            var dataProcessPipeline = _context.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: nameof(TaxiTrip.FareAmount))
                                      .Append(_context.Transforms.Categorical.OneHotEncoding(outputColumnName: "VendorIdEncoded", inputColumnName: nameof(TaxiTrip.VendorId)))
                                      .Append(_context.Transforms.Categorical.OneHotEncoding(outputColumnName: "RateCodeEncoded", inputColumnName: nameof(TaxiTrip.RateCode)))
                                      .Append(_context.Transforms.Categorical.OneHotEncoding(outputColumnName: "PaymentTypeEncoded", inputColumnName: nameof(TaxiTrip.PaymentType)))
                                      .Append(_context.Transforms.NormalizeMeanVariance(outputColumnName: nameof(TaxiTrip.PassengerCount)))
                                      .Append(_context.Transforms.NormalizeMeanVariance(outputColumnName: nameof(TaxiTrip.TripTime)))
                                      .Append(_context.Transforms.NormalizeMeanVariance(outputColumnName: nameof(TaxiTrip.TripDistance)))
                                      .Append(_context.Transforms.Concatenate("Features", "VendorIdEncoded", "RateCodeEncoded", "PaymentTypeEncoded", nameof(TaxiTrip.PassengerCount)
                                                                              , nameof(TaxiTrip.TripTime), nameof(TaxiTrip.TripDistance)));

            _trainer = _context.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features");
            var       trainingPipeline = dataProcessPipeline.Append(_trainer);
            Stopwatch stop             = new Stopwatch();

            Console.WriteLine("=============== Create and Train the Model ===============");
            stop.Start();
            _model = trainingPipeline.Fit(dataView);
            stop.Stop();
            Console.WriteLine($" Total {stop.ElapsedMilliseconds} ms");
            Console.WriteLine("=============== End of training ===============");
            Console.WriteLine();
        }
        /// <summary>
        /// Predict a target using a linear regression model trained with the SDCA trainer.
        /// </summary>
        /// <param name="ctx">The regression context trainer object.</param>
        /// <param name="label">The label, or dependent variable.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="l2Const">The L2 regularization hyperparameter.</param>
        /// <param name="l1Threshold">The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model.</param>
        /// <param name="maxIterations">The maximum number of passes to perform over the data.</param>
        /// <param name="loss">The custom loss, if unspecified will be <see cref="SquaredLoss"/>.</param>
        /// <param name="advancedSettings">A delegate to set more settings.
        /// The settings here will override the ones provided in the direct method signature,
        /// if both are present and have different values.
        /// The columns names, however need to be provided directly, not through the <paramref name="advancedSettings"/>.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained.  Note that this action cannot change the result in any way; it is only a way for the caller to
        /// be informed about what was learnt.</param>
        /// <returns>The predicted output.</returns>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        ///  [!code-csharp[SDCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs)]
        /// ]]></format>
        /// </example>
        public static Scalar <float> Sdca(this RegressionContext.RegressionTrainers ctx,
                                          Scalar <float> label, Vector <float> features, Scalar <float> weights = null,
                                          float?l2Const     = null,
                                          float?l1Threshold = null,
                                          int?maxIterations = null,
                                          ISupportSdcaRegressionLoss loss = null,
                                          Action <SdcaRegressionTrainer.Arguments> advancedSettings = null,
                                          Action <LinearRegressionModelParameters> onFit            = null)
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckValue(features, nameof(features));
            Contracts.CheckValueOrNull(weights);
            Contracts.CheckParam(!(l2Const < 0), nameof(l2Const), "Must not be negative, if specified.");
            Contracts.CheckParam(!(l1Threshold < 0), nameof(l1Threshold), "Must not be negative, if specified.");
            Contracts.CheckParam(!(maxIterations < 1), nameof(maxIterations), "Must be positive if specified");
            Contracts.CheckValueOrNull(loss);
            Contracts.CheckValueOrNull(advancedSettings);
            Contracts.CheckValueOrNull(onFit);

            var rec = new TrainerEstimatorReconciler.Regression(
                (env, labelName, featuresName, weightsName) =>
            {
                var trainer = new SdcaRegressionTrainer(env, labelName, featuresName, weightsName, loss, l2Const, l1Threshold, maxIterations, advancedSettings);
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans => onFit(trans.Model)));
                }
                return(trainer);
            }, label, features, weights);

            return(rec.Score);
        }
        /// <summary>
        /// Predict a target using a linear regression model trained with the SDCA trainer.
        /// </summary>
        /// <param name="catalog">The regression catalog trainer object.</param>
        /// <param name="label">The label, or dependent variable.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="l2Regularization">The L2 regularization hyperparameter.</param>
        /// <param name="l1Threshold">The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model.</param>
        /// <param name="numberOfIterations">The maximum number of passes to perform over the data.</param>
        /// <param name="lossFunction">The custom loss, if unspecified will be <see cref="SquaredLoss"/>.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained.  Note that this action cannot change the result in any way; it is only a way for the caller to
        /// be informed about what was learnt.</param>
        /// <returns>The predicted output.</returns>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        ///  [!code-csharp[SDCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs)]
        /// ]]></format>
        /// </example>
        public static Scalar <float> Sdca(this RegressionCatalog.RegressionTrainers catalog,
                                          Scalar <float> label, Vector <float> features, Scalar <float> weights = null,
                                          float?l2Regularization = null,
                                          float?l1Threshold      = null,
                                          int?numberOfIterations = null,
                                          ISupportSdcaRegressionLoss lossFunction        = null,
                                          Action <LinearRegressionModelParameters> onFit = null)
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckValue(features, nameof(features));
            Contracts.CheckValueOrNull(weights);
            Contracts.CheckParam(!(l2Regularization < 0), nameof(l2Regularization), "Must not be negative, if specified.");
            Contracts.CheckParam(!(l1Threshold < 0), nameof(l1Threshold), "Must not be negative, if specified.");
            Contracts.CheckParam(!(numberOfIterations < 1), nameof(numberOfIterations), "Must be positive if specified");
            Contracts.CheckValueOrNull(lossFunction);
            Contracts.CheckValueOrNull(onFit);

            var rec = new TrainerEstimatorReconciler.Regression(
                (env, labelName, featuresName, weightsName) =>
            {
                var trainer = new SdcaRegressionTrainer(env, labelName, featuresName, weightsName, lossFunction, l2Regularization, l1Threshold, numberOfIterations);
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans => onFit(trans.Model)));
                }
                return(trainer);
            }, label, features, weights);

            return(rec.Score);
        }
        /// <summary>
        /// Predict a target using a linear regression model trained with the SDCA trainer.
        /// </summary>
        /// <param name="catalog">The regression catalog trainer object.</param>
        /// <param name="label">The label, or dependent variable.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="options">Advanced arguments to the algorithm.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained.  Note that this action cannot change the result in any way; it is only a way for the caller to
        /// be informed about what was learnt.</param>
        /// <returns>The predicted output.</returns>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        ///  [!code-csharp[SDCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs)]
        /// ]]></format>
        /// </example>
        public static Scalar <float> Sdca(this RegressionCatalog.RegressionTrainers catalog,
                                          Scalar <float> label, Vector <float> features, Scalar <float> weights,
                                          SdcaRegressionTrainer.Options options,
                                          Action <LinearRegressionModelParameters> onFit = null)
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckValue(features, nameof(features));
            Contracts.CheckValueOrNull(weights);
            Contracts.CheckValueOrNull(options);
            Contracts.CheckValueOrNull(onFit);

            var rec = new TrainerEstimatorReconciler.Regression(
                (env, labelName, featuresName, weightsName) =>
            {
                options.LabelColumnName   = labelName;
                options.FeatureColumnName = featuresName;

                var trainer = new SdcaRegressionTrainer(env, options);
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans => onFit(trans.Model)));
                }
                return(trainer);
            }, label, features, weights);

            return(rec.Score);
        }
Exemplo n.º 5
0
        public void SdcaWorkout()
        {
            var dataPath = GetDataPath("breast-cancer.txt");

            var data = TextLoader.CreateReader(Env, ctx => (Label: ctx.LoadFloat(0), Features: ctx.LoadFloat(1, 10)))
                       .Read(new MultiFileSource(dataPath));

            IEstimator <ITransformer> est = new LinearClassificationTrainer(Env, new LinearClassificationTrainer.Arguments {
                ConvergenceTolerance = 1e-2f
            }, "Features", "Label");

            TestEstimatorCore(est, data.AsDynamic);

            est = new SdcaRegressionTrainer(Env, new SdcaRegressionTrainer.Arguments {
                ConvergenceTolerance = 1e-2f
            }, "Features", "Label");
            TestEstimatorCore(est, data.AsDynamic);

            est = new SdcaMultiClassTrainer(Env, new SdcaMultiClassTrainer.Arguments {
                ConvergenceTolerance = 1e-2f
            }, "Features", "Label");
            TestEstimatorCore(est, data.AsDynamic);

            Done();
        }
Exemplo n.º 6
0
        static void Main(string[] args)
        {
            Helper.PrintLine($"使用 PFI 解释模型...");

            MLContext mlContext = new MLContext();

            Helper.PrintLine("加载训练数据集...");
            IDataView trainDataView = mlContext.Data.LoadFromTextFile <HousingPriceData>(TrainDataPath, separatorChar: ',');

            Helper.PrintLine("获取特征成员名称...");
            string[] featureColumnNames = trainDataView.Schema
                                          .Select(column => column.Name)
                                          .Where(columnName => columnName != "Label")
                                          .ToArray();

            Helper.PrintLine("创建数据初始化对象...");
            IEstimator <ITransformer> dataPrepEstimator = mlContext.Transforms.Concatenate("Features", featureColumnNames)
                                                          .Append(mlContext.Transforms.NormalizeMinMax("Features"));

            Helper.PrintLine("初始化数据...");
            ITransformer dataPrepTransformer   = dataPrepEstimator.Fit(trainDataView);
            IDataView    preprocessedTrainData = dataPrepTransformer.Transform(trainDataView);

            Helper.PrintLine("创建数据估算器对象...");
            SdcaRegressionTrainer sdcaEstimator = mlContext.Regression.Trainers.Sdca();

            Helper.PrintSplit();
            Helper.PrintLine($"开始训练神经网络...");
            var sdcaModel = sdcaEstimator.Fit(preprocessedTrainData);

            Helper.PrintLine($"训练神经网络完成");
            Helper.PrintSplit();

            ImmutableArray <RegressionMetricsStatistics> pfi = mlContext.Regression.PermutationFeatureImportance(
                sdcaModel,
                preprocessedTrainData,
                permutationCount: 3);

            Helper.PrintLine("按相关性排序特征...");
            var featureImportanceMetrics = pfi
                                           .Select((metric, index) => new { index, metric.RSquared })
                                           .OrderByDescending(myFeatures => Math.Abs(myFeatures.RSquared.Mean))
                                           .ToArray();

            Helper.PrintSplit();
            Helper.PrintLine($"特征 PFI:\n\t{string.Join("\n\t", featureImportanceMetrics.Select(feature => $">>> {featureColumnNames[feature.index]}\n\tMean: {feature.RSquared.Mean:F6}\n\tStandardDeviation: {feature.RSquared.StandardDeviation:F6}\n\tStandardError: {feature.RSquared.StandardError:F6}"))}");

            Helper.Exit(0);
        }
Exemplo n.º 7
0
        public void SdcaWorkout()
        {
            var dataPath = GetDataPath("breast-cancer.txt");

            var data = TextLoader.CreateReader(Env, ctx => (Label: ctx.LoadFloat(0), Features: ctx.LoadFloat(1, 10)))
                       .Read(dataPath);
            IEstimator <ITransformer> est = new SdcaBinaryTrainer(Env, "Features", "Label", advancedSettings: (s) => s.ConvergenceTolerance = 1e-2f);

            TestEstimatorCore(est, data.AsDynamic);

            est = new SdcaRegressionTrainer(Env, "Features", "Label", advancedSettings: (s) => s.ConvergenceTolerance = 1e-2f);
            TestEstimatorCore(est, data.AsDynamic);

            est = new SdcaMultiClassTrainer(Env, "Features", "Label", advancedSettings: (s) => s.ConvergenceTolerance = 1e-2f);
            TestEstimatorCore(est, data.AsDynamic);

            Done();
        }
Exemplo n.º 8
0
        void IAiTest.Train()
        {
            Console.WriteLine("=============== Regression task - Salary Prediction ===============");
            IDataView dataView            = _context.Data.LoadFromTextFile <SalaryData>($"{RootFolder}/{TrainDataFile}", hasHeader: true, separatorChar: ',');
            var       dataProcessPipeline = _context.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: nameof(SalaryData.Salary))
                                            .Append(_context.Transforms.Concatenate("Features", nameof(SalaryData.YearsExperience)));

            _trainer = _context.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features");
            var       trainingPipeline = dataProcessPipeline.Append(_trainer);
            Stopwatch stop             = new Stopwatch();

            Console.WriteLine("=============== Create and Train the Model ===============");
            stop.Start();
            _model = trainingPipeline.Fit(dataView);
            stop.Stop();
            Console.WriteLine($" Total {stop.ElapsedMilliseconds} ms");
            Console.WriteLine("=============== End of training ===============");
            Console.WriteLine();
        }
Exemplo n.º 9
0
 /// <summary>
 /// Set the training algorithm, then create and config the modelBuilder - Selected Trainer (SDCA Regression algorithm).
 /// </summary>
 /// <param name="mlContext"></param>
 /// <param name="dataProcessPipeline"></param>
 /// <param name="trainer"></param>
 /// <param name="trainingPipeline"></param>
 private static void SetTrainingAlgorithm(MLContext mlContext, EstimatorChain <ColumnConcatenatingTransformer> dataProcessPipeline, out SdcaRegressionTrainer trainer, out EstimatorChain <RegressionPredictionTransformer <LinearRegressionModelParameters> > trainingPipeline)
 {
     trainer          = mlContext.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features");
     trainingPipeline = dataProcessPipeline.Append(trainer);
 }