예제 #1
0
        protected CalibratorEstimatorBase(IHostEnvironment env,
                                          TCalibratorTrainer calibratorTrainer,
                                          IPredictor predictor = null,
                                          string labelColumn   = DefaultColumnNames.Label,
                                          string featureColumn = DefaultColumnNames.Features,
                                          string weightColumn  = null)
        {
            Host              = env;
            Predictor         = predictor;
            CalibratorTrainer = calibratorTrainer;

            ScoreColumn    = TrainerUtils.MakeR4ScalarColumn(DefaultColumnNames.Score); // Do we fantom this being named anything else (renaming column)? Complete metadata?
            LabelColumn    = TrainerUtils.MakeBoolScalarLabel(labelColumn);
            FeatureColumn  = TrainerUtils.MakeR4VecFeature(featureColumn);
            PredictedLabel = new SchemaShape.Column(DefaultColumnNames.PredictedLabel,
                                                    SchemaShape.Column.VectorKind.Scalar,
                                                    BoolType.Instance,
                                                    false,
                                                    new SchemaShape(MetadataUtils.GetTrainerOutputMetadata()));

            if (weightColumn != null)
            {
                WeightColumn = TrainerUtils.MakeR4ScalarWeightColumn(weightColumn);
            }
        }
예제 #2
0
 /// <summary>
 /// Initializes a new instance of <see cref="OrdinaryLeastSquaresRegressionTrainer"/>
 /// </summary>
 internal OrdinaryLeastSquaresRegressionTrainer(IHostEnvironment env, Options options)
     : base(Contracts.CheckRef(env, nameof(env)).Register(LoadNameValue), TrainerUtils.MakeR4VecFeature(options.FeatureColumnName),
            TrainerUtils.MakeR4ScalarColumn(options.LabelColumnName), TrainerUtils.MakeR4ScalarWeightColumn(options.ExampleWeightColumnName))
 {
     Host.CheckValue(options, nameof(options));
     Host.CheckUserArg(options.L2Weight >= 0, nameof(options.L2Weight), "L2 regularization term cannot be negative");
     _l2Weight = options.L2Weight;
     _perParameterSignificance = options.PerParameterSignificance;
 }
예제 #3
0
 /// <summary>
 /// Initializes a new instance of <see cref="OlsLinearRegressionTrainer"/>
 /// </summary>
 internal OlsLinearRegressionTrainer(IHostEnvironment env, Arguments args)
     : base(Contracts.CheckRef(env, nameof(env)).Register(LoadNameValue), TrainerUtils.MakeR4VecFeature(args.FeatureColumn),
            TrainerUtils.MakeR4ScalarColumn(args.LabelColumn), TrainerUtils.MakeR4ScalarWeightColumn(args.WeightColumn, args.WeightColumn.IsExplicit))
 {
     Host.CheckValue(args, nameof(args));
     Host.CheckUserArg(args.L2Weight >= 0, nameof(args.L2Weight), "L2 regularization term cannot be negative");
     _l2Weight = args.L2Weight;
     _perParameterSignificance = args.PerParameterSignificance;
 }
예제 #4
0
 /// <summary>
 /// Initializes a new instance of <see cref="FastTreeBinaryClassificationTrainer"/>
 /// </summary>
 /// <param name="env">The private instance of <see cref="IHostEnvironment"/>.</param>
 /// <param name="labelColumn">The name of the label column.</param>
 /// <param name="featureColumn">The name of the feature column.</param>
 /// <param name="weightColumn">The name for the column containing the initial weight.</param>
 /// <param name="numIterations">The number of iterations to use in learning the features.</param>
 /// <param name="learningRate">The learning rate. GAMs work best with a small learning rate.</param>
 /// <param name="maxBins">The maximum number of bins to use to approximate features</param>
 internal RegressionGamTrainer(IHostEnvironment env,
                               string labelColumn   = DefaultColumnNames.Label,
                               string featureColumn = DefaultColumnNames.Features,
                               string weightColumn  = null,
                               int numIterations    = GamDefaults.NumIterations,
                               double learningRate  = GamDefaults.LearningRates,
                               int maxBins          = GamDefaults.MaxBins)
     : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumn), featureColumn, weightColumn, numIterations, learningRate, maxBins)
 {
 }
예제 #5
0
 /// <summary>
 /// Initializes a new instance of <see cref="FastTreeBinaryClassificationTrainer"/>
 /// </summary>
 /// <param name="env">The private instance of <see cref="IHostEnvironment"/>.</param>
 /// <param name="labelColumnName">The name of the label column.</param>
 /// <param name="featureColumnName">The name of the feature column.</param>
 /// <param name="rowGroupColumnName">The name for the column containing the example weight.</param>
 /// <param name="numberOfIterations">The number of iterations to use in learning the features.</param>
 /// <param name="learningRate">The learning rate. GAMs work best with a small learning rate.</param>
 /// <param name="maximumBinCountPerFeature">The maximum number of bins to use to approximate features</param>
 internal RegressionGamTrainer(IHostEnvironment env,
                               string labelColumnName        = DefaultColumnNames.Label,
                               string featureColumnName      = DefaultColumnNames.Features,
                               string rowGroupColumnName     = null,
                               int numberOfIterations        = GamDefaults.NumberOfIterations,
                               double learningRate           = GamDefaults.LearningRate,
                               int maximumBinCountPerFeature = GamDefaults.MaximumBinCountPerFeature)
     : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumnName), featureColumnName, rowGroupColumnName, numberOfIterations, learningRate, maximumBinCountPerFeature)
 {
 }
예제 #6
0
 /// <summary>
 /// Initializes a new instance of <see cref="FastTreeRegressionTrainer"/>
 /// </summary>
 /// <param name="env">The private instance of <see cref="IHostEnvironment"/>.</param>
 /// <param name="labelColumn">The name of the label column.</param>
 /// <param name="featureColumn">The name of the feature column.</param>
 /// <param name="weightColumn">The name for the column containing the initial weight.</param>
 /// <param name="learningRate">The learning rate.</param>
 /// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
 /// <param name="numLeaves">The max number of leaves in each regression tree.</param>
 /// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
 internal FastTreeRegressionTrainer(IHostEnvironment env,
                                    string labelColumn        = DefaultColumnNames.Label,
                                    string featureColumn      = DefaultColumnNames.Features,
                                    string weightColumn       = null,
                                    int numLeaves             = Defaults.NumLeaves,
                                    int numTrees              = Defaults.NumTrees,
                                    int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
                                    double learningRate       = Defaults.LearningRates)
     : base(env, TrainerUtils.MakeR4ScalarColumn(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDatapointsInLeaves, learningRate)
 {
 }
예제 #7
0
 /// <summary>
 /// Initializes a new instance of <see cref="FastTreeRegressionTrainer"/>
 /// </summary>
 /// <param name="env">The private instance of <see cref="IHostEnvironment"/>.</param>
 /// <param name="labelColumnName">The name of the label column.</param>
 /// <param name="featureColumnName">The name of the feature column.</param>
 /// <param name="exampleWeightColumnName">The name for the column containing the example weight.</param>
 /// <param name="learningRate">The learning rate.</param>
 /// <param name="minimumExampleCountPerLeaf">The minimal number of examples allowed in a leaf of a regression tree, out of the subsampled data.</param>
 /// <param name="numberOfLeaves">The max number of leaves in each regression tree.</param>
 /// <param name="numberOfTrees">Total number of decision trees to create in the ensemble.</param>
 internal FastTreeRegressionTrainer(IHostEnvironment env,
                                    string labelColumnName         = DefaultColumnNames.Label,
                                    string featureColumnName       = DefaultColumnNames.Features,
                                    string exampleWeightColumnName = null,
                                    int numberOfLeaves             = Defaults.NumberOfLeaves,
                                    int numberOfTrees = Defaults.NumberOfTrees,
                                    int minimumExampleCountPerLeaf = Defaults.MinimumExampleCountPerLeaf,
                                    double learningRate            = Defaults.LearningRate)
     : base(env, TrainerUtils.MakeR4ScalarColumn(labelColumnName), featureColumnName, exampleWeightColumnName, null, numberOfLeaves, numberOfTrees, minimumExampleCountPerLeaf, learningRate)
 {
 }
예제 #8
0
 /// <summary>
 /// Initializes a new instance of <see cref="LightGbmRegressorTrainer"/>
 /// </summary>
 /// <param name="env">The private instance of <see cref="IHostEnvironment"/>.</param>
 /// <param name="labelColumn">The name of the label column.</param>
 /// <param name="featureColumn">The name of the feature column.</param>
 /// <param name="weights">The name for the column containing the initial weight.</param>
 /// <param name="numLeaves">The number of leaves to use.</param>
 /// <param name="numBoostRound">Number of iterations.</param>
 /// <param name="minDataPerLeaf">The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.</param>
 /// <param name="learningRate">The learning rate.</param>
 internal LightGbmRegressorTrainer(IHostEnvironment env,
                                   string labelColumn   = DefaultColumnNames.Label,
                                   string featureColumn = DefaultColumnNames.Features,
                                   string weights       = null,
                                   int?numLeaves        = null,
                                   int?minDataPerLeaf   = null,
                                   double?learningRate  = null,
                                   int numBoostRound    = LightGBM.Options.Defaults.NumBoostRound)
     : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumn), featureColumn, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound)
 {
 }
 /// <summary>
 /// Initializes a new instance of <see cref="LightGbmRegressionTrainer"/>
 /// </summary>
 /// <param name="env">The private instance of <see cref="IHostEnvironment"/>.</param>
 /// <param name="labelColumnName">The name of the label column.</param>
 /// <param name="featureColumnName">The name of the feature column.</param>
 /// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
 /// <param name="numberOfLeaves">The number of leaves to use.</param>
 /// <param name="minimumExampleCountPerLeaf">The minimal number of data points allowed in a leaf of the tree, out of the subsampled data.</param>
 /// <param name="learningRate">The learning rate.</param>
 /// <param name="numberOfIterations">Number of iterations.</param>
 internal LightGbmRegressionTrainer(IHostEnvironment env,
                                    string labelColumnName         = DefaultColumnNames.Label,
                                    string featureColumnName       = DefaultColumnNames.Features,
                                    string exampleWeightColumnName = null,
                                    int?numberOfLeaves             = null,
                                    int?minimumExampleCountPerLeaf = null,
                                    double?learningRate            = null,
                                    int numberOfIterations         = Trainers.LightGbm.Options.Defaults.NumberOfIterations)
     : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumnName), featureColumnName, exampleWeightColumnName, null, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations)
 {
 }
 /// <summary>
 /// Initializes a new instance of <see cref="LightGbmRegressorTrainer"/>
 /// </summary>
 /// <param name="env">The private instance of <see cref="IHostEnvironment"/>.</param>
 /// <param name="labelColumn">The name of the label column.</param>
 /// <param name="featureColumn">The name of the feature column.</param>
 /// <param name="weights">The name for the column containing the initial weight.</param>
 /// <param name="numLeaves">The number of leaves to use.</param>
 /// <param name="numBoostRound">Number of iterations.</param>
 /// <param name="minDataPerLeaf">The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.</param>
 /// <param name="learningRate">The learning rate.</param>
 /// <param name="advancedSettings">A delegate to set more settings.
 /// The settings here will override the ones provided in the direct signature,
 /// if both are present and have different values.
 /// The columns names, however need to be provided directly, not through the <paramref name="advancedSettings"/>.</param>
 public LightGbmRegressorTrainer(IHostEnvironment env,
                                 string labelColumn   = DefaultColumnNames.Label,
                                 string featureColumn = DefaultColumnNames.Features,
                                 string weights       = null,
                                 int?numLeaves        = null,
                                 int?minDataPerLeaf   = null,
                                 double?learningRate  = null,
                                 int numBoostRound    = LightGbmArguments.Defaults.NumBoostRound,
                                 Action <LightGbmArguments> advancedSettings = null)
     : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumn), featureColumn, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings)
 {
 }
예제 #11
0
 /// <summary>
 /// Initializes a new instance of <see cref="LightGbmRankingTrainer"/>
 /// </summary>
 /// <param name="env">The private instance of <see cref="IHostEnvironment"/>.</param>
 /// <param name="labelColumn">The name of the label column.</param>
 /// <param name="featureColumn">The name of the feature column.</param>
 /// <param name="groupId">The name of the column containing the group ID. </param>
 /// <param name="weights">The name of the optional column containing the initial weights.</param>
 /// <param name="numLeaves">The number of leaves to use.</param>
 /// <param name="numBoostRound">Number of iterations.</param>
 /// <param name="minDataPerLeaf">The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.</param>
 /// <param name="learningRate">The learning rate.</param>
 internal LightGbmRankingTrainer(IHostEnvironment env,
                                 string labelColumn   = DefaultColumnNames.Label,
                                 string featureColumn = DefaultColumnNames.Features,
                                 string groupId       = DefaultColumnNames.GroupId,
                                 string weights       = null,
                                 int?numLeaves        = null,
                                 int?minDataPerLeaf   = null,
                                 double?learningRate  = null,
                                 int numBoostRound    = LightGBM.Options.Defaults.NumBoostRound)
     : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumn), featureColumn, weights, groupId, numLeaves, minDataPerLeaf, learningRate, numBoostRound)
 {
     Host.CheckNonEmpty(groupId, nameof(groupId));
 }
예제 #12
0
        /// <summary>
        /// Initializes a new instance of <see cref="FastTreeTweedieTrainer"/>
        /// </summary>
        /// <param name="env">The private instance of <see cref="IHostEnvironment"/>.</param>
        /// <param name="labelColumnName">The name of the label column.</param>
        /// <param name="featureColumnName">The name of the feature column.</param>
        /// <param name="exampleWeightColumnName">The name for the column containing the example weight.</param>
        /// <param name="learningRate">The learning rate.</param>
        /// <param name="minimumExampleCountPerLeaf">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
        /// <param name="numberOfLeaves">The max number of leaves in each regression tree.</param>
        /// <param name="numberOfTrees">Total number of decision trees to create in the ensemble.</param>
        internal FastTreeTweedieTrainer(IHostEnvironment env,
                                        string labelColumnName         = DefaultColumnNames.Label,
                                        string featureColumnName       = DefaultColumnNames.Features,
                                        string exampleWeightColumnName = null,
                                        int numberOfLeaves             = Defaults.NumberOfLeaves,
                                        int numberOfTrees = Defaults.NumberOfTrees,
                                        int minimumExampleCountPerLeaf = Defaults.MinimumExampleCountPerLeaf,
                                        double learningRate            = Defaults.LearningRate)
            : base(env, TrainerUtils.MakeR4ScalarColumn(labelColumnName), featureColumnName, exampleWeightColumnName, null, numberOfLeaves, numberOfTrees, minimumExampleCountPerLeaf, learningRate)
        {
            Host.CheckNonEmpty(labelColumnName, nameof(labelColumnName));
            Host.CheckNonEmpty(featureColumnName, nameof(featureColumnName));

            Initialize();
        }
        /// <summary>
        /// Initializes a new instance of <see cref="FastTreeTweedieTrainer"/>
        /// </summary>
        /// <param name="env">The private instance of <see cref="IHostEnvironment"/>.</param>
        /// <param name="labelColumn">The name of the label column.</param>
        /// <param name="featureColumn">The name of the feature column.</param>
        /// <param name="weightColumn">The name for the column containing the initial weight.</param>
        /// <param name="learningRate">The learning rate.</param>
        /// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
        /// <param name="numLeaves">The max number of leaves in each regression tree.</param>
        /// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
        internal FastTreeTweedieTrainer(IHostEnvironment env,
                                        string labelColumn        = DefaultColumnNames.Label,
                                        string featureColumn      = DefaultColumnNames.Features,
                                        string weightColumn       = null,
                                        int numLeaves             = Defaults.NumLeaves,
                                        int numTrees              = Defaults.NumTrees,
                                        int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
                                        double learningRate       = Defaults.LearningRates)
            : base(env, TrainerUtils.MakeR4ScalarColumn(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDatapointsInLeaves, learningRate)
        {
            Host.CheckNonEmpty(labelColumn, nameof(labelColumn));
            Host.CheckNonEmpty(featureColumn, nameof(featureColumn));

            Initialize();
        }
 /// <summary>
 /// Initializes a new instance of <see cref="LightGbmRankingTrainer"/>
 /// </summary>
 /// <param name="env">The private instance of <see cref="IHostEnvironment"/>.</param>
 /// <param name="labelColumnName">The name of the label column.</param>
 /// <param name="featureColumnName">The name of the feature column.</param>
 /// <param name="rowGroupdColumnName">The name of the column containing the group ID. </param>
 /// <param name="weightsColumnName">The name of the optional column containing the initial weights.</param>
 /// <param name="numberOfLeaves">The number of leaves to use.</param>
 /// <param name="learningRate">The learning rate.</param>
 /// <param name="minimumExampleCountPerLeaf">The minimal number of data points allowed in a leaf of the tree, out of the subsampled data.</param>
 /// <param name="numberOfIterations">The number of iterations to use.</param>
 internal LightGbmRankingTrainer(IHostEnvironment env,
                                 string labelColumnName         = DefaultColumnNames.Label,
                                 string featureColumnName       = DefaultColumnNames.Features,
                                 string rowGroupdColumnName     = DefaultColumnNames.GroupId,
                                 string weightsColumnName       = null,
                                 int?numberOfLeaves             = null,
                                 int?minimumExampleCountPerLeaf = null,
                                 double?learningRate            = null,
                                 int numberOfIterations         = LightGBM.Options.Defaults.NumberOfIterations)
     : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(labelColumnName),
            featureColumnName, weightsColumnName, rowGroupdColumnName, numberOfLeaves,
            minimumExampleCountPerLeaf, learningRate, numberOfIterations)
 {
     Host.CheckNonEmpty(rowGroupdColumnName, nameof(rowGroupdColumnName));
 }
        private protected CalibratorEstimatorBase(IHostEnvironment env,
                                                  ICalibratorTrainer calibratorTrainer, string labelColumn, string scoreColumn, string weightColumn)
        {
            Host = env;
            _calibratorTrainer = calibratorTrainer;

            if (!string.IsNullOrWhiteSpace(labelColumn))
            {
                LabelColumn = TrainerUtils.MakeBoolScalarLabel(labelColumn);
            }
            else
            {
                env.CheckParam(!calibratorTrainer.NeedsTraining, nameof(labelColumn), "For trained calibrators, " + nameof(labelColumn) + " must be specified.");
            }
            ScoreColumn = TrainerUtils.MakeR4ScalarColumn(scoreColumn); // Do we fanthom this being named anything else (renaming column)? Complete metadata?

            if (weightColumn != null)
            {
                WeightColumn = TrainerUtils.MakeR4ScalarWeightColumn(weightColumn);
            }
        }
예제 #16
0
 /// <summary>
 /// Initializes a new instance of <see cref="FastTreeTweedieTrainer"/> by using the legacy <see cref="Arguments"/> class.
 /// </summary>
 internal FastTreeTweedieTrainer(IHostEnvironment env, Arguments args)
     : base(env, args, TrainerUtils.MakeR4ScalarColumn(args.LabelColumn))
 {
     Initialize();
 }
 /// <summary>
 /// Initializes a new instance of <see cref="FastTreeTweedieTrainer"/> by using the <see cref="Options"/> class.
 /// </summary>
 /// <param name="env">The instance of <see cref="IHostEnvironment"/>.</param>
 /// <param name="options">Algorithm advanced settings.</param>
 internal FastTreeTweedieTrainer(IHostEnvironment env, Options options)
     : base(env, options, TrainerUtils.MakeR4ScalarColumn(options.LabelColumn))
 {
     Initialize();
 }
 internal LightGbmRankingTrainer(IHostEnvironment env, Options options)
     : base(env, LoadNameValue, options, TrainerUtils.MakeR4ScalarColumn(options.LabelColumnName))
 {
     Contracts.CheckUserArg(options.Sigmoid > 0, nameof(Options.Sigmoid), "must be > 0.");
 }
예제 #19
0
 internal OnlineGradientDescentTrainer(IHostEnvironment env, Options options)
     : base(options, env, UserNameValue, TrainerUtils.MakeR4ScalarColumn(options.LabelColumn))
 {
     LossFunction = options.LossFunction.CreateComponent(env);
 }
예제 #20
0
 /// <summary>
 /// Initializes a new instance of <see cref="FastTreeRegressionTrainer"/> by using the legacy <see cref="Arguments"/> class.
 /// </summary>
 internal FastTreeRegressionTrainer(IHostEnvironment env, Arguments args)
     : base(env, args, TrainerUtils.MakeR4ScalarColumn(args.LabelColumn))
 {
 }
예제 #21
0
 /// <summary>
 /// Initializes a new instance of <see cref="FastTreeRegressionTrainer"/> by using the <see cref="Options"/> class.
 /// </summary>
 /// <param name="env">The instance of <see cref="IHostEnvironment"/>.</param>
 /// <param name="options">Algorithm advanced settings.</param>
 internal FastTreeRegressionTrainer(IHostEnvironment env, Options options)
     : base(env, options, TrainerUtils.MakeR4ScalarColumn(options.LabelColumnName))
 {
 }
예제 #22
0
 internal RegressionGamTrainer(IHostEnvironment env, Arguments args)
     : base(env, args, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(args.LabelColumn))
 {
 }
 internal LightGbmRankingTrainer(IHostEnvironment env, Options options)
     : base(env, LoadNameValue, options, TrainerUtils.MakeR4ScalarColumn(options.LabelColumnName))
 {
 }
예제 #24
0
 internal RegressionGamTrainer(IHostEnvironment env, Options options)
     : base(env, options, LoadNameValue, TrainerUtils.MakeR4ScalarColumn(options.LabelColumn))
 {
 }
 internal LightGbmRegressorTrainer(IHostEnvironment env, LightGbmArguments args)
     : base(env, LoadNameValue, args, TrainerUtils.MakeR4ScalarColumn(args.LabelColumn))
 {
 }