Ejemplo n.º 1
0
        public Predictors <TOutput> Train(Datasets data
                                          , Func <int, double> learningRateSchedule = null  // optional: learning rate as a function of iteration (zero-based)
                                          )
        {
            // For multi class, the number of labels is required.
            if (!(PredictionKind != PredictionKind.MultiClassClassification || Objective.NumClass > 1))
            {
                throw new Exception("LightGBM requires the number of classes to be specified in the parameters for multi-class classification.");
            }

            if (PredictionKind == PredictionKind.Ranking)
            {
                if (data.Training.GetGroups() == null)
                {
                    throw new Exception("Require Groups training data for ObjectiveType.LambdaRank");
                }
                if (data.Validation != null && data.Validation.GetGroups() == null)
                {
                    throw new Exception("Require Groups validation data for ObjectiveType.LambdaRank");
                }
            }

            TrainMetrics.Clear();
            ValidMetrics.Clear();
            Booster?.Dispose();
            Booster = null;

            Datasets = data;

            var args = GetParameters(data);

            Booster = Train(args, data.Training, data.Validation, TrainMetrics, ValidMetrics, learningRateSchedule);

            (var model, var argsout) = Booster.GetModel();
            TrainedEnsemble          = model;
            FeatureCount             = data.Training.NumFeatures;

            // check parameter strings
            if (learningRateSchedule != null)
            {
                argsout.Learning.LearningRate = args.Learning.LearningRate;
            }
            var strIn  = args.ToString();
            var strOut = argsout.ToString();

            if (strIn != strOut)
            {
                throw new Exception($"Parameters differ:\n{strIn}\n{strOut}");
            }

            var managed = CreateManagedPredictor();
            var native  = CreateNativePredictor();

            return(new Predictors <TOutput>(managed, native));
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Adds the specified train loss.
        /// </summary>
        /// <param name="trainLoss">The train loss.</param>
        /// <param name="trainMetric">The train metric.</param>
        /// <param name="valLoss">The value loss.</param>
        /// <param name="valMetric">The value metric.</param>
        public void Add(List <double> trainLoss, List <double> trainMetric, List <double> valLoss, List <double> valMetric)
        {
            TrainLosses.Add(trainLoss.Average());
            TrainMetrics.Add(trainMetric.Average());

            if (valLoss.Count > 0)
            {
                ValLosses.Add(valLoss.Average());
            }

            if (valMetric.Count > 0)
            {
                ValMetrics.Add(valMetric.Average());
            }
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Continue training current model, optionally with a new training dataset.
        /// </summary>
        /// <param name="data"></param>
        /// <param name="learningRateSchedule"></param>
        /// <returns></returns>
        public Predictors <TOutput> ContinueTraining(Dataset trainingData = null
                                                     , Func <int, double> learningRateSchedule = null  // optional: learning rate as a function of iteration (zero-based)
                                                     )
        {
            if (Booster == null)
            {
                throw new Exception("No existing booster to train.");
            }
            if (Datasets == null)
            {
                throw new Exception("No existing data sets.");
            }

            if (trainingData != null)
            {
                Datasets.Training = trainingData;
                Booster.ResetTrainingData(trainingData);
            }

            // For multi class, the number of labels is required.
            if (!(PredictionKind != PredictionKind.MultiClassClassification || Objective.NumClass > 1))
            {
                throw new Exception("LightGBM requires the number of classes to be specified in the parameters for multi-class classification.");
            }

            if (PredictionKind == PredictionKind.Ranking)
            {
                if (Datasets.Training.GetGroups() == null)
                {
                    throw new Exception("Require Groups training data for ObjectiveType.LambdaRank");
                }
                if (Datasets.Validation != null && Datasets.Validation.GetGroups() == null)
                {
                    throw new Exception("Require Groups validation data for ObjectiveType.LambdaRank");
                }
            }

            // NOTE: existing metrics cleared
            TrainMetrics.Clear();
            ValidMetrics.Clear();

            var args = GetParameters(Datasets);

            // TODO: HOW TO RESET VALIDATION DATA???
            Train(args, Booster, (Datasets.Validation != null), TrainMetrics, ValidMetrics, learningRateSchedule);

            (var model, var argsout) = Booster.GetModel();
            TrainedEnsemble          = model;
            FeatureCount             = Datasets.Training.NumFeatures;

            // check parameter strings
            if (learningRateSchedule != null)
            {
                argsout.Learning.LearningRate = args.Learning.LearningRate;
            }
            // if both ForceColWise and ForceRowWise are false on the input, LightGBM appears to set one of them to be true on the output?
            if (!args.Learning.ForceColWise && !args.Learning.ForceRowWise)
            {
                argsout.Learning.ForceColWise = false;
                argsout.Learning.ForceRowWise = false;
            }
            // for some reason this parameter is not returned in the output model
            argsout.Objective.MetricFreq = args.Objective.MetricFreq;

            var strIn  = args.ToString();
            var strOut = argsout.ToString();

            if (strIn != strOut)
            {
                throw new Exception($"Parameters differ:\n{strIn}\n{strOut}");
            }

            var managed = CreateManagedPredictor();
            var native  = CreateNativePredictor();

            return(new Predictors <TOutput>(managed, native));
        }