/// <summary> /// Fites a regression decision tree using a set presorted indices for each feature. /// </summary> /// <param name="maximumTreeDepth">The maximal tree depth before a leaf is generated</param> /// <param name="minimumSplitSize">The minimum size </param> /// <param name="minimumInformationGain">The minimum improvement in information gain before a split is made</param> /// <param name="featuresPrSplit">Number of features used at each split in the tree. 0 means all will be used</param> /// <param name="loss">loss function used</param> /// <param name="runParallel">Use multi threading to speed up execution</param> public GBMDecisionTreeLearner(int maximumTreeDepth, int minimumSplitSize, double minimumInformationGain, int featuresPrSplit, IGradientBoostLoss loss, bool runParallel) { if (maximumTreeDepth <= 0) { throw new ArgumentException("maximum tree depth must be larger than 0"); } if (minimumInformationGain <= 0) { throw new ArgumentException("minimum information gain must be larger than 0"); } if (minimumSplitSize <= 0) { throw new ArgumentException("minimum split size must be larger than 0"); } if (featuresPrSplit < 0) { throw new ArgumentException("featuresPrSplit must be at least 0"); } if (loss == null) { throw new ArgumentNullException("loss"); } m_maximumTreeDepth = maximumTreeDepth; m_minimumSplitSize = minimumSplitSize; m_minimumInformationGain = minimumInformationGain; m_featuresPrSplit = featuresPrSplit; m_runParallel = runParallel; m_loss = loss; }
/// <summary> /// Base regression gradient boost learner. /// A series of regression trees are fitted stage wise on the residuals of the previous stage /// </summary> /// <param name="iterations">The number of iterations or stages</param> /// <param name="learningRate">How much each iteration should contribute with</param> /// <param name="maximumTreeDepth">The maximum depth of the tree models</param> /// <param name="minimumSplitSize">minimum node split size in the trees 1 is default</param> /// <param name="minimumInformationGain">The minimum improvement in information gain before a split is made</param> /// <param name="subSampleRatio">ratio of observations sampled at each iteration. Default is 1.0. /// If below 1.0 the algorithm changes to stochastic gradient boosting. /// This reduces variance in the ensemble and can help outer overfitting</param> /// <param name="featuresPrSplit">Number of features used at each split in the tree. 0 means all will be used</param> /// <param name="loss">loss function used</param> /// <param name="runParallel">Use multi threading to speed up execution</param> public RegressionGradientBoostLearner( int iterations, double learningRate, int maximumTreeDepth, int minimumSplitSize, double minimumInformationGain, double subSampleRatio, int featuresPrSplit, IGradientBoostLoss loss, bool runParallel) { if (iterations < 1) { throw new ArgumentException("Iterations must be at least 1"); } if (learningRate <= 0.0) { throw new ArgumentException("learning rate must be larger than 0"); } if (minimumSplitSize <= 0) { throw new ArgumentException("minimum split size must be larger than 0"); } if (maximumTreeDepth < 0) { throw new ArgumentException("maximum tree depth must be larger than 0"); } if (minimumInformationGain <= 0) { throw new ArgumentException("minimum information gain must be larger than 0"); } if ((subSampleRatio <= 0.0) || (subSampleRatio > 1.0)) { throw new ArgumentException("subSampleRatio must be larger than 0.0 and at max 1.0"); } if (featuresPrSplit < 0) { throw new ArgumentException("featuresPrSplit must be at least 0"); } m_loss = loss ?? throw new ArgumentNullException(nameof(loss)); m_iterations = iterations; m_learningRate = learningRate; m_subSampleRatio = subSampleRatio; m_learner = new GBMDecisionTreeLearner(maximumTreeDepth, minimumSplitSize, minimumInformationGain, featuresPrSplit, m_loss, runParallel); }