/// <summary> /// A series of regression trees are fitted stage wise on the residuals of the previous tree /// </summary> /// <param name="observations"></param> /// <param name="targets"></param> /// <param name="indices"></param> /// <returns></returns> public RegressionGradientBoostModel Learn(F64Matrix observations, double[] targets, int[] indices) { Checks.VerifyObservationsAndTargets(observations, targets); Checks.VerifyIndices(indices, observations, targets); var rows = observations.RowCount; var orderedElements = CreateOrderedElements(observations, rows); var inSample = targets.Select(t => false).ToArray(); indices.ForEach(i => inSample[i] = true); var workIndices = indices.ToArray(); var trees = new GBMTree[m_iterations]; var initialLoss = m_loss.InitialLoss(targets, inSample); var predictions = targets.Select(t => initialLoss).ToArray(); var residuals = new double[targets.Length]; var predictWork = new double[observations.RowCount]; for (int iteration = 0; iteration < m_iterations; iteration++) { m_loss.UpdateResiduals(targets, predictions, residuals, inSample); var sampleSize = targets.Length; if (m_subSampleRatio != 1.0) { sampleSize = (int)Math.Round(m_subSampleRatio * workIndices.Length); var currentInSample = Sample(sampleSize, workIndices, targets.Length); trees[iteration] = m_learner.Learn(observations, targets, residuals, predictions, orderedElements, currentInSample); } else { trees[iteration] = m_learner.Learn(observations, targets, residuals, predictions, orderedElements, inSample); } trees[iteration].Predict(observations, predictWork); for (int i = 0; i < predictWork.Length; i++) { predictions[i] += m_learningRate * predictWork[i]; } } return(new RegressionGradientBoostModel(trees, m_learningRate, initialLoss, observations.ColumnCount)); }
/// <summary> /// A series of regression trees are fitted stage wise on the residuals of the previous stage /// </summary> /// <param name="observations"></param> /// <param name="targets"></param> /// <param name="indices"></param> /// <returns></returns> public ClassificationGradientBoostModel Learn(F64Matrix observations, double[] targets, int[] indices) { Checks.VerifyObservationsAndTargets(observations, targets); Checks.VerifyIndices(indices, observations, targets); var rows = observations.RowCount; var orderedElements = CreateOrderedElements(observations, rows); var inSample = targets.Select(t => false).ToArray(); indices.ForEach(i => inSample[i] = true); var workIndices = indices.ToArray(); var uniqueTargets = targets.Distinct().OrderBy(v => v).ToArray(); var initialLoss = m_loss.InitialLoss(targets, inSample); double[][] oneVsAllTargets = null; double[][] predictions = null; double[][] residuals = null; GBMTree[][] trees = null; if (uniqueTargets.Length == 2) // Binary case - only need to fit to one class and use (1.0 - probability) { trees = new GBMTree[][] { new GBMTree[m_iterations] }; predictions = new double[][] { targets.Select(_ => initialLoss).ToArray() }; residuals = new double[][] { new double[targets.Length] }; oneVsAllTargets = new double[1][]; var target = uniqueTargets[0]; oneVsAllTargets[0] = targets.Select(t => t == target ? 1.0 : 0.0).ToArray(); } else // multi-class case - use oneVsAll strategy and fit probability for each class { trees = new GBMTree[uniqueTargets.Length][]; predictions = uniqueTargets.Select(_ => targets.Select(t => initialLoss).ToArray()) .ToArray(); residuals = uniqueTargets.Select(_ => new double[targets.Length]) .ToArray(); oneVsAllTargets = new double[uniqueTargets.Length][]; for (int i = 0; i < uniqueTargets.Length; i++) { var target = uniqueTargets[i]; oneVsAllTargets[i] = targets.Select(t => t == target ? 1.0 : 0.0).ToArray(); trees[i] = new GBMTree[m_iterations]; } } var predictWork = new double[observations.RowCount]; for (int iteration = 0; iteration < m_iterations; iteration++) { for (int itarget = 0; itarget < trees.Length; itarget++) { m_loss.UpdateResiduals(oneVsAllTargets[itarget], predictions[itarget], residuals[itarget], inSample); var sampleSize = targets.Length; if (m_subSampleRatio != 1.0) { sampleSize = (int)Math.Round(m_subSampleRatio * workIndices.Length); var currentInSample = Sample(sampleSize, workIndices, targets.Length); trees[itarget][iteration] = m_learner.Learn(observations, oneVsAllTargets[itarget], residuals[itarget], predictions[itarget], orderedElements, currentInSample); } else { trees[itarget][iteration] = m_learner.Learn(observations, oneVsAllTargets[itarget], residuals[itarget], predictions[itarget], orderedElements, inSample); } trees[itarget][iteration].Predict(observations, predictWork); for (int i = 0; i < predictWork.Length; i++) { predictions[itarget][i] += m_learningRate * predictWork[i]; } } } return(new ClassificationGradientBoostModel(trees, uniqueTargets, m_learningRate, initialLoss, observations.ColumnCount)); }