Exemple #1
0
        /// <summary>
        ///  A series of regression trees are fitted stage wise on the residuals of the previous tree
        /// </summary>
        /// <param name="observations"></param>
        /// <param name="targets"></param>
        /// <param name="indices"></param>
        /// <returns></returns>
        public RegressionGradientBoostModel Learn(F64Matrix observations, double[] targets,
                                                  int[] indices)
        {
            Checks.VerifyObservationsAndTargets(observations, targets);
            Checks.VerifyIndices(indices, observations, targets);

            var rows            = observations.RowCount;
            var orderedElements = CreateOrderedElements(observations, rows);

            var inSample = targets.Select(t => false).ToArray();

            indices.ForEach(i => inSample[i] = true);
            var workIndices = indices.ToArray();

            var trees = new GBMTree[m_iterations];

            var initialLoss = m_loss.InitialLoss(targets, inSample);
            var predictions = targets.Select(t => initialLoss).ToArray();
            var residuals   = new double[targets.Length];

            var predictWork = new double[observations.RowCount];

            for (int iteration = 0; iteration < m_iterations; iteration++)
            {
                m_loss.UpdateResiduals(targets, predictions, residuals, inSample);

                var sampleSize = targets.Length;
                if (m_subSampleRatio != 1.0)
                {
                    sampleSize = (int)Math.Round(m_subSampleRatio * workIndices.Length);
                    var currentInSample = Sample(sampleSize, workIndices, targets.Length);

                    trees[iteration] = m_learner.Learn(observations, targets, residuals,
                                                       predictions, orderedElements, currentInSample);
                }
                else
                {
                    trees[iteration] = m_learner.Learn(observations, targets, residuals,
                                                       predictions, orderedElements, inSample);
                }

                trees[iteration].Predict(observations, predictWork);
                for (int i = 0; i < predictWork.Length; i++)
                {
                    predictions[i] += m_learningRate * predictWork[i];
                }
            }

            return(new RegressionGradientBoostModel(trees, m_learningRate, initialLoss,
                                                    observations.ColumnCount));
        }
Exemple #2
0
        /// <summary>
        ///  A series of regression trees are fitted stage wise on the residuals of the previous stage
        /// </summary>
        /// <param name="observations"></param>
        /// <param name="targets"></param>
        /// <param name="indices"></param>
        /// <returns></returns>
        public ClassificationGradientBoostModel Learn(F64Matrix observations, double[] targets,
                                                      int[] indices)
        {
            Checks.VerifyObservationsAndTargets(observations, targets);
            Checks.VerifyIndices(indices, observations, targets);

            var rows            = observations.RowCount;
            var orderedElements = CreateOrderedElements(observations, rows);

            var inSample = targets.Select(t => false).ToArray();

            indices.ForEach(i => inSample[i] = true);
            var workIndices = indices.ToArray();

            var uniqueTargets = targets.Distinct().OrderBy(v => v).ToArray();
            var initialLoss   = m_loss.InitialLoss(targets, inSample);

            double[][]  oneVsAllTargets = null;
            double[][]  predictions     = null;
            double[][]  residuals       = null;
            GBMTree[][] trees           = null;

            if (uniqueTargets.Length == 2) // Binary case - only need to fit to one class and use (1.0 - probability)
            {
                trees       = new GBMTree[][] { new GBMTree[m_iterations] };
                predictions = new double[][] { targets.Select(_ => initialLoss).ToArray() };
                residuals   = new double[][] { new double[targets.Length] };

                oneVsAllTargets = new double[1][];
                var target = uniqueTargets[0];
                oneVsAllTargets[0] = targets.Select(t => t == target ? 1.0 : 0.0).ToArray();
            }
            else // multi-class case - use oneVsAll strategy and fit probability for each class
            {
                trees       = new GBMTree[uniqueTargets.Length][];
                predictions = uniqueTargets.Select(_ => targets.Select(t => initialLoss).ToArray())
                              .ToArray();
                residuals = uniqueTargets.Select(_ => new double[targets.Length])
                            .ToArray();

                oneVsAllTargets = new double[uniqueTargets.Length][];
                for (int i = 0; i < uniqueTargets.Length; i++)
                {
                    var target = uniqueTargets[i];
                    oneVsAllTargets[i] = targets.Select(t => t == target ? 1.0 : 0.0).ToArray();
                    trees[i]           = new GBMTree[m_iterations];
                }
            }

            var predictWork = new double[observations.RowCount];

            for (int iteration = 0; iteration < m_iterations; iteration++)
            {
                for (int itarget = 0; itarget < trees.Length; itarget++)
                {
                    m_loss.UpdateResiduals(oneVsAllTargets[itarget], predictions[itarget],
                                           residuals[itarget], inSample);

                    var sampleSize = targets.Length;
                    if (m_subSampleRatio != 1.0)
                    {
                        sampleSize = (int)Math.Round(m_subSampleRatio * workIndices.Length);
                        var currentInSample = Sample(sampleSize, workIndices, targets.Length);

                        trees[itarget][iteration] = m_learner.Learn(observations, oneVsAllTargets[itarget],
                                                                    residuals[itarget], predictions[itarget], orderedElements, currentInSample);
                    }
                    else
                    {
                        trees[itarget][iteration] = m_learner.Learn(observations, oneVsAllTargets[itarget],
                                                                    residuals[itarget], predictions[itarget], orderedElements, inSample);
                    }

                    trees[itarget][iteration].Predict(observations, predictWork);
                    for (int i = 0; i < predictWork.Length; i++)
                    {
                        predictions[itarget][i] += m_learningRate * predictWork[i];
                    }
                }
            }

            return(new ClassificationGradientBoostModel(trees, uniqueTargets, m_learningRate,
                                                        initialLoss, observations.ColumnCount));
        }