Exemplo n.º 1
0
        public void GBMDecisionTreeLearner_Learn()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var inSample        = targets.Select(t => true).ToArray();
            var orderedElements = new int[observations.ColumnCount][];
            var rows            = observations.RowCount;

            for (int i = 0; i < observations.ColumnCount; i++)
            {
                var feature = observations.Column(i);
                var indices = Enumerable.Range(0, rows).ToArray();
                feature.SortWith(indices);
                orderedElements[i] = indices;
            }

            var sut  = new GBMDecisionTreeLearner(10);
            var tree = sut.Learn(observations, targets, targets, targets, orderedElements, inSample);

            var predictions = tree.Predict(observations);
            var evaluator   = new MeanSquaredErrorRegressionMetric();
            var actual      = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.0046122425037232661, actual);
        }
        public void GBMDecisionTreeLearner_Learn()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.DecisionTreeData));
            var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix();
            var targets      = parser.EnumerateRows("T").ToF64Vector();

            var inSample        = targets.Select(t => true).ToArray();
            var orderedElements = new int[observations.ColumnCount][];
            var rows            = observations.RowCount;

            for (int i = 0; i < observations.ColumnCount; i++)
            {
                var feature = observations.Column(i);
                var indices = Enumerable.Range(0, rows).ToArray();
                feature.SortWith(indices);
                orderedElements[i] = indices;
            }

            var sut  = new GBMDecisionTreeLearner(10);
            var tree = sut.Learn(observations, targets, targets, targets, orderedElements, inSample);

            var predictions = tree.Predict(observations);
            var evaluator   = new MeanSquaredErrorRegressionMetric();
            var actual      = evaluator.Error(targets, predictions);

            Assert.AreEqual(0.0046122425037232661, actual);
        }
Exemplo n.º 3
0
        public void GBMTree_AddRawFeatureImportances()
        {
            var(observations, targets) = DataSetUtilities.LoadDecisionTreeDataSet();

            var inSample        = targets.Select(t => true).ToArray();
            var orderedElements = new int[observations.ColumnCount][];
            var rows            = observations.RowCount;

            for (int i = 0; i < observations.ColumnCount; i++)
            {
                var feature = observations.Column(i);
                var indices = Enumerable.Range(0, rows).ToArray();
                feature.SortWith(indices);
                orderedElements[i] = indices;
            }

            var sut  = new GBMDecisionTreeLearner(10);
            var tree = sut.Learn(observations, targets, targets, targets, orderedElements, inSample);

            var actual = new double[observations.ColumnCount];

            tree.AddRawVariableImportances(actual);

            var expected = new double[] { 0.0, 105017.48701572006 };

            Assert.AreEqual(expected.Length, actual.Length);
            Assert.AreEqual(expected[0], actual[0], 0.01);
            Assert.AreEqual(expected[1], actual[1], 0.01);
        }
Exemplo n.º 4
0
        public void GBMTree_AddRawFeatureImportances()
        {
            var parser       = new CsvParser(() => new StringReader(Resources.DecisionTreeData));
            var observations = parser.EnumerateRows("F1", "F2").ToF64Matrix();
            var targets      = parser.EnumerateRows("T").ToF64Vector();

            var inSample        = targets.Select(t => true).ToArray();
            var orderedElements = new int[observations.ColumnCount][];
            var rows            = observations.RowCount;

            for (int i = 0; i < observations.ColumnCount; i++)
            {
                var feature = observations.Column(i);
                var indices = Enumerable.Range(0, rows).ToArray();
                feature.SortWith(indices);
                orderedElements[i] = indices;
            }

            var sut  = new GBMDecisionTreeLearner(10);
            var tree = sut.Learn(observations, targets, targets, targets, orderedElements, inSample);

            var actual = new double[observations.ColumnCount];

            tree.AddRawVariableImportances(actual);

            var expected = new double[] { 0.0, 105017.48701572006 };

            Assert.AreEqual(expected.Length, actual.Length);
            Assert.AreEqual(expected[0], actual[0], 0.01);
            Assert.AreEqual(expected[1], actual[1], 0.01);
        }
Exemplo n.º 5
0
        /// <summary>
        ///  Base regression gradient boost learner.
        ///  A series of regression trees are fitted stage wise on the residuals of the previous stage
        /// </summary>
        /// <param name="iterations">The number of iterations or stages</param>
        /// <param name="learningRate">How much each iteration should contribute with</param>
        /// <param name="maximumTreeDepth">The maximum depth of the tree models</param>
        /// <param name="minimumSplitSize">minimum node split size in the trees 1 is default</param>
        /// <param name="minimumInformationGain">The minimum improvement in information gain before a split is made</param>
        /// <param name="subSampleRatio">ratio of observations sampled at each iteration. Default is 1.0.
        /// If below 1.0 the algorithm changes to stochastic gradient boosting.
        /// This reduces variance in the ensemble and can help outer overfitting</param>
        /// <param name="featuresPrSplit">Number of features used at each split in the tree. 0 means all will be used</param>
        /// <param name="loss">loss function used</param>
        /// <param name="runParallel">Use multi threading to speed up execution</param>
        public RegressionGradientBoostLearner(
            int iterations,
            double learningRate,
            int maximumTreeDepth,
            int minimumSplitSize,
            double minimumInformationGain,
            double subSampleRatio,
            int featuresPrSplit,
            IGradientBoostLoss loss,
            bool runParallel)
        {
            if (iterations < 1)
            {
                throw new ArgumentException("Iterations must be at least 1");
            }
            if (learningRate <= 0.0)
            {
                throw new ArgumentException("learning rate must be larger than 0");
            }
            if (minimumSplitSize <= 0)
            {
                throw new ArgumentException("minimum split size must be larger than 0");
            }
            if (maximumTreeDepth < 0)
            {
                throw new ArgumentException("maximum tree depth must be larger than 0");
            }
            if (minimumInformationGain <= 0)
            {
                throw new ArgumentException("minimum information gain must be larger than 0");
            }
            if ((subSampleRatio <= 0.0) || (subSampleRatio > 1.0))
            {
                throw new ArgumentException("subSampleRatio must be larger than 0.0 and at max 1.0");
            }
            if (featuresPrSplit < 0)
            {
                throw new ArgumentException("featuresPrSplit must be at least 0");
            }
            m_loss = loss ?? throw new ArgumentNullException(nameof(loss));

            m_iterations     = iterations;
            m_learningRate   = learningRate;
            m_subSampleRatio = subSampleRatio;
            m_learner        = new GBMDecisionTreeLearner(maximumTreeDepth, minimumSplitSize,
                                                          minimumInformationGain, featuresPrSplit, m_loss, runParallel);
        }