Exemple #1
0
        public void ArrayExtensions_SortWith_Interval()
        {
            var values   = new int[] { 0, 1, 2, 3, 4, 5 };
            var keys     = new int[] { 5, 4, 3, 2, 1, 0 };
            var interval = Interval1D.Create(2, keys.Length);

            keys.SortWith(interval, values);
            var expectedKeys = new int[] { 5, 4, 0, 1, 2, 3 };

            CollectionAssert.AreEqual(expectedKeys, keys);

            var expectedValues = new int[] { 0, 1, 5, 4, 3, 2 };

            CollectionAssert.AreEqual(expectedValues, values);
        }
Exemple #2
0
 void SelectNextRandomIndices(int[] candidateModelIndices)
 {
     if (m_selectWithReplacement)
     {
         for (int i = 0; i < candidateModelIndices.Length; i++)
         {
             candidateModelIndices[i] = m_random.Next(0, m_numberOfModelsToSelect);
         }
     }
     else
     {
         m_allIndices.Shuffle(m_random);
         m_allIndices.CopyTo(Interval1D.Create(0, m_numberOfModelsToSelect), candidateModelIndices);
     }
 }
        public void NaiveSinglePassVarianceEntropyMetric_Impurity_Interval()
        {
            var set1 = new double[] { 0, 1, 2, 3, 4, 3, 2, 1, 0 };
            var set2 = new double[] { 1, 1, 1, 1, 2, 2, 2, 2 };

            var sut      = new NaiveSinglePassVarianceImpurityMetric();
            var interval = Interval1D.Create(2, 7);

            var val1 = sut.Impurity(set1, interval);

            Assert.AreEqual(0.69999999999999929, val1);
            var val2 = sut.Impurity(set2, interval);

            Assert.AreEqual(0.29999999999999982, val2);
        }
        public void RegressionImpurityCalculator_NodeImpurity()
        {
            var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, };

            var parentInterval = Interval1D.Create(0, values.Length);

            var sut = new RegressionImpurityCalculator();

            sut.Init(new double[0], values, new double[0], parentInterval);

            sut.UpdateIndex(50);
            var actual = sut.NodeImpurity();

            Assert.AreEqual(0.66666666666666674, actual, 0.000001);
        }
Exemple #5
0
        public void ArrayExtensions_IndexedCopy_ColumnView_Interval()
        {
            var values      = new double[] { 0, 10, 20, 30, 40, 50 };
            var matrix      = new F64Matrix(values, 6, 1);
            var indices     = new int[] { 1, 1, 2, 2, 2, 5 };
            var destination = new double[values.Length];
            var interval    = Interval1D.Create(1, 5);

            using (var ptr = matrix.GetPinnedPointer())
            {
                var view = ptr.View().ColumnView(0);
                indices.IndexedCopy(view, interval, destination);
                var expected = new double[] { 0, 10, 20, 20, 20, 0 };
                CollectionAssert.AreEqual(expected, destination);
            }
        }
        public void GiniClassificationImpurityCalculator_NodeImpurity()
        {
            var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, };
            var unique = values.Distinct().ToArray();

            var parentInterval = Interval1D.Create(0, values.Length);

            var sut = new GiniClassificationImpurityCalculator();

            sut.Init(unique, values, new double[0], parentInterval);

            sut.UpdateIndex(50);
            var actual = sut.NodeImpurity();

            Assert.AreEqual(0.66666666666666674, actual, 0.000001);
        }
        public void RegressionImpurityCalculator_LeafValue_Weighted()
        {
            var values         = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, };
            var weights        = values.Select(t => Weight(t)).ToArray();
            var parentInterval = Interval1D.Create(0, values.Length);

            var sut = new RegressionImpurityCalculator();

            sut.Init(new double[0], values, weights, parentInterval);

            var impurity = sut.NodeImpurity();

            sut.UpdateIndex(50);
            var actual = sut.LeafValue();

            Assert.AreEqual(1.75, actual, 0.000001);
        }
        public void RegressionImpurityCalculator_ChildImpurities()
        {
            var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, };

            var parentInterval = Interval1D.Create(0, values.Length);

            var sut = new RegressionImpurityCalculator();

            sut.Init(new double[0], values, new double[0], parentInterval);
            var impurity = sut.NodeImpurity();

            sut.UpdateIndex(50);
            var actual   = sut.ChildImpurities();
            var expected = new ChildImpurities(0.0, -2.25);

            Assert.AreEqual(expected, actual);
        }
        public void GiniClassificationImpurityCalculator_LeafValue_Weighted()
        {
            var values         = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, };
            var unique         = values.Distinct().ToArray();
            var weights        = values.Select(t => Weight(t)).ToArray();
            var parentInterval = Interval1D.Create(0, values.Length);

            var sut = new GiniClassificationImpurityCalculator();

            sut.Init(unique, values, weights, parentInterval);

            var impurity = sut.NodeImpurity();

            sut.UpdateIndex(50);
            var actual = sut.LeafValue();

            Assert.AreEqual(2.0, actual, 0.000001);
        }
Exemple #10
0
        public void GiniImpurityMetric_Impurity_Weights()
        {
            var set1 = new double[] { 0, 1, 2, 3, 4, 3, 2, 1, 0 };
            var set2 = new double[] { 1, 1, 1, 1, 2, 2, 2, 2 };
            var set3 = new double[] { 1, 1, 1, 1, 1, 1, 1, 1 };

            var weights = new double[] { 3, 1, 4, 1, 0.5, 1, 2, 1, 10 };
            var sut     = new GiniImpurityMetric();

            var val1 = sut.Impurity(set1, weights, Interval1D.Create(0, set1.Length));

            Assert.AreEqual(0.96921684019918519, val1);
            var val2 = sut.Impurity(set2, weights, Interval1D.Create(0, set2.Length));

            Assert.AreEqual(0.82441700960219477, val2);
            var val3 = sut.Impurity(set3, weights, Interval1D.Create(0, set3.Length));

            Assert.AreEqual(0.64883401920438954, val3);
        }
Exemple #11
0
        public void GiniImpurityMetric_Impurity_Interval()
        {
            var set1 = new double[] { 0, 1, 2, 3, 4, 3, 2, 1, 0 };
            var set2 = new double[] { 1, 1, 1, 1, 2, 2, 2, 2 };
            var set3 = new double[] { 1, 1, 1, 1, 1, 1, 1, 1 };

            var sut      = new GiniImpurityMetric();
            var interval = Interval1D.Create(2, 7);

            var val1 = sut.Impurity(set1, interval);

            Assert.AreEqual(0.64, val1);
            var val2 = sut.Impurity(set2, interval);

            Assert.AreEqual(0.48, val2);
            var val3 = sut.Impurity(set3, interval);

            Assert.AreEqual(0.0, val3);
        }
Exemple #12
0
        public void GiniImpurityMetric_Impurity_Weights_None()
        {
            var set1 = new double[] { 0, 1, 2, 3, 4, 3, 2, 1, 0 };
            var set2 = new double[] { 1, 1, 1, 1, 2, 2, 2, 2 };
            var set3 = new double[] { 1, 1, 1, 1, 1, 1, 1, 1 };

            var weights = new double[] { 1, 1, 1, 1, 1, 1, 1, 1, 1 };
            var sut     = new GiniImpurityMetric();

            var val1 = sut.Impurity(set1, weights, Interval1D.Create(0, set1.Length));

            Assert.AreEqual(0.79012345679012341, val1);
            var val2 = sut.Impurity(set2, weights, Interval1D.Create(0, set2.Length));

            Assert.AreEqual(0.5, val2);
            var val3 = sut.Impurity(set3, weights, Interval1D.Create(0, set3.Length));

            Assert.AreEqual(0.0, val3);
        }
        public void GiniClassificationImpurityCalculator_ChildImpurities()
        {
            var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, };
            var unique = values.Distinct().ToArray();

            var parentInterval = Interval1D.Create(0, values.Length);

            var sut = new GiniClassificationImpurityCalculator();

            sut.Init(unique, values, new double[0], parentInterval);

            var impurity = sut.NodeImpurity();

            sut.UpdateIndex(50);
            var actual   = sut.ChildImpurities();
            var expected = new ChildImpurities(0.0, .5);

            Assert.AreEqual(expected, actual);
        }
        public void NaiveSinglePassVarianceEntropyMetric_Impurity_Interval_Weighted_2()
        {
            var set1 = new double[] { 0, 1, 2, 3, 4, 3, 2, 1, 0 };
            var set2 = new double[] { 1, 1, 1, 1, 2, 2, 2, 2 };
            var set3 = new double[] { 1 };

            var weights = new double[] { 1, 2, 4, 7, 2, 3, 5, 8, 1 };

            var sut      = new NaiveSinglePassVarianceImpurityMetric();
            var interval = Interval1D.Create(2, 7);

            var val1 = sut.Impurity(set1, weights, Interval1D.Create(0, set1.Length));

            Assert.AreEqual(1.2969432314410481, val1);
            var val2 = sut.Impurity(set2, weights, Interval1D.Create(0, set2.Length));

            Assert.AreEqual(0.29577464788732394, val2);
            var val3 = sut.Impurity(set3, weights, Interval1D.Create(0, set3.Length));

            Assert.AreEqual(0.0, val3);
        }
        public void NaiveSinglePassVarianceEntropyMetric_Impurity_Interval_Weighted_1()
        {
            var set1 = new double[] { 0, 1, 2, 3, 4, 3, 2, 1, 0 };
            var set2 = new double[] { 1, 1, 1, 1, 2, 2, 2, 2 };
            var set3 = new double[] { 1 };

            var weights = new double[] { 1, 1, 1, 1, 1, 1, 1, 1, 1 };

            var sut      = new NaiveSinglePassVarianceImpurityMetric();
            var interval = Interval1D.Create(2, 7);

            var val1 = sut.Impurity(set1, weights, Interval1D.Create(0, set1.Length));

            Assert.AreEqual(1.9444444444444446, val1);
            var val2 = sut.Impurity(set2, weights, Interval1D.Create(0, set2.Length));

            Assert.AreEqual(0.2857142857142857, val2);
            var val3 = sut.Impurity(set3, weights, Interval1D.Create(0, set3.Length));

            Assert.AreEqual(0.0, val3);
        }
        public void RegressionImpurityCalculator_ImpurityImprovement()
        {
            var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, };

            var parentInterval = Interval1D.Create(0, values.Length);

            var sut = new RegressionImpurityCalculator();

            sut.Init(new double[0], values, new double[0], parentInterval);
            var impurity = sut.NodeImpurity();

            sut.UpdateIndex(50);
            var improvement1 = sut.ImpurityImprovement(impurity);

            Assert.AreEqual(75.0, improvement1, 0.000001);

            sut.UpdateIndex(96);
            var improvement2 = sut.ImpurityImprovement(impurity);

            Assert.AreEqual(69.473379629629648, improvement2, 0.000001);
        }
        public void RegressionImpurityCalculator_ImpurityImprovement_Weighted()
        {
            var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, };

            var weights        = values.Select(t => Weight(t)).ToArray();
            var parentInterval = Interval1D.Create(0, values.Length);

            var sut = new RegressionImpurityCalculator();

            sut.Init(new double[0], values, weights, parentInterval);
            var impurity = sut.NodeImpurity();

            sut.UpdateIndex(50);
            var improvement1 = sut.ImpurityImprovement(impurity);

            Assert.AreEqual(167.04545454545456, improvement1, 0.000001);

            sut.UpdateIndex(96);
            var improvement2 = sut.ImpurityImprovement(impurity);

            Assert.AreEqual(162.78860028860029, improvement2, 0.000001);
        }
        public void GiniClassificationImpurityCalculator_ImpurityImprovement()
        {
            var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, };
            var unique = values.Distinct().ToArray();

            var parentInterval = Interval1D.Create(0, values.Length);

            var sut = new GiniClassificationImpurityCalculator();

            sut.Init(unique, values, new double[0], parentInterval);

            var impurity = sut.NodeImpurity();

            sut.UpdateIndex(50);
            var improvement1 = sut.ImpurityImprovement(impurity);

            Assert.AreEqual(0.33333333333333343, improvement1, 0.000001);

            sut.UpdateIndex(96);
            var improvement2 = sut.ImpurityImprovement(impurity);

            Assert.AreEqual(0.28047839506172845, improvement2, 0.000001);
        }
        public void GiniClassificationImpurityCalculator_ImpurityImprovement_Weighted()
        {
            var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, };
            var unique = values.Distinct().ToArray();

            var weights        = values.Select(t => Weight(t)).ToArray();
            var parentInterval = Interval1D.Create(0, values.Length);

            var sut = new GiniClassificationImpurityCalculator();

            sut.Init(unique, values, weights, parentInterval);

            var impurity = sut.NodeImpurity();

            sut.UpdateIndex(50);
            var improvement1 = sut.ImpurityImprovement(impurity);

            Assert.AreEqual(0.14015151515151511, improvement1, 0.000001);

            sut.UpdateIndex(96);
            var improvement2 = sut.ImpurityImprovement(impurity);

            Assert.AreEqual(0.17358104858104859, improvement2, 0.000001);
        }
        public void RandomSplitSearcher_FindBestSplit()
        {
            var parser   = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var feature  = parser.EnumerateRows("AptitudeTestScore").ToF64Vector();
            var targets  = parser.EnumerateRows("Pass").ToF64Vector();
            var interval = Interval1D.Create(0, feature.Length);

            Array.Sort(feature, targets);

            var impurityCalculator = new GiniClassificationImpurityCalculator();

            impurityCalculator.Init(targets.Distinct().ToArray(), targets, new double[0], interval);
            var impurity = impurityCalculator.NodeImpurity();

            var sut = new RandomSplitSearcher(1, 42);

            var actual = sut.FindBestSplit(impurityCalculator, feature, targets,
                                           interval, impurity);

            var expected = new SplitResult(15, 3.6724258636461693, 0.037941545633853213,
                                           0.39111111111111119, 0.49586776859504134);

            Assert.AreEqual(expected, actual);
        }
        /// <summary>
        ///
        /// </summary>
        /// <param name="observations"></param>
        /// <param name="targets"></param>
        /// <param name="indices"></param>
        /// <param name="weights"></param>
        /// <returns></returns>
        public BinaryTree Build(F64MatrixView observations, double[] targets, int[] indices, double[] weights)
        {
            Array.Clear(m_variableImportance, 0, m_variableImportance.Length);

            Array.Resize(ref m_workTargets, indices.Length);
            Array.Resize(ref m_workFeature, indices.Length);
            Array.Resize(ref m_workIndices, indices.Length);

            var numberOfFeatures = observations.ColumnCount;

            if (m_featuresPrSplit == 0)
            {
                m_featuresPrSplit = numberOfFeatures;
            }

            Array.Resize(ref m_bestSplitWorkIndices, indices.Length);
            m_bestSplitWorkIndices.Clear();
            Array.Resize(ref m_variableImportance, numberOfFeatures);
            Array.Resize(ref m_allFeatureIndices, numberOfFeatures);
            Array.Resize(ref m_featureCandidates, m_featuresPrSplit);

            m_featuresCandidatesSet = false;

            for (int i = 0; i < m_allFeatureIndices.Length; i++)
            {
                m_allFeatureIndices[i] = i;
            }

            var allInterval = Interval1D.Create(0, indices.Length);

            indices.CopyTo(allInterval, m_workIndices);
            m_workIndices.IndexedCopy(targets, allInterval, m_workTargets);

            if (weights.Length != 0)
            {
                Array.Resize(ref m_workWeights, indices.Length);
                m_workIndices.IndexedCopy(weights, allInterval, m_workWeights);
            }

            var targetNames = targets.Distinct().ToArray();

            m_impurityCalculator.Init(targetNames, m_workTargets, m_workWeights, allInterval);
            var rootImpurity = m_impurityCalculator.NodeImpurity();

            var nodes         = new List <Node>();
            var probabilities = new List <double[]>();

            var stack = new Stack <DecisionNodeCreationItem>(100);

            stack.Push(new DecisionNodeCreationItem(0, NodePositionType.Root, allInterval, rootImpurity, 0));

            var first                       = true;
            var currentNodeIndex            = 0;
            var currentLeafProbabilityIndex = 0;

            while (stack.Count > 0)
            {
                var bestSplitResult  = SplitResult.Initial();
                var bestFeatureIndex = -1;
                var parentItem       = stack.Pop();

                var  parentInterval  = parentItem.Interval;
                var  parentNodeDepth = parentItem.NodeDepth;
                Node parentNode      = Node.Default();

                if (nodes.Count != 0)
                {
                    parentNode = nodes[parentItem.ParentIndex];
                }

                var parentNodePositionType = parentItem.NodeType;
                var parentImpurity         = parentItem.Impurity;

                if (first && parentNode.FeatureIndex != -1)
                {
                    nodes[0] = new Node(parentNode.FeatureIndex,
                                        parentNode.Value, -1, -1, parentNode.NodeIndex, parentNode.LeafProbabilityIndex);

                    first = false;
                }

                var isLeaf = (parentNodeDepth >= m_maximumTreeDepth);

                if (!isLeaf)
                {
                    SetNextFeatures(numberOfFeatures);

                    foreach (var featureIndex in m_featureCandidates)
                    {
                        m_workIndices.IndexedCopy(observations.ColumnView(featureIndex), parentInterval, m_workFeature);
                        m_workFeature.SortWith(parentInterval, m_workIndices);
                        m_workIndices.IndexedCopy(targets, parentInterval, m_workTargets);

                        if (weights.Length != 0)
                        {
                            m_workIndices.IndexedCopy(weights, parentInterval, m_workWeights);
                        }

                        var splitResult = m_splitSearcher.FindBestSplit(m_impurityCalculator, m_workFeature,
                                                                        m_workTargets, parentInterval, parentImpurity);

                        if (splitResult.ImpurityImprovement > bestSplitResult.ImpurityImprovement)
                        {
                            bestSplitResult = splitResult;
                            m_workIndices.CopyTo(parentInterval, m_bestSplitWorkIndices);
                            bestFeatureIndex = featureIndex;
                        }
                    }

                    isLeaf = isLeaf || (bestSplitResult.SplitIndex < 0);
                    isLeaf = isLeaf || (bestSplitResult.ImpurityImprovement < m_minimumInformationGain);

                    m_bestSplitWorkIndices.CopyTo(parentInterval, m_workIndices);
                }

                if (isLeaf)
                {
                    m_bestSplitWorkIndices.IndexedCopy(targets, parentInterval, m_workTargets);

                    if (weights.Length != 0)
                    {
                        m_bestSplitWorkIndices.IndexedCopy(weights, parentInterval, m_workWeights);
                    }

                    m_impurityCalculator.UpdateInterval(parentInterval);
                    var value = m_impurityCalculator.LeafValue();

                    var leaf = new Node(-1, value, -1, -1,
                                        currentNodeIndex++, currentLeafProbabilityIndex++);

                    probabilities.Add(m_impurityCalculator.LeafProbabilities());

                    nodes.Add(leaf);
                    nodes.UpdateParent(parentNode, leaf, parentNodePositionType);
                }
                else
                {
                    m_variableImportance[bestFeatureIndex] += bestSplitResult.ImpurityImprovement * parentInterval.Length / allInterval.Length;

                    var split = new Node(bestFeatureIndex, bestSplitResult.Threshold, -1, -1,
                                         currentNodeIndex++, -1);

                    nodes.Add(split);
                    nodes.UpdateParent(parentNode, split, parentNodePositionType);

                    var nodeDepth = parentNodeDepth + 1;

                    stack.Push(new DecisionNodeCreationItem(split.NodeIndex, NodePositionType.Right,
                                                            Interval1D.Create(bestSplitResult.SplitIndex, parentInterval.ToExclusive),
                                                            bestSplitResult.ImpurityRight, nodeDepth));

                    stack.Push(new DecisionNodeCreationItem(split.NodeIndex, NodePositionType.Left,
                                                            Interval1D.Create(parentInterval.FromInclusive, bestSplitResult.SplitIndex),
                                                            bestSplitResult.ImpurityLeft, nodeDepth));
                }
            }

            if (first) // No valid split return single leaf result
            {
                m_impurityCalculator.UpdateInterval(allInterval);

                var leaf = new Node(-1, m_impurityCalculator.LeafValue(), -1, -1,
                                    currentNodeIndex++, currentLeafProbabilityIndex++);

                probabilities.Add(m_impurityCalculator.LeafProbabilities());

                nodes.Clear();
                nodes.Add(leaf);
            }

            return(new BinaryTree(nodes, probabilities, targetNames,
                                  m_variableImportance.ToArray()));
        }
        /// <summary>
        /// Learns an adaboost regression model
        /// </summary>
        /// <param name="observations"></param>
        /// <param name="targets"></param>
        /// <param name="indices"></param>
        /// <returns></returns>
        public RegressionAdaBoostModel Learn(F64Matrix observations, double[] targets, int[] indices)
        {
            if (m_maximumTreeDepth == 0)
            {
                m_maximumTreeDepth = 3;
            }

            m_modelLearner = new RegressionDecisionTreeLearner(m_maximumTreeDepth, m_minimumSplitSize,
                                                               observations.ColumnCount, m_minimumInformationGain, 42);

            m_modelErrors.Clear();
            m_modelWeights.Clear();
            m_models.Clear();

            Array.Resize(ref m_sampleWeights, targets.Length);

            Array.Resize(ref m_workErrors, targets.Length);
            Array.Resize(ref m_indexedTargets, indices.Length);
            Array.Resize(ref m_sampleIndices, indices.Length);

            indices.IndexedCopy(targets, Interval1D.Create(0, indices.Length),
                                m_indexedTargets);

            var initialWeight = 1.0 / indices.Length;

            for (int i = 0; i < indices.Length; i++)
            {
                var index = indices[i];
                m_sampleWeights[index] = initialWeight;
            }

            for (int i = 0; i < m_iterations; i++)
            {
                if (!Boost(observations, targets, indices, i))
                {
                    break;
                }

                var ensembleError = ErrorEstimate(observations, indices);

                if (ensembleError == 0.0)
                {
                    break;
                }

                if (m_modelErrors[i] == 0.0)
                {
                    break;
                }

                var weightSum = m_sampleWeights.Sum(indices);
                if (weightSum <= 0.0)
                {
                    break;
                }

                if (i == m_iterations - 1)
                {
                    // Normalize weights
                    for (int j = 0; j < indices.Length; j++)
                    {
                        var index = indices[j];
                        m_sampleWeights[index] = m_sampleWeights[index] / weightSum;
                    }
                }
            }

            var featuresCount      = observations.ColumnCount;
            var variableImportance = VariableImportance(featuresCount);

            return(new RegressionAdaBoostModel(m_models.ToArray(), m_modelWeights.ToArray(),
                                               variableImportance));
        }
Exemple #23
0
        public unsafe void F64MatrixView_SubView()
        {
            var matrix = Matrix();

            using (var pinnedMatrix = matrix.GetPinnedPointer())
            {
                var subView   = pinnedMatrix.View().View(Interval2D.Create(Interval1D.Create(0, 2), Interval1D.Create(0, 3)));
                var subMatrix = matrix.Rows(new int[] { 0, 1 });
                AssertMatrixView(subMatrix, subView);
            }
        }