public void ArrayExtensions_SortWith_Interval() { var values = new int[] { 0, 1, 2, 3, 4, 5 }; var keys = new int[] { 5, 4, 3, 2, 1, 0 }; var interval = Interval1D.Create(2, keys.Length); keys.SortWith(interval, values); var expectedKeys = new int[] { 5, 4, 0, 1, 2, 3 }; CollectionAssert.AreEqual(expectedKeys, keys); var expectedValues = new int[] { 0, 1, 5, 4, 3, 2 }; CollectionAssert.AreEqual(expectedValues, values); }
void SelectNextRandomIndices(int[] candidateModelIndices) { if (m_selectWithReplacement) { for (int i = 0; i < candidateModelIndices.Length; i++) { candidateModelIndices[i] = m_random.Next(0, m_numberOfModelsToSelect); } } else { m_allIndices.Shuffle(m_random); m_allIndices.CopyTo(Interval1D.Create(0, m_numberOfModelsToSelect), candidateModelIndices); } }
public void NaiveSinglePassVarianceEntropyMetric_Impurity_Interval() { var set1 = new double[] { 0, 1, 2, 3, 4, 3, 2, 1, 0 }; var set2 = new double[] { 1, 1, 1, 1, 2, 2, 2, 2 }; var sut = new NaiveSinglePassVarianceImpurityMetric(); var interval = Interval1D.Create(2, 7); var val1 = sut.Impurity(set1, interval); Assert.AreEqual(0.69999999999999929, val1); var val2 = sut.Impurity(set2, interval); Assert.AreEqual(0.29999999999999982, val2); }
public void RegressionImpurityCalculator_NodeImpurity() { var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; var parentInterval = Interval1D.Create(0, values.Length); var sut = new RegressionImpurityCalculator(); sut.Init(new double[0], values, new double[0], parentInterval); sut.UpdateIndex(50); var actual = sut.NodeImpurity(); Assert.AreEqual(0.66666666666666674, actual, 0.000001); }
public void ArrayExtensions_IndexedCopy_ColumnView_Interval() { var values = new double[] { 0, 10, 20, 30, 40, 50 }; var matrix = new F64Matrix(values, 6, 1); var indices = new int[] { 1, 1, 2, 2, 2, 5 }; var destination = new double[values.Length]; var interval = Interval1D.Create(1, 5); using (var ptr = matrix.GetPinnedPointer()) { var view = ptr.View().ColumnView(0); indices.IndexedCopy(view, interval, destination); var expected = new double[] { 0, 10, 20, 20, 20, 0 }; CollectionAssert.AreEqual(expected, destination); } }
public void GiniClassificationImpurityCalculator_NodeImpurity() { var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; var unique = values.Distinct().ToArray(); var parentInterval = Interval1D.Create(0, values.Length); var sut = new GiniClassificationImpurityCalculator(); sut.Init(unique, values, new double[0], parentInterval); sut.UpdateIndex(50); var actual = sut.NodeImpurity(); Assert.AreEqual(0.66666666666666674, actual, 0.000001); }
public void RegressionImpurityCalculator_LeafValue_Weighted() { var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; var weights = values.Select(t => Weight(t)).ToArray(); var parentInterval = Interval1D.Create(0, values.Length); var sut = new RegressionImpurityCalculator(); sut.Init(new double[0], values, weights, parentInterval); var impurity = sut.NodeImpurity(); sut.UpdateIndex(50); var actual = sut.LeafValue(); Assert.AreEqual(1.75, actual, 0.000001); }
public void RegressionImpurityCalculator_ChildImpurities() { var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; var parentInterval = Interval1D.Create(0, values.Length); var sut = new RegressionImpurityCalculator(); sut.Init(new double[0], values, new double[0], parentInterval); var impurity = sut.NodeImpurity(); sut.UpdateIndex(50); var actual = sut.ChildImpurities(); var expected = new ChildImpurities(0.0, -2.25); Assert.AreEqual(expected, actual); }
public void GiniClassificationImpurityCalculator_LeafValue_Weighted() { var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; var unique = values.Distinct().ToArray(); var weights = values.Select(t => Weight(t)).ToArray(); var parentInterval = Interval1D.Create(0, values.Length); var sut = new GiniClassificationImpurityCalculator(); sut.Init(unique, values, weights, parentInterval); var impurity = sut.NodeImpurity(); sut.UpdateIndex(50); var actual = sut.LeafValue(); Assert.AreEqual(2.0, actual, 0.000001); }
public void GiniImpurityMetric_Impurity_Weights() { var set1 = new double[] { 0, 1, 2, 3, 4, 3, 2, 1, 0 }; var set2 = new double[] { 1, 1, 1, 1, 2, 2, 2, 2 }; var set3 = new double[] { 1, 1, 1, 1, 1, 1, 1, 1 }; var weights = new double[] { 3, 1, 4, 1, 0.5, 1, 2, 1, 10 }; var sut = new GiniImpurityMetric(); var val1 = sut.Impurity(set1, weights, Interval1D.Create(0, set1.Length)); Assert.AreEqual(0.96921684019918519, val1); var val2 = sut.Impurity(set2, weights, Interval1D.Create(0, set2.Length)); Assert.AreEqual(0.82441700960219477, val2); var val3 = sut.Impurity(set3, weights, Interval1D.Create(0, set3.Length)); Assert.AreEqual(0.64883401920438954, val3); }
public void GiniImpurityMetric_Impurity_Interval() { var set1 = new double[] { 0, 1, 2, 3, 4, 3, 2, 1, 0 }; var set2 = new double[] { 1, 1, 1, 1, 2, 2, 2, 2 }; var set3 = new double[] { 1, 1, 1, 1, 1, 1, 1, 1 }; var sut = new GiniImpurityMetric(); var interval = Interval1D.Create(2, 7); var val1 = sut.Impurity(set1, interval); Assert.AreEqual(0.64, val1); var val2 = sut.Impurity(set2, interval); Assert.AreEqual(0.48, val2); var val3 = sut.Impurity(set3, interval); Assert.AreEqual(0.0, val3); }
public void GiniImpurityMetric_Impurity_Weights_None() { var set1 = new double[] { 0, 1, 2, 3, 4, 3, 2, 1, 0 }; var set2 = new double[] { 1, 1, 1, 1, 2, 2, 2, 2 }; var set3 = new double[] { 1, 1, 1, 1, 1, 1, 1, 1 }; var weights = new double[] { 1, 1, 1, 1, 1, 1, 1, 1, 1 }; var sut = new GiniImpurityMetric(); var val1 = sut.Impurity(set1, weights, Interval1D.Create(0, set1.Length)); Assert.AreEqual(0.79012345679012341, val1); var val2 = sut.Impurity(set2, weights, Interval1D.Create(0, set2.Length)); Assert.AreEqual(0.5, val2); var val3 = sut.Impurity(set3, weights, Interval1D.Create(0, set3.Length)); Assert.AreEqual(0.0, val3); }
public void GiniClassificationImpurityCalculator_ChildImpurities() { var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; var unique = values.Distinct().ToArray(); var parentInterval = Interval1D.Create(0, values.Length); var sut = new GiniClassificationImpurityCalculator(); sut.Init(unique, values, new double[0], parentInterval); var impurity = sut.NodeImpurity(); sut.UpdateIndex(50); var actual = sut.ChildImpurities(); var expected = new ChildImpurities(0.0, .5); Assert.AreEqual(expected, actual); }
public void NaiveSinglePassVarianceEntropyMetric_Impurity_Interval_Weighted_2() { var set1 = new double[] { 0, 1, 2, 3, 4, 3, 2, 1, 0 }; var set2 = new double[] { 1, 1, 1, 1, 2, 2, 2, 2 }; var set3 = new double[] { 1 }; var weights = new double[] { 1, 2, 4, 7, 2, 3, 5, 8, 1 }; var sut = new NaiveSinglePassVarianceImpurityMetric(); var interval = Interval1D.Create(2, 7); var val1 = sut.Impurity(set1, weights, Interval1D.Create(0, set1.Length)); Assert.AreEqual(1.2969432314410481, val1); var val2 = sut.Impurity(set2, weights, Interval1D.Create(0, set2.Length)); Assert.AreEqual(0.29577464788732394, val2); var val3 = sut.Impurity(set3, weights, Interval1D.Create(0, set3.Length)); Assert.AreEqual(0.0, val3); }
public void NaiveSinglePassVarianceEntropyMetric_Impurity_Interval_Weighted_1() { var set1 = new double[] { 0, 1, 2, 3, 4, 3, 2, 1, 0 }; var set2 = new double[] { 1, 1, 1, 1, 2, 2, 2, 2 }; var set3 = new double[] { 1 }; var weights = new double[] { 1, 1, 1, 1, 1, 1, 1, 1, 1 }; var sut = new NaiveSinglePassVarianceImpurityMetric(); var interval = Interval1D.Create(2, 7); var val1 = sut.Impurity(set1, weights, Interval1D.Create(0, set1.Length)); Assert.AreEqual(1.9444444444444446, val1); var val2 = sut.Impurity(set2, weights, Interval1D.Create(0, set2.Length)); Assert.AreEqual(0.2857142857142857, val2); var val3 = sut.Impurity(set3, weights, Interval1D.Create(0, set3.Length)); Assert.AreEqual(0.0, val3); }
public void RegressionImpurityCalculator_ImpurityImprovement() { var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; var parentInterval = Interval1D.Create(0, values.Length); var sut = new RegressionImpurityCalculator(); sut.Init(new double[0], values, new double[0], parentInterval); var impurity = sut.NodeImpurity(); sut.UpdateIndex(50); var improvement1 = sut.ImpurityImprovement(impurity); Assert.AreEqual(75.0, improvement1, 0.000001); sut.UpdateIndex(96); var improvement2 = sut.ImpurityImprovement(impurity); Assert.AreEqual(69.473379629629648, improvement2, 0.000001); }
public void RegressionImpurityCalculator_ImpurityImprovement_Weighted() { var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; var weights = values.Select(t => Weight(t)).ToArray(); var parentInterval = Interval1D.Create(0, values.Length); var sut = new RegressionImpurityCalculator(); sut.Init(new double[0], values, weights, parentInterval); var impurity = sut.NodeImpurity(); sut.UpdateIndex(50); var improvement1 = sut.ImpurityImprovement(impurity); Assert.AreEqual(167.04545454545456, improvement1, 0.000001); sut.UpdateIndex(96); var improvement2 = sut.ImpurityImprovement(impurity); Assert.AreEqual(162.78860028860029, improvement2, 0.000001); }
public void GiniClassificationImpurityCalculator_ImpurityImprovement() { var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; var unique = values.Distinct().ToArray(); var parentInterval = Interval1D.Create(0, values.Length); var sut = new GiniClassificationImpurityCalculator(); sut.Init(unique, values, new double[0], parentInterval); var impurity = sut.NodeImpurity(); sut.UpdateIndex(50); var improvement1 = sut.ImpurityImprovement(impurity); Assert.AreEqual(0.33333333333333343, improvement1, 0.000001); sut.UpdateIndex(96); var improvement2 = sut.ImpurityImprovement(impurity); Assert.AreEqual(0.28047839506172845, improvement2, 0.000001); }
public void GiniClassificationImpurityCalculator_ImpurityImprovement_Weighted() { var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; var unique = values.Distinct().ToArray(); var weights = values.Select(t => Weight(t)).ToArray(); var parentInterval = Interval1D.Create(0, values.Length); var sut = new GiniClassificationImpurityCalculator(); sut.Init(unique, values, weights, parentInterval); var impurity = sut.NodeImpurity(); sut.UpdateIndex(50); var improvement1 = sut.ImpurityImprovement(impurity); Assert.AreEqual(0.14015151515151511, improvement1, 0.000001); sut.UpdateIndex(96); var improvement2 = sut.ImpurityImprovement(impurity); Assert.AreEqual(0.17358104858104859, improvement2, 0.000001); }
public void RandomSplitSearcher_FindBestSplit() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var feature = parser.EnumerateRows("AptitudeTestScore").ToF64Vector(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var interval = Interval1D.Create(0, feature.Length); Array.Sort(feature, targets); var impurityCalculator = new GiniClassificationImpurityCalculator(); impurityCalculator.Init(targets.Distinct().ToArray(), targets, new double[0], interval); var impurity = impurityCalculator.NodeImpurity(); var sut = new RandomSplitSearcher(1, 42); var actual = sut.FindBestSplit(impurityCalculator, feature, targets, interval, impurity); var expected = new SplitResult(15, 3.6724258636461693, 0.037941545633853213, 0.39111111111111119, 0.49586776859504134); Assert.AreEqual(expected, actual); }
/// <summary> /// /// </summary> /// <param name="observations"></param> /// <param name="targets"></param> /// <param name="indices"></param> /// <param name="weights"></param> /// <returns></returns> public BinaryTree Build(F64MatrixView observations, double[] targets, int[] indices, double[] weights) { Array.Clear(m_variableImportance, 0, m_variableImportance.Length); Array.Resize(ref m_workTargets, indices.Length); Array.Resize(ref m_workFeature, indices.Length); Array.Resize(ref m_workIndices, indices.Length); var numberOfFeatures = observations.ColumnCount; if (m_featuresPrSplit == 0) { m_featuresPrSplit = numberOfFeatures; } Array.Resize(ref m_bestSplitWorkIndices, indices.Length); m_bestSplitWorkIndices.Clear(); Array.Resize(ref m_variableImportance, numberOfFeatures); Array.Resize(ref m_allFeatureIndices, numberOfFeatures); Array.Resize(ref m_featureCandidates, m_featuresPrSplit); m_featuresCandidatesSet = false; for (int i = 0; i < m_allFeatureIndices.Length; i++) { m_allFeatureIndices[i] = i; } var allInterval = Interval1D.Create(0, indices.Length); indices.CopyTo(allInterval, m_workIndices); m_workIndices.IndexedCopy(targets, allInterval, m_workTargets); if (weights.Length != 0) { Array.Resize(ref m_workWeights, indices.Length); m_workIndices.IndexedCopy(weights, allInterval, m_workWeights); } var targetNames = targets.Distinct().ToArray(); m_impurityCalculator.Init(targetNames, m_workTargets, m_workWeights, allInterval); var rootImpurity = m_impurityCalculator.NodeImpurity(); var nodes = new List <Node>(); var probabilities = new List <double[]>(); var stack = new Stack <DecisionNodeCreationItem>(100); stack.Push(new DecisionNodeCreationItem(0, NodePositionType.Root, allInterval, rootImpurity, 0)); var first = true; var currentNodeIndex = 0; var currentLeafProbabilityIndex = 0; while (stack.Count > 0) { var bestSplitResult = SplitResult.Initial(); var bestFeatureIndex = -1; var parentItem = stack.Pop(); var parentInterval = parentItem.Interval; var parentNodeDepth = parentItem.NodeDepth; Node parentNode = Node.Default(); if (nodes.Count != 0) { parentNode = nodes[parentItem.ParentIndex]; } var parentNodePositionType = parentItem.NodeType; var parentImpurity = parentItem.Impurity; if (first && parentNode.FeatureIndex != -1) { nodes[0] = new Node(parentNode.FeatureIndex, parentNode.Value, -1, -1, parentNode.NodeIndex, parentNode.LeafProbabilityIndex); first = false; } var isLeaf = (parentNodeDepth >= m_maximumTreeDepth); if (!isLeaf) { SetNextFeatures(numberOfFeatures); foreach (var featureIndex in m_featureCandidates) { m_workIndices.IndexedCopy(observations.ColumnView(featureIndex), parentInterval, m_workFeature); m_workFeature.SortWith(parentInterval, m_workIndices); m_workIndices.IndexedCopy(targets, parentInterval, m_workTargets); if (weights.Length != 0) { m_workIndices.IndexedCopy(weights, parentInterval, m_workWeights); } var splitResult = m_splitSearcher.FindBestSplit(m_impurityCalculator, m_workFeature, m_workTargets, parentInterval, parentImpurity); if (splitResult.ImpurityImprovement > bestSplitResult.ImpurityImprovement) { bestSplitResult = splitResult; m_workIndices.CopyTo(parentInterval, m_bestSplitWorkIndices); bestFeatureIndex = featureIndex; } } isLeaf = isLeaf || (bestSplitResult.SplitIndex < 0); isLeaf = isLeaf || (bestSplitResult.ImpurityImprovement < m_minimumInformationGain); m_bestSplitWorkIndices.CopyTo(parentInterval, m_workIndices); } if (isLeaf) { m_bestSplitWorkIndices.IndexedCopy(targets, parentInterval, m_workTargets); if (weights.Length != 0) { m_bestSplitWorkIndices.IndexedCopy(weights, parentInterval, m_workWeights); } m_impurityCalculator.UpdateInterval(parentInterval); var value = m_impurityCalculator.LeafValue(); var leaf = new Node(-1, value, -1, -1, currentNodeIndex++, currentLeafProbabilityIndex++); probabilities.Add(m_impurityCalculator.LeafProbabilities()); nodes.Add(leaf); nodes.UpdateParent(parentNode, leaf, parentNodePositionType); } else { m_variableImportance[bestFeatureIndex] += bestSplitResult.ImpurityImprovement * parentInterval.Length / allInterval.Length; var split = new Node(bestFeatureIndex, bestSplitResult.Threshold, -1, -1, currentNodeIndex++, -1); nodes.Add(split); nodes.UpdateParent(parentNode, split, parentNodePositionType); var nodeDepth = parentNodeDepth + 1; stack.Push(new DecisionNodeCreationItem(split.NodeIndex, NodePositionType.Right, Interval1D.Create(bestSplitResult.SplitIndex, parentInterval.ToExclusive), bestSplitResult.ImpurityRight, nodeDepth)); stack.Push(new DecisionNodeCreationItem(split.NodeIndex, NodePositionType.Left, Interval1D.Create(parentInterval.FromInclusive, bestSplitResult.SplitIndex), bestSplitResult.ImpurityLeft, nodeDepth)); } } if (first) // No valid split return single leaf result { m_impurityCalculator.UpdateInterval(allInterval); var leaf = new Node(-1, m_impurityCalculator.LeafValue(), -1, -1, currentNodeIndex++, currentLeafProbabilityIndex++); probabilities.Add(m_impurityCalculator.LeafProbabilities()); nodes.Clear(); nodes.Add(leaf); } return(new BinaryTree(nodes, probabilities, targetNames, m_variableImportance.ToArray())); }
/// <summary> /// Learns an adaboost regression model /// </summary> /// <param name="observations"></param> /// <param name="targets"></param> /// <param name="indices"></param> /// <returns></returns> public RegressionAdaBoostModel Learn(F64Matrix observations, double[] targets, int[] indices) { if (m_maximumTreeDepth == 0) { m_maximumTreeDepth = 3; } m_modelLearner = new RegressionDecisionTreeLearner(m_maximumTreeDepth, m_minimumSplitSize, observations.ColumnCount, m_minimumInformationGain, 42); m_modelErrors.Clear(); m_modelWeights.Clear(); m_models.Clear(); Array.Resize(ref m_sampleWeights, targets.Length); Array.Resize(ref m_workErrors, targets.Length); Array.Resize(ref m_indexedTargets, indices.Length); Array.Resize(ref m_sampleIndices, indices.Length); indices.IndexedCopy(targets, Interval1D.Create(0, indices.Length), m_indexedTargets); var initialWeight = 1.0 / indices.Length; for (int i = 0; i < indices.Length; i++) { var index = indices[i]; m_sampleWeights[index] = initialWeight; } for (int i = 0; i < m_iterations; i++) { if (!Boost(observations, targets, indices, i)) { break; } var ensembleError = ErrorEstimate(observations, indices); if (ensembleError == 0.0) { break; } if (m_modelErrors[i] == 0.0) { break; } var weightSum = m_sampleWeights.Sum(indices); if (weightSum <= 0.0) { break; } if (i == m_iterations - 1) { // Normalize weights for (int j = 0; j < indices.Length; j++) { var index = indices[j]; m_sampleWeights[index] = m_sampleWeights[index] / weightSum; } } } var featuresCount = observations.ColumnCount; var variableImportance = VariableImportance(featuresCount); return(new RegressionAdaBoostModel(m_models.ToArray(), m_modelWeights.ToArray(), variableImportance)); }
public unsafe void F64MatrixView_SubView() { var matrix = Matrix(); using (var pinnedMatrix = matrix.GetPinnedPointer()) { var subView = pinnedMatrix.View().View(Interval2D.Create(Interval1D.Create(0, 2), Interval1D.Create(0, 3))); var subMatrix = matrix.Rows(new int[] { 0, 1 }); AssertMatrixView(subMatrix, subView); } }