public void GiniClasificationImpurityCalculator_NodeImpurity() { var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; var unique = values.Distinct().ToArray(); var parentInterval = Interval1D.Create(0, values.Length); var sut = new GiniClasificationImpurityCalculator(); sut.Init(unique, values, new double[0], parentInterval); sut.UpdateIndex(50); var actual = sut.NodeImpurity(); Assert.AreEqual(0.66666666666666674, actual, 0.000001); }
public void GiniClasificationImpurityCalculator_LeafValue_Weighted() { var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; var unique = values.Distinct().ToArray(); var weights = values.Select(t => Weight(t)).ToArray(); var parentInterval = Interval1D.Create(0, values.Length); var sut = new GiniClasificationImpurityCalculator(); sut.Init(unique, values, weights, parentInterval); var impurity = sut.NodeImpurity(); sut.UpdateIndex(50); var actual = sut.LeafValue(); Assert.AreEqual(2.0, actual, 0.000001); }
public void GiniClasificationImpurityCalculator_ChildImpurities() { var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; var unique = values.Distinct().ToArray(); var parentInterval = Interval1D.Create(0, values.Length); var sut = new GiniClasificationImpurityCalculator(); sut.Init(unique, values, new double[0], parentInterval); var impurity = sut.NodeImpurity(); sut.UpdateIndex(50); var actual = sut.ChildImpurities(); var expected = new ChildImpurities(0.0, .5); Assert.AreEqual(expected, actual); }
public void GiniClasificationImpurityCalculator_ImpurityImprovement() { var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; var unique = values.Distinct().ToArray(); var parentInterval = Interval1D.Create(0, values.Length); var sut = new GiniClasificationImpurityCalculator(); sut.Init(unique, values, new double[0], parentInterval); var impurity = sut.NodeImpurity(); sut.UpdateIndex(50); var improvement1 = sut.ImpurityImprovement(impurity); Assert.AreEqual(0.33333333333333343, improvement1, 0.000001); sut.UpdateIndex(96); var improvement2 = sut.ImpurityImprovement(impurity); Assert.AreEqual(0.28047839506172845, improvement2, 0.000001); }
public void GiniClasificationImpurityCalculator_ImpurityImprovement_Weighted() { var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; var unique = values.Distinct().ToArray(); var weights = values.Select(t => Weight(t)).ToArray(); var parentInterval = Interval1D.Create(0, values.Length); var sut = new GiniClasificationImpurityCalculator(); sut.Init(unique, values, weights, parentInterval); var impurity = sut.NodeImpurity(); sut.UpdateIndex(50); var improvement1 = sut.ImpurityImprovement(impurity); Assert.AreEqual(0.14015151515151511, improvement1, 0.000001); sut.UpdateIndex(96); var improvement2 = sut.ImpurityImprovement(impurity); Assert.AreEqual(0.17358104858104859, improvement2, 0.000001); }
public void RandomSplitSearcher_FindBestSplit() { var parser = new CsvParser(() => new StringReader(Resources.AptitudeData)); var feature = parser.EnumerateRows("AptitudeTestScore").ToF64Vector(); var targets = parser.EnumerateRows("Pass").ToF64Vector(); var interval = Interval1D.Create(0, feature.Length); Array.Sort(feature, targets); var impurityCalculator = new GiniClasificationImpurityCalculator(); impurityCalculator.Init(targets.Distinct().ToArray(), targets, new double[0], interval); var impurity = impurityCalculator.NodeImpurity(); var sut = new RandomSplitSearcher(1, 42); var actual = sut.FindBestSplit(impurityCalculator, feature, targets, interval, impurity); var expected = new SplitResult(15, 3.6724258636461693, 0.037941545633853213, 0.39111111111111119, 0.49586776859504134); Assert.AreEqual(expected, actual); }