public void GiniClasificationImpurityCalculator_NodeImpurity()
        {
            var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, };
            var unique = values.Distinct().ToArray();

            var parentInterval = Interval1D.Create(0, values.Length);

            var sut = new GiniClasificationImpurityCalculator();

            sut.Init(unique, values, new double[0], parentInterval);

            sut.UpdateIndex(50);
            var actual = sut.NodeImpurity();

            Assert.AreEqual(0.66666666666666674, actual, 0.000001);
        }
        public void GiniClasificationImpurityCalculator_LeafValue_Weighted()
        {
            var values         = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, };
            var unique         = values.Distinct().ToArray();
            var weights        = values.Select(t => Weight(t)).ToArray();
            var parentInterval = Interval1D.Create(0, values.Length);

            var sut = new GiniClasificationImpurityCalculator();

            sut.Init(unique, values, weights, parentInterval);

            var impurity = sut.NodeImpurity();

            sut.UpdateIndex(50);
            var actual = sut.LeafValue();

            Assert.AreEqual(2.0, actual, 0.000001);
        }
        public void GiniClasificationImpurityCalculator_ChildImpurities()
        {
            var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, };
            var unique = values.Distinct().ToArray();

            var parentInterval = Interval1D.Create(0, values.Length);

            var sut = new GiniClasificationImpurityCalculator();

            sut.Init(unique, values, new double[0], parentInterval);

            var impurity = sut.NodeImpurity();

            sut.UpdateIndex(50);
            var actual   = sut.ChildImpurities();
            var expected = new ChildImpurities(0.0, .5);

            Assert.AreEqual(expected, actual);
        }
        public void GiniClasificationImpurityCalculator_ImpurityImprovement()
        {
            var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, };
            var unique = values.Distinct().ToArray();

            var parentInterval = Interval1D.Create(0, values.Length);

            var sut = new GiniClasificationImpurityCalculator();

            sut.Init(unique, values, new double[0], parentInterval);

            var impurity = sut.NodeImpurity();

            sut.UpdateIndex(50);
            var improvement1 = sut.ImpurityImprovement(impurity);

            Assert.AreEqual(0.33333333333333343, improvement1, 0.000001);

            sut.UpdateIndex(96);
            var improvement2 = sut.ImpurityImprovement(impurity);

            Assert.AreEqual(0.28047839506172845, improvement2, 0.000001);
        }
        public void GiniClasificationImpurityCalculator_ImpurityImprovement_Weighted()
        {
            var values = new double[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, };
            var unique = values.Distinct().ToArray();

            var weights        = values.Select(t => Weight(t)).ToArray();
            var parentInterval = Interval1D.Create(0, values.Length);

            var sut = new GiniClasificationImpurityCalculator();

            sut.Init(unique, values, weights, parentInterval);

            var impurity = sut.NodeImpurity();

            sut.UpdateIndex(50);
            var improvement1 = sut.ImpurityImprovement(impurity);

            Assert.AreEqual(0.14015151515151511, improvement1, 0.000001);

            sut.UpdateIndex(96);
            var improvement2 = sut.ImpurityImprovement(impurity);

            Assert.AreEqual(0.17358104858104859, improvement2, 0.000001);
        }
Esempio n. 6
0
        public void RandomSplitSearcher_FindBestSplit()
        {
            var parser   = new CsvParser(() => new StringReader(Resources.AptitudeData));
            var feature  = parser.EnumerateRows("AptitudeTestScore").ToF64Vector();
            var targets  = parser.EnumerateRows("Pass").ToF64Vector();
            var interval = Interval1D.Create(0, feature.Length);

            Array.Sort(feature, targets);

            var impurityCalculator = new GiniClasificationImpurityCalculator();

            impurityCalculator.Init(targets.Distinct().ToArray(), targets, new double[0], interval);
            var impurity = impurityCalculator.NodeImpurity();

            var sut = new RandomSplitSearcher(1, 42);

            var actual = sut.FindBestSplit(impurityCalculator, feature, targets,
                                           interval, impurity);

            var expected = new SplitResult(15, 3.6724258636461693, 0.037941545633853213,
                                           0.39111111111111119, 0.49586776859504134);

            Assert.AreEqual(expected, actual);
        }