Пример #1
0
        public void learn_logistic_regression()
        {
            #region doc_learn_lr
            // This example shows how to use AdaBoost to train more complex
            // models than a simple DecisionStump. For example, we will use
            // it to train a boosted Logistic Regression classifier.

            // Let's use some synthetic data for that: The Yin-Yang dataset is
            // a simple 2D binary non-linear decision problem where the points
            // belong to each of the classes interwine in a Yin-Yang shape:
            var        dataset = new YinYang();
            double[][] inputs  = dataset.Instances;
            int[]      outputs = Classes.ToZeroOne(dataset.ClassLabels);

            // Create an AdaBoost for Logistic Regression as:
            var teacher = new AdaBoost <LogisticRegression>()
            {
                // Here we can specify how each regression should be learned:
                Learner = (param) => new IterativeReweightedLeastSquares <LogisticRegression>()
                {
                    ComputeStandardErrors = false,
                    MaxIterations         = 50,
                    Tolerance             = 0
                },

                // Train until:
                MaxIterations = 50,
                Tolerance     = 1e-5,
            };

            // Now, we can use the Learn method to learn a boosted classifier
            Boost <LogisticRegression> classifier = teacher.Learn(inputs, outputs);

            // And we can test its performance using (error should be 0.11):
            ConfusionMatrix cm = ConfusionMatrix.Estimate(classifier, inputs, outputs);

            double error = cm.Error;    // should be 0.11
            double acc   = cm.Accuracy; // should be 0.89
            double kappa = cm.Kappa;    // should be 0.78

            // And compute a decision for a single data point using:
            bool y = classifier.Decide(inputs[0]); // result should false
            #endregion

            Assert.AreEqual(false, y);
            Assert.AreEqual(0.11, error);
            Assert.AreEqual(0.89, acc);
            Assert.AreEqual(0.78, kappa);

            Assert.AreEqual(2, classifier.Models.Count);
            Assert.AreEqual(0.63576818449825168, classifier.Models[0].Weight);
            Assert.AreEqual(0.36423181550174832, classifier.Models[1].Weight);

            int[] actual = new int[outputs.Length];
            for (int i = 0; i < actual.Length; i++)
            {
                actual[i] = classifier.Compute(inputs[i]);
            }
        }
Пример #2
0
        public void ConstructorTest()
        {
            double[][] inputs =
            {
                new double[] {  10, 42 },
                new double[] { 162, 96 },
                new double[] { 125, 20 },
                new double[] {  96,  6 },
                new double[] {   2, 73 },
                new double[] {  52, 51 },
                new double[] {  71, 49 },
            };

            int[] outputs =
            {
                -1, -1, +1, +1, -1, -1, +1
            };


            var classifier = new Boost <DecisionStump>();

            var teacher = new AdaBoost <DecisionStump>(classifier)
            {
                Creation = (weights) =>
                {
                    var stump = new DecisionStump(2);
                    stump.Learn(inputs, outputs, weights);
                    return(stump);
                },

                Iterations = 5,
                Tolerance  = 1e-3
            };


            double error = teacher.Run(inputs, outputs);

            Assert.AreEqual(0, error);

            Assert.AreEqual(5, classifier.Models.Count);
            Assert.AreEqual(0.16684734250395147, classifier.Models[0].Weight);
            Assert.AreEqual(0.22329026900109736, classifier.Models[1].Weight);
            Assert.AreEqual(0.28350372170582383, classifier.Models[2].Weight);
            Assert.AreEqual(0.16684734250395139, classifier.Models[3].Weight);
            Assert.AreEqual(0.15951132428517592, classifier.Models[4].Weight);

            int[] actual = new int[outputs.Length];
            for (int i = 0; i < actual.Length; i++)
            {
                actual[i] = classifier.Compute(inputs[i]);
            }

            for (int i = 0; i < actual.Length; i++)
            {
                Assert.AreEqual(outputs[i], actual[i]);
            }
        }
Пример #3
0
        public void learn_decision_trees()
        {
            #region doc_learn_dt
            // This example shows how to use AdaBoost to train more complex
            // models than a simple DecisionStump. For example, we will use
            // it to train a boosted Decision Trees.

            // Let's use some synthetic data for that: The Yin-Yang dataset is
            // a simple 2D binary non-linear decision problem where the points
            // belong to each of the classes interwine in a Yin-Yang shape:
            var        dataset = new YinYang();
            double[][] inputs  = dataset.Instances;
            int[]      outputs = Classes.ToZeroOne(dataset.ClassLabels);

            // Create an AdaBoost for Logistic Regression as:
            var teacher = new AdaBoost <DecisionTree>()
            {
                // Here we can specify how each regression should be learned:
                Learner = (param) => new C45Learning()
                {
                    // i.e.
                    // MaxHeight =
                    // MaxVariables =
                },

                // Train until:
                MaxIterations = 50,
                Tolerance     = 1e-5,
            };

            // Now, we can use the Learn method to learn a boosted classifier
            Boost <DecisionTree> classifier = teacher.Learn(inputs, outputs);

            // And we can test its performance using (error should be 0.11):
            double error = ConfusionMatrix.Estimate(classifier, inputs, outputs).Error;

            // And compute a decision for a single data point using:
            bool y = classifier.Decide(inputs[0]); // result should false
            #endregion

            Assert.AreEqual(false, y);
            Assert.AreEqual(0, error);

            Assert.AreEqual(22, classifier.Models.Count);
            Assert.AreEqual(0.063497989403001331, classifier.Models[0].Weight);
            Assert.AreEqual(0.081129615464770655, classifier.Models[1].Weight);
            Assert.AreEqual(0.083062765085567689, classifier.Models[2].Weight);
            Assert.AreEqual(0.050307480220333232, classifier.Models[3].Weight);
            Assert.AreEqual(0.044287142080877882, classifier.Models[4].Weight);
            Assert.AreEqual(0.042772219812778081, classifier.Models[5].Weight);

            int[] actual = new int[outputs.Length];
            for (int i = 0; i < actual.Length; i++)
            {
                actual[i] = classifier.Compute(inputs[i]);
            }
        }
Пример #4
0
        public void ConstructorTest2()
        {
            var dataset = new YinYang();

            double[][] inputs   = dataset.Instances;
            bool[]     outputs2 = dataset.ClassLabels;

            int[] outputs = outputs2.Apply(x => x ? 1 : 0);

            var classifier = new Boost <Weak <LogisticRegression> >();

            var teacher = new AdaBoost <Weak <LogisticRegression> >(classifier)
            {
                Creation = (weights) =>
                {
                    LogisticRegression reg = new LogisticRegression(2, intercept: 1);

                    IterativeReweightedLeastSquares irls = new IterativeReweightedLeastSquares(reg)
                    {
                        ComputeStandardErrors = false
                    };

                    for (int i = 0; i < 50; i++)
                    {
                        irls.Run(inputs, outputs, weights);
                    }

                    return(new Weak <LogisticRegression>(reg, (s, x) => Math.Sign(s.Compute(x) - 0.5)));
                },

                Iterations = 50,
                Tolerance  = 1e-5,
            };



            double error = teacher.Run(inputs, outputs);


            Assert.AreEqual(0.11, error);

            Assert.AreEqual(2, classifier.Models.Count);
            Assert.AreEqual(0.63576818449825168, classifier.Models[0].Weight);
            Assert.AreEqual(0.36423181550174832, classifier.Models[1].Weight);

            int[] actual = new int[outputs.Length];
            for (int i = 0; i < actual.Length; i++)
            {
                actual[i] = classifier.Compute(inputs[i]);
            }

            //for (int i = 0; i < actual.Length; i++)
            //    Assert.AreEqual(outputs[i], actual[i]);
        }
Пример #5
0
        public void ConstructorTest2()
        {
            double[][] inputs  = LeastSquaresLearningTest.yinyang.GetColumns(0, 1).ToArray();
            int[]      outputs = LeastSquaresLearningTest.yinyang.GetColumn(2).ToInt32();

            var outputs2 = outputs.Apply(x => x > 0 ? 1.0 : 0.0);

            var classifier = new Boost <Weak <LogisticRegression> >();

            var teacher = new AdaBoost <Weak <LogisticRegression> >(classifier)
            {
                Creation = (weights) =>
                {
                    LogisticRegression reg = new LogisticRegression(2, intercept: 1);

                    IterativeReweightedLeastSquares irls = new IterativeReweightedLeastSquares(reg)
                    {
                        ComputeStandardErrors = false
                    };

                    for (int i = 0; i < 50; i++)
                    {
                        irls.Run(inputs, outputs2, weights);
                    }

                    return(new Weak <LogisticRegression>(reg, (s, x) => Math.Sign(s.Compute(x) - 0.5)));
                },

                Iterations = 50,
                Tolerance  = 1e-5,
            };



            double error = teacher.Run(inputs, outputs);


            Assert.AreEqual(0.11, error);

            Assert.AreEqual(2, classifier.Models.Count);
            Assert.AreEqual(0.63576818449825168, classifier.Models[0].Weight);
            Assert.AreEqual(0.36423181550174832, classifier.Models[1].Weight);

            int[] actual = new int[outputs.Length];
            for (int i = 0; i < actual.Length; i++)
            {
                actual[i] = classifier.Compute(inputs[i]);
            }

            //for (int i = 0; i < actual.Length; i++)
            //    Assert.AreEqual(outputs[i], actual[i]);
        }
Пример #6
0
        /// <summary>
        ///   Computes the error ratio, the number of
        ///   misclassifications divided by the total
        ///   number of samples in a dataset.
        /// </summary>
        ///
        public double ComputeError(double[][] inputs, int[] outputs)
        {
            int miss = 0;

            for (int i = 0; i < inputs.Length; i++)
            {
                int expected = outputs[i];
                int actual   = classifier.Compute(inputs[i]);
                if (expected != actual)
                {
                    miss++;
                }
            }

            return(miss / (double)inputs.Length);
        }
Пример #7
0
        public void ConstructorTest()
        {
            double[][] inputs =
            {
                new double[] { 10, 42 },
                new double[] { 162, 96 },
                new double[] { 125, 20 },
                new double[] { 96, 6 },
                new double[] { 2, 73 },
                new double[] { 52, 51 },
                new double[] { 71, 49 },
            };

            int[] outputs = 
            {
                -1, -1, +1, +1, -1, -1, +1
            };


            var classifier = new Boost<DecisionStump>();

            var teacher = new AdaBoost<DecisionStump>(classifier)
            {
                Creation = (weights) =>
                {
                    var stump = new DecisionStump(2);
                    stump.Learn(inputs, outputs, weights);
                    return stump;
                },

                Iterations = 5,
                Tolerance = 1e-3
            };


            double error = teacher.Run(inputs, outputs);

            Assert.AreEqual(0, error);

            Assert.AreEqual(5, classifier.Models.Count);
            Assert.AreEqual(0.16684734250395147, classifier.Models[0].Weight);
            Assert.AreEqual(0.22329026900109736, classifier.Models[1].Weight);
            Assert.AreEqual(0.28350372170582383, classifier.Models[2].Weight);
            Assert.AreEqual(0.16684734250395139, classifier.Models[3].Weight);
            Assert.AreEqual(0.15951132428517592, classifier.Models[4].Weight);

            int[] actual = new int[outputs.Length];
            for (int i = 0; i < actual.Length; i++)
                actual[i] = classifier.Compute(inputs[i]);

            for (int i = 0; i < actual.Length; i++)
                Assert.AreEqual(outputs[i], actual[i]);
        }
Пример #8
0
        public void ConstructorTest2()
        {
            double[][] inputs = LeastSquaresLearningTest.yinyang.GetColumns(0, 1).ToArray();
            int[] outputs = LeastSquaresLearningTest.yinyang.GetColumn(2).ToInt32();

            var outputs2 = outputs.Apply(x => x > 0 ? 1.0 : 0.0);

            var classifier = new Boost<Weak<LogisticRegression>>();

            var teacher = new AdaBoost<Weak<LogisticRegression>>(classifier)
            {
                Creation = (weights) =>
                {
                    LogisticRegression reg = new LogisticRegression(2, intercept: 1);

                    IterativeReweightedLeastSquares irls = new IterativeReweightedLeastSquares(reg)
                    {
                        ComputeStandardErrors = false
                    };

                    for (int i = 0; i < 50; i++)
                        irls.Run(inputs, outputs2, weights);

                    return new Weak<LogisticRegression>(reg, (s, x) => Math.Sign(s.Compute(x) - 0.5));
                },

                Iterations = 50,
                Tolerance = 1e-5,
            };



            double error = teacher.Run(inputs, outputs);


            Assert.AreEqual(0.11, error);

            Assert.AreEqual(2, classifier.Models.Count);
            Assert.AreEqual(0.63576818449825168, classifier.Models[0].Weight);
            Assert.AreEqual(0.36423181550174832, classifier.Models[1].Weight);

            int[] actual = new int[outputs.Length];
            for (int i = 0; i < actual.Length; i++)
                actual[i] = classifier.Compute(inputs[i]);

            //for (int i = 0; i < actual.Length; i++)
            //    Assert.AreEqual(outputs[i], actual[i]);
        }
Пример #9
0
        public void learn_stump_classifier()
        {
            #region doc_learn
            // Let's say we want to classify the following 2-dimensional
            // data samples into 2 possible classes, either true or false:
            double[][] inputs =
            {
                new double[] {  10, 42 },
                new double[] { 162, 96 },
                new double[] { 125, 20 },
                new double[] {  96,  6 },
                new double[] {   2, 73 },
                new double[] {  52, 51 },
                new double[] {  71, 49 },
            };

            // And those are their associated class labels
            bool[] outputs =
            {
                false, false, true, true, false, false, true
            };

            // We can create an AdaBoost algorithm as:
            var learner = new AdaBoost <DecisionStump>()
            {
                Learner = (p) => new ThresholdLearning(),

                // Train until:
                MaxIterations = 5,
                Tolerance     = 1e-3
            };

            // Now, we can use the Learn method to learn a boosted classifier
            Boost <DecisionStump> classifier = learner.Learn(inputs, outputs);

            // And we can test its performance using (error should be 0):
            ConfusionMatrix cm = ConfusionMatrix.Estimate(classifier, inputs, outputs);

            double error = cm.Error;    // should be 0.0
            double acc   = cm.Accuracy; // should be 1.0
            double kappa = cm.Kappa;    // should be 1.0

            // And compute a decision for a single data point using:
            bool y = classifier.Decide(inputs[0]); // result should false
            #endregion

            Assert.AreEqual(false, y);
            Assert.AreEqual(0, error);
            Assert.AreEqual(1, acc);
            Assert.AreEqual(1, kappa);

            Assert.AreEqual(5, classifier.Models.Count);
            Assert.AreEqual(0.16684734250395147, classifier.Models[0].Weight);
            Assert.AreEqual(0.22329026900109736, classifier.Models[1].Weight);
            Assert.AreEqual(0.28350372170582383, classifier.Models[2].Weight);
            Assert.AreEqual(0.16684734250395139, classifier.Models[3].Weight);
            Assert.AreEqual(0.15951132428517592, classifier.Models[4].Weight);

            int[] actual = new int[outputs.Length];
            for (int i = 0; i < actual.Length; i++)
            {
                actual[i] = classifier.Compute(inputs[i]);
            }

            for (int i = 0; i < actual.Length; i++)
            {
                Assert.AreEqual(outputs[i] ? 1 : -1, actual[i]);
            }
        }
Пример #10
0
        public void ConstructorTest()
        {
            // Let's say we want to classify the following 2-dimensional
            // data samples into 2 possible classes, either true or false:
            double[][] inputs =
            {
                new double[] {  10, 42 },
                new double[] { 162, 96 },
                new double[] { 125, 20 },
                new double[] {  96,  6 },
                new double[] {   2, 73 },
                new double[] {  52, 51 },
                new double[] {  71, 49 },
            };

            // And those are their associated class labels
            int[] outputs =
            {
                -1, -1, +1, +1, -1, -1, +1
            };


            // First, we create a classsifier using:
            var classifier = new Boost <DecisionStump>();

            // Now, we can create a AdaBoost learning algorithm as:
            var teacher = new AdaBoost <DecisionStump>(classifier)
            {
                Creation = (weights) =>
                {
                    var stump = new DecisionStump(2);
                    stump.Learn(inputs, outputs, weights);
                    return(stump);
                },

                // Train until:
                MaxIterations = 5,
                Tolerance     = 1e-3
            };

            // Now, we can use the Run method to learn:
            double error = teacher.Run(inputs, outputs); // error should be zero.

            // Now, we can compute the model outputs for new samples using
            int y = classifier.Compute(new double[] { 71, 48 }); // should be 1

            Assert.AreEqual(1, y);

            Assert.AreEqual(0, error);

            Assert.AreEqual(5, classifier.Models.Count);
            Assert.AreEqual(0.16684734250395147, classifier.Models[0].Weight);
            Assert.AreEqual(0.22329026900109736, classifier.Models[1].Weight);
            Assert.AreEqual(0.28350372170582383, classifier.Models[2].Weight);
            Assert.AreEqual(0.16684734250395139, classifier.Models[3].Weight);
            Assert.AreEqual(0.15951132428517592, classifier.Models[4].Weight);

            int[] actual = new int[outputs.Length];
            for (int i = 0; i < actual.Length; i++)
            {
                actual[i] = classifier.Compute(inputs[i]);
            }

            for (int i = 0; i < actual.Length; i++)
            {
                Assert.AreEqual(outputs[i], actual[i]);
            }
        }