public void LargeCoefficientsTest()
        {
            double[,] data =
            {
                { 48, 1, 4.40, 0 },
                { 60, 0, 7.89, 1 },
                { 51, 0, 3.48, 0 },
                { 66, 0, 8.41, 1 },
                { 40, 1, 3.05, 0 },
                { 44, 1, 4.56, 0 },
                { 80, 0, 6.91, 1 },
                { 52, 0, 5.69, 0 },
                { 58, 0, 4.01, 0 },
                { 58, 0, 4.48, 0 },
                { 72, 1, 5.97, 0 },
                { 57, 0, 6.71, 1 },
                { 55, 1, 5.36, 0 },
                { 71, 0, 5.68, 0 },
                { 44, 1, 4.61, 0 },
                { 65, 1, 4.80, 0 },
                { 38, 0, 5.06, 0 },
                { 50, 0, 6.40, 0 },
                { 80, 0, 6.67, 1 },
                { 69, 1, 5.79, 0 },
                { 39, 0, 5.42, 0 },
                { 68, 0, 7.61, 1 },
                { 47, 1, 3.24, 0 },
                { 45, 1, 4.29, 0 },
                { 79, 1, 7.44, 1 },
                { 41, 1, 4.60, 0 },
                { 45, 0, 5.91, 0 },
                { 54, 0, 4.77, 0 },
                { 43, 1, 5.62, 0 },
                { 62, 1, 7.92, 1 },
                { 72, 1, 7.92, 1 },
                { 57, 1, 6.19, 0 },
                { 39, 1, 2.37, 0 },
                { 51, 0, 5.84, 0 },
                { 73, 1, 5.94, 0 },
                { 41, 1, 3.82, 0 },
                { 35, 0, 2.35, 0 },
                { 69, 0, 6.57, 1 },
                { 75, 1, 7.96, 1 },
                { 51, 1, 3.96, 0 },
                { 61, 1, 4.36, 0 },
                { 55, 0, 3.84, 0 },
                { 45, 1, 3.02, 0 },
                { 48, 0, 4.65, 0 },
                { 77, 0, 7.93, 1 },
                { 40, 1, 2.46, 0 },
                { 37, 1, 2.32, 0 },
                { 78, 0, 7.88, 1 },
                { 39, 1, 4.55, 0 },
                { 41, 0, 2.45, 0 },
                { 54, 1, 5.62, 0 },
                { 59, 1, 5.03, 0 },
                { 78, 0, 8.08, 1 },
                { 56, 1, 6.96, 1 },
                { 49, 1, 3.07, 0 },
                { 48, 0, 4.75, 0 },
                { 63, 1, 5.64, 0 },
                { 50, 0, 3.35, 0 },
                { 59, 1, 5.08, 0 },
                { 60, 0, 6.58, 1 },
                { 64, 0, 5.19, 0 },
                { 76, 1, 6.69, 1 },
                { 58, 0, 5.18, 0 },
                { 48, 1, 4.47, 0 },
                { 72, 0, 8.70, 1 },
                { 40, 1, 5.14, 0 },
                { 53, 0, 3.40, 0 },
                { 79, 0, 9.77, 1 },
                { 61, 1, 7.79, 1 },
                { 59, 0, 7.42, 1 },
                { 44, 0, 2.55, 0 },
                { 52, 1, 3.71, 0 },
                { 80, 1, 7.56, 1 },
                { 76, 0, 7.80, 1 },
                { 51, 0, 5.94, 0 },
                { 46, 1, 5.52, 0 },
                { 48, 0, 3.25, 0 },
                { 58, 1, 4.71, 0 },
                { 44, 1, 2.52, 0 },
                { 68, 0, 8.38, 1 },
            };

            double[][] input  = data.Submatrix(null, 0, 2).ToJagged();
            double[]   output = data.GetColumn(3);

            var regression = new LogisticRegression(3);

            var teacher = new IterativeReweightedLeastSquares(regression);

            teacher.Regularization = 1e-10;

            var errors = new List <double>();

            for (int i = 0; i < 1000; i++)
            {
                errors.Add(teacher.Run(input, output));
            }

            double error = 0;

            for (int i = 0; i < output.Length; i++)
            {
                double expected = output[i];
                double actual   = System.Math.Round(regression.Compute(input[i]));

                if (expected != actual)
                {
                    error++;
                }
            }

            error /= output.Length;

            Assert.AreEqual(error, 0);
            Assert.AreEqual(-490.30977151704076, regression.Coefficients[0], 1e-7);
            Assert.AreEqual(1.7763049293456503, regression.Coefficients[1], 1e-7);
            Assert.AreEqual(-14.882619671822592, regression.Coefficients[2], 1e-7);
            Assert.AreEqual(60.5066623676452, regression.Coefficients[3], 1e-7);
        }
Пример #2
0
        public void RegressTest()
        {
            double[,] inputGrouped =
            {
                { 1, 4, 5 }, // product 1 has four occurrences of class 1 and five  of class 0
                { 2, 1, 3 }, // product 2 has one  occurrence  of class 1 and three of class 0
            };

            double[,] inputGroupProb =
            {
                { 1, 4.0 / (4 + 5) }, // product 1 has 0.44 probability of belonging to class 1
                { 2, 1.0 / (1 + 3) }, // product 2 has 0.25 probability of belonging to class 1
            };


            double[,] inputExtended =
            {
                { 1, 1 }, // observation of product 1 in class 1
                { 1, 1 }, // observation of product 1 in class 1
                { 1, 1 }, // observation of product 1 in class 1
                { 1, 1 }, // observation of product 1 in class 1
                { 1, 0 }, // observation of product 1 in class 0
                { 1, 0 }, // observation of product 1 in class 0
                { 1, 0 }, // observation of product 1 in class 0
                { 1, 0 }, // observation of product 1 in class 0
                { 1, 0 }, // observation of product 1 in class 0
                { 2, 1 }, // observation of product 2 in class 1
                { 2, 0 }, // observation of product 2 in class 0
                { 2, 0 }, // observation of product 2 in class 0
                { 2, 0 }, // observation of product 2 in class 0
            };


            // Fit using extended data
            double[][]         inputs            = Matrix.ColumnVector(inputExtended.GetColumn(0)).ToArray();
            double[]           outputs           = inputExtended.GetColumn(1);
            LogisticRegression target            = new LogisticRegression(1);
            IterativeReweightedLeastSquares irls = new IterativeReweightedLeastSquares(target);

            irls.Run(inputs, outputs);

            // Fit using grouped data
            double[][]         inputs2            = Matrix.ColumnVector(inputGroupProb.GetColumn(0)).ToArray();
            double[]           outputs2           = inputGroupProb.GetColumn(1);
            LogisticRegression target2            = new LogisticRegression(1);
            IterativeReweightedLeastSquares irls2 = new IterativeReweightedLeastSquares(target2);

            irls2.Run(inputs2, outputs2);


            Assert.IsTrue(Matrix.IsEqual(target.Coefficients, target2.Coefficients, 0.000001));



            double[,] data = new double[, ]
            {
                { 1, 0 },
                { 2, 0 },
                { 3, 0 },
                { 4, 0 },
                { 5, 1 },
                { 6, 0 },
                { 7, 1 },
                { 8, 0 },
                { 9, 1 },
                { 10, 1 }
            };


            double[][] inputs3  = Matrix.ColumnVector(data.GetColumn(0)).ToArray();
            double[]   outputs3 = data.GetColumn(1);
            LogisticRegressionAnalysis analysis = new LogisticRegressionAnalysis(inputs3, outputs3);

            analysis.Compute();

            Assert.IsFalse(double.IsNaN(analysis.Deviance));
            Assert.IsFalse(double.IsNaN(analysis.ChiSquare.PValue));

            Assert.AreEqual(analysis.Deviance, 8.6202, 0.0005);
            Assert.AreEqual(analysis.ChiSquare.PValue, 0.0278, 0.0005);

            // Check intercept
            Assert.IsFalse(double.IsNaN(analysis.Coefficients[0].Value));
            Assert.AreEqual(analysis.Coefficients[0].Value, -4.3578, 0.0005);

            // Check coefficients
            Assert.IsFalse(double.IsNaN(analysis.Coefficients[1].Value));
            Assert.AreEqual(analysis.Coefficients[1].Value, 0.6622, 0.0005);

            // Check statistics
            Assert.AreEqual(analysis.Coefficients[1].StandardError, 0.4001, 0.0005);
            Assert.AreEqual(analysis.Coefficients[1].Wald.PValue, 0.0979, 0.0005);

            Assert.AreEqual(analysis.Coefficients[1].OddsRatio, 1.9391, 0.0005);

            Assert.AreEqual(analysis.Coefficients[1].ConfidenceLower, 0.8852, 0.0005);
            Assert.AreEqual(analysis.Coefficients[1].ConfidenceUpper, 4.2478, 0.0005);


            Assert.IsFalse(double.IsNaN(analysis.Coefficients[1].Wald.PValue));
            Assert.IsFalse(double.IsNaN(analysis.Coefficients[1].StandardError));
            Assert.IsFalse(double.IsNaN(analysis.Coefficients[1].OddsRatio));
            Assert.IsFalse(double.IsNaN(analysis.Coefficients[1].ConfidenceLower));
            Assert.IsFalse(double.IsNaN(analysis.Coefficients[1].ConfidenceUpper));
        }
        public void learn_new_mechanism()
        {
            Accord.Math.Random.Generator.Seed = 0;

            #region doc_log_reg_1
            // Suppose we have the following data about some patients.
            // The first variable is continuous and represent patient
            // age. The second variable is dichotomic and give whether
            // they smoke or not (This is completely fictional data).

            // We also know if they have had lung cancer or not, and
            // we would like to know whether smoking has any connection
            // with lung cancer (This is completely fictional data).

            double[][] input =
            {                           // age, smokes?, had cancer?
                new double[] { 55, 0 }, // false - no cancer
                new double[] { 28, 0 }, // false
                new double[] { 65, 1 }, // false
                new double[] { 46, 0 }, // true  - had cancer
                new double[] { 86, 1 }, // true
                new double[] { 56, 1 }, // true
                new double[] { 85, 0 }, // false
                new double[] { 33, 0 }, // false
                new double[] { 21, 1 }, // false
                new double[] { 42, 1 }, // true
            };

            bool[] output = // Whether each patient had lung cancer or not
            {
                false, false, false, true, true, true, false, false, false, true
            };


            // To verify this hypothesis, we are going to create a logistic
            // regression model for those two inputs (age and smoking), learned
            // using a method called "Iteratively Reweighted Least Squares":

            var learner = new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                Tolerance      = 1e-4, // Let's set some convergence parameters
                Iterations     = 100,  // maximum number of iterations to perform
                Regularization = 0
            };

            // Now, we can use the learner to finally estimate our model:
            LogisticRegression regression = learner.Learn(input, output);

            // At this point, we can compute the odds ratio of our variables.
            // In the model, the variable at 0 is always the intercept term,
            // with the other following in the sequence. Index 1 is the age
            // and index 2 is whether the patient smokes or not.

            // For the age variable, we have that individuals with
            //   higher age have 1.021 greater odds of getting lung
            //   cancer controlling for cigarette smoking.
            double ageOdds = regression.GetOddsRatio(1); // 1.0208597028836701

            // For the smoking/non smoking category variable, however, we
            //   have that individuals who smoke have 5.858 greater odds
            //   of developing lung cancer compared to those who do not
            //   smoke, controlling for age (remember, this is completely
            //   fictional and for demonstration purposes only).
            double smokeOdds = regression.GetOddsRatio(2); // 5.8584748789881331

            // We can also obtain confidence intervals for the odd ratios:
            DoubleRange ageRange   = regression.GetConfidenceInterval(1); // { 0.955442466180864, 1.09075592717851 }
            DoubleRange smokeRange = regression.GetConfidenceInterval(2); // { 0.326598216009923, 105.088535240304 }

            // If we would like to use the model to predict a probability for
            // each patient regarding whether they are at risk of cancer or not,
            // we can use the Probability function:

            double[] scores = regression.Probability(input);

            // Finally, if we would like to arrive at a conclusion regarding
            // each patient, we can use the Decide method, which will transform
            // the probabilities (from 0 to 1) into actual true/false values:

            bool[] actual = regression.Decide(input);
            #endregion

            double[] expected =
            {
                0.21044171509541, 0.132425274863516, 0.657478034489772, 0.181224847711481, 0.747556618035989, 0.614500418479497, 0.331167053803838, 0.144741108525755, 0.436271096256738, 0.544193832738005
            };

            string str = scores.ToCSharp();
            for (int i = 0; i < scores.Length; i++)
            {
                Assert.AreEqual(expected[i], scores[i], 1e-8);
            }

            double[] transform = regression.Transform(input, scores);
            for (int i = 0; i < scores.Length; i++)
            {
                Assert.AreEqual(expected[i], transform[i], 1e-8);
            }

            Assert.AreEqual(1.0208597028836701, ageOdds, 1e-10);
            Assert.AreEqual(5.8584748789881331, smokeOdds, 1e-6);

            Assert.AreEqual(-2.4577464307294092, regression.Intercept, 1e-8);
            Assert.AreEqual(-2.4577464307294092, regression.Coefficients[0], 1e-8);
            Assert.AreEqual(0.020645118265359252, regression.Coefficients[1], 1e-10);
            Assert.AreEqual(1.7678893101571855, regression.Coefficients[2], 1e-8);

            Assert.IsTrue(new[] { 0.955442466180864, 1.09075592717851 }.IsEqual(ageRange, atol: 1e-10));
            Assert.IsTrue(new[] { 0.326598216009923, 105.088535240304 }.IsEqual(smokeRange, atol: 1e-10));

            Assert.IsFalse(actual[0]);
            Assert.IsFalse(actual[1]);
            Assert.IsTrue(actual[2]);
            Assert.IsFalse(actual[3]);
            Assert.IsTrue(actual[4]);
            Assert.IsTrue(actual[5]);
            Assert.IsFalse(actual[6]);
            Assert.IsFalse(actual[7]);
            Assert.IsFalse(actual[8]);
            Assert.IsTrue(actual[9]);
        }
        public void scores_probabilities_test()
        {
            double[][] input =
            {
                new double[] { 55, 0 }, // 0 - no cancer
                new double[] { 28, 0 }, // 0
                new double[] { 65, 1 }, // 0
                new double[] { 46, 0 }, // 1 - have cancer

                new double[] { 86, 1 }, // 1
                new double[] { 86, 1 }, // 1
                new double[] { 56, 1 }, // 1
                new double[] { 85, 0 }, // 0

                new double[] { 33, 0 }, // 0
                new double[] { 21, 1 }, // 0
                new double[] { 42, 1 }, // 1
            };

            double[] output =
            {
                0, 0, 0, 1,
                1, 1, 1, 0,
                0, 0, 1
            };

            double[] weights =
            {
                1.0, 1.0, 1.0, 1.0,
                0.5, 0.5, 1.0, 1.0,
                1.0, 1.0, 1.0
            };


            var teacher = new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                Regularization = 0
            };

            var target = teacher.Learn(input, output, weights);

            LogitLinkFunction link = (LogitLinkFunction)target.Link;

            Assert.AreEqual(0, link.A);
            Assert.AreEqual(1, link.B);

            Assert.AreEqual(-2.4577464307294092, target.Intercept, 1e-8);
            Assert.AreEqual(-2.4577464307294092, target.Coefficients[0], 1e-8);
            Assert.AreEqual(0.020645118265359252, target.Coefficients[1], 1e-8);
            Assert.AreEqual(1.7678893101571855, target.Coefficients[2], 1e-8);

            // Test Scores, LogLikelihoods and Probability functions
            // https://github.com/accord-net/framework/issues/570

            double[][] scoresAllSamples         = target.Scores(input);
            double[][] logLikelihoodsAllSamples = target.LogLikelihoods(input);
            double[][] probabilitiesAllSamples  = target.Probabilities(input);
            Assert.IsTrue(scoresAllSamples.IsEqual(Matrix.Apply(probabilitiesAllSamples, link.Function), rtol: 1e-5));

            Assert.IsTrue(probabilitiesAllSamples.IsEqual(logLikelihoodsAllSamples.Exp()));
            Assert.IsTrue(probabilitiesAllSamples.Sum(dimension: 1).IsEqual(Vector.Ones(11), rtol: 1e-6));


            bool[] decideAllSamples = target.Decide(input);
            double err = new ZeroOneLoss(output).Loss(decideAllSamples);

            Assert.AreEqual(0.18181818181818182, err, 1e-5);
            Assert.AreEqual(decideAllSamples, scoresAllSamples.ArgMax(dimension: 1).ToBoolean());
            Assert.AreEqual(decideAllSamples.ToInt32(), logLikelihoodsAllSamples.ArgMax(dimension: 1));
            Assert.AreEqual(decideAllSamples, probabilitiesAllSamples.ArgMax(dimension: 1).ToBoolean());

            double[] scoreAllSamples = target.Score(input);
            Assert.AreEqual(scoreAllSamples, scoresAllSamples.GetColumn(1));
            double[] logLikelihoodAllSamples = target.LogLikelihood(input);
            Assert.AreEqual(logLikelihoodAllSamples, logLikelihoodsAllSamples.GetColumn(1));
            double[] probabilityAllSamples = target.Probability(input);
            Assert.AreEqual(probabilityAllSamples, probabilitiesAllSamples.GetColumn(1));

            for (int i = 0; i < input.Length; i++)
            {
                double[] scoresOneSample = target.Scores(input[i]);
                Assert.AreEqual(scoresOneSample, scoresAllSamples[i]);

                double[] logLikelihoodsOneSample = target.LogLikelihoods(input[i]);
                Assert.AreEqual(logLikelihoodsOneSample, logLikelihoodsAllSamples[i]);

                double[] probabilitiesOneSample = target.Probabilities(input[i]);
                Assert.AreEqual(probabilitiesOneSample, probabilitiesAllSamples[i]);

                bool decideOneSample = target.Decide(input[i]);
                Assert.AreEqual(decideOneSample, decideAllSamples[i]);

                double scoreOneSample = target.Score(input[i]);
                Assert.AreEqual(scoreOneSample, scoreAllSamples[i]);

                double logLikelihoodOneSample = target.LogLikelihood(input[i]);
                Assert.AreEqual(logLikelihoodOneSample, logLikelihoodAllSamples[i]);

                double probabilityOneSample = target.Probability(input[i]);
                Assert.AreEqual(probabilityOneSample, probabilityAllSamples[i]);
            }

            bool[] decideScoresAllSamples         = null; target.Scores(input, ref decideScoresAllSamples);
            bool[] decideLogLikelihoodsAllSamples = null; target.LogLikelihoods(input, ref decideLogLikelihoodsAllSamples);
            Assert.AreEqual(decideScoresAllSamples, decideLogLikelihoodsAllSamples);
            bool[] decideProbabilitiesAllSamples = null; target.Probabilities(input, ref decideProbabilitiesAllSamples);
            Assert.AreEqual(decideScoresAllSamples, decideProbabilitiesAllSamples);

            bool[] decideScoreAllSamples = null; target.Score(input, ref decideScoreAllSamples);
            Assert.AreEqual(decideScoreAllSamples, decideScoresAllSamples);
            bool[] decideLogLikelihoodAllSamples = null; target.LogLikelihood(input, ref decideLogLikelihoodAllSamples);
            Assert.AreEqual(decideScoreAllSamples, decideLogLikelihoodAllSamples);
            bool[] decideProbabilityAllSamples = null; target.Probability(input, ref decideProbabilityAllSamples);
            Assert.AreEqual(decideScoreAllSamples, decideProbabilityAllSamples);


            for (int i = 0; i < input.Length; i++)
            {
                bool decideScoresOneSample; target.Scores(input[i], out decideScoresOneSample);
                Assert.AreEqual(decideScoresOneSample, decideScoresAllSamples[i]);

                bool decideLogLikelihoodsOneSample; target.LogLikelihoods(input[i], out decideLogLikelihoodsOneSample);
                Assert.AreEqual(decideLogLikelihoodsOneSample, decideLogLikelihoodsAllSamples[i]);

                bool decideProbabilitiesOneSample; target.Probabilities(input[i], out decideProbabilitiesOneSample);
                Assert.AreEqual(decideProbabilitiesOneSample, decideProbabilitiesAllSamples[i]);

                bool decideScoreOneSample; target.Score(input[i], out decideScoreOneSample);
                Assert.AreEqual(decideScoreOneSample, decideScoreAllSamples[i]);

                bool decideLogLikelihoodOneSample; target.LogLikelihood(input[i], out decideLogLikelihoodOneSample);
                Assert.AreEqual(decideLogLikelihoodOneSample, decideLogLikelihoodAllSamples[i]);

                bool decideProbabilityOneSample; target.Probability(input: input[i], decision: out decideProbabilityOneSample);
                Assert.AreEqual(decideProbabilityOneSample, decideProbabilityAllSamples[i]);
            }


            //bool[][] decidesScoresAllSamples = null; target.Scores(input, ref decidesScoresAllSamples);
            //bool[][] decidesLogLikelihoodsAllSamples = null; target.LogLikelihoods(input, ref decidesLogLikelihoodsAllSamples);
            //bool[][] decidesProbabilitiesAllSamples = null; target.Probabilities(input, ref decidesProbabilitiesAllSamples);


            //bool[][] decidesScoreAllSamples = null; target.Score(input, ref decidesScoreAllSamples);
            //bool[][] decidesLogLikelihoodAllSamples = null; target.LogLikelihood(input, ref decidesLogLikelihoodAllSamples);
            //bool[][] decidesProbabilityAllSamples = null; target.Probability(input, ref decidesProbabilityAllSamples);
        }
        /// <summary>
        ///   Computes the Logistic Regression Analysis.
        /// </summary>
        ///
        /// <remarks>The likelihood surface for the
        ///   logistic regression learning is convex, so there will be only one
        ///   peak. Any local maxima will be also a global maxima.
        /// </remarks>
        ///
        /// <param name="limit">
        ///   The difference between two iterations of the regression algorithm
        ///   when the algorithm should stop. If not specified, the value of
        ///   10e-4 will be used. The difference is calculated based on the largest
        ///   absolute parameter change of the regression.
        /// </param>
        ///
        /// <param name="maxIterations">
        ///   The maximum number of iterations to be performed by the regression
        ///   algorithm.
        /// </param>
        ///
        /// <returns>
        ///   True if the model converged, false otherwise.
        /// </returns>
        ///
        public bool Compute(double limit, int maxIterations)
        {
            double delta;
            int    iteration = 0;

            var learning = new IterativeReweightedLeastSquares(regression);

            do // learning iterations until convergence
            {
                delta = learning.Run(inputData, outputData);
                iteration++;
            } while (delta > limit && iteration < maxIterations);

            // Check if the full model has converged
            bool converged = iteration < maxIterations;


            // Store model information
            this.result        = regression.Compute(inputData);
            this.deviance      = regression.GetDeviance(inputData, outputData);
            this.logLikelihood = regression.GetLogLikelihood(inputData, outputData);
            this.chiSquare     = regression.ChiSquare(inputData, outputData);

            // Store coefficient information
            for (int i = 0; i < regression.Coefficients.Length; i++)
            {
                this.standardErrors[i] = regression.StandardErrors[i];

                this.waldTests[i]    = regression.GetWaldTest(i);
                this.coefficients[i] = regression.Coefficients[i];
                this.confidences[i]  = regression.GetConfidenceInterval(i);
                this.oddsRatios[i]   = regression.GetOddsRatio(i);
            }


            // Perform likelihood-ratio tests against diminished nested models
            LogisticRegression innerModel = new LogisticRegression(inputCount - 1);

            learning = new IterativeReweightedLeastSquares(innerModel);

            for (int i = 0; i < inputCount; i++)
            {
                // Create a diminished inner model without the current variable
                double[][] data = inputData.RemoveColumn(i);


                iteration = 0;

                do // learning iterations until convergence
                {
                    delta = learning.Run(data, outputData);
                    iteration++;
                } while (delta > limit && iteration < maxIterations);

                double ratio = 2.0 * (logLikelihood - innerModel.GetLogLikelihood(data, outputData));
                ratioTests[i + 1] = new ChiSquareTest(ratio, 1);
            }



            // Returns true if the full model has converged, false otherwise.
            return(converged);
        }
Пример #6
0
        public void ComputeTest()
        {
            // Example from http://bayes.bgsu.edu/bcwr/vignettes/probit_regression.pdf

            double[][] input =
            {
                new double[] { 525 },
                new double[] { 533 },
                new double[] { 545 },
                new double[] { 582 },
                new double[] { 581 },
                new double[] { 576 },
                new double[] { 572 },
                new double[] { 609 },
                new double[] { 559 },
                new double[] { 543 },
                new double[] { 576 },
                new double[] { 525 },
                new double[] { 574 },
                new double[] { 582 },
                new double[] { 574 },
                new double[] { 471 },
                new double[] { 595 },
                new double[] { 557 },
                new double[] { 557 },
                new double[] { 584 },
                new double[] { 599 },
                new double[] { 517 },
                new double[] { 649 },
                new double[] { 584 },
                new double[] { 463 },
                new double[] { 591 },
                new double[] { 488 },
                new double[] { 563 },
                new double[] { 553 },
                new double[] { 549 }
            };

            double[] output =
            {
                0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1
            };


            var regression = new GeneralizedLinearRegression(new ProbitLinkFunction(), inputs: 1);

            var teacher = new IterativeReweightedLeastSquares(regression);


            double delta = 0;

            do
            {
                // Perform an iteration
                delta = teacher.Run(input, output);
            } while (delta > 1e-6);



            Assert.AreEqual(2, regression.Coefficients.Length);
            Assert.AreEqual(-17.6984, regression.Coefficients[0], 1e-4);
            Assert.AreEqual(0.03293, regression.Coefficients[1], 1e-4);

            Assert.AreEqual(2, regression.StandardErrors.Length);
            Assert.AreEqual(9.2731983954911374, regression.StandardErrors[0], 1e-5);
            Assert.AreEqual(0.016768779446085, regression.StandardErrors[1], 1e-6);
        }
Пример #7
0
        public void learn_test()
        {
            #region doc_learn
            // The Bag-Of-Words model can be used to extract finite-length feature
            // vectors from sequences of arbitrary length, like for example, texts:


            string[] texts =
            {
                @"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas molestie malesuada 
                  nisi et placerat. Curabitur blandit porttitor suscipit. Nunc facilisis ultrices felis,
                  vitae luctus arcu semper in. Fusce ut felis ipsum. Sed faucibus tortor ut felis placerat
                  euismod. Vestibulum pharetra velit et dolor ornare quis malesuada leo aliquam. Aenean 
                  lobortis, tortor iaculis vestibulum dictum, tellus nisi vestibulum libero, ultricies 
                  pretium nisi ante in neque. Integer et massa lectus. Aenean ut sem quam. Mauris at nisl 
                  augue, volutpat tempus nisl. Suspendisse luctus convallis metus, vitae pretium risus 
                  pretium vitae. Duis tristique euismod aliquam",

                @"Sed consectetur nisl et diam mattis varius. Aliquam ornare tincidunt arcu eget adipiscing. 
                  Etiam quis augue lectus, vel sollicitudin lorem. Fusce lacinia, leo non porttitor adipiscing, 
                  mauris purus lobortis ipsum, id scelerisque erat neque eget nunc. Suspendisse potenti. Etiam 
                  non urna non libero pulvinar consequat ac vitae turpis. Nam urna eros, laoreet id sagittis eu,
                  posuere in sapien. Phasellus semper convallis faucibus. Nulla fermentum faucibus tellus in 
                  rutrum. Maecenas quis risus augue, eu gravida massa."
            };

            string[][] words = texts.Tokenize();

            // Create a new BoW with options:
            var codebook = new BagOfWords()
            {
                MaximumOccurance = 1 // the resulting vector will have only 0's and 1's
            };

            // Compute the codebook (note: this would have to be done only for the training set)
            codebook.Learn(words);


            // Now, we can use the learned codebook to extract fixed-length
            // representations of the different texts (paragraphs) above:

            // Extract a feature vector from the text 1:
            double[] bow1 = codebook.Transform(words[0]);

            // Extract a feature vector from the text 2:
            double[] bow2 = codebook.Transform(words[1]);

            // we could also have transformed everything at once, i.e.
            // double[][] bow = codebook.Transform(words);


            // Now, since we have finite length representations (both bow1 and bow2 should
            // have the same size), we can pass them to any classifier or machine learning
            // method. For example, we can pass them to a Logistic Regression Classifier to
            // discern between the first and second paragraphs

            // Lets create a Logistic classifier to separate the two paragraphs:
            var learner = new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                Tolerance      = 1e-4, // Let's set some convergence parameters
                Iterations     = 100,  // maximum number of iterations to perform
                Regularization = 0
            };

            // Now, we use the learning algorithm to learn the distinction between the two:
            LogisticRegression reg = learner.Learn(new[] { bow1, bow2 }, new[] { false, true });

            // Finally, we can predict using the classifier:
            bool c1 = reg.Decide(bow1); // Should be false
            bool c2 = reg.Decide(bow2); // Should be true
            #endregion

            Assert.AreEqual(bow1.Length, 99);
            Assert.AreEqual(bow2.Length, 99);

            Assert.AreEqual(bow1.Sum(), 67);
            Assert.AreEqual(bow2.Sum(), 63);

            Assert.IsFalse(c1);
            Assert.IsTrue(c2);
        }
Пример #8
0
        public void ComputeTest()
        {
            // Suppose we have the following data about some patients.
            // The first variable is continuous and represent patient
            // age. The second variable is dichotomic and give whether
            // they smoke or not (This is completely fictional data).
            double[][] input =
            {
                new double[] { 55, 0 }, // 0 - no cancer
                new double[] { 28, 0 }, // 0
                new double[] { 65, 1 }, // 0
                new double[] { 46, 0 }, // 1 - have cancer
                new double[] { 86, 1 }, // 1
                new double[] { 56, 1 }, // 1
                new double[] { 85, 0 }, // 0
                new double[] { 33, 0 }, // 0
                new double[] { 21, 1 }, // 0
                new double[] { 42, 1 }, // 1
            };

            // We also know if they have had lung cancer or not, and
            // we would like to know whether smoking has any connection
            // with lung cancer (This is completely fictional data).
            double[] output =
            {
                0, 0, 0, 1, 1, 1, 0, 0, 0, 1
            };


            // To verify this hypothesis, we are going to create a logistic
            // regression model for those two inputs (age and smoking).
            LogisticRegression regression = new LogisticRegression(inputs: 2);

            // Next, we are going to estimate this model. For this, we
            // will use the Iteratively Reweighted Least Squares method.
            var teacher = new IterativeReweightedLeastSquares(regression);

            teacher.Regularization = 0;

            // Now, we will iteratively estimate our model. The Run method returns
            // the maximum relative change in the model parameters and we will use
            // it as the convergence criteria.

            double delta = 0;

            do
            {
                // Perform an iteration
                delta = teacher.Run(input, output);
            } while (delta > 0.001);

            // At this point, we can compute the odds ratio of our variables.
            // In the model, the variable at 0 is always the intercept term,
            // with the other following in the sequence. Index 1 is the age
            // and index 2 is whether the patient smokes or not.

            // For the age variable, we have that individuals with
            //   higher age have 1.021 greater odds of getting lung
            //   cancer controlling for cigarette smoking.
            double ageOdds = regression.GetOddsRatio(1); // 1.0208597028836701

            // For the smoking/non smoking category variable, however, we
            //   have that individuals who smoke have 5.858 greater odds
            //   of developing lung cancer compared to those who do not
            //   smoke, controlling for age (remember, this is completely
            //   fictional and for demonstration purposes only).
            double smokeOdds = regression.GetOddsRatio(2); // 5.8584748789881331

            double[] actual = new double[output.Length];
            for (int i = 0; i < input.Length; i++)
            {
                actual[i] = regression.Compute(input[i]);
            }

            double[] expected =
            {
                0.21044171560168326,
                0.13242527535212373,
                0.65747803433771812,
                0.18122484822324372,
                0.74755661773156912,
                0.61450041841477232,
                0.33116705418194975,
                0.14474110902457912,
                0.43627109657399382,
                0.54419383282533118
            };

            for (int i = 0; i < actual.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i]);
            }

            Assert.AreEqual(1.0208597028836701, ageOdds, 1e-10);
            Assert.AreEqual(5.8584748789881331, smokeOdds, 1e-8);
            Assert.IsFalse(double.IsNaN(ageOdds));
            Assert.IsFalse(double.IsNaN(smokeOdds));

            Assert.AreEqual(-2.4577464307294092, regression.Intercept, 1e-8);
            Assert.AreEqual(-2.4577464307294092, regression.Coefficients[0], 1e-8);
            Assert.AreEqual(0.020645118265359252, regression.Coefficients[1], 1e-10);
            Assert.AreEqual(1.7678893101571855, regression.Coefficients[2], 1e-8);
        }
        public void ComputeTest()
        {
            // Suppose we have the following data about some patients.
            // The first variable is continuous and represent patient
            // age. The second variable is dicotomic and give whether
            // they smoke or not (This is completely fictional data).
            double[][] input =
            {
                new double[] { 55, 0 }, // 0 - no cancer
                new double[] { 28, 0 }, // 0
                new double[] { 65, 1 }, // 0
                new double[] { 46, 0 }, // 1 - have cancer
                new double[] { 86, 1 }, // 1
                new double[] { 56, 1 }, // 1
                new double[] { 85, 0 }, // 0
                new double[] { 33, 0 }, // 0
                new double[] { 21, 1 }, // 0
                new double[] { 42, 1 }, // 1
            };

            // We also know if they have had lung cancer or not, and
            // we would like to know whether smoking has any connection
            // with lung cancer (This is completely fictional data).
            double[] output =
            {
                0, 0, 0, 1, 1, 1, 0, 0, 0, 1
            };


            // To verify this hypothesis, we are going to create a logistic
            // regression model for those two inputs (age and smoking).
            LogisticRegression regression = new LogisticRegression(inputs: 2);

            // Next, we are going to estimate this model. For this, we
            // will use the Iteravely reweighted least squares method.
            var teacher = new IterativeReweightedLeastSquares(regression);

            // Now, we will iteratively estimate our model. The Run method returns
            // the maximum relative change in the model parameters and we will use
            // it as the convergence criteria.

            double delta = 0;

            do
            {
                // Perform an iteration
                delta = teacher.Run(input, output);
            } while (delta > 0.001);

            // At this point, we can compute the odds ratio of our variables.
            // In the model, the variable at 0 is always the intercept term,
            // with the other following in the sequence. Index 1 is the age
            // and index 2 is whether the patient smokes or not.

            // For the age variable, we have that individuals with
            //   higher age have 1.021 greater odds of getting lung
            //   cancer controlling for cigarrete smoking.
            double ageOdds = regression.GetOddsRatio(1); // 1.0208597028836701

            // For the smoking/non smoking category variable, however, we
            //   have that individuals who smoke have 5.858 greater odds
            //   of developing lung cancer compared to those who do not
            //   smoke, controlling for age (remember, this is completely
            //   fictional and for demonstration purposes only).
            double smokeOdds = regression.GetOddsRatio(2); // 5.8584748789881331


            Assert.AreEqual(1.0208597028836701, ageOdds);
            Assert.AreEqual(5.8584748789881331, smokeOdds);
        }
Пример #10
0
        /// <summary>
        /// Uses data from <paramref name="fileName">fileName</paramref> to train a logistic regression model./>
        /// </summary>
        /// <param name="fileName">The name of the data file.</param>
        /// <returns>A string to print giving information about the weights and odds ratios.</returns>
        public static string Learn(string fileName)
        {
            //Read all inputs and outputs from training file.
            string[]   lines   = File.ReadAllLines("Logistic Regression Model/data/" + fileName + ".txt");
            double[][] inputs  = new double[lines.Length][];
            int[]      outputs = new int[lines.Length];

            for (int a = 0; a < lines.Length; a++)
            {
                string[] split = lines[a].Split(':');

                //Dynamically get variables from file.
                string[] scores = split[1].Split('&');
                inputs[a] = new double[scores.Length];
                for (int b = 0; b < scores.Length; b++)
                {
                    inputs[a][b] = double.Parse(scores[b]);
                }

                outputs[a] = int.Parse(split[2]);
            }

            //Set up Accord.NET learner.
            IterativeReweightedLeastSquares <LogisticRegression> learner = new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                Tolerance      = 1e-4,
                MaxIterations  = 100,
                Regularization = 1e-10
            };

            //Shuffle the input and output pairs to eliminate some inherent bias from
            //training data.
            Dictionary <double[], int> map = inputs.Zip(outputs, (arg1, arg2) => new { arg1, arg2 }).ToDictionary(x => x.arg1, x => x.arg2);

            map.Shuffle();
            inputs  = map.Keys.ToArray();
            outputs = map.Values.ToArray();


            //Train Regression
            LogisticRegression regression = learner.Learn(inputs, outputs.ToBoolArray());



            //Save to a Model file.
            int counter = 0;

            while (File.Exists("Logistic Regression Model/models/Model-" + counter + ".txt"))
            {
                counter++;
            }

            //Create a file writer
            FileStream   fs     = File.Create("Logistic Regression Model/models/Model-" + counter + ".txt");
            StreamWriter writer = new StreamWriter(fs);

            //Print the weights
            string result = "Weights: " + regression.Weights.GetString() + "\n";

            //Write lines.
            writer.WriteLine(regression.Weights.Append(regression.Intercept).ToArray().GetString());
            for (int c = 0; c < regression.Weights.Length; c++)
            {
                writer.WriteLine(regression.GetOddsRatio(c));
                result += "Odds Ratio " + c + ": " + regression.GetOddsRatio(c) + "\n";
            }

            //Get Loss values.
            double[] actual   = new double[inputs.Length];
            double[] expected = new double[outputs.Length];
            for (int a = 0; a < actual.Length; a++)
            {
                actual[a]   = regression.Probability(inputs[a]);
                expected[a] = outputs[a];
            }

            //Calculate and print square loss.
            string loss = "Loss: " + new SquareLoss(expected)
            {
                Mean = true,
                Root = true
            }.Loss(actual);

            result += loss + "\n";
            writer.WriteLine(loss);

            Console.WriteLine("\n\n" + loss);


            //Calculate and print R-squared Loss
            string r2 = "R2: " + new RSquaredLoss(inputs[0].Length, expected).Loss(actual);

            result += r2;
            writer.WriteLine(r2);

            //Cleanup
            writer.Close();
            writer.Dispose();

            fs.Close();
            fs.Dispose();

            Console.WriteLine("Model trained successfully!");
            Console.WriteLine("\nEvaluating...\n");

            //Get the VIFs
            float[] VIFs = CalculateVIFs(inputs);

            //Log it
            for (int a = 0; a < VIFs.Length; a++)
            {
                Logger.Log("Variance Inflation Factor #" + a + ": " + VIFs[a]);
            }

            return(result);
        }
Пример #11
0
        private void regression(List <Cell> samplePoints)
        {
            // 构造输入输出数据集

            // 样本数目
            int COUNT = samplePoints.Count;

            // 构造输入和输出数据集
            double[][] inputs  = new double[COUNT][];
            bool[]     outputs = new bool[COUNT];
            for (int i = 0; i < COUNT; i++)
            {
                Cell cell = samplePoints[i];
                int  pos  = cell.row * width + cell.col;
                inputs[i] = (from buffer in driveBuffers
                             select buffer[pos]).ToArray <double>();
                outputs[i] = cell.type;
            }



            var learner = new IterativeReweightedLeastSquares <Accord.Statistics.Models.Regression.LogisticRegression>()
            {
                Tolerance             = 1e-8, // 收敛参数
                Iterations            = 20,   // 最大循环数目
                Regularization        = 0,
                ComputeStandardErrors = true
            };


            Accord.Statistics.Models.Regression.LogisticRegression regression = learner.Learn(inputs, outputs);


            // 输出 odds
            StringBuilder strb = new StringBuilder();

            for (int i = 0; i <= inputs[0].Length; i++)
            {
                strb.AppendLine(" " + i + " : " + regression.GetOddsRatio(i));
            }
            updateConsoleEvent(strb.ToString());

            // 输出 weights
            StringBuilder strw = new StringBuilder();

            strw.AppendLine("权重系数:");
            strw.AppendLine("截距: " + regression.Intercept.ToString());
            var weights = regression.Weights;

            for (int i = 0; i < weights.Length; i++)
            {
                strw.AppendLine("权重" + (i + 1) + ":" + weights[i]);
            }
            updateConsoleEvent(strw.ToString());

            double[] result  = new double[width * height];
            double   minProp = double.MaxValue;

            double[] minInput = null;
            for (int row = 0; row < height; row++)
            {
                for (int col = 0; col < width; col++)
                {
                    int pos = row * width + col;
                    if (beginBuffer[pos] < 0 || !IsValid(pos))
                    {
                        result[pos] = this.landUse.NullInfo.LandUseTypeValue;
                        continue;
                    }
                    double[] input = (from buffer in driveBuffers
                                      select buffer[pos]).ToArray <double>();
                    double prop = regression.Probability(input);
                    if (prop < minProp)
                    {
                        minProp  = prop;
                        minInput = input;
                    }
                    result[pos] = prop;
                }
            }



            // 新建 GDAL dataset
            OSGeo.GDAL.Driver  driver  = OSGeo.GDAL.Gdal.GetDriverByName("GTIFF");
            OSGeo.GDAL.Dataset dataset = driver.Create(this.ResultLayerName, width, height, 1, OSGeo.GDAL.DataType.GDT_Float64, null);

            dataset.WriteRaster(0, 0, width, height, result, width, height, 1, new int[1] {
                1
            }, 0, 0, 0);
            dataset.FlushCache();
        }
Пример #12
0
        public double[] GetResult()
        {
            // 采样
            List <Cell> samplePoints = getSample(this.NumberOfSample);

            // 样本数目
            int COUNT = samplePoints.Count;

            // 构造输入和输出数据集
            double[][] inputs  = new double[COUNT][];
            bool[]     outputs = new bool[COUNT];
            for (int i = 0; i < COUNT; i++)
            {
                Cell cell = samplePoints[i];
                int  pos  = cell.row * width + cell.col;
                inputs[i] = (from buffer in driveBuffers
                             select buffer[pos]).ToArray <double>();
                outputs[i] = cell.type;
            }



            var learner = new IterativeReweightedLeastSquares <Accord.Statistics.Models.Regression.LogisticRegression>()
            {
                Tolerance             = 1e-8, // 收敛参数
                Iterations            = 20,   // 最大循环数目
                Regularization        = 0,
                ComputeStandardErrors = true
            };


            Accord.Statistics.Models.Regression.LogisticRegression regression = learner.Learn(inputs, outputs);


            //// 输出 odds
            //StringBuilder strb = new StringBuilder();
            //for (int i = 0; i <= inputs[0].Length; i++)
            //{
            //    strb.AppendLine(" " + i + " : " + regression.GetOddsRatio(i));
            //}
            //updateConsoleEvent(strb.ToString());

            //// 输出 weights
            //StringBuilder strw = new StringBuilder();
            //strw.AppendLine("权重系数:");
            //strw.AppendLine("截距: " + regression.Intercept.ToString());
            //var weights = regression.Weights;
            //for (int i = 0; i < weights.Length; i++)
            //{
            //    strw.AppendLine("权重" + (i + 1) + ":" + weights[i]);
            //}
            //updateConsoleEvent(strw.ToString());

            double[] result  = new double[width * height];
            double   minProp = double.MaxValue;

            double[] minInput = null;
            for (int row = 0; row < height; row++)
            {
                for (int col = 0; col < width; col++)
                {
                    int pos = row * width + col;
                    if (beginBuffer[pos] < 0 || !IsValid(pos))
                    {
                        result[pos] = this.landUse.NullInfo.LandUseTypeValue;
                        continue;
                    }
                    double[] input = (from buffer in driveBuffers
                                      select buffer[pos]).ToArray <double>();
                    double prop = regression.Probability(input);
                    if (prop < minProp)
                    {
                        minProp  = prop;
                        minInput = input;
                    }
                    result[pos] = prop;
                }
            }
            return(result);
        }
Пример #13
0
        public void learn_new_mechanism()
        {
            Accord.Math.Random.Generator.Seed = 0;

            #region doc_log_reg_1
            // Suppose we have the following data about some patients.
            // The first variable is continuous and represent patient
            // age. The second variable is dichotomic and give whether
            // they smoke or not (This is completely fictional data).

            // We also know if they have had lung cancer or not, and
            // we would like to know whether smoking has any connection
            // with lung cancer (This is completely fictional data).

            double[][] input =
            {                           // age, smokes?, had cancer?
                new double[] { 55, 0 }, // false - no cancer
                new double[] { 28, 0 }, // false
                new double[] { 65, 1 }, // false
                new double[] { 46, 0 }, // true  - had cancer
                new double[] { 86, 1 }, // true
                new double[] { 56, 1 }, // true
                new double[] { 85, 0 }, // false
                new double[] { 33, 0 }, // false
                new double[] { 21, 1 }, // false
                new double[] { 42, 1 }, // true
            };

            bool[] output = // Whether each patient had lung cancer or not
            {
                false, false, false, true, true, true, false, false, false, true
            };


            // To verify this hypothesis, we are going to create a logistic
            // regression model for those two inputs (age and smoking), learned
            // using a method called "Iteratively Reweighted Least Squares":

            var learner = new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                Tolerance      = 1e-4, // Let's set some convergence parameters
                Iterations     = 100,  // maximum number of iterations to perform
                Regularization = 0
            };

            // Now, we can use the learner to finally estimate our model:
            LogisticRegression regression = learner.Learn(input, output);

            // At this point, we can compute the odds ratio of our variables.
            // In the model, the variable at 0 is always the intercept term,
            // with the other following in the sequence. Index 1 is the age
            // and index 2 is whether the patient smokes or not.

            // For the age variable, we have that individuals with
            //   higher age have 1.021 greater odds of getting lung
            //   cancer controlling for cigarette smoking.
            double ageOdds = regression.GetOddsRatio(1); // 1.0208597028836701

            // For the smoking/non smoking category variable, however, we
            //   have that individuals who smoke have 5.858 greater odds
            //   of developing lung cancer compared to those who do not
            //   smoke, controlling for age (remember, this is completely
            //   fictional and for demonstration purposes only).
            double smokeOdds = regression.GetOddsRatio(2); // 5.8584748789881331

            // If we would like to use the model to predict a probability for
            // each patient regarding whether they are at risk of cancer or not,
            // we can use the Probability function:

            double[] scores = regression.Probability(input);

            // Finally, if we would like to arrive at a conclusion regarding
            // each patient, we can use the Decide method, which will transform
            // the probabilities (from 0 to 1) into actual true/false values:

            bool[] actual = regression.Decide(input);
            #endregion

            double[] expected =
            {
                0.26653094409723,  0.152638465629209,  1.91952079193046,
                0.221336525913065,  2.96128427776555,  1.59403653839456,
                0.495141657849358, 0.169236601885844, 0.773902301904016,
                1.1939150275367
            };

            string str = scores.ToCSharp();
            for (int i = 0; i < scores.Length; i++)
            {
                Assert.AreEqual(expected[i], scores[i], 1e-8);
            }

            double[] transform = regression.Transform(input, scores);
            for (int i = 0; i < scores.Length; i++)
            {
                Assert.AreEqual(expected[i], transform[i], 1e-8);
            }

            Assert.AreEqual(1.0208597028836701, ageOdds, 1e-10);
            Assert.AreEqual(5.8584748789881331, smokeOdds, 1e-6);

            Assert.AreEqual(-2.4577464307294092, regression.Intercept, 1e-8);
            Assert.AreEqual(-2.4577464307294092, regression.Coefficients[0], 1e-8);
            Assert.AreEqual(0.020645118265359252, regression.Coefficients[1], 1e-10);
            Assert.AreEqual(1.7678893101571855, regression.Coefficients[2], 1e-8);

            Assert.IsFalse(actual[0]);
            Assert.IsFalse(actual[1]);
            Assert.IsTrue(actual[2]);
            Assert.IsFalse(actual[3]);
            Assert.IsTrue(actual[4]);
            Assert.IsTrue(actual[5]);
            Assert.IsFalse(actual[6]);
            Assert.IsFalse(actual[7]);
            Assert.IsFalse(actual[8]);
            Assert.IsTrue(actual[9]);
        }
Пример #14
0
        public void ComputeTest3()
        {
            double[][] input =
            {
                new double[] { 55, 0 }, // 0 - no cancer
                new double[] { 28, 0 }, // 0
                new double[] { 65, 1 }, // 0
                new double[] { 46, 0 }, // 1 - have cancer

                new double[] { 86, 1 }, // 1
                new double[] { 86, 1 }, // 1
                new double[] { 56, 1 }, // 1
                new double[] { 85, 0 }, // 0

                new double[] { 33, 0 }, // 0
                new double[] { 21, 1 }, // 0
                new double[] { 42, 1 }, // 1
            };

            double[] output =
            {
                0, 0, 0, 1,
                1, 1, 1, 0,
                0, 0, 1
            };

            double[] weights =
            {
                1.0, 1.0, 1.0, 1.0,
                0.5, 0.5, 1.0, 1.0,
                1.0, 1.0, 1.0
            };


            LogisticRegression regression = new LogisticRegression(inputs: 2);

            var teacher = new IterativeReweightedLeastSquares(regression);

            teacher.Regularization = 0;


            double delta = 0;

            do
            {
                delta = teacher.Run(input, output, weights);
            } while (delta > 0.001);


            double ageOdds   = regression.GetOddsRatio(1);
            double smokeOdds = regression.GetOddsRatio(2);

            Assert.AreEqual(1.0208597028836701, ageOdds, 1e-10);
            Assert.AreEqual(5.8584748789881331, smokeOdds, 1e-8);
            Assert.IsFalse(double.IsNaN(ageOdds));
            Assert.IsFalse(double.IsNaN(smokeOdds));


            Assert.AreEqual(-2.4577464307294092, regression.Intercept, 1e-8);
            Assert.AreEqual(-2.4577464307294092, regression.Coefficients[0], 1e-8);
            Assert.AreEqual(0.020645118265359252, regression.Coefficients[1], 1e-8);
            Assert.AreEqual(1.7678893101571855, regression.Coefficients[2], 1e-8);
        }
Пример #15
0
        public static void Execute()
        {
            double[][] input =
            {
                new double[] { 55, 0 },
                new double[] { 28, 0 },
                new double[] { 65, 0 },
                new double[] { 46, 0 },
                new double[] { 86, 0 },
                new double[] { 56, 0 },
                new double[] { 85, 0 },
                new double[] { 33, 0 },
                new double[] { 21, 0 },
                new double[] { 42, 0 },
            };

            double[] output =
            {
                0, 0, 0, 1, 1, 1, 0, 0, 0, 1
            };
            LogisticRegression regression = new LogisticRegression(2);
            var    trainer = new IterativeReweightedLeastSquares(regression);
            double delta   = 0;

            do
            {
                // Perform an iteration
                delta = trainer.Run(input, output);
            } while (delta > 0.001);

            var b1 = regression.Coefficients[1];
            var b2 = regression.Coefficients[2];
            var b0 = regression.Intercept;

            System.Console.WriteLine(b0);
            System.Console.WriteLine(b1);
            System.Console.WriteLine(b2);

            var func = new Func <double, double, double>((x1, x2) =>
            {
                var result = 1 / (1 + Math.Exp(-b0 - b1 * x1 - b2 * x2));
                return(result);
            });
            var age     = 79;
            var smoking = 0;
            var r       = func(age, smoking);

            System.Console.WriteLine("input x [age:{0}, smoking:{1}] is {2}", age, smoking, r);

            LogisticRegression LR = new LogisticRegression();

            LR.NumberOfInputs = 1;
            var learner = new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                Tolerance      = 1e-4, // Let's set some convergence parameters
                Iterations     = 100,  // maximum number of iterations to perform
                Regularization = 0
            };

            LR = learner.Learn(input, output);
            System.Console.WriteLine(LR.Intercept);
            System.Console.WriteLine(LR.GetOddsRatio(1) - 1);
            System.Console.WriteLine(LR.GetOddsRatio(2) - 1);
            double [] test = new double[] { 79, 0 };
            System.Console.WriteLine(LR.Probability(test));
        }
Пример #16
0
        public void LargeCoefficientsTest()
        {
            double[,] data =
            {
                { 48, 1, 4.40, 0 },
                { 60, 0, 7.89, 1 },
                { 51, 0, 3.48, 0 },
                { 66, 0, 8.41, 1 },
                { 40, 1, 3.05, 0 },
                { 44, 1, 4.56, 0 },
                { 80, 0, 6.91, 1 },
                { 52, 0, 5.69, 0 },
                { 58, 0, 4.01, 0 },
                { 58, 0, 4.48, 0 },
                { 72, 1, 5.97, 0 },
                { 57, 0, 6.71, 1 },
                { 55, 1, 5.36, 0 },
                { 71, 0, 5.68, 0 },
                { 44, 1, 4.61, 0 },
                { 65, 1, 4.80, 0 },
                { 38, 0, 5.06, 0 },
                { 50, 0, 6.40, 0 },
                { 80, 0, 6.67, 1 },
                { 69, 1, 5.79, 0 },
                { 39, 0, 5.42, 0 },
                { 68, 0, 7.61, 1 },
                { 47, 1, 3.24, 0 },
                { 45, 1, 4.29, 0 },
                { 79, 1, 7.44, 1 },
                { 41, 1, 4.60, 0 },
                { 45, 0, 5.91, 0 },
                { 54, 0, 4.77, 0 },
                { 43, 1, 5.62, 0 },
                { 62, 1, 7.92, 1 },
                { 72, 1, 7.92, 1 },
                { 57, 1, 6.19, 0 },
                { 39, 1, 2.37, 0 },
                { 51, 0, 5.84, 0 },
                { 73, 1, 5.94, 0 },
                { 41, 1, 3.82, 0 },
                { 35, 0, 2.35, 0 },
                { 69, 0, 6.57, 1 },
                { 75, 1, 7.96, 1 },
                { 51, 1, 3.96, 0 },
                { 61, 1, 4.36, 0 },
                { 55, 0, 3.84, 0 },
                { 45, 1, 3.02, 0 },
                { 48, 0, 4.65, 0 },
                { 77, 0, 7.93, 1 },
                { 40, 1, 2.46, 0 },
                { 37, 1, 2.32, 0 },
                { 78, 0, 7.88, 1 },
                { 39, 1, 4.55, 0 },
                { 41, 0, 2.45, 0 },
                { 54, 1, 5.62, 0 },
                { 59, 1, 5.03, 0 },
                { 78, 0, 8.08, 1 },
                { 56, 1, 6.96, 1 },
                { 49, 1, 3.07, 0 },
                { 48, 0, 4.75, 0 },
                { 63, 1, 5.64, 0 },
                { 50, 0, 3.35, 0 },
                { 59, 1, 5.08, 0 },
                { 60, 0, 6.58, 1 },
                { 64, 0, 5.19, 0 },
                { 76, 1, 6.69, 1 },
                { 58, 0, 5.18, 0 },
                { 48, 1, 4.47, 0 },
                { 72, 0, 8.70, 1 },
                { 40, 1, 5.14, 0 },
                { 53, 0, 3.40, 0 },
                { 79, 0, 9.77, 1 },
                { 61, 1, 7.79, 1 },
                { 59, 0, 7.42, 1 },
                { 44, 0, 2.55, 0 },
                { 52, 1, 3.71, 0 },
                { 80, 1, 7.56, 1 },
                { 76, 0, 7.80, 1 },
                { 51, 0, 5.94, 0 },
                { 46, 1, 5.52, 0 },
                { 48, 0, 3.25, 0 },
                { 58, 1, 4.71, 0 },
                { 44, 1, 2.52, 0 },
                { 68, 0, 8.38, 1 },
            };

            double[][] input  = data.Submatrix(null, 0, 2).ToArray();
            double[]   output = data.GetColumn(3);

            LogisticRegression regression = new LogisticRegression(3);

            var teacher = new IterativeReweightedLeastSquares(regression);

            teacher.Regularization = 1e-10;

            var errors = new List <double>();

            for (int i = 0; i < 1000; i++)
            {
                errors.Add(teacher.Run(input, output));
            }

            double error = 0;

            for (int i = 0; i < output.Length; i++)
            {
                double expected = output[i];
                double actual   = System.Math.Round(regression.Compute(input[i]));

                if (expected != actual)
                {
                    error++;
                }
            }

            error /= output.Length;

            Assert.AreEqual(error, 0);
            Assert.AreEqual(-355.59378247276379, regression.Coefficients[0]);
            Assert.AreEqual(1.2646432605797491, regression.Coefficients[1]);
            Assert.AreEqual(-10.710529810144157, regression.Coefficients[2]);
            Assert.AreEqual(44.089493151268726, regression.Coefficients[3]);
        }
Пример #17
0
        /// <summary>
        /// Run the lesson.
        /// </summary>
        public static void Run()
        {
            // get data
            Console.WriteLine("Loading data....");
            var path    = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"..\..\..\..\california_housing.csv"));
            var housing = Frame.ReadCsv(path, separators: ",");

            housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000);

            // create the median_high_house_value feature
            housing.AddColumn("median_high_house_value",
                              housing["median_house_value"].Select(v => v.Value >= 265000 ? 1.0 : 0.0));

            // shuffle the frame
            var rnd     = new Random();
            var indices = Enumerable.Range(0, housing.Rows.KeyCount).OrderBy(v => rnd.NextDouble());

            housing = housing.IndexRowsWith(indices).SortRowsByKey();

            // create training, validation, and test frames
            var training   = housing.Rows[Enumerable.Range(0, 12000)];
            var validation = housing.Rows[Enumerable.Range(12000, 2500)];
            var test       = housing.Rows[Enumerable.Range(14500, 2500)];

            // build the list of features we're going to use
            var columns = new string[] {
                "latitude",
                "longitude",
                "housing_median_age",
                "total_rooms",
                "total_bedrooms",
                "population",
                "households",
                "median_income"
            };

            // train the model using a logistic regressor
            var learner = new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                MaxIterations = 100
            };
            var regression = learner.Learn(
                training.Columns[columns].ToArray2D <double>().ToJagged(),
                training["median_high_house_value"].Values.ToArray());

            // get probabilities
            var features_validation = validation.Columns[columns].ToArray2D <double>().ToJagged();
            var label_validation    = validation["median_high_house_value"].Values.ToArray();
            var probabilities       = regression.Probability(features_validation);

            // calculate the histogram of probabilities
            var histogram = new Histogram();

            histogram.Compute(probabilities, 0.05);

            // draw the histogram
            Plot(histogram, "Probability histogram", "prediction", "count");

            // get predictions and actuals
            var predictions = regression.Decide(features_validation);
            var actuals     = label_validation.Select(v => v == 1.0 ? true : false).ToArray();

            // create confusion matrix
            var confusion = new ConfusionMatrix(predictions, actuals);

            // display classification scores
            Console.WriteLine($"True Positives:  {confusion.TruePositives}");
            Console.WriteLine($"True Negatives:  {confusion.TrueNegatives}");
            Console.WriteLine($"False Positives: {confusion.FalsePositives}");
            Console.WriteLine($"False Negatives: {confusion.FalseNegatives}");
            Console.WriteLine();

            // display accuracy, precision, and recall
            Console.WriteLine($"Accuracy:        {confusion.Accuracy}");
            Console.WriteLine($"Precision:       {confusion.Precision}");
            Console.WriteLine($"Recall:          {confusion.Recall}");
            Console.WriteLine();

            // display TPR and FPR
            Console.WriteLine($"TPR:             {confusion.Sensitivity}");
            Console.WriteLine($"FPR:             {confusion.FalsePositiveRate}");
            Console.WriteLine();

            // calculate roc curve
            var roc = new ReceiverOperatingCharacteristic(
                actuals,
                predictions.Select(v => v ? 1 : 0).ToArray());

            roc.Compute(100);

            // generate the scatter plot
            var rocPlot = roc.GetScatterplot(true);

            // show roc curve
            Plot(rocPlot);

            // show the auc
            Console.WriteLine($"AUC:             {roc.Area}");
        }
Пример #18
0
        /// <summary>
        /// The main application entry point.
        /// </summary>
        /// <param name="args">Command line arguments.</param>
        public static void Main(string[] args)
        {
            // get data
            Console.WriteLine("Loading data....");
            var path    = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"..\..\..\..\california_housing.csv"));
            var housing = Frame.ReadCsv(path, separators: ",");

            housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000);

            // shuffle the frame
            var rnd     = new Random();
            var indices = Enumerable.Range(0, housing.Rows.KeyCount).OrderBy(v => rnd.NextDouble());

            housing = housing.IndexRowsWith(indices).SortRowsByKey();

            // create the median_high_house_value feature
            housing.AddColumn("median_high_house_value",
                              housing["median_house_value"].Select(v => v.Value >= 265000 ? 1.0 : 0.0));

            // create one-hot vectors for longitude and latitude
            Console.WriteLine("Binning longitude and latitude...");
            var vectors_long =
                from l in housing["longitude"].Values
                select Vector.Create <double>(
                    1,
                    (from b in Bin(-125, -114)
                     select l >= b.Min && l < b.Max).ToArray());

            var vectors_lat =
                from l in housing["latitude"].Values
                select Vector.Create <double>(
                    1,
                    (from b in Bin(32, 43)
                     select l >= b.Min && l < b.Max).ToArray());

            // multiply vectors and create columns
            Console.WriteLine("Creating longxlat feature cross...");
            var vectors_cross =
                vectors_long.Zip(vectors_lat, (lng, lat) => lng.Outer(lat));

            for (var i = 0; i < 12; i++)
            {
                for (var j = 0; j < 12; j++)
                {
                    housing.AddColumn($"location {i},{j}", from v in vectors_cross select v[i, j]);
                }
            }

            // set up model columns
            var columns = (from i in Enumerable.Range(0, 12)
                           from j in Enumerable.Range(0, 12)
                           select $"location {i},{j}").ToList();

            columns.Add("housing_median_age");
            columns.Add("total_rooms");
            columns.Add("total_bedrooms");
            columns.Add("population");
            columns.Add("households");
            columns.Add("median_income");

            // create training, validation, and test partitions
            var training   = housing.Rows[Enumerable.Range(0, 12000)];
            var validation = housing.Rows[Enumerable.Range(12000, 2500)];
            var test       = housing.Rows[Enumerable.Range(14500, 2500)];

            ////////////////////////////////////////////////////////////////////////
            // Without regularization
            ////////////////////////////////////////////////////////////////////////

            // train the model
            Console.WriteLine("Training model without regularization...");
            var learner = new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                MaxIterations  = 50,
                Regularization = 0
            };
            var regression = learner.Learn(
                training.Columns[columns].ToArray2D <double>().ToJagged(),
                training["median_high_house_value"].Values.ToArray());

            // display training results
            Console.WriteLine("TRAINING WITHOUT REGULARIZATION");
            Console.WriteLine($"Weights:     {regression.Weights.ToString<double>("0.00")}");
            Console.WriteLine($"Intercept:   {regression.Intercept}");
            Console.WriteLine();

            // plot a histogram of the nonzero weights
            var histogram = new Histogram();

            histogram.Compute(regression.Weights, 1.0); // set to 1.0 when regularization is disabled

            // draw the histogram
            Plot(histogram, "Without Regularization", "prediction", "count");

            ////////////////////////////////////////////////////////////////////////
            // With regularization
            ////////////////////////////////////////////////////////////////////////

            // train the model
            Console.WriteLine("Training model with regularization...");
            learner = new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                MaxIterations  = 50,
                Regularization = 50
            };
            regression = learner.Learn(
                training.Columns[columns].ToArray2D <double>().ToJagged(),
                training["median_high_house_value"].Values.ToArray());

            // display training results
            Console.WriteLine("TRAINING WITH REGULARIZATION");
            Console.WriteLine($"Weights:     {regression.Weights.ToString<double>("0.00")}");
            Console.WriteLine($"Intercept:   {regression.Intercept}");
            Console.WriteLine();

            // plot a histogram of the nonzero weights
            histogram = new Histogram();
            histogram.Compute(regression.Weights, 0.1); // set to 1.0 when regularization is disabled

            // draw the histogram
            Plot(histogram, "With Regularization", "prediction", "count");

            Console.ReadLine();
        }
    // Start is called before the first frame update
    void Start()
    {
        timeScale      = 1;
        timeScaleAnt   = timeScale;
        timesRessurect = 0;
        nLaps          = -1;
        timeLap        = 0;

        // Generating POD parameters
        CarInstantiate();

        // Initializing Decision Trees
        var teacherT = new IterativeReweightedLeastSquares <LogisticRegression>()
        {
            MaxIterations  = 100,
            Regularization = 1e-6
        };

        var teacherS = new IterativeReweightedLeastSquares <LogisticRegression>()
        {
            MaxIterations  = 100,
            Regularization = 1e-6
        };

        dataSizeSt = 1;
        dataSizeTh = 1;

        InpThrust    = new double[dataSizeTh][];
        InpThrust[0] = new double[4];
        OutThrust    = new int[1];
        OutThrust[0] = 1;

        InpSteer    = new double[dataSizeSt][];
        InpSteer[0] = new double[5];
        OutSteer    = new int[1];
        OutSteer[0] = 0;

        // Use the learning algorithm to induce the tree
        double[][] inputsT0 = new double[1][];
        inputsT0[0]    = new double[4];
        inputsT0[0][0] = 0.5f;
        inputsT0[0][1] = 10f;
        inputsT0[0][2] = 0.5f;
        inputsT0[0][3] = 65f;

        double[][] inputsS0 = new double[1][];
        inputsS0[0]    = new double[5];
        inputsS0[0][0] = 0.5f;
        inputsS0[0][1] = 10f;
        inputsS0[0][2] = 0.5f;
        inputsS0[0][3] = 65f;
        inputsS0[0][4] = 1f;

        int[] outputs0 = new int[1];
        outputs0[0] = 1;
        int[] outputs1 = new int[1];
        outputs1[0] = 0;

        for (int i = 0; i < 4; i++)
        {
            InpThrust[0][i] = i;
        }
        for (int i = 0; i < 5; i++)
        {
            InpSteer[0][i] = i;
        }
        decisionThrust = teacherT.Learn(inputsT0, outputs0);
        decisionSteer  = teacherS.Learn(inputsS0, outputs1);
    }