Esempio n. 1
0
        public void prediction_test()
        {
            // Example from http://www.real-statistics.com/multiple-regression/confidence-and-prediction-intervals/
            var dt = Accord.IO.CsvReader.FromText(Resources.linreg, true).ToTable();

            double[]   y = dt.Columns["Poverty"].ToArray();
            double[][] x = dt.ToArray("Infant Mort", "White", "Crime");

            // Use Ordinary Least Squares to learn the regression
            OrdinaryLeastSquares ols = new OrdinaryLeastSquares();

            // Use OLS to learn the multiple linear regression
            MultipleLinearRegression regression = ols.Learn(x, y);

            Assert.AreEqual(3, regression.NumberOfInputs);
            Assert.AreEqual(1, regression.NumberOfOutputs);

            Assert.AreEqual(0.443650703716698, regression.Intercept, 1e-5);
            Assert.AreEqual(1.2791842411083394, regression.Weights[0], 1e-5);
            Assert.AreEqual(0.036259242392669415, regression.Weights[1], 1e-5);
            Assert.AreEqual(0.0014225014835705938, regression.Weights[2], 1e-5);

            double rse = regression.GetStandardError(x, y);

            Assert.AreEqual(rse, 2.4703520840798507, 1e-5);


            double[][] im  = ols.GetInformationMatrix();
            double     mse = regression.GetStandardError(x, y);

            double[] se = regression.GetStandardErrors(mse, im);

            Assert.AreEqual(0.30063086032754965, se[0], 1e-10);
            Assert.AreEqual(0.033603448179240082, se[1], 1e-10);
            Assert.AreEqual(0.0022414548866296342, se[2], 1e-10);
            Assert.AreEqual(3.9879881671805824, se[3], 1e-10);

            double[] x0 = new double[] { 7, 80, 400 };
            double   y0 = regression.Transform(x0);

            Assert.AreEqual(y0, 12.867680376316864, 1e-5);

            double actual = regression.GetStandardError(x0, mse, im);

            Assert.AreEqual(0.35902764658470271, actual, 1e-10);

            DoubleRange ci = regression.GetConfidenceInterval(x0, mse, x.Length, im);

            Assert.AreEqual(ci.Min, 12.144995206616116, 1e-5);
            Assert.AreEqual(ci.Max, 13.590365546017612, 1e-5);

            actual = regression.GetPredictionStandardError(x0, mse, im);
            Assert.AreEqual(2.4963053239397244, actual, 1e-10);

            DoubleRange pi = regression.GetPredictionInterval(x0, mse, x.Length, im);

            Assert.AreEqual(pi.Min, 7.8428783761994554, 1e-5);
            Assert.AreEqual(pi.Max, 17.892482376434273, 1e-5);
        }
Esempio n. 2
0
        private void compute(double[][] x, double[] y)
        {
            int n = x.Length;
            int p = NumberOfInputs;

            SSt             = 0;
            SSe             = 0;
            outputMean      = 0.0;
            NumberOfSamples = x.Length;

            // Compute the regression
            OrdinaryLeastSquares.Token = Token;
            regression        = OrdinaryLeastSquares.Learn(x, y);
            informationMatrix = OrdinaryLeastSquares.GetInformationMatrix();

            // Calculate mean of the expected outputs
            outputMean = y.Mean();

            // Calculate actual outputs (results)
#pragma warning disable 612, 618
            results = regression.Transform(x);

            // Calculate SSe and SSt
            for (int i = 0; i < x.Length; i++)
            {
                double d;
                d    = y[i] - results[i];
                SSe += d * d;

                d    = y[i] - outputMean;
                SSt += d * d;
            }


            // Calculate SSr
            SSr = SSt - SSe;

            // Calculate R-Squared
            rSquared = (SSt != 0) ? 1.0 - (SSe / SSt) : 1.0;

            // Calculated Adjusted R-Squared
            if (rSquared == 1)
            {
                rAdjusted = 1;
            }
            else
            {
                if (n - p == 1)
                {
                    rAdjusted = double.NaN;
                }
                else
                {
                    rAdjusted = 1.0 - (1.0 - rSquared) * ((n - 1.0) / (n - p - 1.0));
                }
            }

            // Calculate Degrees of Freedom
            DFr = p;
            DFe = n - (p + 1);
            DFt = DFr + DFe;

            // Calculate Sum of Squares Mean
            MSe = SSe / DFe;
            MSr = SSr / DFr;
            MSt = SSt / DFt;

            // Calculate the F statistic
            ftest    = new FTest(MSr / MSe, DFr, DFe);
            stdError = Math.Sqrt(MSe);

            // Create the ANOVA table
            List <AnovaVariationSource> table = new List <AnovaVariationSource>();
            table.Add(new AnovaVariationSource(this, "Regression", SSr, DFr, MSr, ftest));
            table.Add(new AnovaVariationSource(this, "Error", SSe, DFe, MSe, null));
            table.Add(new AnovaVariationSource(this, "Total", SSt, DFt, MSt, null));
            this.anovaTable = new AnovaSourceCollection(table);

            // Compute coefficient standard errors;
            standardErrors = new double[NumberOfInputs + 1];
            for (int i = 0; i < informationMatrix.Length; i++)
            {
                standardErrors[i] = Math.Sqrt(MSe * informationMatrix[i][i]);
            }

            // Compute coefficient tests
            for (int i = 0; i < CoefficientValues.Length; i++)
            {
                double tStatistic = CoefficientValues[i] / standardErrors[i];

                ttests[i] = new TTest(estimatedValue: CoefficientValues[i],
                                      standardError: standardErrors[i], degreesOfFreedom: DFe);

                ftests[i] = new FTest(tStatistic * tStatistic, 1, DFe);

                confidences[i] = ttests[i].GetConfidenceInterval(confidencePercent);
            }


            // Compute model performance tests
            ttest         = new TTest(results, outputMean);
            ztest         = new ZTest(results, outputMean);
            chiSquareTest = new ChiSquareTest(y, results, n - p - 1);
#pragma warning restore 612, 618
        }
        public void prediction_test()
        {
            // Example from http://www.real-statistics.com/multiple-regression/confidence-and-prediction-intervals/
            var dt = Accord.IO.CsvReader.FromText(Resources.linreg, true).ToTable();

            double[][] y = dt.ToArray("Poverty");
            double[][] x = dt.ToArray("Infant Mort", "White", "Crime");

            // Use Ordinary Least Squares to learn the regression
            OrdinaryLeastSquares ols = new OrdinaryLeastSquares();

            // Use OLS to learn the multiple linear regression
            MultivariateLinearRegression regression = ols.Learn(x, y);

            Assert.AreEqual(3, regression.NumberOfInputs);
            Assert.AreEqual(1, regression.NumberOfOutputs);

            Assert.AreEqual(0.443650703716698, regression.Intercepts[0], 1e-5);
            Assert.AreEqual(1.2791842411083394, regression.Weights[0][0], 1e-5);
            Assert.AreEqual(0.036259242392669415, regression.Weights[1][0], 1e-5);
            Assert.AreEqual(0.0014225014835705938, regression.Weights[2][0], 1e-5);

            double rse = regression.GetStandardError(x, y)[0];
            Assert.AreEqual(rse, 2.4703520840798507, 1e-5);


            double[][] im = ols.GetInformationMatrix();
            double[] mse = regression.GetStandardError(x, y);
            double[][] se = regression.GetStandardErrors(mse, im);

            Assert.AreEqual(0.30063086032754965, se[0][0], 1e-10);
            Assert.AreEqual(0.033603448179240082, se[0][1], 1e-10);
            Assert.AreEqual(0.0022414548866296342, se[0][2], 1e-10);
            Assert.AreEqual(3.9879881671805824, se[0][3], 1e-10);

            double[] x0 = new double[] { 7, 80, 400 };
            double y0 = regression.Transform(x0)[0];
            Assert.AreEqual(y0, 12.867680376316864, 1e-5);

            double actual = regression.GetStandardError(x0, mse, im)[0];

            Assert.AreEqual(0.35902764658470271, actual, 1e-10);

            DoubleRange ci = regression.GetConfidenceInterval(x0, mse, x.Length, im)[0];
            Assert.AreEqual(ci.Min, 12.144995206616116, 1e-5);
            Assert.AreEqual(ci.Max, 13.590365546017612, 1e-5);

            actual = regression.GetPredictionStandardError(x0, mse, im)[0];
            Assert.AreEqual(2.4963053239397244, actual, 1e-10);

            DoubleRange pi = regression.GetPredictionInterval(x0, mse, x.Length, im)[0];
            Assert.AreEqual(pi.Min, 7.8428783761994554, 1e-5);
            Assert.AreEqual(pi.Max, 17.892482376434273, 1e-5);
        }
Esempio n. 4
0
        private static RegressionResult PerformOls(double[] yVal, string[] xVars, double[][] xVals)
        {
            var ols = new OrdinaryLeastSquares()
            {
                // intercept should represent the return if no impact from inputs. (mean daily return)
                // forcing to 0 as we assume that all explanation of price move should be due to independenet variables.
                // in reality there is likely an unexplainable drift. For example daily bleed (MER, funding etc).
                UseIntercept = false
            };

            MultipleLinearRegression regression = ols.Learn(xVals, yVal);

            double[] predicted = regression.Transform(xVals);

            RegressionResult r = new RegressionResult();

            r.ModelType     = "OLS";
            r.StandardError = regression.GetStandardError(xVals, yVal);
            // r.RSquared = new RSquaredLoss(xVals.Length, yVal, ).Loss(predicted);
            r.RSquared    = regression.CoefficientOfDetermination(xVals, yVal, false);
            r.AdjRSquared = regression.CoefficientOfDetermination(xVals, yVal, true);
            r.Betas       = SgtStringUtils.DoubleArrayToRoundedDelimitedString(regression.Weights);
            r.xVars       = string.Join(";", xVars);

            // TODO need to validate the below section. add p-values?
            double[] coeffStandardErrors = regression.GetStandardErrors(r.StandardError, ols.GetInformationMatrix());
            double[] coeffTScores        = DoubleDivide(regression.Weights, coeffStandardErrors);
            double[] coeffPVals          = new double[coeffTScores.Length];

            double df = xVals.Length - 1;

            for (int i = 0; i < coeffTScores.Length; i++)
            {
                TTest tTest = new TTest(coeffTScores[i], df, OneSampleHypothesis.ValueIsDifferentFromHypothesis);
                coeffPVals[i] = tTest.PValue;
            }

            r.CoeffStandardErrors = SgtStringUtils.DoubleArrayToRoundedDelimitedString(coeffStandardErrors);
            r.CoeffTScores        = SgtStringUtils.DoubleArrayToRoundedDelimitedString(coeffTScores);
            r.CoeffPVals          = SgtStringUtils.DoubleArrayToRoundedDelimitedString(coeffPVals);
            // /TODO

            r.xVarCount    = regression.NumberOfInputs;
            r.SamplesCount = yVal.Length;
            r.Mean         = regression.Intercept;

            return(r);
        }