public MultivariateSample CreateMultivariateNormalSample(ColumnVector M, SymmetricMatrix C, int n)
        {
            int d = M.Dimension;

            MultivariateSample S = new MultivariateSample(d);

            SquareMatrix A = C.CholeskyDecomposition().SquareRootMatrix();

            Random rng = new Random(1);
            ContinuousDistribution normal = new NormalDistribution();


            for (int i = 0; i < n; i++)
            {
                // create a vector of normal deviates
                ColumnVector V = new ColumnVector(d);
                for (int j = 0; j < d; j++)
                {
                    double y = rng.NextDouble();
                    double z = normal.InverseLeftProbability(y);
                    V[j] = z;
                }

                // form the multivariate distributed vector
                ColumnVector X = M + A * V;

                // add it to the sample
                S.Add(X);
            }

            return(S);
        }
Example #2
0
        public static Tuple <Point, Vector> Compute(Point[] points)
        {
            Contract.Requires(points.Length >= 2);
            Contract.Ensures(Contract.Result <Tuple <Point, Vector> >() != null);

            if (points.Length == 2)
            {
                return(Tuple.Create(points[0], (points[1] - points[0]).Normalized()));
            }

            var avgX = points.Select(p => p.X).Average();
            var avgY = points.Select(p => p.Y).Average();

            var shifted  = points.Select(p => p - new Vector(avgX, avgY));
            var mvSample = new MultivariateSample(2);

            foreach (var p in shifted)
            {
                mvSample.Add(p.X, p.Y);
            }

            var pca = mvSample.PrincipalComponentAnalysis();
            var firstComponentVector = pca.Component(0).NormalizedVector();

            return(Tuple.Create(
                       new Point(avgX, avgY),
                       new Vector(firstComponentVector[0], firstComponentVector[1])));
        }
        public void MultivariateMoments()
        {
            // create a random sample
            MultivariateSample     M  = new MultivariateSample(3);
            ContinuousDistribution d0 = new NormalDistribution();
            ContinuousDistribution d1 = new ExponentialDistribution();
            ContinuousDistribution d2 = new UniformDistribution();
            Random rng = new Random(1);
            int    n   = 10;

            for (int i = 0; i < n; i++)
            {
                M.Add(d0.GetRandomValue(rng), d1.GetRandomValue(rng), d2.GetRandomValue(rng));
            }

            // test that moments agree
            for (int i = 0; i < 3; i++)
            {
                int[] p = new int[3];
                p[i] = 1;
                Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Mean, M.RawMoment(p)));
                p[i] = 2;
                Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Variance, M.CentralMoment(p)));
                for (int j = 0; j < i; j++)
                {
                    int[] q = new int[3];
                    q[i] = 1;
                    q[j] = 1;
                    Assert.IsTrue(TestUtilities.IsNearlyEqual(M.TwoColumns(i, j).Covariance, M.CentralMoment(q)));
                }
            }
        }
        public void MultivariateLinearRegressionBadInputTest()
        {
            // create a sample
            MultivariateSample sample = new MultivariateSample(3);

            sample.Add(1, 2, 3);
            sample.Add(2, 3, 4);

            // try to predict with too little data
            try {
                sample.LinearRegression(2);
                Assert.IsTrue(false);
            } catch (InvalidOperationException) {
                Assert.IsTrue(true);
            }

            // add enough data
            sample.Add(3, 4, 5);
            sample.Add(4, 5, 6);

            // try to predict a non-existent variable
            try {
                sample.LinearRegression(-1);
                Assert.IsTrue(false);
            } catch (ArgumentOutOfRangeException) {
                Assert.IsTrue(true);
            }

            try {
                sample.LinearRegression(3);
                Assert.IsTrue(false);
            } catch (ArgumentOutOfRangeException) {
                Assert.IsTrue(true);
            }
        }
        public void MultivariateLinearRegressionAgreement2()
        {
            // A multivariate linear regression with just one x-column should be the same as a bivariate linear regression.

            double intercept = 1.0;
            double slope     = -2.0;
            ContinuousDistribution yErrDist = new NormalDistribution(0.0, 3.0);
            UniformDistribution    xDist    = new UniformDistribution(Interval.FromEndpoints(-2.0, 3.0));
            Random rng = new Random(1111111);

            MultivariateSample multi = new MultivariateSample("x", "y");

            for (int i = 0; i < 10; i++)
            {
                double x = xDist.GetRandomValue(rng);
                double y = intercept + slope * x + yErrDist.GetRandomValue(rng);
                multi.Add(x, y);
            }

            // Old multi linear regression code.
            MultiLinearRegressionResult result1 = multi.LinearRegression(1);

            // Simple linear regression code.
            LinearRegressionResult result2 = multi.TwoColumns(0, 1).LinearRegression();

            Assert.IsTrue(TestUtilities.IsNearlyEqual(result1.Parameters["Intercept"].Estimate, result2.Parameters["Intercept"].Estimate));

            // New multi linear regression code.
            MultiLinearRegressionResult result3 = multi.Column(1).ToList().MultiLinearRegression(multi.Column(0).ToList());

            Assert.IsTrue(TestUtilities.IsNearlyEqual(result1.Parameters["Intercept"].Estimate, result3.Parameters["Intercept"].Estimate));
        }
        public void MultivariateLinearLogisticRegressionSimple()
        {
            // define model y = a + b0 * x0 + b1 * x1 + noise
            double a  = 1.0;
            double b0 = -1.0 / 2.0;
            double b1 = 1.0 / 3.0;
            ContinuousDistribution x0distribution = new LaplaceDistribution();
            ContinuousDistribution x1distribution = new NormalDistribution();

            // draw a sample from the model
            Random             rng   = new Random(1);
            MultivariateSample old   = new MultivariateSample("y", "x0", "x1");
            FrameTable         table = new FrameTable();

            table.AddColumn <double>("x0");
            table.AddColumn <double>("x1");
            table.AddColumn <bool>("y");

            for (int i = 0; i < 100; i++)
            {
                double x0 = x0distribution.GetRandomValue(rng);
                double x1 = x1distribution.GetRandomValue(rng);
                double t  = a + b0 * x0 + b1 * x1;
                double p  = 1.0 / (1.0 + Math.Exp(-t));
                bool   y  = (rng.NextDouble() < p);
                old.Add(y ? 1.0 : 0.0, x0, x1);
                table.AddRow(x0, x1, y);
            }

            // do a linear regression fit on the model
            MultiLinearLogisticRegressionResult oldResult = old.LogisticLinearRegression(0);
            MultiLinearLogisticRegressionResult newResult = table["y"].As <bool>().MultiLinearLogisticRegression(
                table["x0"].As <double>(), table["x1"].As <double>()
                );

            // the result should have the appropriate dimension
            Assert.IsTrue(newResult.Parameters.Count == 3);

            // The parameters should match the model
            Assert.IsTrue(newResult.CoefficientOf(0).ConfidenceInterval(0.99).ClosedContains(b0));
            Assert.IsTrue(newResult.CoefficientOf("x1").ConfidenceInterval(0.99).ClosedContains(b1));
            Assert.IsTrue(newResult.Intercept.ConfidenceInterval(0.99).ClosedContains(a));

            // Our predictions should be better than chance.
            int correct = 0;

            for (int i = 0; i < table.Rows.Count; i++)
            {
                FrameRow row = table.Rows[i];
                double   x0  = (double)row["x0"];
                double   x1  = (double)row["x1"];
                double   p   = newResult.Predict(x0, x1).Value;
                bool     y   = (bool)row["y"];
                if ((y && p > 0.5) || (!y & p < 0.5))
                {
                    correct++;
                }
            }
            Assert.IsTrue(correct > 0.5 * table.Rows.Count);
        }
Example #7
0
        public void NormalFitCovariances()
        {
            NormalDistribution N = new NormalDistribution(-1.0, 2.0);

            // Create a bivariate sample to hold our fitted best mu and sigma values
            // so we can determine their covariance as well as their means and variances
            BivariateSample    parameters  = new BivariateSample();
            MultivariateSample covariances = new MultivariateSample(3);

            // A bunch of times, create a normal sample
            for (int i = 0; i < 128; i++)
            {
                // We use small samples so the variation in mu and sigma will be more substantial.
                Sample s = TestUtilities.CreateSample(N, 8, i);

                // Fit each sample to a normal distribution
                NormalFitResult fit = NormalDistribution.FitToSample(s);

                // and record the mu and sigma values from the fit into our bivariate sample
                parameters.Add(fit.Mean.Value, fit.StandardDeviation.Value);

                // also record the claimed covariances among these parameters
                covariances.Add(fit.Parameters.CovarianceMatrix[0, 0], fit.Parameters.CovarianceMatrix[1, 1], fit.Parameters.CovarianceMatrix[0, 1]);
            }

            // the mean fit values should agree with the population distribution
            Assert.IsTrue(parameters.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.Mean));
            Assert.IsTrue(parameters.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.StandardDeviation));

            // but also the covariances of those fit values should agree with the claimed covariances
            Assert.IsTrue(parameters.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(covariances.Column(0).Mean));
            Assert.IsTrue(parameters.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(covariances.Column(1).Mean));
            Assert.IsTrue(parameters.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(covariances.Column(2).Mean));
        }
        public void MultivariateLinearRegressionNullDistribution()
        {
            int d = 4;

            Random             rng = new Random(1);
            NormalDistribution n   = new NormalDistribution();

            Sample fs = new Sample();

            for (int i = 0; i < 64; i++)
            {
                MultivariateSample ms = new MultivariateSample(d);
                for (int j = 0; j < 8; j++)
                {
                    double[] x = new double[d];
                    for (int k = 0; k < d; k++)
                    {
                        x[k] = n.GetRandomValue(rng);
                    }
                    ms.Add(x);
                }
                RegressionResult r = ms.LinearRegression(0);
                fs.Add(r.F.Statistic);
            }

            // conduct a KS test to check that F follows the expected distribution
            TestResult ks = fs.KolmogorovSmirnovTest(new FisherDistribution(3, 4));

            Assert.IsTrue(ks.LeftProbability < 0.95);
        }
        public void MultivariateLinearRegressionAgreement()
        {
            Random rng = new Random(1);

            MultivariateSample SA = new MultivariateSample(2);

            for (int i = 0; i < 10; i++)
            {
                SA.Add(rng.NextDouble(), rng.NextDouble());
            }
            RegressionResult RA = SA.LinearRegression(0);
            ColumnVector     PA = RA.Parameters.Best;
            SymmetricMatrix  CA = RA.Parameters.Covariance;

            MultivariateSample SB = SA.Columns(1, 0);
            RegressionResult   RB = SB.LinearRegression(1);
            ColumnVector       PB = RB.Parameters.Best;
            SymmetricMatrix    CB = RB.Parameters.Covariance;

            Assert.IsTrue(TestUtilities.IsNearlyEqual(PA[0], PB[1])); Assert.IsTrue(TestUtilities.IsNearlyEqual(PA[1], PB[0]));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[0, 0], CB[1, 1])); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[0, 1], CB[1, 0])); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[1, 1], CB[0, 0]));

            BivariateSample  SC = SA.TwoColumns(1, 0);
            RegressionResult RC = SC.LinearRegression();
            ColumnVector     PC = RC.Parameters.Best;
            SymmetricMatrix  CC = RC.Parameters.Covariance;

            Assert.IsTrue(TestUtilities.IsNearlyEqual(PA, PC));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(CA, CC));
        }
Example #10
0
        public void GumbelFit()
        {
            GumbelDistribution d = new GumbelDistribution(-1.0, 2.0);

            MultivariateSample parameters = new MultivariateSample(2);
            MultivariateSample variances  = new MultivariateSample(3);

            // Do a bunch of fits, record reported parameters and variances
            for (int i = 0; i < 32; i++)
            {
                Sample s = SampleTest.CreateSample(d, 64, i);

                GumbelFitResult r = GumbelDistribution.FitToSample(s);
                parameters.Add(r.Location.Value, r.Scale.Value);
                variances.Add(r.Parameters.CovarianceMatrix[0, 0], r.Parameters.CovarianceMatrix[1, 1], r.Parameters.CovarianceMatrix[0, 1]);

                Assert.IsTrue(r.GoodnessOfFit.Probability > 0.01);
            }

            // The reported parameters should agree with the underlying parameters
            Assert.IsTrue(parameters.Column(0).PopulationMean.ConfidenceInterval(0.99).ClosedContains(d.Location));
            Assert.IsTrue(parameters.Column(1).PopulationMean.ConfidenceInterval(0.99).ClosedContains(d.Scale));

            // The reported covariances should agree with the observed covariances
            Assert.IsTrue(parameters.Column(0).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(0).Mean));
            Assert.IsTrue(parameters.Column(1).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(1).Mean));
            Assert.IsTrue(parameters.TwoColumns(0, 1).PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(2).Mean));
        }
        public void MeansClustering()
        {
            // Re-create the mouse test

            double[] x = new double[3];
            double[] y = new double[3];
            double[] s = new double[3];

            x[0] = 0.25;
            y[0] = 0.75;
            s[0] = 0.1;

            x[1] = 0.75;
            y[1] = 0.75;
            s[1] = 0.1;

            x[2] = 0.5;
            y[2] = 0.5;
            s[2] = 0.2;

            MultivariateSample points = new MultivariateSample(2);
            Random             rng    = new Random(1);
            NormalDistribution d      = new NormalDistribution();

            for (int i = 0; i < 100; i++)
            {
                int k = rng.Next(3);
                points.Add(x[k] + s[k] * d.GetRandomValue(rng), y[k] + s[k] * d.GetRandomValue(rng));
            }

            MeansClusteringResult result = points.MeansClustering(3);

            Assert.IsTrue(result.Count == 3);
            Assert.IsTrue(result.Dimension == 2);
        }
Example #12
0
        public static PCA2DResult Compute(IEnumerable <Point> points)
        {
            Contract.Requires(points != null);
            Contract.Requires(points.Any() && points.Skip(1).Any()); // at least two points

            var xAvg = points.Select(pnt => pnt.X).Average();
            var yAvg = points.Select(pnt => pnt.Y).Average();

            var shiftedPoints =
                from pnt in points
                select new Point(pnt.X - xAvg, pnt.Y - yAvg);

            var mvSample = new MultivariateSample(2);

            foreach (var pnt in shiftedPoints)
            {
                mvSample.Add(pnt.X, pnt.Y);
            }

            var pca    = mvSample.PrincipalComponentAnalysis();
            var first  = pca.Component(0).NormalizedVector();
            var second = pca.Component(1).NormalizedVector();

            return(new PCA2DResult(
                       new Point(xAvg, yAvg),
                       new Vector(first[0], first[1]),
                       new Vector(second[0], second[1])));
        }
Example #13
0
        public void FitDataToLineUncertaintyTest()
        {
            double[] xs = TestUtilities.GenerateUniformRealValues(0.0, 10.0, 10);
            Func <double, double> fv = delegate(double x) {
                return(2.0 * x - 1.0);
            };
            Func <double, double> fu = delegate(double x) {
                return(1.0 + x);
            };

            MultivariateSample sample     = new MultivariateSample(2);
            SymmetricMatrix    covariance = new SymmetricMatrix(2);

            // create a bunch of small data sets
            for (int i = 0; i < 100; i++)
            {
                UncertainMeasurementSample data = CreateDataSet(xs, fv, fu, i);
                FitResult fit = data.FitToLine();

                sample.Add(fit.Parameters);
                covariance = fit.CovarianceMatrix;
                // because it depends only on the x's and sigmas, the covariance is always the same

                Console.WriteLine("cov_00 = {0}", covariance[0, 0]);
            }

            // the measured covariances should agree with the claimed covariances
            //Assert.IsTrue(sample.PopulationCovariance(0,0).ConfidenceInterval(0.95).ClosedContains(covariance[0,0]));
            //Assert.IsTrue(sample.PopulationCovariance(0,1).ConfidenceInterval(0.95).ClosedContains(covariance[0,1]));
            //Assert.IsTrue(sample.PopulationCovariance(1,0).ConfidenceInterval(0.95).ClosedContains(covariance[1,0]));
            //Assert.IsTrue(sample.PopulationCovariance(1,1).ConfidenceInterval(0.95).ClosedContains(covariance[1,1]));
        }
Example #14
0
        public void WaldFit()
        {
            WaldDistribution wald = new WaldDistribution(3.5, 2.5);

            BivariateSample    parameters = new BivariateSample();
            MultivariateSample variances  = new MultivariateSample(3);

            for (int i = 0; i < 128; i++)
            {
                Sample s = SampleTest.CreateSample(wald, 16, i);

                FitResult r = WaldDistribution.FitToSample(s);
                parameters.Add(r.Parameters[0], r.Parameters[1]);
                variances.Add(r.Covariance(0, 0), r.Covariance(1, 1), r.Covariance(0, 1));

                Assert.IsTrue(r.GoodnessOfFit.Probability > 0.01);
            }

            Assert.IsTrue(parameters.X.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wald.Mean));
            Assert.IsTrue(parameters.Y.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wald.Shape));

            Assert.IsTrue(parameters.X.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(0).Median));
            Assert.IsTrue(parameters.Y.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(1).Median));
            Assert.IsTrue(parameters.PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(2).Median));
        }
        public void MultivariateLinearRegressionTest()
        {
            // define model y = a + b0 * x0 + b1 * x1 + noise
            double a  = 1.0;
            double b0 = -2.0;
            double b1 = 3.0;
            ContinuousDistribution noise = new NormalDistribution(0.0, 10.0);

            // draw a sample from the model
            Random             rng    = new Random(1);
            MultivariateSample sample = new MultivariateSample(3);

            for (int i = 0; i < 100; i++)
            {
                double x0  = -10.0 + 20.0 * rng.NextDouble();
                double x1  = -10.0 + 20.0 * rng.NextDouble();
                double eps = noise.InverseLeftProbability(rng.NextDouble());
                double y   = a + b0 * x0 + b1 * x1 + eps;
                sample.Add(x0, x1, y);
            }

            // do a linear regression fit on the model
            ParameterCollection result = sample.LinearRegression(2).Parameters;

            // the result should have the appropriate dimension
            Assert.IsTrue(result.Count == 3);

            // the parameters should match the model
            Assert.IsTrue(result[0].Estimate.ConfidenceInterval(0.90).ClosedContains(b0));
            Assert.IsTrue(result[1].Estimate.ConfidenceInterval(0.90).ClosedContains(b1));
            Assert.IsTrue(result[2].Estimate.ConfidenceInterval(0.90).ClosedContains(a));
        }
        public void MultivariateLinearRegressionSimple()
        {
            // define model y = a + b0 * x0 + b1 * x1 + noise
            double a  = 1.0;
            double b0 = -2.0;
            double b1 = 3.0;
            ContinuousDistribution x0distribution = new CauchyDistribution(10.0, 5.0);
            ContinuousDistribution x1distribution = new UniformDistribution(Interval.FromEndpoints(-10.0, 20.0));
            ContinuousDistribution noise          = new NormalDistribution(0.0, 10.0);

            // draw a sample from the model
            Random             rng    = new Random(1);
            MultivariateSample sample = new MultivariateSample("x0", "x1", "y");
            FrameTable         table  = new FrameTable();

            table.AddColumns <double>("x0", "x1", "y");

            for (int i = 0; i < 100; i++)
            {
                double x0  = x0distribution.GetRandomValue(rng);
                double x1  = x1distribution.GetRandomValue(rng);
                double eps = noise.GetRandomValue(rng);
                double y   = a + b0 * x0 + b1 * x1 + eps;
                sample.Add(x0, x1, y);
                table.AddRow(x0, x1, y);
            }

            // do a linear regression fit on the model
            ParameterCollection         oldResult = sample.LinearRegression(2).Parameters;
            MultiLinearRegressionResult newResult = table["y"].As <double>().MultiLinearRegression(
                table["x0"].As <double>(), table["x1"].As <double>()
                );

            // the result should have the appropriate dimension
            Assert.IsTrue(oldResult.Count == 3);
            Assert.IsTrue(newResult.Parameters.Count == 3);

            // The parameters should match the model
            Assert.IsTrue(oldResult[0].Estimate.ConfidenceInterval(0.90).ClosedContains(b0));
            Assert.IsTrue(oldResult[1].Estimate.ConfidenceInterval(0.90).ClosedContains(b1));
            Assert.IsTrue(oldResult[2].Estimate.ConfidenceInterval(0.90).ClosedContains(a));

            Assert.IsTrue(newResult.CoefficientOf(0).ConfidenceInterval(0.99).ClosedContains(b0));
            Assert.IsTrue(newResult.CoefficientOf("x1").ConfidenceInterval(0.99).ClosedContains(b1));
            Assert.IsTrue(newResult.Intercept.ConfidenceInterval(0.99).ClosedContains(a));

            // The residuals should be compatible with the model predictions
            for (int i = 0; i < table.Rows.Count; i++)
            {
                FrameRow row = table.Rows[i];
                double   x0  = (double)row["x0"];
                double   x1  = (double)row["x1"];
                double   yp  = newResult.Predict(x0, x1).Value;
                double   y   = (double)row["y"];
                Assert.IsTrue(TestUtilities.IsNearlyEqual(newResult.Residuals[i], y - yp));
            }
        }
Example #17
0
        public void Bug6391()
        {
            // this simple PCA caused a NonConvergenceException
            var mvSample = new MultivariateSample(2);

            mvSample.Add(0, 1);
            mvSample.Add(0, -1);
            var pca = mvSample.PrincipalComponentAnalysis();
        }
        private double GetTotalVariance(MultivariateSample sample)
        {
            double total = 0.0;

            for (int i = 0; i < sample.Dimension; i++)
            {
                total += sample.Column(i).Variance;
            }
            return(total);
        }
        public void TimeSeriesFitAR1()
        {
            double alpha = 0.3;
            double mu    = 0.2;
            double sigma = 0.4;
            int    n     = 20;

            // For our fit to AR(1), we have incorporated bias correction (at least
            // for the most important parameter alpha), so we can do a small-n test.

            MultivariateSample parameters  = new MultivariateSample(3);
            MultivariateSample covariances = new MultivariateSample(6);
            Sample             tests       = new Sample();

            for (int i = 0; i < 100; i++)
            {
                TimeSeries series = GenerateAR1TimeSeries(alpha, mu, sigma, n, i + 314159);

                FitResult result = series.FitToAR1();

                parameters.Add(result.Parameters);
                covariances.Add(
                    result.CovarianceMatrix[0, 0],
                    result.CovarianceMatrix[1, 1],
                    result.CovarianceMatrix[2, 2],
                    result.CovarianceMatrix[0, 1],
                    result.CovarianceMatrix[0, 2],
                    result.CovarianceMatrix[1, 2]
                    );

                tests.Add(result.GoodnessOfFit.Probability);
            }

            // Check that fit parameters agree with inputs
            Assert.IsTrue(parameters.Column(0).PopulationMean.ConfidenceInterval(0.99).ClosedContains(alpha));
            Assert.IsTrue(parameters.Column(1).PopulationMean.ConfidenceInterval(0.99).ClosedContains(mu));
            Assert.IsTrue(parameters.Column(2).PopulationMean.ConfidenceInterval(0.99).ClosedContains(sigma));

            // Check that reported variances agree with actual variances
            Assert.IsTrue(parameters.Column(0).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(0).Median));
            Assert.IsTrue(parameters.Column(1).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(1).Median));
            Assert.IsTrue(parameters.Column(2).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Median));
            Assert.IsTrue(parameters.TwoColumns(0, 1).PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(3).Mean));
            Assert.IsTrue(parameters.TwoColumns(0, 2).PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(4).Mean));
            Assert.IsTrue(parameters.TwoColumns(1, 2).PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(5).Mean));

            // For small n, the fitted alpha can vary considerably, and the formula for var(m) varies
            // quite strongly with alpha, so the computed var(m) have a very long tail. This pushes the
            // mean computed var(m) quite a bit higher than a typical value, so we use medians instead
            // of means for our best guess for the predicted variance.

            TestResult ks = tests.KolmogorovSmirnovTest(new UniformDistribution());

            Assert.IsTrue(ks.Probability > 0.05);
        }
Example #20
0
        public void TestMultivariateRegression()
        {
            double cz = 1.0;
            double cx = 0.0;
            double cy = 0.0;

            Random       rng           = new Random(1001110000);
            Distribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-4.0, 8.0));
            Distribution yDistribution = new UniformDistribution(Interval.FromEndpoints(-8.0, 4.0));
            Distribution eDistribution = new NormalDistribution();

            Sample r2Sample = new Sample();

            for (int i = 0; i < 500; i++)
            {
                MultivariateSample xyzSample = new MultivariateSample(3);
                for (int k = 0; k < 12; k++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double y = yDistribution.GetRandomValue(rng);
                    double z = cx * x + cy * y + cz + eDistribution.GetRandomValue(rng);
                    xyzSample.Add(x, y, z);
                }
                FitResult fit = xyzSample.LinearRegression(2);
                double    fcx = fit.Parameters[0];
                double    fcy = fit.Parameters[1];
                double    fcz = fit.Parameters[2];

                double ss2 = 0.0;
                double ss1 = 0.0;
                foreach (double[] xyz in xyzSample)
                {
                    ss2 += MoreMath.Sqr(xyz[2] - (fcx * xyz[0] + fcy * xyz[1] + fcz));
                    ss1 += MoreMath.Sqr(xyz[2] - xyzSample.Column(2).Mean);
                }
                double r2 = 1.0 - ss2 / ss1;
                r2Sample.Add(r2);
            }

            Console.WriteLine("{0} {1} {2} {3} {4}", r2Sample.Count, r2Sample.PopulationMean, r2Sample.StandardDeviation, r2Sample.Minimum, r2Sample.Maximum);

            Distribution r2Distribution = new BetaDistribution((3 - 1) / 2.0, (12 - 3) / 2.0);

            //Distribution r2Distribution = new BetaDistribution((10 - 2) / 2.0, (2 - 1) / 2.0);
            Console.WriteLine("{0} {1}", r2Distribution.Mean, r2Distribution.StandardDeviation);

            TestResult ks = r2Sample.KolmogorovSmirnovTest(r2Distribution);

            Console.WriteLine(ks.RightProbability);
            Console.WriteLine(ks.Probability);
        }
Example #21
0
        public static MultivariateSample ToSample(this IEnumerable <Series> series)
        {
            var sl     = series.ToArray();
            var sample = new MultivariateSample(sl.Length);
            var row    = new double[sl.Length];
            var count  = series.Select(s => s.Count).Max();

            for (int i = 0; i < sl.Length; i++)
            {
                row [i] = sl[i].As <double>()[i];
                sample.Add(row);
            }
            return(sample);
        }
Example #22
0
        public void TimeSeriesFitToMA1()
        {
            double beta  = -0.2;
            double mu    = 0.4;
            double sigma = 0.6;
            int    n     = 100;

            // If we are going to strictly test parameter values and variances,
            // we can't pick n too small, because the formulas we use are only
            // asymptotically unbiased.

            MultivariateSample parameters  = new MultivariateSample(3);
            MultivariateSample covariances = new MultivariateSample(6);
            Sample             tests       = new Sample("p");

            for (int i = 0; i < 64; i++)
            {
                TimeSeries series = GenerateMA1TimeSeries(beta, mu, sigma, n, n * i + 314159);

                Debug.Assert(series.Count == n);

                MA1FitResult result = series.FitToMA1();

                //Assert.IsTrue(result.Dimension == 3);
                parameters.Add(result.Parameters.ValuesVector);
                covariances.Add(
                    result.Parameters.CovarianceMatrix[0, 0],
                    result.Parameters.CovarianceMatrix[1, 1],
                    result.Parameters.CovarianceMatrix[2, 2],
                    result.Parameters.CovarianceMatrix[0, 1],
                    result.Parameters.CovarianceMatrix[0, 2],
                    result.Parameters.CovarianceMatrix[1, 2]
                    );
                tests.Add(result.GoodnessOfFit.Probability);
            }

            Assert.IsTrue(parameters.Column(0).PopulationMean.ConfidenceInterval(0.99).ClosedContains(mu));;
            Assert.IsTrue(parameters.Column(1).PopulationMean.ConfidenceInterval(0.99).ClosedContains(beta));
            Assert.IsTrue(parameters.Column(2).PopulationMean.ConfidenceInterval(0.99).ClosedContains(sigma));

            Assert.IsTrue(parameters.Column(0).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(0).Mean));
            Assert.IsTrue(parameters.Column(1).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(1).Mean));
            Assert.IsTrue(parameters.Column(2).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean));
            Assert.IsTrue(parameters.TwoColumns(0, 1).PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(3).Mean));
            Assert.IsTrue(parameters.TwoColumns(0, 2).PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(4).Mean));
            Assert.IsTrue(parameters.TwoColumns(1, 2).PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(5).Mean));

            Assert.IsTrue(tests.KuiperTest(new UniformDistribution()).Probability > 0.01);
        }
Example #23
0
        public List <double> Remove(List <double> Data, ref Stack <ITransformation> Transforms)
        {
            double yestHigh = 0.0;
            double yestLow  = 0.0;
            double lastHigh = 0.0;
            double lastLow  = 0.0;

            MultivariateSample mvS = new MultivariateSample(3);

            for (int i = 0; i < Data.Count; i++)
            {
                yestHigh = lastHigh;
                yestLow  = lastLow;
                lastHigh = Highs[i];
                lastLow  = Lows[i];

                if (i > 0)
                {
                    mvS.Add(yestHigh, yestLow, Closes[i]);
                }
            }

            List <double> detrendedData = (List <double>)Utilities.DeepClone(Data);

            double[] parameters = mvS.LinearRegression(2).Parameters();

            for (int i = 0; i < Data.Count; i++)
            {
                double regression;

                if (i > 0)
                {
                    regression =
                        (parameters[0] * Highs[i - 1])
                        + (parameters[1] * Lows[i - 1])
                        + parameters[2];
                }
                else
                {
                    regression = Closes[i];
                }

                detrendedData[i] -= regression;
            }

            Transforms.Push(new HighLowTransformation(yestHigh, yestLow, lastHigh, lastLow, parameters));

            return(detrendedData);
        }
Example #24
0
        public GeometricClass Classify(PointsSequence p)
        {
            var points = p.Points;
            var xAvg   = points.Select(pnt => pnt.X).Average();
            var yAvg   = points.Select(pnt => pnt.Y).Average();

            var shiftedPoints =
                from pnt in points
                select new Point(pnt.X - xAvg, pnt.Y - yAvg);

            var mvSample = new MultivariateSample(2);

            foreach (var pnt in shiftedPoints)
            {
                mvSample.Add(pnt.X, pnt.Y);
            }

            var pca        = mvSample.PrincipalComponentAnalysis();
            var firstSize  = pca.Component(0).ScaledVector().Norm();
            var secondSize = pca.Component(1).ScaledVector().Norm();

            var fraction = secondSize / firstSize;

            if (fraction < pcaFractionThreshold)
            {
                return(GeometricClass.Line);
            }
            else
            {
                var conicEllipse      = EllipseFitter.ConicFit(p.Points);
                var parametricEllipse = EllipseFitter.Fit(p.Points);

                var deviations =
                    from pnt in p.Points
                    select EllipseFitter.ComputeDeviation(conicEllipse, parametricEllipse, pnt);

                var avgDeviation = deviations.Average();

                if (avgDeviation < ellipseFitThreshold)
                {
                    return(GeometricClass.Ellipse);
                }
                else
                {
                    return(GeometricClass.Line);
                }
            }
        }
Example #25
0
        public void FitDataToPolynomialUncertaintiesTest()
        {
            // make sure the reported uncertainties it fit parameters really represent their standard deviation,
            // and that the reported off-diagonal elements really represent their correlations

            double[] xs = TestUtilities.GenerateUniformRealValues(-1.0, 2.0, 10);
            Func <double, double> fv = delegate(double x) {
                return(0.0 + 1.0 * x + 2.0 * x * x);
            };
            Func <double, double> fu = delegate(double x) {
                return(0.5);
            };

            // keep track of best-fit parameters and claimed parameter covariances
            MultivariateSample sample = new MultivariateSample(3);

            // generate 50 small data sets and fit each
            UncertainMeasurementFitResult[] fits = new UncertainMeasurementFitResult[50];
            for (int i = 0; i < fits.Length; i++)
            {
                UncertainMeasurementSample set = CreateDataSet(xs, fv, fu, 314159 + i);
                fits[i] = set.FitToPolynomial(2);
                sample.Add(fits[i].Parameters.ValuesVector);
            }

            // check that parameters agree
            for (int i = 0; i < 3; i++)
            {
                Console.WriteLine(sample.Column(i).PopulationMean);
            }

            // for each parameter, verify that the standard deviation of the reported values agrees with the (average) reported uncertainty
            double[] pMeans = new double[3];
            for (int i = 0; i <= 2; i++)
            {
                Sample values        = new Sample();
                Sample uncertainties = new Sample();
                for (int j = 0; j < fits.Length; j++)
                {
                    UncertainValue p = fits[j].Parameters[i].Estimate;
                    values.Add(p.Value);
                    uncertainties.Add(p.Uncertainty);
                }
                pMeans[i] = values.Mean;
                Assert.IsTrue(values.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(uncertainties.Mean));
            }
        }
Example #26
0
        public void BivariateNonlinearFitVariances()
        {
            // Verify that we can fit a non-linear function,
            // that the estimated parameters do cluster around the true values,
            // and that the estimated parameter covariances do reflect the actually observed covariances

            double a = 2.7;
            double b = 3.1;

            ContinuousDistribution xDistribution = new ExponentialDistribution(2.0);
            ContinuousDistribution eDistribution = new NormalDistribution(0.0, 4.0);

            FrameTable parameters = new FrameTable();

            parameters.AddColumns <double>("a", "b");
            MultivariateSample covariances = new MultivariateSample(3);

            for (int i = 0; i < 64; i++)
            {
                BivariateSample sample = new BivariateSample();
                Random          rng    = new Random(i);
                for (int j = 0; j < 8; j++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double y = a * Math.Pow(x, b) + eDistribution.GetRandomValue(rng);
                    sample.Add(x, y);
                }

                NonlinearRegressionResult fit = sample.NonlinearRegression(
                    (IReadOnlyList <double> p, double x) => p[0] * Math.Pow(x, p[1]),
                    new double[] { 1.0, 1.0 }
                    );

                parameters.AddRow(fit.Parameters.ValuesVector);
                covariances.Add(fit.Parameters.CovarianceMatrix[0, 0], fit.Parameters.CovarianceMatrix[1, 1], fit.Parameters.CovarianceMatrix[0, 1]);
            }

            Assert.IsTrue(parameters["a"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(a));
            Assert.IsTrue(parameters["b"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(b));

            Assert.IsTrue(parameters["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(covariances.Column(0).Mean));
            Assert.IsTrue(parameters["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(covariances.Column(1).Mean));
            Assert.IsTrue(parameters["a"].As <double>().PopulationCovariance(parameters["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean));
            Assert.IsTrue(Bivariate.PopulationCovariance(parameters["a"].As <double>(), parameters["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean));
        }
        public void OldMultivariateLinearRegressionTest()
        {
            MultivariateSample sample = new MultivariateSample(3);

            sample.Add(98322, 81449, 269465);
            sample.Add(65060, 31749, 121900);
            sample.Add(36052, 14631, 37004);
            sample.Add(31829, 27732, 91400);
            sample.Add(7101, 9693, 54900);
            sample.Add(41294, 4268, 16160);
            sample.Add(16614, 4697, 21500);
            sample.Add(3449, 4233, 9306);
            sample.Add(3386, 5293, 38300);
            sample.Add(6242, 2039, 13369);
            sample.Add(14036, 7893, 29901);
            sample.Add(2636, 3345, 10930);
            sample.Add(869, 1135, 5100);
            sample.Add(452, 727, 7653);

            /*
             * sample.Add(41.9, 29.1, 251.3);
             * sample.Add(43.4, 29.3, 251.3);
             * sample.Add(43.9, 29.5, 248.3);
             * sample.Add(44.5, 29.7, 267.5);
             * sample.Add(47.3, 29.9, 273.0);
             * sample.Add(47.5, 30.3, 276.5);
             * sample.Add(47.9, 30.5, 270.3);
             * sample.Add(50.2, 30.7, 274.9);
             * sample.Add(52.8, 30.8, 285.0);
             * sample.Add(53.2, 30.9, 290.0);
             * sample.Add(56.7, 31.5, 297.0);
             * sample.Add(57.0, 31.7, 302.5);
             * sample.Add(63.5, 31.9, 304.5);
             * sample.Add(65.3, 32.0, 309.3);
             * sample.Add(71.1, 32.1, 321.7);
             * sample.Add(77.0, 32.5, 330.7);
             * sample.Add(77.8, 32.9, 349.0);
             */

            Console.WriteLine(sample.Count);

            //sample.LinearRegression(0);
            sample.LinearRegression(0);
        }
Example #28
0
        public void TestMultivariateRegression()
        {
            // Collect r^2 values from multivariate linear regressions.

            double cz = 1.0;
            double cx = 0.0;
            double cy = 0.0;

            Random rng = new Random(1001110000);
            ContinuousDistribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-4.0, 8.0));
            ContinuousDistribution yDistribution = new UniformDistribution(Interval.FromEndpoints(-8.0, 4.0));
            ContinuousDistribution eDistribution = new NormalDistribution();

            List <double> r2Sample = new List <double>();

            for (int i = 0; i < 500; i++)
            {
                MultivariateSample xyzSample = new MultivariateSample(3);
                for (int k = 0; k < 12; k++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double y = yDistribution.GetRandomValue(rng);
                    double z = cx * x + cy * y + cz + eDistribution.GetRandomValue(rng);
                    xyzSample.Add(x, y, z);
                }
                MultiLinearRegressionResult fit = xyzSample.LinearRegression(2);
                double fcx = fit.Parameters.ValuesVector[0];
                double fcy = fit.Parameters.ValuesVector[1];
                double fcz = fit.Parameters.ValuesVector[2];

                r2Sample.Add(fit.RSquared);
            }

            // r^2 values should be distributed as expected.
            ContinuousDistribution r2Distribution = new BetaDistribution((3 - 1) / 2.0, (12 - 3) / 2.0);

            TestResult ks = r2Sample.KolmogorovSmirnovTest(r2Distribution);

            Assert.IsTrue(ks.Probability > 0.05);
        }
        public void MultivariateLinearRegressionTest()
        {
            // define model y = a + b0 * x0 + b1 * x1 + noise
            double       a     = 1.0;
            double       b0    = -2.0;
            double       b1    = 3.0;
            Distribution noise = new NormalDistribution(0.0, 10.0);

            // draw a sample from the model
            Random             rng    = new Random(1);
            MultivariateSample sample = new MultivariateSample(3);

            for (int i = 0; i < 100; i++)
            {
                double x0  = -10.0 + 20.0 * rng.NextDouble();
                double x1  = -10.0 + 20.0 * rng.NextDouble();
                double eps = noise.InverseLeftProbability(rng.NextDouble());
                double y   = a + b0 * x0 + b1 * x1 + eps;
                sample.Add(x0, x1, y);
            }

            // do a linear regression fit on the model
            FitResult result = sample.LinearRegression(2);

            // the result should have the appropriate dimension
            Assert.IsTrue(result.Dimension == 3);

            // the result should be significant
            Console.WriteLine("{0} {1}", result.GoodnessOfFit.Statistic, result.GoodnessOfFit.LeftProbability);
            Assert.IsTrue(result.GoodnessOfFit.LeftProbability > 0.95);

            // the parameters should match the model
            Console.WriteLine(result.Parameter(0));
            Assert.IsTrue(result.Parameter(0).ConfidenceInterval(0.90).ClosedContains(b0));
            Console.WriteLine(result.Parameter(1));
            Assert.IsTrue(result.Parameter(1).ConfidenceInterval(0.90).ClosedContains(b1));
            Console.WriteLine(result.Parameter(2));
            Assert.IsTrue(result.Parameter(2).ConfidenceInterval(0.90).ClosedContains(a));
        }
        public void MultivariateManipulations()
        {
            MultivariateSample S = new MultivariateSample(3);

            Assert.IsTrue(S.Dimension == 3);

            Assert.IsTrue(S.Count == 0);

            S.Add(1.1, 1.2, 1.3);
            S.Add(2.1, 2.2, 2.3);

            Assert.IsTrue(S.Count == 2);

            // check that an entry is there, remove it, check that it is not there
            Assert.IsTrue(S.Contains(1.1, 1.2, 1.3));
            Assert.IsTrue(S.Remove(1.1, 1.2, 1.3));
            Assert.IsFalse(S.Contains(1.1, 1.2, 1.3));

            // clear it and check that the count went to zero
            S.Clear();
            Assert.IsTrue(S.Count == 0);
        }