public void MeansClustering()
        {
            // Re-create the mouse test

            double[] x = new double[3];
            double[] y = new double[3];
            double[] s = new double[3];

            x[0] = 0.25;
            y[0] = 0.75;
            s[0] = 0.1;

            x[1] = 0.75;
            y[1] = 0.75;
            s[1] = 0.1;

            x[2] = 0.5;
            y[2] = 0.5;
            s[2] = 0.2;

            MultivariateSample points = new MultivariateSample(2);
            Random             rng    = new Random(1);
            NormalDistribution d      = new NormalDistribution();

            for (int i = 0; i < 100; i++)
            {
                int k = rng.Next(3);
                points.Add(x[k] + s[k] * d.GetRandomValue(rng), y[k] + s[k] * d.GetRandomValue(rng));
            }

            MeansClusteringResult result = points.MeansClustering(3);

            Assert.IsTrue(result.Count == 3);
            Assert.IsTrue(result.Dimension == 2);
        }
Exemplo n.º 2
0
        private static void AddRows(FrameTable table, IReadOnlyList <string> names, string sex, double meanHeight, double stddevHeight, double meanBmi, double stddevBmi, int flag, Random rng)
        {
            NormalDistribution  gauss = new NormalDistribution();
            UniformDistribution ages  = new UniformDistribution(Interval.FromEndpoints(15.0, 75.0));

            foreach (string name in names)
            {
                double zHeight = gauss.GetRandomValue(rng);
                double height  = meanHeight + stddevHeight * zHeight;

                double zBmi = gauss.GetRandomValue(rng);
                double bmi  = meanBmi + stddevBmi * zBmi;

                double weight = MoreMath.Sqr(height / 100.0) * bmi;

                double t = -0.4 + 0.6 * zBmi + 0.8 * flag;
                double p = 1.0 / (1.0 + Math.Exp(-t));
                bool   r = rng.NextDouble() < p;

                int id = table.Rows.Count;

                TimeSpan age       = TimeSpan.FromDays(365.24 * ages.GetRandomValue(rng));
                DateTime birthdate = (DateTime.Now - age).Date;

                table.AddRow(id, name, sex, birthdate, height, weight, r);
            }
        }
Exemplo n.º 3
0
        public void StudentTNullDistributionTest()
        {
            ContinuousDistribution z = new NormalDistribution(-1.0, 2.0);
            Random rng = new Random(1);

            foreach (int n in TestUtilities.GenerateIntegerValues(2, 32, 4))
            {
                Sample tSample = new Sample();
                ContinuousDistribution tDistribution = null;

                for (int j = 0; j < 128; j++)
                {
                    Sample a = new Sample();
                    Sample b = new Sample();
                    for (int i = 0; i < n; i++)
                    {
                        a.Add(z.GetRandomValue(rng));
                        b.Add(z.GetRandomValue(rng));
                    }

                    TestResult tResult = Sample.StudentTTest(a, b);
                    tSample.Add(tResult.Statistic);
                    tDistribution = tResult.Distribution;
                }

                TestResult ks = tSample.KolmogorovSmirnovTest(tDistribution);
                Assert.IsTrue(ks.Probability > 0.01);

                Assert.IsTrue(tSample.PopulationMean.ConfidenceInterval(0.99).ClosedContains(tDistribution.Mean));
                Assert.IsTrue(tSample.PopulationStandardDeviation.ConfidenceInterval(0.99).ClosedContains(tDistribution.StandardDeviation));
            }
        }
Exemplo n.º 4
0
        // For red noise, use AR(1) with mu = 0

        private TimeSeries GenerateAR1TimeSeries(double alpha, double mu, double sigma, int count, int seed = 1)
        {
            TimeSeries         series            = new TimeSeries();
            Random             rng               = new Random(seed);
            NormalDistribution d                 = new NormalDistribution(0.0, sigma);
            double             previousDeviation = d.GetRandomValue(rng) / Math.Sqrt(1.0 - alpha * alpha);

            for (int i = 0; i < count; i++)
            {
                double currentDeviation = alpha * previousDeviation + d.GetRandomValue(rng);
                series.Add(mu + currentDeviation);
                previousDeviation = currentDeviation;
            }
            return(series);
        }
Exemplo n.º 5
0
        // For white noise, use MA(1) with beta = mu = 0

        private TimeSeries GenerateMA1TimeSeries(double beta, double mu, double sigma, int count, int seed = 1)
        {
            TimeSeries         series    = new TimeSeries();
            Random             rng       = new Random(seed);
            NormalDistribution eDist     = new NormalDistribution(0.0, sigma);
            double             uPrevious = eDist.GetRandomValue(rng);

            for (int i = 0; i < count; i++)
            {
                double u = eDist.GetRandomValue(rng);
                series.Add(mu + u + beta * uPrevious);
                uPrevious = u;
            }
            return(series);
        }
Exemplo n.º 6
0
        public void PearsonRDistribution()
        {
            Random rng = new Random(1);

            // pick some underlying distributions for the sample variables, which must be normal but can have any parameters
            NormalDistribution xDistribution = new NormalDistribution(1, 2);
            NormalDistribution yDistribution = new NormalDistribution(3, 4);

            // try this for several sample sizes, all low so that we see the difference from the normal distribution
            // n = 3 maxima at ends; n = 4 uniform; n = 5 semi-circular "mound"; n = 6 parabolic "mound"
            foreach (int n in new int[] { 3, 4, 5, 6, 8 })
            {
                Console.WriteLine("n={0}", n);

                // find r values
                Sample rSample = new Sample();
                for (int i = 0; i < 100; i++)
                {
                    // to get each r value, construct a bivariate sample of the given size with no cross-correlation
                    BivariateSample xySample = new BivariateSample();
                    for (int j = 0; j < n; j++)
                    {
                        xySample.Add(xDistribution.GetRandomValue(rng), yDistribution.GetRandomValue(rng));
                    }
                    double r = xySample.PearsonRTest().Statistic;
                    rSample.Add(r);
                }

                // check whether r is distributed as expected
                TestResult result = rSample.KolmogorovSmirnovTest(new PearsonRDistribution(n));
                Console.WriteLine("P={0}", result.LeftProbability);
                Assert.IsTrue(result.LeftProbability < 0.95);
            }
        }
Exemplo n.º 7
0
        public void BivariateLinearRegressionGoodnessOfFitDistribution()
        {
            // create uncorrelated x and y values
            // the distribution of F-test statistics returned by linear fits should follow the expected F-distribution

            Random             rng = new Random(987654321);
            NormalDistribution xd  = new NormalDistribution(1.0, 2.0);
            NormalDistribution yd  = new NormalDistribution(-3.0, 4.0);

            Sample fs = new Sample();

            for (int i = 0; i < 127; i++)
            {
                BivariateSample xys = new BivariateSample();
                for (int j = 0; j < 7; j++)
                {
                    xys.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng));
                }
                double f = xys.LinearRegression().GoodnessOfFit.Statistic;
                fs.Add(f);
            }

            Distribution fd = new FisherDistribution(1, 5);

            Console.WriteLine("{0} v. {1}", fs.PopulationMean, fd.Mean);
            TestResult t = fs.KolmogorovSmirnovTest(fd);

            Console.WriteLine(t.LeftProbability);
            Assert.IsTrue(t.LeftProbability < 0.95);
        }
        public void MultivariateLinearRegressionNullDistribution()
        {
            int d = 4;

            Random             rng = new Random(1);
            NormalDistribution n   = new NormalDistribution();

            Sample fs = new Sample();

            for (int i = 0; i < 64; i++)
            {
                MultivariateSample ms = new MultivariateSample(d);
                for (int j = 0; j < 8; j++)
                {
                    double[] x = new double[d];
                    for (int k = 0; k < d; k++)
                    {
                        x[k] = n.GetRandomValue(rng);
                    }
                    ms.Add(x);
                }
                RegressionResult r = ms.LinearRegression(0);
                fs.Add(r.F.Statistic);
            }

            // conduct a KS test to check that F follows the expected distribution
            TestResult ks = fs.KolmogorovSmirnovTest(new FisherDistribution(3, 4));

            Assert.IsTrue(ks.LeftProbability < 0.95);
        }
Exemplo n.º 9
0
        public void TestBivariateRegression()
        {
            // Do a bunch of linear regressions. r^2 should be distributed as expected.

            double a0 = 1.0;
            double b0 = 0.0;

            Random rng = new Random(1001110000);
            ContinuousDistribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-2.0, 4.0));
            ContinuousDistribution eDistribution = new NormalDistribution();

            List <double> r2Sample = new List <double>();

            for (int i = 0; i < 500; i++)
            {
                BivariateSample xySample = new BivariateSample();
                for (int k = 0; k < 10; k++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double y = a0 + b0 * x + eDistribution.GetRandomValue(rng);
                    xySample.Add(x, y);
                }
                LinearRegressionResult fit = xySample.LinearRegression();
                double a = fit.Intercept.Value;
                double b = fit.Slope.Value;

                r2Sample.Add(fit.RSquared);
            }

            ContinuousDistribution r2Distribution = new BetaDistribution((2 - 1) / 2.0, (10 - 2) / 2.0);
            TestResult             ks             = r2Sample.KolmogorovSmirnovTest(r2Distribution);

            Assert.IsTrue(ks.Probability > 0.05);
        }
        /// <summary>
        /// Gets the random derivation offset in pixel.
        /// </summary>
        /// <param name="randomDerivationOffset">The random derivation offset.</param>
        /// <returns></returns>
        private double GetRandomDerivationOffsetPixel(NormalDistribution randomDerivationOffset)
        {
            var randomDerivationPercent = randomDerivationOffset.GetRandomValue();
            var randomDerivationPixel   = DepthPixel * randomDerivationPercent / 100;

            return(randomDerivationPixel);
        }
Exemplo n.º 11
0
        public static double[] GenerateNoise(int k, string distr, double outlier)
        {
            double[] result = new double[k];

            for (var i = 0; i < k; i++)
            {
                //the probability the point will be outlying
                var variation = outliersProbability.GetRandomValue(rng);

                //if a point will be outlying
                if (variation < outlier)
                {
                    result[i] = outliersDistribution.GetRandomValue(rng);
                }

                //if a point will be regular
                else
                {
                    //depending on the original error distribution we generate a value
                    switch (distr)
                    {
                    case "norm":
                        result[i] = EpsNormDistribution.GetRandomValue(rng);
                        break;

                    case "stud3":
                        result[i] = EpsStudDistribution.GetRandomValue(rng);
                        break;
                    }
                }
            }

            return(result);
        }
        public void MultivariateMoments()
        {
            // create a random sample
            MultivariateSample     M  = new MultivariateSample(3);
            ContinuousDistribution d0 = new NormalDistribution();
            ContinuousDistribution d1 = new ExponentialDistribution();
            ContinuousDistribution d2 = new UniformDistribution();
            Random rng = new Random(1);
            int    n   = 10;

            for (int i = 0; i < n; i++)
            {
                M.Add(d0.GetRandomValue(rng), d1.GetRandomValue(rng), d2.GetRandomValue(rng));
            }

            // test that moments agree
            for (int i = 0; i < 3; i++)
            {
                int[] p = new int[3];
                p[i] = 1;
                Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Mean, M.RawMoment(p)));
                p[i] = 2;
                Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Variance, M.CentralMoment(p)));
                for (int j = 0; j < i; j++)
                {
                    int[] q = new int[3];
                    q[i] = 1;
                    q[j] = 1;
                    Assert.IsTrue(TestUtilities.IsNearlyEqual(M.TwoColumns(i, j).Covariance, M.CentralMoment(q)));
                }
            }
        }
Exemplo n.º 13
0
        public void MultivariateLinearLogisticRegressionSimple()
        {
            // define model y = a + b0 * x0 + b1 * x1 + noise
            double a  = 1.0;
            double b0 = -1.0 / 2.0;
            double b1 = 1.0 / 3.0;
            ContinuousDistribution x0distribution = new LaplaceDistribution();
            ContinuousDistribution x1distribution = new NormalDistribution();

            // draw a sample from the model
            Random             rng   = new Random(1);
            MultivariateSample old   = new MultivariateSample("y", "x0", "x1");
            FrameTable         table = new FrameTable();

            table.AddColumn <double>("x0");
            table.AddColumn <double>("x1");
            table.AddColumn <bool>("y");

            for (int i = 0; i < 100; i++)
            {
                double x0 = x0distribution.GetRandomValue(rng);
                double x1 = x1distribution.GetRandomValue(rng);
                double t  = a + b0 * x0 + b1 * x1;
                double p  = 1.0 / (1.0 + Math.Exp(-t));
                bool   y  = (rng.NextDouble() < p);
                old.Add(y ? 1.0 : 0.0, x0, x1);
                table.AddRow(x0, x1, y);
            }

            // do a linear regression fit on the model
            MultiLinearLogisticRegressionResult oldResult = old.LogisticLinearRegression(0);
            MultiLinearLogisticRegressionResult newResult = table["y"].As <bool>().MultiLinearLogisticRegression(
                table["x0"].As <double>(), table["x1"].As <double>()
                );

            // the result should have the appropriate dimension
            Assert.IsTrue(newResult.Parameters.Count == 3);

            // The parameters should match the model
            Assert.IsTrue(newResult.CoefficientOf(0).ConfidenceInterval(0.99).ClosedContains(b0));
            Assert.IsTrue(newResult.CoefficientOf("x1").ConfidenceInterval(0.99).ClosedContains(b1));
            Assert.IsTrue(newResult.Intercept.ConfidenceInterval(0.99).ClosedContains(a));

            // Our predictions should be better than chance.
            int correct = 0;

            for (int i = 0; i < table.Rows.Count; i++)
            {
                FrameRow row = table.Rows[i];
                double   x0  = (double)row["x0"];
                double   x1  = (double)row["x1"];
                double   p   = newResult.Predict(x0, x1).Value;
                bool     y   = (bool)row["y"];
                if ((y && p > 0.5) || (!y & p < 0.5))
                {
                    correct++;
                }
            }
            Assert.IsTrue(correct > 0.5 * table.Rows.Count);
        }
Exemplo n.º 14
0
        public void MultivariateLinearRegressionAgreement2()
        {
            // A multivariate linear regression with just one x-column should be the same as a bivariate linear regression.

            double intercept = 1.0;
            double slope     = -2.0;
            ContinuousDistribution yErrDist = new NormalDistribution(0.0, 3.0);
            UniformDistribution    xDist    = new UniformDistribution(Interval.FromEndpoints(-2.0, 3.0));
            Random rng = new Random(1111111);

            MultivariateSample multi = new MultivariateSample("x", "y");

            for (int i = 0; i < 10; i++)
            {
                double x = xDist.GetRandomValue(rng);
                double y = intercept + slope * x + yErrDist.GetRandomValue(rng);
                multi.Add(x, y);
            }

            // Old multi linear regression code.
            MultiLinearRegressionResult result1 = multi.LinearRegression(1);

            // Simple linear regression code.
            LinearRegressionResult result2 = multi.TwoColumns(0, 1).LinearRegression();

            Assert.IsTrue(TestUtilities.IsNearlyEqual(result1.Parameters["Intercept"].Estimate, result2.Parameters["Intercept"].Estimate));

            // New multi linear regression code.
            MultiLinearRegressionResult result3 = multi.Column(1).ToList().MultiLinearRegression(multi.Column(0).ToList());

            Assert.IsTrue(TestUtilities.IsNearlyEqual(result1.Parameters["Intercept"].Estimate, result3.Parameters["Intercept"].Estimate));
        }
Exemplo n.º 15
0
        public void MultivariateLinearRegressionVariances()
        {
            // define model y = a + b0 * x0 + b1 * x1 + noise
            double a  = -3.0;
            double b0 = 2.0;
            double b1 = -1.0;
            ContinuousDistribution x0distribution = new LaplaceDistribution();
            ContinuousDistribution x1distribution = new CauchyDistribution();
            ContinuousDistribution eDistribution  = new NormalDistribution(0.0, 4.0);

            FrameTable data = new FrameTable();

            data.AddColumns <double>("a", "da", "b0", "db0", "b1", "db1", "ab1Cov", "p", "dp");

            // draw a sample from the model
            Random rng = new Random(4);

            for (int j = 0; j < 64; j++)
            {
                List <double> x0s = new List <double>();
                List <double> x1s = new List <double>();
                List <double> ys  = new List <double>();

                for (int i = 0; i < 16; i++)
                {
                    double x0 = x0distribution.GetRandomValue(rng);
                    double x1 = x1distribution.GetRandomValue(rng);
                    double e  = eDistribution.GetRandomValue(rng);
                    double y  = a + b0 * x0 + b1 * x1 + e;
                    x0s.Add(x0);
                    x1s.Add(x1);
                    ys.Add(y);
                }

                // do a linear regression fit on the model
                MultiLinearRegressionResult result = ys.MultiLinearRegression(
                    new Dictionary <string, IReadOnlyList <double> > {
                    { "x0", x0s }, { "x1", x1s }
                }
                    );
                UncertainValue pp = result.Predict(-5.0, 6.0);

                data.AddRow(
                    result.Intercept.Value, result.Intercept.Uncertainty,
                    result.CoefficientOf("x0").Value, result.CoefficientOf("x0").Uncertainty,
                    result.CoefficientOf("x1").Value, result.CoefficientOf("x1").Uncertainty,
                    result.Parameters.CovarianceOf("Intercept", "x1"),
                    pp.Value, pp.Uncertainty
                    );
            }

            // The estimated parameters should agree with the model that generated the data.

            // The variances of the estimates should agree with the claimed variances
            Assert.IsTrue(data["a"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["da"].As <double>().Mean()));
            Assert.IsTrue(data["b0"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["db0"].As <double>().Mean()));
            Assert.IsTrue(data["b1"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["db1"].As <double>().Mean()));
            Assert.IsTrue(data["a"].As <double>().PopulationCovariance(data["b1"].As <double>()).ConfidenceInterval(0.99).ClosedContains(data["ab1Cov"].As <double>().Mean()));
            Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Median()));
        }
Exemplo n.º 16
0
        public void TwoWayAnova()
        {
            // We will construct a 3 X 2 two factor model, with row and column effects
            // but no interaction effect. We should detect this with a two-way ANOVA.

            Random rng = new Random(1);

            Sample[,] samples = new Sample[3, 2];
            for (int r = 0; r < 3; r++)
            {
                for (int c = 0; c < 2; c++)
                {
                    double mu = 1.0;

                    if (c == 0)
                    {
                        mu -= 2.0;
                    }
                    else if (c == 1)
                    {
                        mu += 2.0;
                    }

                    if (r == 1)
                    {
                        mu -= 3.0;
                    }
                    else if (r == 2)
                    {
                        mu += 3.0;
                    }

                    NormalDistribution sDistribution = new NormalDistribution(mu, 4.0);
                    Sample             s             = new Sample();
                    for (int i = 0; i < 25; i++)
                    {
                        s.Add(sDistribution.GetRandomValue(rng));
                    }
                    samples[r, c] = s;
                }
            }

            TwoWayAnovaResult result = Sample.TwoWayAnovaTest(samples);

            Assert.IsTrue(result.RowFactor.Result.Probability < 0.05);
            Assert.IsTrue(result.ColumnFactor.Result.Probability < 0.05);
            Assert.IsTrue(result.Interaction.Result.Probability > 0.05);

            Assert.IsTrue(TestUtilities.IsNearlyEqual(
                              result.RowFactor.SumOfSquares + result.ColumnFactor.SumOfSquares + result.Interaction.SumOfSquares + result.Residual.SumOfSquares,
                              result.Total.SumOfSquares
                              ));

            Assert.IsTrue(TestUtilities.IsNearlyEqual(
                              result.RowFactor.DegreesOfFreedom + result.ColumnFactor.DegreesOfFreedom + result.Interaction.DegreesOfFreedom + result.Residual.DegreesOfFreedom,
                              result.Total.DegreesOfFreedom
                              ));
        }
Exemplo n.º 17
0
        public void BivariateLinearRegressionNullDistribution()
        {
            // create uncorrelated x and y values
            // the distribution of F-test statistics returned by linear fits should follow the expected F-distribution

            Random             rng = new Random(987654321);
            NormalDistribution xd  = new NormalDistribution(1.0, 2.0);
            NormalDistribution yd  = new NormalDistribution(-3.0, 4.0);

            Sample fs = new Sample();

            Sample rSample = new Sample();
            ContinuousDistribution rDistribution = null;

            Sample fSample = new Sample();
            ContinuousDistribution fDistribution = null;

            for (int i = 0; i < 127; i++)
            {
                BivariateSample sample = new BivariateSample();
                for (int j = 0; j < 7; j++)
                {
                    sample.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng));
                }
                LinearRegressionResult result = sample.LinearRegression();

                double f = result.F.Statistic;
                fs.Add(f);

                rSample.Add(result.R.Statistic);
                rDistribution = result.R.Distribution;

                fSample.Add(result.F.Statistic);
                fDistribution = result.F.Distribution;

                Assert.IsTrue(result.F.Statistic == result.Anova.Result.Statistic);

                Assert.IsTrue(TestUtilities.IsNearlyEqual(
                                  result.R.Probability, result.F.Probability,
                                  new EvaluationSettings()
                {
                    RelativePrecision = 1.0E-14, AbsolutePrecision = 1.0E-16
                }
                                  ));
            }

            ContinuousDistribution fd = new FisherDistribution(1, 5);

            Console.WriteLine("{0} v. {1}", fs.PopulationMean, fd.Mean);
            TestResult t = fs.KolmogorovSmirnovTest(fd);

            Console.WriteLine(t.LeftProbability);
            Assert.IsTrue(t.LeftProbability < 0.95);

            Assert.IsTrue(rSample.KuiperTest(rDistribution).Probability > 0.05);
            Assert.IsTrue(fSample.KuiperTest(fDistribution).Probability > 0.05);
        }
        public void MultivariateLinearRegressionSimple()
        {
            // define model y = a + b0 * x0 + b1 * x1 + noise
            double a  = 1.0;
            double b0 = -2.0;
            double b1 = 3.0;
            ContinuousDistribution x0distribution = new CauchyDistribution(10.0, 5.0);
            ContinuousDistribution x1distribution = new UniformDistribution(Interval.FromEndpoints(-10.0, 20.0));
            ContinuousDistribution noise          = new NormalDistribution(0.0, 10.0);

            // draw a sample from the model
            Random             rng    = new Random(1);
            MultivariateSample sample = new MultivariateSample("x0", "x1", "y");
            FrameTable         table  = new FrameTable();

            table.AddColumns <double>("x0", "x1", "y");

            for (int i = 0; i < 100; i++)
            {
                double x0  = x0distribution.GetRandomValue(rng);
                double x1  = x1distribution.GetRandomValue(rng);
                double eps = noise.GetRandomValue(rng);
                double y   = a + b0 * x0 + b1 * x1 + eps;
                sample.Add(x0, x1, y);
                table.AddRow(x0, x1, y);
            }

            // do a linear regression fit on the model
            ParameterCollection         oldResult = sample.LinearRegression(2).Parameters;
            MultiLinearRegressionResult newResult = table["y"].As <double>().MultiLinearRegression(
                table["x0"].As <double>(), table["x1"].As <double>()
                );

            // the result should have the appropriate dimension
            Assert.IsTrue(oldResult.Count == 3);
            Assert.IsTrue(newResult.Parameters.Count == 3);

            // The parameters should match the model
            Assert.IsTrue(oldResult[0].Estimate.ConfidenceInterval(0.90).ClosedContains(b0));
            Assert.IsTrue(oldResult[1].Estimate.ConfidenceInterval(0.90).ClosedContains(b1));
            Assert.IsTrue(oldResult[2].Estimate.ConfidenceInterval(0.90).ClosedContains(a));

            Assert.IsTrue(newResult.CoefficientOf(0).ConfidenceInterval(0.99).ClosedContains(b0));
            Assert.IsTrue(newResult.CoefficientOf("x1").ConfidenceInterval(0.99).ClosedContains(b1));
            Assert.IsTrue(newResult.Intercept.ConfidenceInterval(0.99).ClosedContains(a));

            // The residuals should be compatible with the model predictions
            for (int i = 0; i < table.Rows.Count; i++)
            {
                FrameRow row = table.Rows[i];
                double   x0  = (double)row["x0"];
                double   x1  = (double)row["x1"];
                double   yp  = newResult.Predict(x0, x1).Value;
                double   y   = (double)row["y"];
                Assert.IsTrue(TestUtilities.IsNearlyEqual(newResult.Residuals[i], y - yp));
            }
        }
Exemplo n.º 19
0
        public void LinearRegressionVariances()
        {
            // do a set of logistic regression fits
            // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned

            Random rng = new Random(314159);

            // define line parameters
            double a0 = 2.0; double b0 = -1.0;

            // do a lot of fits, recording results of each
            FrameTable data = new FrameTable();

            data.AddColumns <double>("a", "va", "b", "vb", "abCov", "p", "dp");

            for (int k = 0; k < 128; k++)
            {
                // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution
                ContinuousDistribution xd = new LogisticDistribution();
                ContinuousDistribution nd = new NormalDistribution(0.0, 2.0);

                // generate a synthetic data set
                BivariateSample sample = new BivariateSample();
                for (int i = 0; i < 12; i++)
                {
                    double x = xd.GetRandomValue(rng);
                    double y = a0 + b0 * x + nd.GetRandomValue(rng);
                    sample.Add(x, y);
                }

                // do the regression
                LinearRegressionResult result = sample.LinearRegression();

                // record result
                UncertainValue p = result.Predict(12.0);
                data.AddRow(new Dictionary <string, object>()
                {
                    { "a", result.Intercept.Value },
                    { "va", result.Parameters.VarianceOf("Intercept") },
                    { "b", result.Slope.Value },
                    { "vb", result.Parameters.VarianceOf("Slope") },
                    { "abCov", result.Parameters.CovarianceOf("Slope", "Intercept") },
                    { "p", p.Value },
                    { "dp", p.Uncertainty }
                });
            }

            // variances of parameters should agree with predictions
            Assert.IsTrue(data["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["va"].As <double>().Median()));
            Assert.IsTrue(data["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["vb"].As <double>().Median()));
            Assert.IsTrue(data["a"].As <double>().PopulationCovariance(data["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(data["abCov"].As <double>().Median()));

            // variance of prediction should agree with claim
            Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Median()));
        }
Exemplo n.º 20
0
        public void StudentFromNormal()
        {
            // make sure Student t is consistent with its definition

            // we are going to take a sample that we expect to be t-distributed
            Sample tSample = new Sample();

            // begin with an underlying normal distribution
            ContinuousDistribution xDistribution = new NormalDistribution();

            // compute a bunch of t statistics from the distribution
            Random rng = new Random(314159);

            for (int i = 0; i < 10000; i++)
            {
                double p = xDistribution.GetRandomValue(rng);
                double q = 0.0;
                for (int j = 0; j < 5; j++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    q += x * x;
                }
                q = q / 5;

                double t = p / Math.Sqrt(q);
                tSample.Add(t);
            }

            ContinuousDistribution tDistribution = new StudentDistribution(5);
            TestResult             tResult       = tSample.KolmogorovSmirnovTest(tDistribution);

            Assert.IsTrue(tResult.Probability > 0.05);

            // Distribution should be demonstrably non-normal
            ContinuousDistribution zDistribution = new NormalDistribution(tDistribution.Mean, tDistribution.StandardDeviation);
            TestResult             zResult       = tSample.KolmogorovSmirnovTest(zDistribution);

            Assert.IsTrue(zResult.Probability < 0.05);
        }
Exemplo n.º 21
0
        public void TestMultivariateRegression()
        {
            double cz = 1.0;
            double cx = 0.0;
            double cy = 0.0;

            Random       rng           = new Random(1001110000);
            Distribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-4.0, 8.0));
            Distribution yDistribution = new UniformDistribution(Interval.FromEndpoints(-8.0, 4.0));
            Distribution eDistribution = new NormalDistribution();

            Sample r2Sample = new Sample();

            for (int i = 0; i < 500; i++)
            {
                MultivariateSample xyzSample = new MultivariateSample(3);
                for (int k = 0; k < 12; k++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double y = yDistribution.GetRandomValue(rng);
                    double z = cx * x + cy * y + cz + eDistribution.GetRandomValue(rng);
                    xyzSample.Add(x, y, z);
                }
                FitResult fit = xyzSample.LinearRegression(2);
                double    fcx = fit.Parameters[0];
                double    fcy = fit.Parameters[1];
                double    fcz = fit.Parameters[2];

                double ss2 = 0.0;
                double ss1 = 0.0;
                foreach (double[] xyz in xyzSample)
                {
                    ss2 += MoreMath.Sqr(xyz[2] - (fcx * xyz[0] + fcy * xyz[1] + fcz));
                    ss1 += MoreMath.Sqr(xyz[2] - xyzSample.Column(2).Mean);
                }
                double r2 = 1.0 - ss2 / ss1;
                r2Sample.Add(r2);
            }

            Console.WriteLine("{0} {1} {2} {3} {4}", r2Sample.Count, r2Sample.PopulationMean, r2Sample.StandardDeviation, r2Sample.Minimum, r2Sample.Maximum);

            Distribution r2Distribution = new BetaDistribution((3 - 1) / 2.0, (12 - 3) / 2.0);

            //Distribution r2Distribution = new BetaDistribution((10 - 2) / 2.0, (2 - 1) / 2.0);
            Console.WriteLine("{0} {1}", r2Distribution.Mean, r2Distribution.StandardDeviation);

            TestResult ks = r2Sample.KolmogorovSmirnovTest(r2Distribution);

            Console.WriteLine(ks.RightProbability);
            Console.WriteLine(ks.Probability);
        }
Exemplo n.º 22
0
        static Tuple <double, double> CalcCCRandom(List <Criteria> criterias, bool newLogic = false)
        {
            var data = new Dictionary <Criteria, Tuple <double, double> >();

            foreach (var criteria in criterias)
            {
                var sourceCriteriaDistribution    = new NormalDistribution(criteria.SourceMean, criteria.SourceSigma);
                var disturberCriteriaDistribution = new NormalDistribution(criteria.DisturberMean, criteria.DisturberSigma);
                var currentSourceCriteriaValue    = sourceCriteriaDistribution.GetRandomValue(MyMath.random);
                var currentDisturberCriteriaValue = disturberCriteriaDistribution.GetRandomValue(MyMath.random);
                data.Add(criteria, new Tuple <double, double>(currentSourceCriteriaValue, currentDisturberCriteriaValue));
            }
            return(CalcCC(data, newLogic));
        }
Exemplo n.º 23
0
        public double GetScore(double standardDeviation)
        {
            if (ActualScore.HasValue)
            {
                return(ActualScore.Value);
            }

            if (!ProjectedScore.HasValue)
            {
                throw new Exception("No defined score data");
            }

            NormalDistribution distribution = new NormalDistribution(ProjectedScore.Value, standardDeviation);

            return(Math.Round(distribution.GetRandomValue(rnd), 2));
        }
Exemplo n.º 24
0
        public void TestBivariateRegression()
        {
            double a0 = 1.0;
            double b0 = 0.0;

            Random       rng           = new Random(1001110000);
            Distribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-2.0, 4.0));
            Distribution eDistribution = new NormalDistribution();

            Sample r2Sample = new Sample();

            for (int i = 0; i < 500; i++)
            {
                BivariateSample xySample = new BivariateSample();
                for (int k = 0; k < 10; k++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double y = a0 + b0 * x + eDistribution.GetRandomValue(rng);
                    xySample.Add(x, y);
                }
                FitResult fit = xySample.LinearRegression();
                double    a   = fit.Parameters[0];
                double    b   = fit.Parameters[1];

                double ss2 = 0.0;
                double ss1 = 0.0;
                foreach (XY xy in xySample)
                {
                    ss2 += MoreMath.Sqr(xy.Y - (a + b * xy.X));
                    ss1 += MoreMath.Sqr(xy.Y - xySample.Y.Mean);
                }
                double r2 = 1.0 - ss2 / ss1;
                r2Sample.Add(r2);
            }

            Console.WriteLine("{0} {1} {2} {3} {4}", r2Sample.Count, r2Sample.PopulationMean, r2Sample.StandardDeviation, r2Sample.Minimum, r2Sample.Maximum);

            Distribution r2Distribution = new BetaDistribution((2 - 1) / 2.0, (10 - 2) / 2.0);

            //Distribution r2Distribution = new BetaDistribution((10 - 2) / 2.0, (2 - 1) / 2.0);
            Console.WriteLine("{0} {1}", r2Distribution.Mean, r2Distribution.StandardDeviation);

            TestResult ks = r2Sample.KolmogorovSmirnovTest(r2Distribution);

            Console.WriteLine(ks.RightProbability);
            Console.WriteLine(ks.Probability);
        }
Exemplo n.º 25
0
        public void BivariateLinearRegressionNullDistribution()
        {
            // Create uncorrelated x and y values and do a linear fit.
            // The r-tests and F-test statistics returned by the linear fits
            // should agree and both test statistics should follow their claimed
            // distributions.

            Random             rng = new Random(987654321);
            NormalDistribution xd  = new NormalDistribution(1.0, 2.0);
            NormalDistribution yd  = new NormalDistribution(-3.0, 4.0);

            Sample rSample = new Sample();
            ContinuousDistribution rDistribution = null;

            Sample fSample = new Sample();
            ContinuousDistribution fDistribution = null;

            for (int i = 0; i < 127; i++)
            {
                BivariateSample sample = new BivariateSample();
                for (int j = 0; j < 7; j++)
                {
                    sample.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng));
                }
                LinearRegressionResult result = sample.LinearRegression();

                rSample.Add(result.R.Statistic.Value);
                rDistribution = result.R.Statistic.Distribution;

                fSample.Add(result.F.Statistic.Value);
                fDistribution = result.F.Statistic.Distribution;

                Assert.IsTrue(result.F.Statistic.Value == result.Anova.Result.Statistic.Value);

                Assert.IsTrue(TestUtilities.IsNearlyEqual(
                                  result.R.Probability, result.F.Probability,
                                  new EvaluationSettings()
                {
                    RelativePrecision = 1.0E-13, AbsolutePrecision = 1.0E-16
                }
                                  ));
            }

            Assert.IsTrue(rSample.KuiperTest(rDistribution).Probability > 0.05);
            Assert.IsTrue(fSample.KuiperTest(fDistribution).Probability > 0.05);
        }
Exemplo n.º 26
0
        public void BivariateNonlinearFitVariances()
        {
            // Verify that we can fit a non-linear function,
            // that the estimated parameters do cluster around the true values,
            // and that the estimated parameter covariances do reflect the actually observed covariances

            double a = 2.7;
            double b = 3.1;

            ContinuousDistribution xDistribution = new ExponentialDistribution(2.0);
            ContinuousDistribution eDistribution = new NormalDistribution(0.0, 4.0);

            FrameTable parameters = new FrameTable();

            parameters.AddColumns <double>("a", "b");
            MultivariateSample covariances = new MultivariateSample(3);

            for (int i = 0; i < 64; i++)
            {
                BivariateSample sample = new BivariateSample();
                Random          rng    = new Random(i);
                for (int j = 0; j < 8; j++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double y = a * Math.Pow(x, b) + eDistribution.GetRandomValue(rng);
                    sample.Add(x, y);
                }

                NonlinearRegressionResult fit = sample.NonlinearRegression(
                    (IReadOnlyList <double> p, double x) => p[0] * Math.Pow(x, p[1]),
                    new double[] { 1.0, 1.0 }
                    );

                parameters.AddRow(fit.Parameters.ValuesVector);
                covariances.Add(fit.Parameters.CovarianceMatrix[0, 0], fit.Parameters.CovarianceMatrix[1, 1], fit.Parameters.CovarianceMatrix[0, 1]);
            }

            Assert.IsTrue(parameters["a"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(a));
            Assert.IsTrue(parameters["b"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(b));

            Assert.IsTrue(parameters["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(covariances.Column(0).Mean));
            Assert.IsTrue(parameters["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(covariances.Column(1).Mean));
            Assert.IsTrue(parameters["a"].As <double>().PopulationCovariance(parameters["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean));
            Assert.IsTrue(Bivariate.PopulationCovariance(parameters["a"].As <double>(), parameters["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean));
        }
Exemplo n.º 27
0
        public void PearsonRNullDistribution()
        {
            Random rng = new Random(1111111);

            // Pick some underlying distributions for the sample variables,
            // which must be normal but can have any parameters.
            NormalDistribution xDistribution = new NormalDistribution(1, 2);
            NormalDistribution yDistribution = new NormalDistribution(3, 4);

            // Try this for several sample sizes, all low so that we see the difference from the normal distribution
            // n = 3 maxima at ends; n = 4 uniform; n = 5 semi-circular "mound"; n = 6 parabolic "mound".
            foreach (int n in new int[] { 3, 4, 5, 6, 8 })
            {
                // find r values
                Sample rSample = new Sample();
                ContinuousDistribution rDistribution = null;
                for (int i = 0; i < 128; i++)
                {
                    // to get each r value, construct a bivariate sample of the given size with no cross-correlation
                    BivariateSample xySample = new BivariateSample();
                    for (int j = 0; j < n; j++)
                    {
                        xySample.Add(
                            xDistribution.GetRandomValue(rng),
                            yDistribution.GetRandomValue(rng)
                            );
                    }
                    TestResult rTest = xySample.PearsonRTest();
                    rSample.Add(rTest.Statistic);
                    rDistribution = rTest.Distribution;
                }

                // Check whether r is distributed as expected
                TestResult result = rSample.KuiperTest(new PearsonRDistribution(n));
                Assert.IsTrue(result.Probability > 0.01);

                Assert.IsTrue(rSample.PopulationMean.ConfidenceInterval(0.95).ClosedContains(rDistribution.Mean));
                Assert.IsTrue(rSample.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(rDistribution.StandardDeviation));
            }
        }
Exemplo n.º 28
0
        public void TestMultivariateRegression()
        {
            // Collect r^2 values from multivariate linear regressions.

            double cz = 1.0;
            double cx = 0.0;
            double cy = 0.0;

            Random rng = new Random(1001110000);
            ContinuousDistribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-4.0, 8.0));
            ContinuousDistribution yDistribution = new UniformDistribution(Interval.FromEndpoints(-8.0, 4.0));
            ContinuousDistribution eDistribution = new NormalDistribution();

            List <double> r2Sample = new List <double>();

            for (int i = 0; i < 500; i++)
            {
                MultivariateSample xyzSample = new MultivariateSample(3);
                for (int k = 0; k < 12; k++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double y = yDistribution.GetRandomValue(rng);
                    double z = cx * x + cy * y + cz + eDistribution.GetRandomValue(rng);
                    xyzSample.Add(x, y, z);
                }
                MultiLinearRegressionResult fit = xyzSample.LinearRegression(2);
                double fcx = fit.Parameters.ValuesVector[0];
                double fcy = fit.Parameters.ValuesVector[1];
                double fcz = fit.Parameters.ValuesVector[2];

                r2Sample.Add(fit.RSquared);
            }

            // r^2 values should be distributed as expected.
            ContinuousDistribution r2Distribution = new BetaDistribution((3 - 1) / 2.0, (12 - 3) / 2.0);

            TestResult ks = r2Sample.KolmogorovSmirnovTest(r2Distribution);

            Assert.IsTrue(ks.Probability > 0.05);
        }
Exemplo n.º 29
0
        public static void GenerateIndividualSet(string distr, double outlier, string magn, string iteration)
        {
            string partPath = rootFolder + magn + "\\" + outlier.ToString() + "\\" + distr + "\\" + iteration;

            Directory.CreateDirectory(partPath);

            //Generate noise for all responses of the whole dataset
            for (var i = 0; i < n; i++)
            {
                eps[i] = GenerateNoise(p, distr, outlier);
            }

            for (var j = 0; j < n; j++)
            {
                for (int i = 0; i < m; i++)
                {
                    X[j][i] = XDistribution.GetRandomValue(rng);
                }
            }

            for (var j = 0; j < n; j++)
            {
                for (var k = 0; k < p; k++)
                {
                    Y[j][k] = 0;
                }
            }

            for (var i = 0; i < n; i++)
            {
                CalculateCustomFunction(i, magn);
            }



            PrintGeneratedData(partPath + "\\Data.txt");
        }
Exemplo n.º 30
0
        public void BivariateNonlinearFitSimple()
        {
            double t0 = 3.0;
            double s0 = 1.0;

            ContinuousDistribution xDistribution = new CauchyDistribution(0.0, 2.0);
            ContinuousDistribution eDistribution = new NormalDistribution(0.0, 0.5);

            Random        rng = new Random(5);
            List <double> x   = TestUtilities.CreateDataSample(rng, xDistribution, 48).ToList();
            List <double> y   = x.Select(z => Math.Sin(2.0 * Math.PI * z / t0 + s0) + eDistribution.GetRandomValue(rng)).ToList();

            Func <IReadOnlyDictionary <string, double>, double, double> fitFunction = (d, z) => {
                double t = d["Period"];
                double s = d["Phase"];
                return(Math.Sin(2.0 * Math.PI * z / t + s));
            };

            Dictionary <string, double> start = new Dictionary <string, double>()
            {
                { "Period", 2.5 }, { "Phase", 1.5 }
            };

            NonlinearRegressionResult result = y.NonlinearRegression(x, fitFunction, start);

            Assert.IsTrue(result.Parameters["Period"].Estimate.ConfidenceInterval(0.99).ClosedContains(t0));
            Assert.IsTrue(result.Parameters["Phase"].Estimate.ConfidenceInterval(0.99).ClosedContains(s0));

            for (int i = 0; i < x.Count; i++)
            {
                double yp = result.Predict(x[i]);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Residuals[i], y[i] - yp));
            }
        }