Ejemplo n.º 1
0
        public void MultivariateLinearRegressionVariances()
        {
            // define model y = a + b0 * x0 + b1 * x1 + noise
            double a  = -3.0;
            double b0 = 2.0;
            double b1 = -1.0;
            ContinuousDistribution x0distribution = new LaplaceDistribution();
            ContinuousDistribution x1distribution = new CauchyDistribution();
            ContinuousDistribution eDistribution  = new NormalDistribution(0.0, 4.0);

            FrameTable data = new FrameTable();

            data.AddColumns <double>("a", "da", "b0", "db0", "b1", "db1", "ab1Cov", "p", "dp");

            // draw a sample from the model
            Random rng = new Random(4);

            for (int j = 0; j < 64; j++)
            {
                List <double> x0s = new List <double>();
                List <double> x1s = new List <double>();
                List <double> ys  = new List <double>();

                for (int i = 0; i < 16; i++)
                {
                    double x0 = x0distribution.GetRandomValue(rng);
                    double x1 = x1distribution.GetRandomValue(rng);
                    double e  = eDistribution.GetRandomValue(rng);
                    double y  = a + b0 * x0 + b1 * x1 + e;
                    x0s.Add(x0);
                    x1s.Add(x1);
                    ys.Add(y);
                }

                // do a linear regression fit on the model
                MultiLinearRegressionResult result = ys.MultiLinearRegression(
                    new Dictionary <string, IReadOnlyList <double> > {
                    { "x0", x0s }, { "x1", x1s }
                }
                    );
                UncertainValue pp = result.Predict(-5.0, 6.0);

                data.AddRow(
                    result.Intercept.Value, result.Intercept.Uncertainty,
                    result.CoefficientOf("x0").Value, result.CoefficientOf("x0").Uncertainty,
                    result.CoefficientOf("x1").Value, result.CoefficientOf("x1").Uncertainty,
                    result.Parameters.CovarianceOf("Intercept", "x1"),
                    pp.Value, pp.Uncertainty
                    );
            }

            // The estimated parameters should agree with the model that generated the data.

            // The variances of the estimates should agree with the claimed variances
            Assert.IsTrue(data["a"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["da"].As <double>().Mean()));
            Assert.IsTrue(data["b0"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["db0"].As <double>().Mean()));
            Assert.IsTrue(data["b1"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["db1"].As <double>().Mean()));
            Assert.IsTrue(data["a"].As <double>().PopulationCovariance(data["b1"].As <double>()).ConfidenceInterval(0.99).ClosedContains(data["ab1Cov"].As <double>().Mean()));
            Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Median()));
        }
Ejemplo n.º 2
0
        public void WaldFit()
        {
            WaldDistribution wald = new WaldDistribution(3.5, 2.5);

            FrameTable results = new FrameTable();

            results.AddColumns <double>("Mean", "Shape", "MeanVariance", "ShapeVariance", "MeanShapeCovariance");

            for (int i = 0; i < 128; i++)
            {
                Sample sample = SampleTest.CreateSample(wald, 16, i);

                WaldFitResult result = WaldDistribution.FitToSample(sample);
                Assert.IsTrue(result.Mean.Value == result.Parameters.ValuesVector[result.Parameters.IndexOf("Mean")]);
                Assert.IsTrue(result.Shape.Value == result.Parameters.ValuesVector[result.Parameters.IndexOf("Shape")]);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Parameters.VarianceOf("Mean"), MoreMath.Sqr(result.Mean.Uncertainty)));
                Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Parameters.VarianceOf("Shape"), MoreMath.Sqr(result.Shape.Uncertainty)));
                results.AddRow(
                    result.Mean.Value, result.Shape.Value,
                    result.Parameters.VarianceOf("Mean"), result.Parameters.VarianceOf("Shape"), result.Parameters.CovarianceOf("Mean", "Shape")
                    );
            }

            Assert.IsTrue(results["Mean"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(wald.Mean));
            Assert.IsTrue(results["Shape"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(wald.Shape));

            Assert.IsTrue(results["Mean"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(results["MeanVariance"].As <double>().Median()));
            Assert.IsTrue(results["Shape"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(results["ShapeVariance"].As <double>().Median()));
            Assert.IsTrue(results["Mean"].As <double>().PopulationCovariance(results["Shape"].As <double>()).ConfidenceInterval(0.99).ClosedContains(results["MeanShapeCovariance"].As <double>().Median()));
        }
Ejemplo n.º 3
0
        public static void ConstructExampleData()
        {
            FrameTable table = new FrameTable();

            table.AddColumn <int>("Id");
            table.AddColumn <string>("Name");
            table.AddColumn <string>("Sex");
            table.AddColumn <DateTime>("Birthdate");
            table.AddColumn <double>("Height");
            table.AddColumns <double?>("Weight");
            table.AddColumn <bool>("Result");

            Random rng = new Random(1000001);

            string[] maleNames = new string[] { "Alex", "Chris", "David", "Eric", "Frederic", "George", "Hans", "Igor", "John", "Kevin", "Luke", "Mark", "Oscar", "Peter", "Richard", "Stephan", "Thomas", "Vincent" };
            AddRows(table, maleNames, "M", 175.0, 12.0, 24.0, 3.0, 1, rng);

            string[] femaleNames = new string[] { "Anne", "Belle", "Dorothy", "Elizabeth", "Fiona", "Helen", "Julia", "Kate", "Louise", "Mary", "Natalie", "Olivia", "Ruth", "Sarah", "Theresa", "Viola" };
            AddRows(table, femaleNames, "F", 160.0, 10.0, 24.0, 3.0, 0, rng);

            // add rows with nulls
            table.AddRow(table.Rows.Count, null, "M", DateTime.Parse("1970-07-27"), 183.0, 74.0, false);
            table.AddRow(table.Rows.Count, "Zoey", "F", DateTime.Parse("2007-09-17"), 138.0, null, false);

            string path = @"example.csv";

            using (StreamWriter writer = new StreamWriter(File.OpenWrite(path))) {
                table.ToCsv(writer);
            }
            Console.WriteLine(File.Exists(path));

            string json = JsonConvert.SerializeObject(table.ToDictionaries(), Formatting.Indented);

            File.WriteAllText("example.json", json);
        }
        public void MultivariateLinearRegressionSimple()
        {
            // define model y = a + b0 * x0 + b1 * x1 + noise
            double a  = 1.0;
            double b0 = -2.0;
            double b1 = 3.0;
            ContinuousDistribution x0distribution = new CauchyDistribution(10.0, 5.0);
            ContinuousDistribution x1distribution = new UniformDistribution(Interval.FromEndpoints(-10.0, 20.0));
            ContinuousDistribution noise          = new NormalDistribution(0.0, 10.0);

            // draw a sample from the model
            Random             rng    = new Random(1);
            MultivariateSample sample = new MultivariateSample("x0", "x1", "y");
            FrameTable         table  = new FrameTable();

            table.AddColumns <double>("x0", "x1", "y");

            for (int i = 0; i < 100; i++)
            {
                double x0  = x0distribution.GetRandomValue(rng);
                double x1  = x1distribution.GetRandomValue(rng);
                double eps = noise.GetRandomValue(rng);
                double y   = a + b0 * x0 + b1 * x1 + eps;
                sample.Add(x0, x1, y);
                table.AddRow(x0, x1, y);
            }

            // do a linear regression fit on the model
            ParameterCollection         oldResult = sample.LinearRegression(2).Parameters;
            MultiLinearRegressionResult newResult = table["y"].As <double>().MultiLinearRegression(
                table["x0"].As <double>(), table["x1"].As <double>()
                );

            // the result should have the appropriate dimension
            Assert.IsTrue(oldResult.Count == 3);
            Assert.IsTrue(newResult.Parameters.Count == 3);

            // The parameters should match the model
            Assert.IsTrue(oldResult[0].Estimate.ConfidenceInterval(0.90).ClosedContains(b0));
            Assert.IsTrue(oldResult[1].Estimate.ConfidenceInterval(0.90).ClosedContains(b1));
            Assert.IsTrue(oldResult[2].Estimate.ConfidenceInterval(0.90).ClosedContains(a));

            Assert.IsTrue(newResult.CoefficientOf(0).ConfidenceInterval(0.99).ClosedContains(b0));
            Assert.IsTrue(newResult.CoefficientOf("x1").ConfidenceInterval(0.99).ClosedContains(b1));
            Assert.IsTrue(newResult.Intercept.ConfidenceInterval(0.99).ClosedContains(a));

            // The residuals should be compatible with the model predictions
            for (int i = 0; i < table.Rows.Count; i++)
            {
                FrameRow row = table.Rows[i];
                double   x0  = (double)row["x0"];
                double   x1  = (double)row["x1"];
                double   yp  = newResult.Predict(x0, x1).Value;
                double   y   = (double)row["y"];
                Assert.IsTrue(TestUtilities.IsNearlyEqual(newResult.Residuals[i], y - yp));
            }
        }
Ejemplo n.º 5
0
        public void LinearRegressionVariances()
        {
            // do a set of logistic regression fits
            // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned

            Random rng = new Random(314159);

            // define line parameters
            double a0 = 2.0; double b0 = -1.0;

            // do a lot of fits, recording results of each
            FrameTable data = new FrameTable();

            data.AddColumns <double>("a", "va", "b", "vb", "abCov", "p", "dp");

            for (int k = 0; k < 128; k++)
            {
                // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution
                ContinuousDistribution xd = new LogisticDistribution();
                ContinuousDistribution nd = new NormalDistribution(0.0, 2.0);

                // generate a synthetic data set
                BivariateSample sample = new BivariateSample();
                for (int i = 0; i < 12; i++)
                {
                    double x = xd.GetRandomValue(rng);
                    double y = a0 + b0 * x + nd.GetRandomValue(rng);
                    sample.Add(x, y);
                }

                // do the regression
                LinearRegressionResult result = sample.LinearRegression();

                // record result
                UncertainValue p = result.Predict(12.0);
                data.AddRow(new Dictionary <string, object>()
                {
                    { "a", result.Intercept.Value },
                    { "va", result.Parameters.VarianceOf("Intercept") },
                    { "b", result.Slope.Value },
                    { "vb", result.Parameters.VarianceOf("Slope") },
                    { "abCov", result.Parameters.CovarianceOf("Slope", "Intercept") },
                    { "p", p.Value },
                    { "dp", p.Uncertainty }
                });
            }

            // variances of parameters should agree with predictions
            Assert.IsTrue(data["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["va"].As <double>().Median()));
            Assert.IsTrue(data["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["vb"].As <double>().Median()));
            Assert.IsTrue(data["a"].As <double>().PopulationCovariance(data["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(data["abCov"].As <double>().Median()));

            // variance of prediction should agree with claim
            Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Median()));
        }
Ejemplo n.º 6
0
        public void LinearLogisticRegressionVariances()
        {
            // define model y = a + b0 * x0 + b1 * x1 + noise
            double a = -2.0;
            double b = 1.0;
            ContinuousDistribution xDistribution = new StudentDistribution(2.0);

            FrameTable data = new FrameTable();

            data.AddColumns <double>("a", "da", "b", "db", "abcov", "p", "dp");

            // draw a sample from the model
            Random rng = new Random(3);

            for (int j = 0; j < 32; j++)
            {
                List <double> xs = new List <double>();
                List <bool>   ys = new List <bool>();

                for (int i = 0; i < 32; i++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double t = a + b * x;
                    double p = 1.0 / (1.0 + Math.Exp(-t));
                    bool   y = (rng.NextDouble() < p);
                    xs.Add(x);
                    ys.Add(y);
                }

                // do a linear regression fit on the model
                LinearLogisticRegressionResult result = ys.LinearLogisticRegression(xs);
                UncertainValue pp = result.Predict(1.0);

                data.AddRow(
                    result.Intercept.Value, result.Intercept.Uncertainty,
                    result.Slope.Value, result.Slope.Uncertainty,
                    result.Parameters.CovarianceMatrix[0, 1],
                    pp.Value, pp.Uncertainty
                    );
            }

            // The estimated parameters should agree with the model that generated the data.

            // The variances of the estimates should agree with the claimed variances
            Assert.IsTrue(data["a"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["da"].As <double>().Mean()));
            Assert.IsTrue(data["b"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["db"].As <double>().Mean()));
            Assert.IsTrue(data["a"].As <double>().PopulationCovariance(data["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(data["abcov"].As <double>().Mean()));
            Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Mean()));
        }
Ejemplo n.º 7
0
        public void BivariateNonlinearFitVariances()
        {
            // Verify that we can fit a non-linear function,
            // that the estimated parameters do cluster around the true values,
            // and that the estimated parameter covariances do reflect the actually observed covariances

            double a = 2.7;
            double b = 3.1;

            ContinuousDistribution xDistribution = new ExponentialDistribution(2.0);
            ContinuousDistribution eDistribution = new NormalDistribution(0.0, 4.0);

            FrameTable parameters = new FrameTable();

            parameters.AddColumns <double>("a", "b");
            MultivariateSample covariances = new MultivariateSample(3);

            for (int i = 0; i < 64; i++)
            {
                BivariateSample sample = new BivariateSample();
                Random          rng    = new Random(i);
                for (int j = 0; j < 8; j++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double y = a * Math.Pow(x, b) + eDistribution.GetRandomValue(rng);
                    sample.Add(x, y);
                }

                NonlinearRegressionResult fit = sample.NonlinearRegression(
                    (IReadOnlyList <double> p, double x) => p[0] * Math.Pow(x, p[1]),
                    new double[] { 1.0, 1.0 }
                    );

                parameters.AddRow(fit.Parameters.ValuesVector);
                covariances.Add(fit.Parameters.CovarianceMatrix[0, 0], fit.Parameters.CovarianceMatrix[1, 1], fit.Parameters.CovarianceMatrix[0, 1]);
            }

            Assert.IsTrue(parameters["a"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(a));
            Assert.IsTrue(parameters["b"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(b));

            Assert.IsTrue(parameters["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(covariances.Column(0).Mean));
            Assert.IsTrue(parameters["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(covariances.Column(1).Mean));
            Assert.IsTrue(parameters["a"].As <double>().PopulationCovariance(parameters["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean));
            Assert.IsTrue(Bivariate.PopulationCovariance(parameters["a"].As <double>(), parameters["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean));
        }
Ejemplo n.º 8
0
        public void MultivariateLinearLogisticRegressionVariances()
        {
            // define model y = a + b0 * x0 + b1 * x1 + noise
            double a  = -3.0;
            double b0 = 2.0;
            double b1 = 1.0;
            ContinuousDistribution x0distribution = new ExponentialDistribution();
            ContinuousDistribution x1distribution = new LognormalDistribution();

            FrameTable data = new FrameTable();

            data.AddColumns <double>("a", "da", "b0", "db0", "b1", "db1", "p", "dp");

            // draw a sample from the model
            Random rng = new Random(2);

            for (int j = 0; j < 32; j++)
            {
                List <double> x0s = new List <double>();
                List <double> x1s = new List <double>();
                List <bool>   ys  = new List <bool>();

                FrameTable table = new FrameTable();
                table.AddColumn <double>("x0");
                table.AddColumn <double>("x1");
                table.AddColumn <bool>("y");

                for (int i = 0; i < 32; i++)
                {
                    double x0 = x0distribution.GetRandomValue(rng);
                    double x1 = x1distribution.GetRandomValue(rng);
                    double t  = a + b0 * x0 + b1 * x1;
                    double p  = 1.0 / (1.0 + Math.Exp(-t));
                    bool   y  = (rng.NextDouble() < p);
                    x0s.Add(x0);
                    x1s.Add(x1);
                    ys.Add(y);
                }

                // do a linear regression fit on the model
                MultiLinearLogisticRegressionResult result = ys.MultiLinearLogisticRegression(
                    new Dictionary <string, IReadOnlyList <double> > {
                    { "x0", x0s }, { "x1", x1s }
                }
                    );
                UncertainValue pp = result.Predict(0.0, 1.0);

                data.AddRow(
                    result.Intercept.Value, result.Intercept.Uncertainty,
                    result.CoefficientOf("x0").Value, result.CoefficientOf("x0").Uncertainty,
                    result.CoefficientOf("x1").Value, result.CoefficientOf("x1").Uncertainty,
                    pp.Value, pp.Uncertainty
                    );
            }

            // The estimated parameters should agree with the model that generated the data.

            // The variances of the estimates should agree with the claimed variances
            Assert.IsTrue(data["a"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["da"].As <double>().Mean()));
            Assert.IsTrue(data["b0"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["db0"].As <double>().Mean()));
            Assert.IsTrue(data["b1"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["db1"].As <double>().Mean()));
            Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Mean()));
        }
Ejemplo n.º 9
0
        public void FrameTableManipulation()
        {
            FrameTable table = new FrameTable();

            table.AddColumn <int>("Id");
            table.AddColumn <DateTime?>("Birthdate");
            table.AddColumns <string>("FirstName", "LastName");
            Assert.IsTrue(table.Columns.Count == 4);

            // Index lookup should work
            Assert.IsTrue(table.GetColumnIndex("Birthdate") >= 0);
            Assert.IsTrue(table.GetColumnIndex("None") < 0);

            // Add rows
            Assert.IsTrue(table.Rows.Count == 0);
            table.AddRow(1, DateTime.Parse("1990-01-01"), "a", "p");
            table.AddRow(2, DateTime.Parse("2000-02-02"), null, null);
            table.AddRow(new Dictionary <string, object>()
            {
                { "Id", 3 }, { "Birthdate", null }, { "FirstName", "c" }, { "LastName", "r" }
            });
            Assert.IsTrue(table.Rows.Count == 3);

            // Adding rows with the wrong types and/or entries should fail
            // Careful, some of these will leave the table in a bad state
            //try {
            //    table.AddRow(4, DateTime.Parse("2010-04-04"), 1.0, "s");
            //    Assert.Fail();
            //} catch (Exception) { }
            try {
                table.AddRow(4, DateTime.Parse("2010-04-04"));
                Assert.Fail();
            } catch (Exception) { }
            //try {
            //    table.AddRow(new Dictionary<string, object>() {
            //        {"Id", 4}, { "FirstName", "d" }, { "LastName", "r" }
            //    });
            //    Assert.Fail();
            //} catch (Exception) { }
            //try {
            //    table.AddRow(new Dictionary<string, object>() {
            //        {"Id", 4}, { "Birthdate", null }, { "FirstName", "d" }, { "LastName", "r" }, { "MiddleName", "u" }
            //    });
            //    Assert.Fail();
            //} catch (Exception) { }

            // Adding a new column with the wrong length should fail
            try {
                table.AddColumn <double>("Score");
                Assert.Fail();
            } catch (Exception) { }
            Assert.IsTrue(table.GetColumnIndex("Score") < 0);

            // Adding a new column with the right length should work
            List <double> scores = new List <double>()
            {
                1.1, 1.2, 1.3
            };

            table.AddColumn("Score", scores);
            Assert.IsTrue(table.GetColumnIndex("Score") >= 0);

            // Adding a new computed column should work
            table.AddComputedColumn <TimeSpan?>("Age", r => {
                DateTime?b = (DateTime?)r["Birthdate"];
                if (b.HasValue)
                {
                    return(DateTime.Now - b.Value);
                }
                else
                {
                    return(null);
                }
            });
            Assert.IsTrue(table.GetColumnIndex("Age") >= 0);

            // Changing a value should change the result of the computed column that depends on it
            int      birthdateIndex = table.GetColumnIndex("Birthdate");
            int      ageIndex       = table.GetColumnIndex("Age");
            TimeSpan age1           = (TimeSpan)table[0, ageIndex];

            table[0, birthdateIndex] = DateTime.Parse("2010-01-01");
            TimeSpan age2 = (TimeSpan)table[0, ageIndex];

            Assert.IsTrue(age2 != age1);

            // Clearing a table should work
            table.Clear();
            Assert.IsTrue(table.Columns.Count > 0);
            Assert.IsTrue(table.Rows.Count == 0);
        }