public void MultivariateLinearRegressionVariances() { // define model y = a + b0 * x0 + b1 * x1 + noise double a = -3.0; double b0 = 2.0; double b1 = -1.0; ContinuousDistribution x0distribution = new LaplaceDistribution(); ContinuousDistribution x1distribution = new CauchyDistribution(); ContinuousDistribution eDistribution = new NormalDistribution(0.0, 4.0); FrameTable data = new FrameTable(); data.AddColumns <double>("a", "da", "b0", "db0", "b1", "db1", "ab1Cov", "p", "dp"); // draw a sample from the model Random rng = new Random(4); for (int j = 0; j < 64; j++) { List <double> x0s = new List <double>(); List <double> x1s = new List <double>(); List <double> ys = new List <double>(); for (int i = 0; i < 16; i++) { double x0 = x0distribution.GetRandomValue(rng); double x1 = x1distribution.GetRandomValue(rng); double e = eDistribution.GetRandomValue(rng); double y = a + b0 * x0 + b1 * x1 + e; x0s.Add(x0); x1s.Add(x1); ys.Add(y); } // do a linear regression fit on the model MultiLinearRegressionResult result = ys.MultiLinearRegression( new Dictionary <string, IReadOnlyList <double> > { { "x0", x0s }, { "x1", x1s } } ); UncertainValue pp = result.Predict(-5.0, 6.0); data.AddRow( result.Intercept.Value, result.Intercept.Uncertainty, result.CoefficientOf("x0").Value, result.CoefficientOf("x0").Uncertainty, result.CoefficientOf("x1").Value, result.CoefficientOf("x1").Uncertainty, result.Parameters.CovarianceOf("Intercept", "x1"), pp.Value, pp.Uncertainty ); } // The estimated parameters should agree with the model that generated the data. // The variances of the estimates should agree with the claimed variances Assert.IsTrue(data["a"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["da"].As <double>().Mean())); Assert.IsTrue(data["b0"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["db0"].As <double>().Mean())); Assert.IsTrue(data["b1"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["db1"].As <double>().Mean())); Assert.IsTrue(data["a"].As <double>().PopulationCovariance(data["b1"].As <double>()).ConfidenceInterval(0.99).ClosedContains(data["ab1Cov"].As <double>().Mean())); Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Median())); }
public void WaldFit() { WaldDistribution wald = new WaldDistribution(3.5, 2.5); FrameTable results = new FrameTable(); results.AddColumns <double>("Mean", "Shape", "MeanVariance", "ShapeVariance", "MeanShapeCovariance"); for (int i = 0; i < 128; i++) { Sample sample = SampleTest.CreateSample(wald, 16, i); WaldFitResult result = WaldDistribution.FitToSample(sample); Assert.IsTrue(result.Mean.Value == result.Parameters.ValuesVector[result.Parameters.IndexOf("Mean")]); Assert.IsTrue(result.Shape.Value == result.Parameters.ValuesVector[result.Parameters.IndexOf("Shape")]); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Parameters.VarianceOf("Mean"), MoreMath.Sqr(result.Mean.Uncertainty))); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Parameters.VarianceOf("Shape"), MoreMath.Sqr(result.Shape.Uncertainty))); results.AddRow( result.Mean.Value, result.Shape.Value, result.Parameters.VarianceOf("Mean"), result.Parameters.VarianceOf("Shape"), result.Parameters.CovarianceOf("Mean", "Shape") ); } Assert.IsTrue(results["Mean"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(wald.Mean)); Assert.IsTrue(results["Shape"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(wald.Shape)); Assert.IsTrue(results["Mean"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(results["MeanVariance"].As <double>().Median())); Assert.IsTrue(results["Shape"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(results["ShapeVariance"].As <double>().Median())); Assert.IsTrue(results["Mean"].As <double>().PopulationCovariance(results["Shape"].As <double>()).ConfidenceInterval(0.99).ClosedContains(results["MeanShapeCovariance"].As <double>().Median())); }
public static void ConstructExampleData() { FrameTable table = new FrameTable(); table.AddColumn <int>("Id"); table.AddColumn <string>("Name"); table.AddColumn <string>("Sex"); table.AddColumn <DateTime>("Birthdate"); table.AddColumn <double>("Height"); table.AddColumns <double?>("Weight"); table.AddColumn <bool>("Result"); Random rng = new Random(1000001); string[] maleNames = new string[] { "Alex", "Chris", "David", "Eric", "Frederic", "George", "Hans", "Igor", "John", "Kevin", "Luke", "Mark", "Oscar", "Peter", "Richard", "Stephan", "Thomas", "Vincent" }; AddRows(table, maleNames, "M", 175.0, 12.0, 24.0, 3.0, 1, rng); string[] femaleNames = new string[] { "Anne", "Belle", "Dorothy", "Elizabeth", "Fiona", "Helen", "Julia", "Kate", "Louise", "Mary", "Natalie", "Olivia", "Ruth", "Sarah", "Theresa", "Viola" }; AddRows(table, femaleNames, "F", 160.0, 10.0, 24.0, 3.0, 0, rng); // add rows with nulls table.AddRow(table.Rows.Count, null, "M", DateTime.Parse("1970-07-27"), 183.0, 74.0, false); table.AddRow(table.Rows.Count, "Zoey", "F", DateTime.Parse("2007-09-17"), 138.0, null, false); string path = @"example.csv"; using (StreamWriter writer = new StreamWriter(File.OpenWrite(path))) { table.ToCsv(writer); } Console.WriteLine(File.Exists(path)); string json = JsonConvert.SerializeObject(table.ToDictionaries(), Formatting.Indented); File.WriteAllText("example.json", json); }
public void MultivariateLinearRegressionSimple() { // define model y = a + b0 * x0 + b1 * x1 + noise double a = 1.0; double b0 = -2.0; double b1 = 3.0; ContinuousDistribution x0distribution = new CauchyDistribution(10.0, 5.0); ContinuousDistribution x1distribution = new UniformDistribution(Interval.FromEndpoints(-10.0, 20.0)); ContinuousDistribution noise = new NormalDistribution(0.0, 10.0); // draw a sample from the model Random rng = new Random(1); MultivariateSample sample = new MultivariateSample("x0", "x1", "y"); FrameTable table = new FrameTable(); table.AddColumns <double>("x0", "x1", "y"); for (int i = 0; i < 100; i++) { double x0 = x0distribution.GetRandomValue(rng); double x1 = x1distribution.GetRandomValue(rng); double eps = noise.GetRandomValue(rng); double y = a + b0 * x0 + b1 * x1 + eps; sample.Add(x0, x1, y); table.AddRow(x0, x1, y); } // do a linear regression fit on the model ParameterCollection oldResult = sample.LinearRegression(2).Parameters; MultiLinearRegressionResult newResult = table["y"].As <double>().MultiLinearRegression( table["x0"].As <double>(), table["x1"].As <double>() ); // the result should have the appropriate dimension Assert.IsTrue(oldResult.Count == 3); Assert.IsTrue(newResult.Parameters.Count == 3); // The parameters should match the model Assert.IsTrue(oldResult[0].Estimate.ConfidenceInterval(0.90).ClosedContains(b0)); Assert.IsTrue(oldResult[1].Estimate.ConfidenceInterval(0.90).ClosedContains(b1)); Assert.IsTrue(oldResult[2].Estimate.ConfidenceInterval(0.90).ClosedContains(a)); Assert.IsTrue(newResult.CoefficientOf(0).ConfidenceInterval(0.99).ClosedContains(b0)); Assert.IsTrue(newResult.CoefficientOf("x1").ConfidenceInterval(0.99).ClosedContains(b1)); Assert.IsTrue(newResult.Intercept.ConfidenceInterval(0.99).ClosedContains(a)); // The residuals should be compatible with the model predictions for (int i = 0; i < table.Rows.Count; i++) { FrameRow row = table.Rows[i]; double x0 = (double)row["x0"]; double x1 = (double)row["x1"]; double yp = newResult.Predict(x0, x1).Value; double y = (double)row["y"]; Assert.IsTrue(TestUtilities.IsNearlyEqual(newResult.Residuals[i], y - yp)); } }
public void LinearRegressionVariances() { // do a set of logistic regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned Random rng = new Random(314159); // define line parameters double a0 = 2.0; double b0 = -1.0; // do a lot of fits, recording results of each FrameTable data = new FrameTable(); data.AddColumns <double>("a", "va", "b", "vb", "abCov", "p", "dp"); for (int k = 0; k < 128; k++) { // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution ContinuousDistribution xd = new LogisticDistribution(); ContinuousDistribution nd = new NormalDistribution(0.0, 2.0); // generate a synthetic data set BivariateSample sample = new BivariateSample(); for (int i = 0; i < 12; i++) { double x = xd.GetRandomValue(rng); double y = a0 + b0 * x + nd.GetRandomValue(rng); sample.Add(x, y); } // do the regression LinearRegressionResult result = sample.LinearRegression(); // record result UncertainValue p = result.Predict(12.0); data.AddRow(new Dictionary <string, object>() { { "a", result.Intercept.Value }, { "va", result.Parameters.VarianceOf("Intercept") }, { "b", result.Slope.Value }, { "vb", result.Parameters.VarianceOf("Slope") }, { "abCov", result.Parameters.CovarianceOf("Slope", "Intercept") }, { "p", p.Value }, { "dp", p.Uncertainty } }); } // variances of parameters should agree with predictions Assert.IsTrue(data["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["va"].As <double>().Median())); Assert.IsTrue(data["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["vb"].As <double>().Median())); Assert.IsTrue(data["a"].As <double>().PopulationCovariance(data["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(data["abCov"].As <double>().Median())); // variance of prediction should agree with claim Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Median())); }
public void LinearLogisticRegressionVariances() { // define model y = a + b0 * x0 + b1 * x1 + noise double a = -2.0; double b = 1.0; ContinuousDistribution xDistribution = new StudentDistribution(2.0); FrameTable data = new FrameTable(); data.AddColumns <double>("a", "da", "b", "db", "abcov", "p", "dp"); // draw a sample from the model Random rng = new Random(3); for (int j = 0; j < 32; j++) { List <double> xs = new List <double>(); List <bool> ys = new List <bool>(); for (int i = 0; i < 32; i++) { double x = xDistribution.GetRandomValue(rng); double t = a + b * x; double p = 1.0 / (1.0 + Math.Exp(-t)); bool y = (rng.NextDouble() < p); xs.Add(x); ys.Add(y); } // do a linear regression fit on the model LinearLogisticRegressionResult result = ys.LinearLogisticRegression(xs); UncertainValue pp = result.Predict(1.0); data.AddRow( result.Intercept.Value, result.Intercept.Uncertainty, result.Slope.Value, result.Slope.Uncertainty, result.Parameters.CovarianceMatrix[0, 1], pp.Value, pp.Uncertainty ); } // The estimated parameters should agree with the model that generated the data. // The variances of the estimates should agree with the claimed variances Assert.IsTrue(data["a"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["da"].As <double>().Mean())); Assert.IsTrue(data["b"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["db"].As <double>().Mean())); Assert.IsTrue(data["a"].As <double>().PopulationCovariance(data["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(data["abcov"].As <double>().Mean())); Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Mean())); }
public void BivariateNonlinearFitVariances() { // Verify that we can fit a non-linear function, // that the estimated parameters do cluster around the true values, // and that the estimated parameter covariances do reflect the actually observed covariances double a = 2.7; double b = 3.1; ContinuousDistribution xDistribution = new ExponentialDistribution(2.0); ContinuousDistribution eDistribution = new NormalDistribution(0.0, 4.0); FrameTable parameters = new FrameTable(); parameters.AddColumns <double>("a", "b"); MultivariateSample covariances = new MultivariateSample(3); for (int i = 0; i < 64; i++) { BivariateSample sample = new BivariateSample(); Random rng = new Random(i); for (int j = 0; j < 8; j++) { double x = xDistribution.GetRandomValue(rng); double y = a * Math.Pow(x, b) + eDistribution.GetRandomValue(rng); sample.Add(x, y); } NonlinearRegressionResult fit = sample.NonlinearRegression( (IReadOnlyList <double> p, double x) => p[0] * Math.Pow(x, p[1]), new double[] { 1.0, 1.0 } ); parameters.AddRow(fit.Parameters.ValuesVector); covariances.Add(fit.Parameters.CovarianceMatrix[0, 0], fit.Parameters.CovarianceMatrix[1, 1], fit.Parameters.CovarianceMatrix[0, 1]); } Assert.IsTrue(parameters["a"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(a)); Assert.IsTrue(parameters["b"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(b)); Assert.IsTrue(parameters["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(covariances.Column(0).Mean)); Assert.IsTrue(parameters["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(covariances.Column(1).Mean)); Assert.IsTrue(parameters["a"].As <double>().PopulationCovariance(parameters["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean)); Assert.IsTrue(Bivariate.PopulationCovariance(parameters["a"].As <double>(), parameters["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean)); }
public void MultivariateLinearLogisticRegressionVariances() { // define model y = a + b0 * x0 + b1 * x1 + noise double a = -3.0; double b0 = 2.0; double b1 = 1.0; ContinuousDistribution x0distribution = new ExponentialDistribution(); ContinuousDistribution x1distribution = new LognormalDistribution(); FrameTable data = new FrameTable(); data.AddColumns <double>("a", "da", "b0", "db0", "b1", "db1", "p", "dp"); // draw a sample from the model Random rng = new Random(2); for (int j = 0; j < 32; j++) { List <double> x0s = new List <double>(); List <double> x1s = new List <double>(); List <bool> ys = new List <bool>(); FrameTable table = new FrameTable(); table.AddColumn <double>("x0"); table.AddColumn <double>("x1"); table.AddColumn <bool>("y"); for (int i = 0; i < 32; i++) { double x0 = x0distribution.GetRandomValue(rng); double x1 = x1distribution.GetRandomValue(rng); double t = a + b0 * x0 + b1 * x1; double p = 1.0 / (1.0 + Math.Exp(-t)); bool y = (rng.NextDouble() < p); x0s.Add(x0); x1s.Add(x1); ys.Add(y); } // do a linear regression fit on the model MultiLinearLogisticRegressionResult result = ys.MultiLinearLogisticRegression( new Dictionary <string, IReadOnlyList <double> > { { "x0", x0s }, { "x1", x1s } } ); UncertainValue pp = result.Predict(0.0, 1.0); data.AddRow( result.Intercept.Value, result.Intercept.Uncertainty, result.CoefficientOf("x0").Value, result.CoefficientOf("x0").Uncertainty, result.CoefficientOf("x1").Value, result.CoefficientOf("x1").Uncertainty, pp.Value, pp.Uncertainty ); } // The estimated parameters should agree with the model that generated the data. // The variances of the estimates should agree with the claimed variances Assert.IsTrue(data["a"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["da"].As <double>().Mean())); Assert.IsTrue(data["b0"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["db0"].As <double>().Mean())); Assert.IsTrue(data["b1"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["db1"].As <double>().Mean())); Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Mean())); }
public void FrameTableManipulation() { FrameTable table = new FrameTable(); table.AddColumn <int>("Id"); table.AddColumn <DateTime?>("Birthdate"); table.AddColumns <string>("FirstName", "LastName"); Assert.IsTrue(table.Columns.Count == 4); // Index lookup should work Assert.IsTrue(table.GetColumnIndex("Birthdate") >= 0); Assert.IsTrue(table.GetColumnIndex("None") < 0); // Add rows Assert.IsTrue(table.Rows.Count == 0); table.AddRow(1, DateTime.Parse("1990-01-01"), "a", "p"); table.AddRow(2, DateTime.Parse("2000-02-02"), null, null); table.AddRow(new Dictionary <string, object>() { { "Id", 3 }, { "Birthdate", null }, { "FirstName", "c" }, { "LastName", "r" } }); Assert.IsTrue(table.Rows.Count == 3); // Adding rows with the wrong types and/or entries should fail // Careful, some of these will leave the table in a bad state //try { // table.AddRow(4, DateTime.Parse("2010-04-04"), 1.0, "s"); // Assert.Fail(); //} catch (Exception) { } try { table.AddRow(4, DateTime.Parse("2010-04-04")); Assert.Fail(); } catch (Exception) { } //try { // table.AddRow(new Dictionary<string, object>() { // {"Id", 4}, { "FirstName", "d" }, { "LastName", "r" } // }); // Assert.Fail(); //} catch (Exception) { } //try { // table.AddRow(new Dictionary<string, object>() { // {"Id", 4}, { "Birthdate", null }, { "FirstName", "d" }, { "LastName", "r" }, { "MiddleName", "u" } // }); // Assert.Fail(); //} catch (Exception) { } // Adding a new column with the wrong length should fail try { table.AddColumn <double>("Score"); Assert.Fail(); } catch (Exception) { } Assert.IsTrue(table.GetColumnIndex("Score") < 0); // Adding a new column with the right length should work List <double> scores = new List <double>() { 1.1, 1.2, 1.3 }; table.AddColumn("Score", scores); Assert.IsTrue(table.GetColumnIndex("Score") >= 0); // Adding a new computed column should work table.AddComputedColumn <TimeSpan?>("Age", r => { DateTime?b = (DateTime?)r["Birthdate"]; if (b.HasValue) { return(DateTime.Now - b.Value); } else { return(null); } }); Assert.IsTrue(table.GetColumnIndex("Age") >= 0); // Changing a value should change the result of the computed column that depends on it int birthdateIndex = table.GetColumnIndex("Birthdate"); int ageIndex = table.GetColumnIndex("Age"); TimeSpan age1 = (TimeSpan)table[0, ageIndex]; table[0, birthdateIndex] = DateTime.Parse("2010-01-01"); TimeSpan age2 = (TimeSpan)table[0, ageIndex]; Assert.IsTrue(age2 != age1); // Clearing a table should work table.Clear(); Assert.IsTrue(table.Columns.Count > 0); Assert.IsTrue(table.Rows.Count == 0); }