public MultivariateSample CreateMultivariateNormalSample(ColumnVector M, SymmetricMatrix C, int n) { int d = M.Dimension; MultivariateSample S = new MultivariateSample(d); SquareMatrix A = C.CholeskyDecomposition().SquareRootMatrix(); Random rng = new Random(1); ContinuousDistribution normal = new NormalDistribution(); for (int i = 0; i < n; i++) { // create a vector of normal deviates ColumnVector V = new ColumnVector(d); for (int j = 0; j < d; j++) { double y = rng.NextDouble(); double z = normal.InverseLeftProbability(y); V[j] = z; } // form the multivariate distributed vector ColumnVector X = M + A * V; // add it to the sample S.Add(X); } return(S); }
public static Tuple <Point, Vector> Compute(Point[] points) { Contract.Requires(points.Length >= 2); Contract.Ensures(Contract.Result <Tuple <Point, Vector> >() != null); if (points.Length == 2) { return(Tuple.Create(points[0], (points[1] - points[0]).Normalized())); } var avgX = points.Select(p => p.X).Average(); var avgY = points.Select(p => p.Y).Average(); var shifted = points.Select(p => p - new Vector(avgX, avgY)); var mvSample = new MultivariateSample(2); foreach (var p in shifted) { mvSample.Add(p.X, p.Y); } var pca = mvSample.PrincipalComponentAnalysis(); var firstComponentVector = pca.Component(0).NormalizedVector(); return(Tuple.Create( new Point(avgX, avgY), new Vector(firstComponentVector[0], firstComponentVector[1]))); }
public void MultivariateMoments() { // create a random sample MultivariateSample M = new MultivariateSample(3); ContinuousDistribution d0 = new NormalDistribution(); ContinuousDistribution d1 = new ExponentialDistribution(); ContinuousDistribution d2 = new UniformDistribution(); Random rng = new Random(1); int n = 10; for (int i = 0; i < n; i++) { M.Add(d0.GetRandomValue(rng), d1.GetRandomValue(rng), d2.GetRandomValue(rng)); } // test that moments agree for (int i = 0; i < 3; i++) { int[] p = new int[3]; p[i] = 1; Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Mean, M.RawMoment(p))); p[i] = 2; Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Variance, M.CentralMoment(p))); for (int j = 0; j < i; j++) { int[] q = new int[3]; q[i] = 1; q[j] = 1; Assert.IsTrue(TestUtilities.IsNearlyEqual(M.TwoColumns(i, j).Covariance, M.CentralMoment(q))); } } }
public void MultivariateLinearRegressionBadInputTest() { // create a sample MultivariateSample sample = new MultivariateSample(3); sample.Add(1, 2, 3); sample.Add(2, 3, 4); // try to predict with too little data try { sample.LinearRegression(2); Assert.IsTrue(false); } catch (InvalidOperationException) { Assert.IsTrue(true); } // add enough data sample.Add(3, 4, 5); sample.Add(4, 5, 6); // try to predict a non-existent variable try { sample.LinearRegression(-1); Assert.IsTrue(false); } catch (ArgumentOutOfRangeException) { Assert.IsTrue(true); } try { sample.LinearRegression(3); Assert.IsTrue(false); } catch (ArgumentOutOfRangeException) { Assert.IsTrue(true); } }
public void MultivariateLinearRegressionAgreement2() { // A multivariate linear regression with just one x-column should be the same as a bivariate linear regression. double intercept = 1.0; double slope = -2.0; ContinuousDistribution yErrDist = new NormalDistribution(0.0, 3.0); UniformDistribution xDist = new UniformDistribution(Interval.FromEndpoints(-2.0, 3.0)); Random rng = new Random(1111111); MultivariateSample multi = new MultivariateSample("x", "y"); for (int i = 0; i < 10; i++) { double x = xDist.GetRandomValue(rng); double y = intercept + slope * x + yErrDist.GetRandomValue(rng); multi.Add(x, y); } // Old multi linear regression code. MultiLinearRegressionResult result1 = multi.LinearRegression(1); // Simple linear regression code. LinearRegressionResult result2 = multi.TwoColumns(0, 1).LinearRegression(); Assert.IsTrue(TestUtilities.IsNearlyEqual(result1.Parameters["Intercept"].Estimate, result2.Parameters["Intercept"].Estimate)); // New multi linear regression code. MultiLinearRegressionResult result3 = multi.Column(1).ToList().MultiLinearRegression(multi.Column(0).ToList()); Assert.IsTrue(TestUtilities.IsNearlyEqual(result1.Parameters["Intercept"].Estimate, result3.Parameters["Intercept"].Estimate)); }
public void MultivariateLinearLogisticRegressionSimple() { // define model y = a + b0 * x0 + b1 * x1 + noise double a = 1.0; double b0 = -1.0 / 2.0; double b1 = 1.0 / 3.0; ContinuousDistribution x0distribution = new LaplaceDistribution(); ContinuousDistribution x1distribution = new NormalDistribution(); // draw a sample from the model Random rng = new Random(1); MultivariateSample old = new MultivariateSample("y", "x0", "x1"); FrameTable table = new FrameTable(); table.AddColumn <double>("x0"); table.AddColumn <double>("x1"); table.AddColumn <bool>("y"); for (int i = 0; i < 100; i++) { double x0 = x0distribution.GetRandomValue(rng); double x1 = x1distribution.GetRandomValue(rng); double t = a + b0 * x0 + b1 * x1; double p = 1.0 / (1.0 + Math.Exp(-t)); bool y = (rng.NextDouble() < p); old.Add(y ? 1.0 : 0.0, x0, x1); table.AddRow(x0, x1, y); } // do a linear regression fit on the model MultiLinearLogisticRegressionResult oldResult = old.LogisticLinearRegression(0); MultiLinearLogisticRegressionResult newResult = table["y"].As <bool>().MultiLinearLogisticRegression( table["x0"].As <double>(), table["x1"].As <double>() ); // the result should have the appropriate dimension Assert.IsTrue(newResult.Parameters.Count == 3); // The parameters should match the model Assert.IsTrue(newResult.CoefficientOf(0).ConfidenceInterval(0.99).ClosedContains(b0)); Assert.IsTrue(newResult.CoefficientOf("x1").ConfidenceInterval(0.99).ClosedContains(b1)); Assert.IsTrue(newResult.Intercept.ConfidenceInterval(0.99).ClosedContains(a)); // Our predictions should be better than chance. int correct = 0; for (int i = 0; i < table.Rows.Count; i++) { FrameRow row = table.Rows[i]; double x0 = (double)row["x0"]; double x1 = (double)row["x1"]; double p = newResult.Predict(x0, x1).Value; bool y = (bool)row["y"]; if ((y && p > 0.5) || (!y & p < 0.5)) { correct++; } } Assert.IsTrue(correct > 0.5 * table.Rows.Count); }
public void NormalFitCovariances() { NormalDistribution N = new NormalDistribution(-1.0, 2.0); // Create a bivariate sample to hold our fitted best mu and sigma values // so we can determine their covariance as well as their means and variances BivariateSample parameters = new BivariateSample(); MultivariateSample covariances = new MultivariateSample(3); // A bunch of times, create a normal sample for (int i = 0; i < 128; i++) { // We use small samples so the variation in mu and sigma will be more substantial. Sample s = TestUtilities.CreateSample(N, 8, i); // Fit each sample to a normal distribution NormalFitResult fit = NormalDistribution.FitToSample(s); // and record the mu and sigma values from the fit into our bivariate sample parameters.Add(fit.Mean.Value, fit.StandardDeviation.Value); // also record the claimed covariances among these parameters covariances.Add(fit.Parameters.CovarianceMatrix[0, 0], fit.Parameters.CovarianceMatrix[1, 1], fit.Parameters.CovarianceMatrix[0, 1]); } // the mean fit values should agree with the population distribution Assert.IsTrue(parameters.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.Mean)); Assert.IsTrue(parameters.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.StandardDeviation)); // but also the covariances of those fit values should agree with the claimed covariances Assert.IsTrue(parameters.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(covariances.Column(0).Mean)); Assert.IsTrue(parameters.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(covariances.Column(1).Mean)); Assert.IsTrue(parameters.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(covariances.Column(2).Mean)); }
public void MultivariateLinearRegressionNullDistribution() { int d = 4; Random rng = new Random(1); NormalDistribution n = new NormalDistribution(); Sample fs = new Sample(); for (int i = 0; i < 64; i++) { MultivariateSample ms = new MultivariateSample(d); for (int j = 0; j < 8; j++) { double[] x = new double[d]; for (int k = 0; k < d; k++) { x[k] = n.GetRandomValue(rng); } ms.Add(x); } RegressionResult r = ms.LinearRegression(0); fs.Add(r.F.Statistic); } // conduct a KS test to check that F follows the expected distribution TestResult ks = fs.KolmogorovSmirnovTest(new FisherDistribution(3, 4)); Assert.IsTrue(ks.LeftProbability < 0.95); }
public void MultivariateLinearRegressionAgreement() { Random rng = new Random(1); MultivariateSample SA = new MultivariateSample(2); for (int i = 0; i < 10; i++) { SA.Add(rng.NextDouble(), rng.NextDouble()); } RegressionResult RA = SA.LinearRegression(0); ColumnVector PA = RA.Parameters.Best; SymmetricMatrix CA = RA.Parameters.Covariance; MultivariateSample SB = SA.Columns(1, 0); RegressionResult RB = SB.LinearRegression(1); ColumnVector PB = RB.Parameters.Best; SymmetricMatrix CB = RB.Parameters.Covariance; Assert.IsTrue(TestUtilities.IsNearlyEqual(PA[0], PB[1])); Assert.IsTrue(TestUtilities.IsNearlyEqual(PA[1], PB[0])); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[0, 0], CB[1, 1])); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[0, 1], CB[1, 0])); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA[1, 1], CB[0, 0])); BivariateSample SC = SA.TwoColumns(1, 0); RegressionResult RC = SC.LinearRegression(); ColumnVector PC = RC.Parameters.Best; SymmetricMatrix CC = RC.Parameters.Covariance; Assert.IsTrue(TestUtilities.IsNearlyEqual(PA, PC)); Assert.IsTrue(TestUtilities.IsNearlyEqual(CA, CC)); }
public void GumbelFit() { GumbelDistribution d = new GumbelDistribution(-1.0, 2.0); MultivariateSample parameters = new MultivariateSample(2); MultivariateSample variances = new MultivariateSample(3); // Do a bunch of fits, record reported parameters and variances for (int i = 0; i < 32; i++) { Sample s = SampleTest.CreateSample(d, 64, i); GumbelFitResult r = GumbelDistribution.FitToSample(s); parameters.Add(r.Location.Value, r.Scale.Value); variances.Add(r.Parameters.CovarianceMatrix[0, 0], r.Parameters.CovarianceMatrix[1, 1], r.Parameters.CovarianceMatrix[0, 1]); Assert.IsTrue(r.GoodnessOfFit.Probability > 0.01); } // The reported parameters should agree with the underlying parameters Assert.IsTrue(parameters.Column(0).PopulationMean.ConfidenceInterval(0.99).ClosedContains(d.Location)); Assert.IsTrue(parameters.Column(1).PopulationMean.ConfidenceInterval(0.99).ClosedContains(d.Scale)); // The reported covariances should agree with the observed covariances Assert.IsTrue(parameters.Column(0).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(0).Mean)); Assert.IsTrue(parameters.Column(1).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(1).Mean)); Assert.IsTrue(parameters.TwoColumns(0, 1).PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(2).Mean)); }
public void MeansClustering() { // Re-create the mouse test double[] x = new double[3]; double[] y = new double[3]; double[] s = new double[3]; x[0] = 0.25; y[0] = 0.75; s[0] = 0.1; x[1] = 0.75; y[1] = 0.75; s[1] = 0.1; x[2] = 0.5; y[2] = 0.5; s[2] = 0.2; MultivariateSample points = new MultivariateSample(2); Random rng = new Random(1); NormalDistribution d = new NormalDistribution(); for (int i = 0; i < 100; i++) { int k = rng.Next(3); points.Add(x[k] + s[k] * d.GetRandomValue(rng), y[k] + s[k] * d.GetRandomValue(rng)); } MeansClusteringResult result = points.MeansClustering(3); Assert.IsTrue(result.Count == 3); Assert.IsTrue(result.Dimension == 2); }
public static PCA2DResult Compute(IEnumerable <Point> points) { Contract.Requires(points != null); Contract.Requires(points.Any() && points.Skip(1).Any()); // at least two points var xAvg = points.Select(pnt => pnt.X).Average(); var yAvg = points.Select(pnt => pnt.Y).Average(); var shiftedPoints = from pnt in points select new Point(pnt.X - xAvg, pnt.Y - yAvg); var mvSample = new MultivariateSample(2); foreach (var pnt in shiftedPoints) { mvSample.Add(pnt.X, pnt.Y); } var pca = mvSample.PrincipalComponentAnalysis(); var first = pca.Component(0).NormalizedVector(); var second = pca.Component(1).NormalizedVector(); return(new PCA2DResult( new Point(xAvg, yAvg), new Vector(first[0], first[1]), new Vector(second[0], second[1]))); }
public void FitDataToLineUncertaintyTest() { double[] xs = TestUtilities.GenerateUniformRealValues(0.0, 10.0, 10); Func <double, double> fv = delegate(double x) { return(2.0 * x - 1.0); }; Func <double, double> fu = delegate(double x) { return(1.0 + x); }; MultivariateSample sample = new MultivariateSample(2); SymmetricMatrix covariance = new SymmetricMatrix(2); // create a bunch of small data sets for (int i = 0; i < 100; i++) { UncertainMeasurementSample data = CreateDataSet(xs, fv, fu, i); FitResult fit = data.FitToLine(); sample.Add(fit.Parameters); covariance = fit.CovarianceMatrix; // because it depends only on the x's and sigmas, the covariance is always the same Console.WriteLine("cov_00 = {0}", covariance[0, 0]); } // the measured covariances should agree with the claimed covariances //Assert.IsTrue(sample.PopulationCovariance(0,0).ConfidenceInterval(0.95).ClosedContains(covariance[0,0])); //Assert.IsTrue(sample.PopulationCovariance(0,1).ConfidenceInterval(0.95).ClosedContains(covariance[0,1])); //Assert.IsTrue(sample.PopulationCovariance(1,0).ConfidenceInterval(0.95).ClosedContains(covariance[1,0])); //Assert.IsTrue(sample.PopulationCovariance(1,1).ConfidenceInterval(0.95).ClosedContains(covariance[1,1])); }
public void WaldFit() { WaldDistribution wald = new WaldDistribution(3.5, 2.5); BivariateSample parameters = new BivariateSample(); MultivariateSample variances = new MultivariateSample(3); for (int i = 0; i < 128; i++) { Sample s = SampleTest.CreateSample(wald, 16, i); FitResult r = WaldDistribution.FitToSample(s); parameters.Add(r.Parameters[0], r.Parameters[1]); variances.Add(r.Covariance(0, 0), r.Covariance(1, 1), r.Covariance(0, 1)); Assert.IsTrue(r.GoodnessOfFit.Probability > 0.01); } Assert.IsTrue(parameters.X.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wald.Mean)); Assert.IsTrue(parameters.Y.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wald.Shape)); Assert.IsTrue(parameters.X.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(0).Median)); Assert.IsTrue(parameters.Y.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(1).Median)); Assert.IsTrue(parameters.PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(2).Median)); }
public void MultivariateLinearRegressionTest() { // define model y = a + b0 * x0 + b1 * x1 + noise double a = 1.0; double b0 = -2.0; double b1 = 3.0; ContinuousDistribution noise = new NormalDistribution(0.0, 10.0); // draw a sample from the model Random rng = new Random(1); MultivariateSample sample = new MultivariateSample(3); for (int i = 0; i < 100; i++) { double x0 = -10.0 + 20.0 * rng.NextDouble(); double x1 = -10.0 + 20.0 * rng.NextDouble(); double eps = noise.InverseLeftProbability(rng.NextDouble()); double y = a + b0 * x0 + b1 * x1 + eps; sample.Add(x0, x1, y); } // do a linear regression fit on the model ParameterCollection result = sample.LinearRegression(2).Parameters; // the result should have the appropriate dimension Assert.IsTrue(result.Count == 3); // the parameters should match the model Assert.IsTrue(result[0].Estimate.ConfidenceInterval(0.90).ClosedContains(b0)); Assert.IsTrue(result[1].Estimate.ConfidenceInterval(0.90).ClosedContains(b1)); Assert.IsTrue(result[2].Estimate.ConfidenceInterval(0.90).ClosedContains(a)); }
public void MultivariateLinearRegressionSimple() { // define model y = a + b0 * x0 + b1 * x1 + noise double a = 1.0; double b0 = -2.0; double b1 = 3.0; ContinuousDistribution x0distribution = new CauchyDistribution(10.0, 5.0); ContinuousDistribution x1distribution = new UniformDistribution(Interval.FromEndpoints(-10.0, 20.0)); ContinuousDistribution noise = new NormalDistribution(0.0, 10.0); // draw a sample from the model Random rng = new Random(1); MultivariateSample sample = new MultivariateSample("x0", "x1", "y"); FrameTable table = new FrameTable(); table.AddColumns <double>("x0", "x1", "y"); for (int i = 0; i < 100; i++) { double x0 = x0distribution.GetRandomValue(rng); double x1 = x1distribution.GetRandomValue(rng); double eps = noise.GetRandomValue(rng); double y = a + b0 * x0 + b1 * x1 + eps; sample.Add(x0, x1, y); table.AddRow(x0, x1, y); } // do a linear regression fit on the model ParameterCollection oldResult = sample.LinearRegression(2).Parameters; MultiLinearRegressionResult newResult = table["y"].As <double>().MultiLinearRegression( table["x0"].As <double>(), table["x1"].As <double>() ); // the result should have the appropriate dimension Assert.IsTrue(oldResult.Count == 3); Assert.IsTrue(newResult.Parameters.Count == 3); // The parameters should match the model Assert.IsTrue(oldResult[0].Estimate.ConfidenceInterval(0.90).ClosedContains(b0)); Assert.IsTrue(oldResult[1].Estimate.ConfidenceInterval(0.90).ClosedContains(b1)); Assert.IsTrue(oldResult[2].Estimate.ConfidenceInterval(0.90).ClosedContains(a)); Assert.IsTrue(newResult.CoefficientOf(0).ConfidenceInterval(0.99).ClosedContains(b0)); Assert.IsTrue(newResult.CoefficientOf("x1").ConfidenceInterval(0.99).ClosedContains(b1)); Assert.IsTrue(newResult.Intercept.ConfidenceInterval(0.99).ClosedContains(a)); // The residuals should be compatible with the model predictions for (int i = 0; i < table.Rows.Count; i++) { FrameRow row = table.Rows[i]; double x0 = (double)row["x0"]; double x1 = (double)row["x1"]; double yp = newResult.Predict(x0, x1).Value; double y = (double)row["y"]; Assert.IsTrue(TestUtilities.IsNearlyEqual(newResult.Residuals[i], y - yp)); } }
public void Bug6391() { // this simple PCA caused a NonConvergenceException var mvSample = new MultivariateSample(2); mvSample.Add(0, 1); mvSample.Add(0, -1); var pca = mvSample.PrincipalComponentAnalysis(); }
private double GetTotalVariance(MultivariateSample sample) { double total = 0.0; for (int i = 0; i < sample.Dimension; i++) { total += sample.Column(i).Variance; } return(total); }
public void TimeSeriesFitAR1() { double alpha = 0.3; double mu = 0.2; double sigma = 0.4; int n = 20; // For our fit to AR(1), we have incorporated bias correction (at least // for the most important parameter alpha), so we can do a small-n test. MultivariateSample parameters = new MultivariateSample(3); MultivariateSample covariances = new MultivariateSample(6); Sample tests = new Sample(); for (int i = 0; i < 100; i++) { TimeSeries series = GenerateAR1TimeSeries(alpha, mu, sigma, n, i + 314159); FitResult result = series.FitToAR1(); parameters.Add(result.Parameters); covariances.Add( result.CovarianceMatrix[0, 0], result.CovarianceMatrix[1, 1], result.CovarianceMatrix[2, 2], result.CovarianceMatrix[0, 1], result.CovarianceMatrix[0, 2], result.CovarianceMatrix[1, 2] ); tests.Add(result.GoodnessOfFit.Probability); } // Check that fit parameters agree with inputs Assert.IsTrue(parameters.Column(0).PopulationMean.ConfidenceInterval(0.99).ClosedContains(alpha)); Assert.IsTrue(parameters.Column(1).PopulationMean.ConfidenceInterval(0.99).ClosedContains(mu)); Assert.IsTrue(parameters.Column(2).PopulationMean.ConfidenceInterval(0.99).ClosedContains(sigma)); // Check that reported variances agree with actual variances Assert.IsTrue(parameters.Column(0).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(0).Median)); Assert.IsTrue(parameters.Column(1).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(1).Median)); Assert.IsTrue(parameters.Column(2).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Median)); Assert.IsTrue(parameters.TwoColumns(0, 1).PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(3).Mean)); Assert.IsTrue(parameters.TwoColumns(0, 2).PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(4).Mean)); Assert.IsTrue(parameters.TwoColumns(1, 2).PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(5).Mean)); // For small n, the fitted alpha can vary considerably, and the formula for var(m) varies // quite strongly with alpha, so the computed var(m) have a very long tail. This pushes the // mean computed var(m) quite a bit higher than a typical value, so we use medians instead // of means for our best guess for the predicted variance. TestResult ks = tests.KolmogorovSmirnovTest(new UniformDistribution()); Assert.IsTrue(ks.Probability > 0.05); }
public void TestMultivariateRegression() { double cz = 1.0; double cx = 0.0; double cy = 0.0; Random rng = new Random(1001110000); Distribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-4.0, 8.0)); Distribution yDistribution = new UniformDistribution(Interval.FromEndpoints(-8.0, 4.0)); Distribution eDistribution = new NormalDistribution(); Sample r2Sample = new Sample(); for (int i = 0; i < 500; i++) { MultivariateSample xyzSample = new MultivariateSample(3); for (int k = 0; k < 12; k++) { double x = xDistribution.GetRandomValue(rng); double y = yDistribution.GetRandomValue(rng); double z = cx * x + cy * y + cz + eDistribution.GetRandomValue(rng); xyzSample.Add(x, y, z); } FitResult fit = xyzSample.LinearRegression(2); double fcx = fit.Parameters[0]; double fcy = fit.Parameters[1]; double fcz = fit.Parameters[2]; double ss2 = 0.0; double ss1 = 0.0; foreach (double[] xyz in xyzSample) { ss2 += MoreMath.Sqr(xyz[2] - (fcx * xyz[0] + fcy * xyz[1] + fcz)); ss1 += MoreMath.Sqr(xyz[2] - xyzSample.Column(2).Mean); } double r2 = 1.0 - ss2 / ss1; r2Sample.Add(r2); } Console.WriteLine("{0} {1} {2} {3} {4}", r2Sample.Count, r2Sample.PopulationMean, r2Sample.StandardDeviation, r2Sample.Minimum, r2Sample.Maximum); Distribution r2Distribution = new BetaDistribution((3 - 1) / 2.0, (12 - 3) / 2.0); //Distribution r2Distribution = new BetaDistribution((10 - 2) / 2.0, (2 - 1) / 2.0); Console.WriteLine("{0} {1}", r2Distribution.Mean, r2Distribution.StandardDeviation); TestResult ks = r2Sample.KolmogorovSmirnovTest(r2Distribution); Console.WriteLine(ks.RightProbability); Console.WriteLine(ks.Probability); }
public static MultivariateSample ToSample(this IEnumerable <Series> series) { var sl = series.ToArray(); var sample = new MultivariateSample(sl.Length); var row = new double[sl.Length]; var count = series.Select(s => s.Count).Max(); for (int i = 0; i < sl.Length; i++) { row [i] = sl[i].As <double>()[i]; sample.Add(row); } return(sample); }
public void TimeSeriesFitToMA1() { double beta = -0.2; double mu = 0.4; double sigma = 0.6; int n = 100; // If we are going to strictly test parameter values and variances, // we can't pick n too small, because the formulas we use are only // asymptotically unbiased. MultivariateSample parameters = new MultivariateSample(3); MultivariateSample covariances = new MultivariateSample(6); Sample tests = new Sample("p"); for (int i = 0; i < 64; i++) { TimeSeries series = GenerateMA1TimeSeries(beta, mu, sigma, n, n * i + 314159); Debug.Assert(series.Count == n); MA1FitResult result = series.FitToMA1(); //Assert.IsTrue(result.Dimension == 3); parameters.Add(result.Parameters.ValuesVector); covariances.Add( result.Parameters.CovarianceMatrix[0, 0], result.Parameters.CovarianceMatrix[1, 1], result.Parameters.CovarianceMatrix[2, 2], result.Parameters.CovarianceMatrix[0, 1], result.Parameters.CovarianceMatrix[0, 2], result.Parameters.CovarianceMatrix[1, 2] ); tests.Add(result.GoodnessOfFit.Probability); } Assert.IsTrue(parameters.Column(0).PopulationMean.ConfidenceInterval(0.99).ClosedContains(mu));; Assert.IsTrue(parameters.Column(1).PopulationMean.ConfidenceInterval(0.99).ClosedContains(beta)); Assert.IsTrue(parameters.Column(2).PopulationMean.ConfidenceInterval(0.99).ClosedContains(sigma)); Assert.IsTrue(parameters.Column(0).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(0).Mean)); Assert.IsTrue(parameters.Column(1).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(1).Mean)); Assert.IsTrue(parameters.Column(2).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean)); Assert.IsTrue(parameters.TwoColumns(0, 1).PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(3).Mean)); Assert.IsTrue(parameters.TwoColumns(0, 2).PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(4).Mean)); Assert.IsTrue(parameters.TwoColumns(1, 2).PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(5).Mean)); Assert.IsTrue(tests.KuiperTest(new UniformDistribution()).Probability > 0.01); }
public List <double> Remove(List <double> Data, ref Stack <ITransformation> Transforms) { double yestHigh = 0.0; double yestLow = 0.0; double lastHigh = 0.0; double lastLow = 0.0; MultivariateSample mvS = new MultivariateSample(3); for (int i = 0; i < Data.Count; i++) { yestHigh = lastHigh; yestLow = lastLow; lastHigh = Highs[i]; lastLow = Lows[i]; if (i > 0) { mvS.Add(yestHigh, yestLow, Closes[i]); } } List <double> detrendedData = (List <double>)Utilities.DeepClone(Data); double[] parameters = mvS.LinearRegression(2).Parameters(); for (int i = 0; i < Data.Count; i++) { double regression; if (i > 0) { regression = (parameters[0] * Highs[i - 1]) + (parameters[1] * Lows[i - 1]) + parameters[2]; } else { regression = Closes[i]; } detrendedData[i] -= regression; } Transforms.Push(new HighLowTransformation(yestHigh, yestLow, lastHigh, lastLow, parameters)); return(detrendedData); }
public GeometricClass Classify(PointsSequence p) { var points = p.Points; var xAvg = points.Select(pnt => pnt.X).Average(); var yAvg = points.Select(pnt => pnt.Y).Average(); var shiftedPoints = from pnt in points select new Point(pnt.X - xAvg, pnt.Y - yAvg); var mvSample = new MultivariateSample(2); foreach (var pnt in shiftedPoints) { mvSample.Add(pnt.X, pnt.Y); } var pca = mvSample.PrincipalComponentAnalysis(); var firstSize = pca.Component(0).ScaledVector().Norm(); var secondSize = pca.Component(1).ScaledVector().Norm(); var fraction = secondSize / firstSize; if (fraction < pcaFractionThreshold) { return(GeometricClass.Line); } else { var conicEllipse = EllipseFitter.ConicFit(p.Points); var parametricEllipse = EllipseFitter.Fit(p.Points); var deviations = from pnt in p.Points select EllipseFitter.ComputeDeviation(conicEllipse, parametricEllipse, pnt); var avgDeviation = deviations.Average(); if (avgDeviation < ellipseFitThreshold) { return(GeometricClass.Ellipse); } else { return(GeometricClass.Line); } } }
public void FitDataToPolynomialUncertaintiesTest() { // make sure the reported uncertainties it fit parameters really represent their standard deviation, // and that the reported off-diagonal elements really represent their correlations double[] xs = TestUtilities.GenerateUniformRealValues(-1.0, 2.0, 10); Func <double, double> fv = delegate(double x) { return(0.0 + 1.0 * x + 2.0 * x * x); }; Func <double, double> fu = delegate(double x) { return(0.5); }; // keep track of best-fit parameters and claimed parameter covariances MultivariateSample sample = new MultivariateSample(3); // generate 50 small data sets and fit each UncertainMeasurementFitResult[] fits = new UncertainMeasurementFitResult[50]; for (int i = 0; i < fits.Length; i++) { UncertainMeasurementSample set = CreateDataSet(xs, fv, fu, 314159 + i); fits[i] = set.FitToPolynomial(2); sample.Add(fits[i].Parameters.ValuesVector); } // check that parameters agree for (int i = 0; i < 3; i++) { Console.WriteLine(sample.Column(i).PopulationMean); } // for each parameter, verify that the standard deviation of the reported values agrees with the (average) reported uncertainty double[] pMeans = new double[3]; for (int i = 0; i <= 2; i++) { Sample values = new Sample(); Sample uncertainties = new Sample(); for (int j = 0; j < fits.Length; j++) { UncertainValue p = fits[j].Parameters[i].Estimate; values.Add(p.Value); uncertainties.Add(p.Uncertainty); } pMeans[i] = values.Mean; Assert.IsTrue(values.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(uncertainties.Mean)); } }
public void BivariateNonlinearFitVariances() { // Verify that we can fit a non-linear function, // that the estimated parameters do cluster around the true values, // and that the estimated parameter covariances do reflect the actually observed covariances double a = 2.7; double b = 3.1; ContinuousDistribution xDistribution = new ExponentialDistribution(2.0); ContinuousDistribution eDistribution = new NormalDistribution(0.0, 4.0); FrameTable parameters = new FrameTable(); parameters.AddColumns <double>("a", "b"); MultivariateSample covariances = new MultivariateSample(3); for (int i = 0; i < 64; i++) { BivariateSample sample = new BivariateSample(); Random rng = new Random(i); for (int j = 0; j < 8; j++) { double x = xDistribution.GetRandomValue(rng); double y = a * Math.Pow(x, b) + eDistribution.GetRandomValue(rng); sample.Add(x, y); } NonlinearRegressionResult fit = sample.NonlinearRegression( (IReadOnlyList <double> p, double x) => p[0] * Math.Pow(x, p[1]), new double[] { 1.0, 1.0 } ); parameters.AddRow(fit.Parameters.ValuesVector); covariances.Add(fit.Parameters.CovarianceMatrix[0, 0], fit.Parameters.CovarianceMatrix[1, 1], fit.Parameters.CovarianceMatrix[0, 1]); } Assert.IsTrue(parameters["a"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(a)); Assert.IsTrue(parameters["b"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(b)); Assert.IsTrue(parameters["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(covariances.Column(0).Mean)); Assert.IsTrue(parameters["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(covariances.Column(1).Mean)); Assert.IsTrue(parameters["a"].As <double>().PopulationCovariance(parameters["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean)); Assert.IsTrue(Bivariate.PopulationCovariance(parameters["a"].As <double>(), parameters["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean)); }
public void OldMultivariateLinearRegressionTest() { MultivariateSample sample = new MultivariateSample(3); sample.Add(98322, 81449, 269465); sample.Add(65060, 31749, 121900); sample.Add(36052, 14631, 37004); sample.Add(31829, 27732, 91400); sample.Add(7101, 9693, 54900); sample.Add(41294, 4268, 16160); sample.Add(16614, 4697, 21500); sample.Add(3449, 4233, 9306); sample.Add(3386, 5293, 38300); sample.Add(6242, 2039, 13369); sample.Add(14036, 7893, 29901); sample.Add(2636, 3345, 10930); sample.Add(869, 1135, 5100); sample.Add(452, 727, 7653); /* * sample.Add(41.9, 29.1, 251.3); * sample.Add(43.4, 29.3, 251.3); * sample.Add(43.9, 29.5, 248.3); * sample.Add(44.5, 29.7, 267.5); * sample.Add(47.3, 29.9, 273.0); * sample.Add(47.5, 30.3, 276.5); * sample.Add(47.9, 30.5, 270.3); * sample.Add(50.2, 30.7, 274.9); * sample.Add(52.8, 30.8, 285.0); * sample.Add(53.2, 30.9, 290.0); * sample.Add(56.7, 31.5, 297.0); * sample.Add(57.0, 31.7, 302.5); * sample.Add(63.5, 31.9, 304.5); * sample.Add(65.3, 32.0, 309.3); * sample.Add(71.1, 32.1, 321.7); * sample.Add(77.0, 32.5, 330.7); * sample.Add(77.8, 32.9, 349.0); */ Console.WriteLine(sample.Count); //sample.LinearRegression(0); sample.LinearRegression(0); }
public void TestMultivariateRegression() { // Collect r^2 values from multivariate linear regressions. double cz = 1.0; double cx = 0.0; double cy = 0.0; Random rng = new Random(1001110000); ContinuousDistribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-4.0, 8.0)); ContinuousDistribution yDistribution = new UniformDistribution(Interval.FromEndpoints(-8.0, 4.0)); ContinuousDistribution eDistribution = new NormalDistribution(); List <double> r2Sample = new List <double>(); for (int i = 0; i < 500; i++) { MultivariateSample xyzSample = new MultivariateSample(3); for (int k = 0; k < 12; k++) { double x = xDistribution.GetRandomValue(rng); double y = yDistribution.GetRandomValue(rng); double z = cx * x + cy * y + cz + eDistribution.GetRandomValue(rng); xyzSample.Add(x, y, z); } MultiLinearRegressionResult fit = xyzSample.LinearRegression(2); double fcx = fit.Parameters.ValuesVector[0]; double fcy = fit.Parameters.ValuesVector[1]; double fcz = fit.Parameters.ValuesVector[2]; r2Sample.Add(fit.RSquared); } // r^2 values should be distributed as expected. ContinuousDistribution r2Distribution = new BetaDistribution((3 - 1) / 2.0, (12 - 3) / 2.0); TestResult ks = r2Sample.KolmogorovSmirnovTest(r2Distribution); Assert.IsTrue(ks.Probability > 0.05); }
public void MultivariateLinearRegressionTest() { // define model y = a + b0 * x0 + b1 * x1 + noise double a = 1.0; double b0 = -2.0; double b1 = 3.0; Distribution noise = new NormalDistribution(0.0, 10.0); // draw a sample from the model Random rng = new Random(1); MultivariateSample sample = new MultivariateSample(3); for (int i = 0; i < 100; i++) { double x0 = -10.0 + 20.0 * rng.NextDouble(); double x1 = -10.0 + 20.0 * rng.NextDouble(); double eps = noise.InverseLeftProbability(rng.NextDouble()); double y = a + b0 * x0 + b1 * x1 + eps; sample.Add(x0, x1, y); } // do a linear regression fit on the model FitResult result = sample.LinearRegression(2); // the result should have the appropriate dimension Assert.IsTrue(result.Dimension == 3); // the result should be significant Console.WriteLine("{0} {1}", result.GoodnessOfFit.Statistic, result.GoodnessOfFit.LeftProbability); Assert.IsTrue(result.GoodnessOfFit.LeftProbability > 0.95); // the parameters should match the model Console.WriteLine(result.Parameter(0)); Assert.IsTrue(result.Parameter(0).ConfidenceInterval(0.90).ClosedContains(b0)); Console.WriteLine(result.Parameter(1)); Assert.IsTrue(result.Parameter(1).ConfidenceInterval(0.90).ClosedContains(b1)); Console.WriteLine(result.Parameter(2)); Assert.IsTrue(result.Parameter(2).ConfidenceInterval(0.90).ClosedContains(a)); }
public void MultivariateManipulations() { MultivariateSample S = new MultivariateSample(3); Assert.IsTrue(S.Dimension == 3); Assert.IsTrue(S.Count == 0); S.Add(1.1, 1.2, 1.3); S.Add(2.1, 2.2, 2.3); Assert.IsTrue(S.Count == 2); // check that an entry is there, remove it, check that it is not there Assert.IsTrue(S.Contains(1.1, 1.2, 1.3)); Assert.IsTrue(S.Remove(1.1, 1.2, 1.3)); Assert.IsFalse(S.Contains(1.1, 1.2, 1.3)); // clear it and check that the count went to zero S.Clear(); Assert.IsTrue(S.Count == 0); }