public void NormalFitCovariances() { NormalDistribution N = new NormalDistribution(-1.0, 2.0); // Create a bivariate sample to hold our fitted best mu and sigma values // so we can determine their covariance as well as their means and variances BivariateSample parameters = new BivariateSample(); MultivariateSample covariances = new MultivariateSample(3); // A bunch of times, create a normal sample for (int i = 0; i < 128; i++) { // We use small samples so the variation in mu and sigma will be more substantial. Sample s = TestUtilities.CreateSample(N, 8, i); // Fit each sample to a normal distribution FitResult fit = NormalDistribution.FitToSample(s); // and record the mu and sigma values from the fit into our bivariate sample parameters.Add(fit.Parameter(0).Value, fit.Parameter(1).Value); // also record the claimed covariances among these parameters covariances.Add(fit.Covariance(0, 0), fit.Covariance(1, 1), fit.Covariance(0, 1)); } // the mean fit values should agree with the population distribution Assert.IsTrue(parameters.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.Mean)); Assert.IsTrue(parameters.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.StandardDeviation)); // but also the covariances of those fit values should agree with the claimed covariances Assert.IsTrue(parameters.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(covariances.Column(0).Mean)); Assert.IsTrue(parameters.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(covariances.Column(1).Mean)); Assert.IsTrue(parameters.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(covariances.Column(2).Mean)); }
public void FitDataToLinearFunctionTest() { // create a data set from a linear combination of sine and cosine Interval r = Interval.FromEndpoints(-4.0, 6.0); double[] c = new double[] { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 }; Func <double, double> fv = delegate(double x) { return(2.0 * Math.Cos(x) + 1.0 * Math.Sin(x)); }; Func <double, double> fu = delegate(double x) { return(0.1 + 0.1 * Math.Abs(x)); }; UncertainMeasurementSample set = CreateDataSet(r, fv, fu, 20, 2); // fit the data set to a linear combination of sine and cosine Func <double, double>[] fs = new Func <double, double>[] { delegate(double x) { return(Math.Cos(x)); }, delegate(double x) { return(Math.Sin(x)); } }; FitResult result = set.FitToLinearFunction(fs); // the fit should be right right dimension Assert.IsTrue(result.Dimension == 2); // the coefficients should match Console.WriteLine(result.Parameter(0)); Console.WriteLine(result.Parameter(1)); Assert.IsTrue(result.Parameter(0).ConfidenceInterval(0.95).ClosedContains(2.0)); Assert.IsTrue(result.Parameter(1).ConfidenceInterval(0.95).ClosedContains(1.0)); // diagonal covarainces should match errors Assert.IsTrue(TestUtilities.IsNearlyEqual(Math.Sqrt(result.Covariance(0, 0)), result.Parameter(0).Uncertainty)); Assert.IsTrue(TestUtilities.IsNearlyEqual(Math.Sqrt(result.Covariance(1, 1)), result.Parameter(1).Uncertainty)); }
public void WaldFit() { WaldDistribution wald = new WaldDistribution(3.5, 2.5); BivariateSample parameters = new BivariateSample(); MultivariateSample variances = new MultivariateSample(3); for (int i = 0; i < 128; i++) { Sample s = SampleTest.CreateSample(wald, 16, i); FitResult r = WaldDistribution.FitToSample(s); parameters.Add(r.Parameters[0], r.Parameters[1]); variances.Add(r.Covariance(0, 0), r.Covariance(1, 1), r.Covariance(0, 1)); Assert.IsTrue(r.GoodnessOfFit.Probability > 0.01); } Assert.IsTrue(parameters.X.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wald.Mean)); Assert.IsTrue(parameters.Y.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wald.Shape)); Assert.IsTrue(parameters.X.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(0).Median)); Assert.IsTrue(parameters.Y.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(1).Median)); Assert.IsTrue(parameters.PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(2).Median)); }
public void GumbelFit() { GumbelDistribution d = new GumbelDistribution(-1.0, 2.0); MultivariateSample parameters = new MultivariateSample(2); MultivariateSample variances = new MultivariateSample(3); // Do a bunch of fits, record reported parameters an variances for (int i = 0; i < 32; i++) { Sample s = SampleTest.CreateSample(d, 64, i); FitResult r = GumbelDistribution.FitToSample(s); parameters.Add(r.Parameters); variances.Add(r.Covariance(0, 0), r.Covariance(1, 1), r.Covariance(0, 1)); Assert.IsTrue(r.GoodnessOfFit.Probability > 0.01); } // The reported parameters should agree with the underlying parameters Assert.IsTrue(parameters.Column(0).PopulationMean.ConfidenceInterval(0.99).ClosedContains(d.Location)); Assert.IsTrue(parameters.Column(1).PopulationMean.ConfidenceInterval(0.99).ClosedContains(d.Scale)); // The reported covariances should agree with the observed covariances Assert.IsTrue(parameters.Column(0).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(0).Mean)); Assert.IsTrue(parameters.Column(1).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(1).Mean)); Assert.IsTrue(parameters.TwoColumns(0, 1).PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(2).Mean)); }
public void BivariateNonlinearFit() { // Verify that we can fit a non-linear function, // that the estimated parameters do cluster around the true values, // and that the estimated parameter covariances do reflect the actually observed covariances double a = 2.7; double b = 3.1; ContinuousDistribution xDistribution = new ExponentialDistribution(2.0); ContinuousDistribution eDistribution = new NormalDistribution(0.0, 4.0); MultivariateSample parameters = new MultivariateSample("a", "b"); MultivariateSample covariances = new MultivariateSample(3); for (int i = 0; i < 64; i++) { BivariateSample sample = new BivariateSample(); Random rng = new Random(i); for (int j = 0; j < 8; j++) { double x = xDistribution.GetRandomValue(rng); double y = a * Math.Pow(x, b) + eDistribution.GetRandomValue(rng); sample.Add(x, y); } FitResult fit = sample.NonlinearRegression( (IList <double> p, double x) => p[0] * Math.Pow(x, p[1]), new double[] { 1.0, 1.0 } ); parameters.Add(fit.Parameters); covariances.Add(fit.Covariance(0, 0), fit.Covariance(1, 1), fit.Covariance(0, 1)); } Assert.IsTrue(parameters.Column(0).PopulationMean.ConfidenceInterval(0.99).ClosedContains(a)); Assert.IsTrue(parameters.Column(1).PopulationMean.ConfidenceInterval(0.99).ClosedContains(b)); Assert.IsTrue(parameters.Column(0).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(0).Mean)); Assert.IsTrue(parameters.Column(1).PopulationVariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(1).Mean)); Assert.IsTrue(parameters.TwoColumns(0, 1).PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean)); }
public void RayleighFit() { RayleighDistribution rayleigh = new RayleighDistribution(3.2); Sample parameter = new Sample(); Sample variance = new Sample(); for (int i = 0; i < 128; i++) { // We pick a quite-small sample, because we have a finite-n unbiased estimator. Sample s = SampleTest.CreateSample(rayleigh, 8, i); FitResult r = RayleighDistribution.FitToSample(s); parameter.Add(r.Parameters[0]); variance.Add(r.Covariance(0, 0)); Assert.IsTrue(r.GoodnessOfFit.Probability > 0.01); } Assert.IsTrue(parameter.PopulationMean.ConfidenceInterval(0.99).ClosedContains(rayleigh.Scale)); Assert.IsTrue(parameter.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variance.Median)); }
public void FitDataToLineTest() { Interval r = Interval.FromEndpoints(0.0, 10.0); Func <double, double> fv = delegate(double x) { return(2.0 * x - 1.0); }; Func <double, double> fu = delegate(double x) { return(1.0 + x); }; UncertainMeasurementSample data = CreateDataSet(r, fv, fu, 20); // sanity check the data set Assert.IsTrue(data.Count == 20); // fit to a line FitResult line = data.FitToLine(); Assert.IsTrue(line.Dimension == 2); Assert.IsTrue(line.Parameter(0).ConfidenceInterval(0.95).ClosedContains(-1.0)); Assert.IsTrue(line.Parameter(1).ConfidenceInterval(0.95).ClosedContains(2.0)); Assert.IsTrue(line.GoodnessOfFit.LeftProbability < 0.95); // correlation coefficient should be related to covariance as expected Assert.IsTrue(TestUtilities.IsNearlyEqual(line.CorrelationCoefficient(0, 1), line.Covariance(0, 1) / line.Parameter(0).Uncertainty / line.Parameter(1).Uncertainty)); // fit to a 1st order polynomial and make sure it agrees FitResult poly = data.FitToPolynomial(1); Assert.IsTrue(poly.Dimension == 2); Assert.IsTrue(TestUtilities.IsNearlyEqual(poly.Parameters, line.Parameters)); Assert.IsTrue(TestUtilities.IsNearlyEqual(poly.CovarianceMatrix, line.CovarianceMatrix)); Assert.IsTrue(TestUtilities.IsNearlyEqual(poly.GoodnessOfFit.Statistic, line.GoodnessOfFit.Statistic)); Assert.IsTrue(TestUtilities.IsNearlyEqual(poly.GoodnessOfFit.LeftProbability, line.GoodnessOfFit.LeftProbability)); Assert.IsTrue(TestUtilities.IsNearlyEqual(poly.GoodnessOfFit.RightProbability, line.GoodnessOfFit.RightProbability)); // fit to a constant; the result should be poor FitResult constant = data.FitToConstant(); Assert.IsTrue(constant.GoodnessOfFit.LeftProbability > 0.95); }
public void BivariateLinearRegression() { // do a set of logistic regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned Random rng = new Random(314159); // define logistic parameters double a0 = 2.0; double b0 = -1.0; // keep track of sample of returned a and b fit parameters BivariateSample ps = new BivariateSample(); // also keep track of returned covariance estimates // since these vary slightly from fit to fit, we will average them double caa = 0.0; double cbb = 0.0; double cab = 0.0; // also keep track of test statistics Sample fs = new Sample(); // do 100 fits for (int k = 0; k < 100; k++) { // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution Distribution xd = new LogisticDistribution(); Distribution nd = new NormalDistribution(0.0, 2.0); // generate a synthetic data set BivariateSample s = new BivariateSample(); for (int i = 0; i < 25; i++) { double x = xd.GetRandomValue(rng); double y = a0 + b0 * x + nd.GetRandomValue(rng); s.Add(x, y); } // do the regression FitResult r = s.LinearRegression(); // record best fit parameters double a = r.Parameter(0).Value; double b = r.Parameter(1).Value; ps.Add(a, b); // record estimated covariances caa += r.Covariance(0, 0); cbb += r.Covariance(1, 1); cab += r.Covariance(0, 1); // record the fit statistic fs.Add(r.GoodnessOfFit.Statistic); Console.WriteLine("F={0}", r.GoodnessOfFit.Statistic); } caa /= ps.Count; cbb /= ps.Count; cab /= ps.Count; // check that mean parameter estimates are what they should be: the underlying population parameters Assert.IsTrue(ps.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0)); Assert.IsTrue(ps.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(b0)); Console.WriteLine("{0} {1}", caa, ps.X.PopulationVariance); Console.WriteLine("{0} {1}", cbb, ps.Y.PopulationVariance); // check that parameter covarainces are what they should be: the reported covariance estimates Assert.IsTrue(ps.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(caa)); Assert.IsTrue(ps.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cbb)); Assert.IsTrue(ps.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cab)); // check that F is distributed as it should be Console.WriteLine(fs.KolmogorovSmirnovTest(new FisherDistribution(2, 48)).LeftProbability); }
public void LinearLogisticRegression() { // do a set of logistic regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned Random rng = new Random(314159); // define logistic parameters double a0 = 1.0; double b0 = -1.0 / 2.0; //double a0 = -0.5; double b0 = 2.0; // keep track of sample of returned a and b fit parameters BivariateSample ps = new BivariateSample(); // also keep track of returned covariance estimates // since these vary slightly from fit to fit, we will average them double caa = 0.0; double cbb = 0.0; double cab = 0.0; // do 50 fits for (int k = 0; k < 50; k++) { Console.WriteLine("k={0}", k); // generate a synthetic data set BivariateSample s = new BivariateSample(); for (int i = 0; i < 50; i++) { double x = 2.0 * rng.NextDouble() - 1.0; double ez = Math.Exp(a0 + b0 * x); double P = ez / (1.0 + ez); if (rng.NextDouble() < P) { s.Add(x, 1.0); } else { s.Add(x, 0.0); } } //if (k != 27) continue; // do the regression FitResult r = s.LinearLogisticRegression(); // record best fit parameters double a = r.Parameter(0).Value; double b = r.Parameter(1).Value; ps.Add(a, b); Console.WriteLine("{0}, {1}", a, b); // record estimated covariances caa += r.Covariance(0, 0); cbb += r.Covariance(1, 1); cab += r.Covariance(0, 1); } caa /= ps.Count; cbb /= ps.Count; cab /= ps.Count; // check that mean parameter estimates are what they should be: the underlying population parameters Assert.IsTrue(ps.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0)); Assert.IsTrue(ps.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(b0)); // check that parameter covarainces are what they should be: the reported covariance estimates Assert.IsTrue(ps.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(caa)); Assert.IsTrue(ps.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cbb)); Assert.IsTrue(ps.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cab)); }