public void TestMethod1() { Distribution n0 = new TransformedDistribution(new NormalDistribution(), -2.0, 3.0); Distribution n1 = new NormalDistribution(-2.0, 3.0); Assert.IsTrue(TestUtilities.IsNearlyEqual(n0.Mean, n1.Mean)); Assert.IsTrue(TestUtilities.IsNearlyEqual(n0.Variance, n1.Variance)); Assert.IsTrue(TestUtilities.IsNearlyEqual(n0.StandardDeviation, n1.StandardDeviation)); Assert.IsTrue(TestUtilities.IsNearlyEqual(n0.Skewness, n1.Skewness)); for (int k = 0; k < 8; k++) { Assert.IsTrue(TestUtilities.IsNearlyEqual(n0.Moment(k), n1.Moment(k))); Assert.IsTrue(TestUtilities.IsNearlyEqual(n0.MomentAboutMean(k), n1.MomentAboutMean(k))); } foreach (double x in TestUtilities.GenerateUniformRealValues(-8.0, 8.0, 8)) { Assert.IsTrue(TestUtilities.IsNearlyEqual(n0.ProbabilityDensity(x), n1.ProbabilityDensity(x))); Assert.IsTrue(TestUtilities.IsNearlyEqual(n0.LeftProbability(x), n1.LeftProbability(x))); Assert.IsTrue(TestUtilities.IsNearlyEqual(n0.RightProbability(x), n1.RightProbability(x))); } foreach (double P in TestUtilities.GenerateRealValues(1.0E-4, 1.0, 4)) { Assert.IsTrue(TestUtilities.IsNearlyEqual(n0.InverseLeftProbability(P), n1.InverseLeftProbability(P))); Assert.IsTrue(TestUtilities.IsNearlyEqual(n0.InverseRightProbability(P), n1.InverseRightProbability(P))); } }
public void AnovaDistribution() { Distribution sDistribution = new NormalDistribution(); Random rng = new Random(1); Sample fSample = new Sample(); // do 100 ANOVAs for (int t = 0; t < 100; t++) { // each ANOVA has 4 groups List<Sample> groups = new List<Sample>(); for (int g = 0; g < 4; g++) { // each group has 3 data points Sample group = new Sample(); for (int i = 0; i < 3; i++) { group.Add(sDistribution.GetRandomValue(rng)); } groups.Add(group); } OneWayAnovaResult result = Sample.OneWayAnovaTest(groups); fSample.Add(result.Factor.Result.Statistic); } // compare the distribution of F statistics to the expected distribution Distribution fDistribution = new FisherDistribution(3, 8); Console.WriteLine("m={0} s={1}", fSample.PopulationMean, fSample.PopulationStandardDeviation); TestResult kResult = fSample.KolmogorovSmirnovTest(fDistribution); Console.WriteLine(kResult.LeftProbability); Assert.IsTrue(kResult.LeftProbability < 0.95); }
public MultivariateSample CreateMultivariateNormalSample(ColumnVector M, SymmetricMatrix C, int n) { int d = M.Dimension; MultivariateSample S = new MultivariateSample(d); SquareMatrix A = C.CholeskyDecomposition().SquareRootMatrix(); Random rng = new Random(1); Distribution normal = new NormalDistribution(); for (int i = 0; i < n; i++) { // create a vector of normal deviates ColumnVector V = new ColumnVector(d); for (int j = 0; j < d; j++) { double y = rng.NextDouble(); double z = normal.InverseLeftProbability(y); V[j] = z; } // form the multivariate distributed vector ColumnVector X = M + A * V; // add it to the sample S.Add(X); } return (S); }
/* * x: mide el tiempo * y: mide la cantidad que ingresa en el anden * m: es el máximo de la campana de gauss * r1, r2: son números aleatorios */ public static int Normal(int minValue, int maxValue) { double r1, r2, m, x, y, mu, sigma, fx; if ((minValue >= maxValue) || (minValue < 0) || (maxValue < 0)) { throw new System.ArgumentException("Valores incorrectos"); } mu = calcularMedia(minValue, maxValue); sigma = calcularDesvio(minValue, maxValue); NormalDistribution f = new NormalDistribution(mu, sigma); m = 1 / (sigma * Math.Sqrt(2 * Math.PI)); do { r1 = Rand(); r2 = Rand(); x = minValue + (maxValue - minValue) * r1; y = m * r2; fx = f.ProbabilityDensity((x - mu) / sigma); } while (y <= fx); return Convert.ToInt32(x); }
public static void Mutate(Chromosome chromosome, Random random) { NormalDistribution normal; UniformDistribution uniform = new UniformDistribution(Interval.FromEndpoints(0, 1)); for (int i = 0; i < chromosome.Values.Count; i++) { if (uniform.GetRandomValue(random) <= MUTATION_PROBABILITY) { if (uniform.GetRandomValue(random) <= SELECTION_PROBABILITY) normal = new NormalDistribution(MEAN, SIGMA_1); else normal = new NormalDistribution(MEAN, SIGMA_2); chromosome.Values[i] += normal.GetRandomValue(random); } } }
public void TwoSampleKS2() { int n = 2 * 3 * 3; int m = 2 * 2 * 3; Random rng = new Random(0); NormalDistribution d = new NormalDistribution(); Histogram h = new Histogram((int) AdvancedIntegerMath.LCM(n, m) + 1); //int[] h = new int[AdvancedIntegerMath.LCM(n, m) + 1]; int count = 1000; for (int i = 0; i < count; i++) { Sample A = new Sample(); for (int j = 0; j < n; j++) A.Add(d.GetRandomValue(rng)); Sample B = new Sample(); for (int j = 0; j < m; j++) B.Add(d.GetRandomValue(rng)); TestResult r = Sample.KolmogorovSmirnovTest(A, B); int k = (int) Math.Round(r.Statistic * AdvancedIntegerMath.LCM(n, m)); //Console.WriteLine("{0} {1}", r.Statistic, k); h[k].Increment(); //h[k] = h[k] + 1; } KolmogorovTwoSampleExactDistribution ks = new KolmogorovTwoSampleExactDistribution(n, m); double chi2 = 0.0; int dof = 0; for (int i = 0; i < h.Count; i++) { double ne = ks.ProbabilityMass(i) * count; Console.WriteLine("{0} {1} {2}", i, h[i].Counts, ne); if (ne > 4) { chi2 += MoreMath.Sqr(h[i].Counts - ne) / ne; dof++; } } Console.WriteLine("chi^2={0} dof={1}", chi2, dof); TestResult r2 = h.ChiSquaredTest(ks); ChiSquaredDistribution rd = (ChiSquaredDistribution) r2.Distribution; Console.WriteLine("chi^2={0} dof={1} P={2}", r2.Statistic, rd.DegreesOfFreedom, r2.RightProbability); }
public void SampleMaximumLikelihoodFit() { // normal distriubtion Console.WriteLine("normal"); double mu = -1.0; double sigma = 2.0; Distribution nd = new NormalDistribution(mu, sigma); Sample ns = CreateSample(nd, 500); //FitResult nr = ns.MaximumLikelihoodFit(new NormalDistribution(mu + 1.0, sigma + 1.0)); FitResult nr = ns.MaximumLikelihoodFit((IList<double> p) => new NormalDistribution(p[0], p[1]), new double[] { mu + 1.0, sigma + 1.0 }); Console.WriteLine(nr.Parameter(0)); Console.WriteLine(nr.Parameter(1)); Assert.IsTrue(nr.Dimension == 2); Assert.IsTrue(nr.Parameter(0).ConfidenceInterval(0.95).ClosedContains(mu)); Assert.IsTrue(nr.Parameter(1).ConfidenceInterval(0.95).ClosedContains(sigma)); FitResult nr2 = NormalDistribution.FitToSample(ns); Console.WriteLine(nr.Covariance(0,1)); // test analytic expression Assert.IsTrue(TestUtilities.IsNearlyEqual(nr.Parameter(0).Value, ns.Mean, Math.Sqrt(TestUtilities.TargetPrecision))); // we don't expect to be able to test sigma against analytic expression because ML value has known bias for finite sample size // exponential distribution Console.WriteLine("exponential"); double em = 3.0; Distribution ed = new ExponentialDistribution(em); Sample es = CreateSample(ed, 100); //FitResult er = es.MaximumLikelihoodFit(new ExponentialDistribution(em + 1.0)); FitResult er = es.MaximumLikelihoodFit((IList<double> p) => new ExponentialDistribution(p[0]), new double[] { em + 1.0 }); Console.WriteLine(er.Parameter(0)); Assert.IsTrue(er.Dimension == 1); Assert.IsTrue(er.Parameter(0).ConfidenceInterval(0.95).ClosedContains(em)); // test against analytic expression Assert.IsTrue(TestUtilities.IsNearlyEqual(er.Parameter(0).Value, es.Mean, Math.Sqrt(TestUtilities.TargetPrecision))); Assert.IsTrue(TestUtilities.IsNearlyEqual(er.Parameter(0).Uncertainty, es.Mean / Math.Sqrt(es.Count), Math.Sqrt(Math.Sqrt(TestUtilities.TargetPrecision)))); // lognormal distribution Console.WriteLine("lognormal"); double l1 = -4.0; double l2 = 5.0; Distribution ld = new LognormalDistribution(l1, l2); Sample ls = CreateSample(ld, 100); //FitResult lr = ls.MaximumLikelihoodFit(new LognormalDistribution(l1 + 1.0, l2 + 1.0)); FitResult lr = ls.MaximumLikelihoodFit((IList<double> p) => new LognormalDistribution(p[0], p[1]), new double[] { l1 + 1.0, l2 + 1.0 }); Console.WriteLine(lr.Parameter(0)); Console.WriteLine(lr.Parameter(1)); Console.WriteLine(lr.Covariance(0, 1)); Assert.IsTrue(lr.Dimension == 2); Assert.IsTrue(lr.Parameter(0).ConfidenceInterval(0.99).ClosedContains(l1)); Assert.IsTrue(lr.Parameter(1).ConfidenceInterval(0.99).ClosedContains(l2)); // weibull distribution Console.WriteLine("weibull"); double w_scale = 4.0; double w_shape = 2.0; WeibullDistribution w_d = new WeibullDistribution(w_scale, w_shape); Sample w_s = CreateSample(w_d, 20); //FitResult w_r = w_s.MaximumLikelihoodFit(new WeibullDistribution(1.0, 0.5)); FitResult w_r = w_s.MaximumLikelihoodFit((IList<double> p) => new WeibullDistribution(p[0], p[1]), new double[] { 2.0, 2.0 }); Console.WriteLine(w_r.Parameter(0)); Console.WriteLine(w_r.Parameter(1)); Console.WriteLine(w_r.Covariance(0, 1)); Assert.IsTrue(w_r.Parameter(0).ConfidenceInterval(0.95).ClosedContains(w_d.ScaleParameter)); Assert.IsTrue(w_r.Parameter(1).ConfidenceInterval(0.95).ClosedContains(w_d.ShapeParameter)); // logistic distribution Console.WriteLine("logistic"); double logistic_m = -3.0; double logistic_s = 2.0; Distribution logistic_distribution = new LogisticDistribution(logistic_m, logistic_s); Sample logistic_sample = CreateSample(logistic_distribution, 100); //FitResult logistic_result = logistic_sample.MaximumLikelihoodFit(new LogisticDistribution()); FitResult logistic_result = logistic_sample.MaximumLikelihoodFit((IList<double> p) => new LogisticDistribution(p[0], p[1]), new double[] { 2.0, 3.0 }); Console.WriteLine(logistic_result.Parameter(0)); Console.WriteLine(logistic_result.Parameter(1)); Assert.IsTrue(logistic_result.Dimension == 2); Assert.IsTrue(logistic_result.Parameter(0).ConfidenceInterval(0.95).ClosedContains(logistic_m)); Assert.IsTrue(logistic_result.Parameter(1).ConfidenceInterval(0.95).ClosedContains(logistic_s)); // beta distribution // not yet! /* double beta_alpha = 0.5; double beta_beta = 2.0; Distribution beta_distribution = new BetaDistribution(beta_alpha, beta_beta); Sample beta_sample = CreateSample(beta_distribution, 100); FitResult beta_result = beta_sample.MaximumLikelihoodFit(new BetaDistribution(1.0, 1.0)); Console.WriteLine("Beta:"); Console.WriteLine(beta_result.Parameter(0)); Console.WriteLine(beta_result.Parameter(1)); Assert.IsTrue(beta_result.Dimension == 2); Assert.IsTrue(beta_result.Parameter(0).ConfidenceInterval(0.95).ClosedContains(beta_alpha)); Assert.IsTrue(beta_result.Parameter(1).ConfidenceInterval(0.95).ClosedContains(beta_beta)); */ }
public void NormalFitUncertainties() { NormalDistribution N = new NormalDistribution(-1.0, 2.0); // Create a bivariate sample to hold our fitted best mu and sigma values // so we can determine their covariance as well as their means and variances BivariateSample fits = new BivariateSample(); double cmm = 0.0, css = 0.0, cms = 0.0; // A bunch of times, create a normal sample for (int i = 0; i < 64; i++) { // we will use small samples so the variation in mu and sigma will be more substantial Sample s = TestUtilities.CreateSample(N, 16, i); // fit each sample to a normal distribution FitResult fit = NormalDistribution.FitToSample(s); // and record the mu and sigma values from the fit into our bivariate sample fits.Add(fit.Parameter(0).Value, fit.Parameter(1).Value); // also record the claimed covariances among these parameters cmm += fit.Covariance(0, 0); css += fit.Covariance(1, 1); cms += fit.Covariance(0, 1); } cmm /= fits.Count; css /= fits.Count; cms /= fits.Count; // the mean fit values should agree with the population distribution Console.WriteLine("{0} {1}", fits.X.PopulationMean, N.Mean); Assert.IsTrue(fits.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.Mean)); Console.WriteLine("{0} {1}", fits.Y.PopulationMean, N.StandardDeviation); Assert.IsTrue(fits.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.StandardDeviation)); // but also the covariances of those fit values should agree with the claimed covariances Console.WriteLine("{0} {1}", fits.X.PopulationVariance, cmm); Assert.IsTrue(fits.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cmm)); Console.WriteLine("{0} {1}", fits.Y.PopulationVariance, css); Assert.IsTrue(fits.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(css)); Console.WriteLine("{0} {1}", fits.PopulationCovariance, cms); Assert.IsTrue(fits.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cms)); /* Random rng = new Random(2718281); BivariateSample P = new BivariateSample(); double cmm = 0.0; double css = 0.0; double cms = 0.0; for (int i = 0; i < 64; i++) { Sample s = new Sample(); for (int j = 0; j < 16; j++) { s.Add(N.GetRandomValue(rng)); } FitResult r = NormalDistribution.FitToSample(s); P.Add(r.Parameter(0).Value, r.Parameter(1).Value); cmm += r.Covariance(0, 0); css += r.Covariance(1, 1); cms += r.Covariance(0, 1); } cmm /= P.Count; css /= P.Count; cms /= P.Count; Console.WriteLine("{0} {1}", P.X.PopulationMean, P.Y.PopulationMean); Assert.IsTrue(P.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.Mean)); Assert.IsTrue(P.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.StandardDeviation)); Assert.IsTrue(P.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cmm)); Assert.IsTrue(P.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(css)); Assert.IsTrue(P.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cms)); */ }
public void PearsonRDistribution() { Random rng = new Random(1); // pick some underlying distributions for the sample variables, which must be normal but can have any parameters NormalDistribution xDistribution = new NormalDistribution(1, 2); NormalDistribution yDistribution = new NormalDistribution(3, 4); // try this for several sample sizes, all low so that we see the difference from the normal distribution // n = 3 maxima at ends; n = 4 uniform; n = 5 semi-circular "mound"; n = 6 parabolic "mound" foreach (int n in new int[] { 3, 4, 5, 6, 8 }) { Console.WriteLine("n={0}", n); // find r values Sample rSample = new Sample(); for (int i = 0; i < 100; i++) { // to get each r value, construct a bivariate sample of the given size with no cross-correlation BivariateSample xySample = new BivariateSample(); for (int j = 0; j < n; j++) { xySample.Add(xDistribution.GetRandomValue(rng), yDistribution.GetRandomValue(rng)); } double r = xySample.PearsonRTest().Statistic; rSample.Add(r); } // check whether r is distributed as expected TestResult result = rSample.KolmogorovSmirnovTest(new PearsonRDistribution(n)); Console.WriteLine("P={0}", result.LeftProbability); Assert.IsTrue(result.LeftProbability < 0.95); } }
public void BivariateLinearRegressionGoodnessOfFitDistribution() { // create uncorrelated x and y values // the distribution of F-test statistics returned by linear fits should follow the expected F-distribution Random rng = new Random(987654321); NormalDistribution xd = new NormalDistribution(1.0, 2.0); NormalDistribution yd = new NormalDistribution(-3.0, 4.0); Sample fs = new Sample(); for (int i = 0; i < 127; i++) { BivariateSample xys = new BivariateSample(); for (int j = 0; j < 7; j++) { xys.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng)); } double f = xys.LinearRegression().GoodnessOfFit.Statistic; fs.Add(f); } Distribution fd = new FisherDistribution(1, 5); Console.WriteLine("{0} v. {1}", fs.PopulationMean, fd.Mean); TestResult t = fs.KolmogorovSmirnovTest(fd); Console.WriteLine(t.LeftProbability); Assert.IsTrue(t.LeftProbability < 0.95); }
public void StudentTest2() { // make sure Student t is consistent with its definition // we are going to take a sample that we expect to be t-distributed Sample tSample = new Sample(); // begin with an underlying normal distribution Distribution xDistribution = new NormalDistribution(); // compute a bunch of t satistics from the distribution for (int i = 0; i < 100000; i++) { // take a small sample from the underlying distribution // (as the sample gets large, the t distribution becomes normal) Random rng = new Random(314159+i); double p = xDistribution.InverseLeftProbability(rng.NextDouble()); double q = 0.0; for (int j = 0; j < 5; j++) { double x = xDistribution.InverseLeftProbability(rng.NextDouble()); q += x * x; } q = q / 5; double t = p / Math.Sqrt(q); tSample.Add(t); } Distribution tDistribution = new StudentDistribution(5); TestResult result = tSample.KolmogorovSmirnovTest(tDistribution); Console.WriteLine(result.LeftProbability); }
public void TestNormalOrderStatistic() { int n = 100; //int r = 3 * n / 4; int r = 52; Distribution d = new NormalDistribution(); double C = Math.Exp(AdvancedIntegerMath.LogFactorial(n) - AdvancedIntegerMath.LogFactorial(r - 1) - AdvancedIntegerMath.LogFactorial(n - r)); double m = GaussHermiteIntegrate(x => C * MoreMath.Pow(d.LeftProbability(x), r - 1) * MoreMath.Pow(d.RightProbability(x), n - r) * x); //double m = GaussHermiteIntegrate(x => 1.0); double m2 = FunctionMath.Integrate( //x => 1.0 * Math.Exp(-x * x / 2.0) / Math.Sqrt(2.0 * Math.PI), x => C * MoreMath.Pow(d.LeftProbability(x), r - 1) * MoreMath.Pow(d.RightProbability(x), n - r) * x * Math.Exp(-x * x / 2.0) / Math.Sqrt(2.0 * Math.PI), Interval.FromEndpoints(Double.NegativeInfinity, Double.PositiveInfinity) ); Console.WriteLine(m); Console.WriteLine(m2); Console.WriteLine(NormalMeanOrderStatisticExpansion(r, n)); Console.WriteLine(NormalMeanOrderStatisticExpansion2(r, n)); //Console.WriteLine(1.5 / Math.Sqrt(Math.PI)); }
public void TestBeta() { double a = 200.0; double b = 200.0; double P = 1.0E-5; double x1 = ApproximateInverseBetaSeries(a, b, P); if ((0.0 < x1) && (x1 < 1.0)) { Console.WriteLine("x1 {0} {1}", x1, AdvancedMath.LeftRegularizedBeta(a, b, x1)); } double x2 = 1.0 - ApproximateInverseBetaSeries(b, a, 1.0 - P); if ((0.0 < x2) && (x2 < 1.0)) { Console.WriteLine("x2 {0} {1}", x2, AdvancedMath.LeftRegularizedBeta(a, b, x2)); } //x1 = RefineInverseBeta(a, b, P, x1); //Console.WriteLine("{0} {1}", x1, AdvancedMath.LeftRegularizedBeta(a, b, x1)); NormalDistribution N = new NormalDistribution(); double m = a / (a + b); double s = Math.Sqrt(a * b / (a + b + 1.0)) / (a + b); double x3 = m + s * N.InverseLeftProbability(P); if ((0.0 < x3) && (x3 < 1.0)) { Console.WriteLine("x3 {0} {1}", x3, AdvancedMath.LeftRegularizedBeta(a, b, x3)); } //Console.WriteLine(AdvancedMath.Beta(a, b, 0.35) / AdvancedMath.Beta(a, b)); //Console.WriteLine(AdvancedMath.Beta(a, b, 0.40) / AdvancedMath.Beta(a, b)); //Console.WriteLine(AdvancedMath.Beta(a, b, 0.45) / AdvancedMath.Beta(a, b)); }
public void KuiperNullDistributionTest() { // The distribution is irrelevent; pick one at random Distribution sampleDistribution = new NormalDistribution(); // Loop over various sample sizes foreach (int n in TestUtilities.GenerateIntegerValues(2, 128, 16)) { // Create a sample to hold the KS statistics Sample testStatistics = new Sample(); // and a variable to hold the claimed null distribution, which should be the same for each test Distribution nullDistribution = null; // Create a bunch of samples, each with n+1 data points // We pick n+1 instead of n just to have different sample size values than in the KS test case for (int i = 0; i < 256; i++) { // Just use n+i as a seed in order to get different points each time Sample sample = TestUtilities.CreateSample(sampleDistribution, n + 1, 512 * n + i + 2); // Do a Kuiper test of the sample against the distribution each time TestResult r1 = sample.KuiperTest(sampleDistribution); // Record the test statistic value and the claimed null distribution testStatistics.Add(r1.Statistic); nullDistribution = r1.Distribution; } // Do a KS test of our sample of Kuiper statistics against the claimed null distribution // We could use a Kuiper test here instead, which would be way cool and meta, but we picked KS instead for variety TestResult r2 = testStatistics.KolmogorovSmirnovTest(nullDistribution); Console.WriteLine("{0} {1} {2}", n, r2.Statistic, r2.LeftProbability); Assert.IsTrue(r2.RightProbability > 0.01); // Test moment matches, too Console.WriteLine(" {0} {1}", testStatistics.PopulationMean, nullDistribution.Mean); Console.WriteLine(" {0} {1}", testStatistics.PopulationVariance, nullDistribution.Variance); Assert.IsTrue(testStatistics.PopulationMean.ConfidenceInterval(0.99).ClosedContains(nullDistribution.Mean)); Assert.IsTrue(testStatistics.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(nullDistribution.Variance)); } }
public void MultivariateMoments() { // create a random sample MultivariateSample M = new MultivariateSample(3); Distribution d0 = new NormalDistribution(); Distribution d1 = new ExponentialDistribution(); Distribution d2 = new UniformDistribution(); Random rng = new Random(1); int n = 10; for (int i = 0; i < n; i++) { M.Add(d0.GetRandomValue(rng), d1.GetRandomValue(rng), d2.GetRandomValue(rng)); } // test that moments agree for (int i = 0; i < 3; i++) { int[] p = new int[3]; p[i] = 1; Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Mean, M.Moment(p))); p[i] = 2; Assert.IsTrue(TestUtilities.IsNearlyEqual(M.Column(i).Variance, M.MomentAboutMean(p))); for (int j = 0; j < i; j++) { int[] q = new int[3]; q[i] = 1; q[j] = 1; Assert.IsTrue(TestUtilities.IsNearlyEqual(M.TwoColumns(i, j).Covariance, M.MomentAboutMean(q))); } } }
public void MultivariateLinearRegressionTest() { // define model y = a + b0 * x0 + b1 * x1 + noise double a = 1.0; double b0 = -2.0; double b1 = 3.0; Distribution noise = new NormalDistribution(0.0, 10.0); // draw a sample from the model Random rng = new Random(1); MultivariateSample sample = new MultivariateSample(3); for (int i = 0; i < 100; i++) { double x0 = -10.0 + 20.0 * rng.NextDouble(); double x1 = -10.0 + 20.0 * rng.NextDouble(); double eps = noise.InverseLeftProbability(rng.NextDouble()); double y = a + b0 * x0 + b1 * x1 + eps; sample.Add(x0, x1, y); } // do a linear regression fit on the model FitResult result = sample.LinearRegression(2); // the result should have the appropriate dimension Assert.IsTrue(result.Dimension == 3); // the result should be significant Console.WriteLine("{0} {1}", result.GoodnessOfFit.Statistic, result.GoodnessOfFit.LeftProbability); Assert.IsTrue(result.GoodnessOfFit.LeftProbability > 0.95); // the parameters should match the model Console.WriteLine(result.Parameter(0)); Assert.IsTrue(result.Parameter(0).ConfidenceInterval(0.90).ClosedContains(b0)); Console.WriteLine(result.Parameter(1)); Assert.IsTrue(result.Parameter(1).ConfidenceInterval(0.90).ClosedContains(b1)); Console.WriteLine(result.Parameter(2)); Assert.IsTrue(result.Parameter(2).ConfidenceInterval(0.90).ClosedContains(a)); }
public void MultivariateLinearRegressionNullDistribution() { int d = 4; Random rng = new Random(1); NormalDistribution n = new NormalDistribution(); Sample fs = new Sample(); for (int i = 0; i < 64; i++) { MultivariateSample ms = new MultivariateSample(d); for (int j = 0; j < 8; j++) { double[] x = new double[d]; for (int k = 0; k < d; k++) { x[k] = n.GetRandomValue(rng); } ms.Add(x); } FitResult r = ms.LinearRegression(0); fs.Add(r.GoodnessOfFit.Statistic); } // conduct a KS test to check that F follows the expected distribution TestResult ks = fs.KolmogorovSmirnovTest(new FisherDistribution(3, 4)); Assert.IsTrue(ks.LeftProbability < 0.95); }
/// <summary> /// Computes the normal distribution that best fits the given sample. /// </summary> /// <param name="sample">The sample to fit.</param> /// <returns>The best fit parameters.</returns> /// <remarks> /// <para>The returned fit parameters are the μ (<see cref="Mean"/>) and σ (<see cref="StandardDeviation"/>) parameters, in that order. /// These are the same parameters, in the same order, that are required by the <see cref="NormalDistribution(double,double)"/> constructor to /// specify a new normal distribution.</para> /// </remarks> /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception> /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than three values.</exception> public static FitResult FitToSample(Sample sample) { if (sample == null) throw new ArgumentNullException("sample"); if (sample.Count < 3) throw new InsufficientDataException(); // maximum likelyhood estimates are guaranteed to be asymptotically unbiased, but not necessarily unbiased // this hits home for the maximum likelyhood estimate of the variance of a normal distribution, which fails // to include the N/(N-1) correction factor. since we know the bias, there is no reason for us not to correct // it, and we do so here UncertainValue mu = sample.PopulationMean; UncertainValue sigma = sample.PopulationStandardDeviation; Distribution distribution = new NormalDistribution(mu.Value, sigma.Value); TestResult test = sample.KolmogorovSmirnovTest(distribution); // the best-fit sigma and mu are known to be uncorrelated // you can prove this by writing down the log likelyhood function and // computing its mixed second derivative, which you will see vanishes // at the minimum return (new FitResult(mu.Value, mu.Uncertainty, sigma.Value, sigma.Uncertainty, 0.0, test)); }
public void TestNormal() { NormalDistribution n = new NormalDistribution(); Console.WriteLine(n.InverseLeftProbability(1.0E-10)); Console.WriteLine(n.InverseRightProbability(1.0E-10)); Console.WriteLine(n.InverseLeftProbability(1.0E-300)); Console.WriteLine(n.InverseRightProbability(1.0E-300)); Console.WriteLine(n.InverseLeftProbability(1.0)); //Console.WriteLine(n.InverseLeftProbability(0.26)); }
public void TTestDistribution() { // start with a normally distributed population Distribution xDistribution = new NormalDistribution(2.0, 3.0); Random rng = new Random(1); // draw 100 samples from it and compute the t statistic for each Sample tSample = new Sample(); for (int i = 0; i < 100; i++) { // each sample has 9 values Sample xSample = new Sample(); for (int j = 0; j < 9; j++) { xSample.Add(xDistribution.GetRandomValue(rng)); } //Sample xSample = CreateSample(xDistribution, 10, i); TestResult tResult = xSample.StudentTTest(2.0); double t = tResult.Statistic; Console.WriteLine("t = {0}", t); tSample.Add(t); } // sanity check our sample of t's Assert.IsTrue(tSample.Count == 100); // check that the t statistics are distributed as expected Distribution tDistribution = new StudentDistribution(9); // check on the mean Console.WriteLine("m = {0} vs. {1}", tSample.PopulationMean, tDistribution.Mean); Assert.IsTrue(tSample.PopulationMean.ConfidenceInterval(0.95).ClosedContains(tDistribution.Mean), String.Format("{0} vs. {1}", tSample.PopulationMean, tDistribution.Mean)); // check on the standard deviation Console.WriteLine("s = {0} vs. {1}", tSample.PopulationStandardDeviation, tDistribution.StandardDeviation); Assert.IsTrue(tSample.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(tDistribution.StandardDeviation)); // do a KS test TestResult ksResult = tSample.KolmogorovSmirnovTest(tDistribution); Assert.IsTrue(ksResult.LeftProbability < 0.95); Console.WriteLine("D = {0}", ksResult.Statistic); // check that we can distinguish the t distribution from a normal distribution? }
public void StudentTest() { // make sure Student t is consistent with its definition // we are going to take a sample that we expect to be t-distributed Sample tSample = new Sample(); // begin with an underlying normal distribution Distribution xDistribution = new NormalDistribution(1.0, 2.0); // compute a bunch of t satistics from the distribution for (int i = 0; i < 200000; i++) { // take a small sample from the underlying distribution // (as the sample gets large, the t distribution becomes normal) Random rng = new Random(i); Sample xSample = new Sample(); for (int j = 0; j < 5; j++) { double x = xDistribution.InverseLeftProbability(rng.NextDouble()); xSample.Add(x); } // compute t for the sample double t = (xSample.Mean - xDistribution.Mean) / (xSample.PopulationStandardDeviation.Value / Math.Sqrt(xSample.Count)); tSample.Add(t); //Console.WriteLine(t); } // t's should be t-distrubuted; use a KS test to check this Distribution tDistribution = new StudentDistribution(4); TestResult result = tSample.KolmogorovSmirnovTest(tDistribution); Console.WriteLine(result.LeftProbability); //Assert.IsTrue(result.LeftProbability < 0.95); // t's should be demonstrably not normally distributed Console.WriteLine(tSample.KolmogorovSmirnovTest(new NormalDistribution()).LeftProbability); //Assert.IsTrue(tSample.KolmogorovSmirnovTest(new NormalDistribution()).LeftProbability > 0.95); }
public void ZTestDistribution() { Random rng = new Random(1); // define the sampling population (which must be normal for a z-test) Distribution population = new NormalDistribution(2.0, 3.0); // collect 100 samples Sample zSample = new Sample(); for (int i = 0; i < 100; i++) { // each z-statistic is formed by making a 4-count sample from a normal distribution Sample sample = new Sample(); for (int j = 0; j < 4; j++) { sample.Add(population.GetRandomValue(rng)); } // for each sample, do a z-test against the population TestResult zResult = sample.ZTest(population.Mean, population.StandardDeviation); zSample.Add(zResult.Statistic); } // the z's should be distrubuted normally TestResult result = zSample.KolmogorovSmirnovTest(new NormalDistribution()); Console.WriteLine("{0} {1}", result.Statistic, result.LeftProbability); Assert.IsTrue((result.LeftProbability > 0.05) && (result.LeftProbability < 0.95)); }
public void MomentMapTest() { Distribution d = new NormalDistribution(); for (int n = 1; n < 11; n++) { double[] K = new double[n+1]; K[0] = 1.0; if (K.Length > 1) K[1] = 0.0; if (K.Length > 2) K[2] = 1.0; //for (int m = 1; m < K.Length; m++) { // K[m] = AdvancedIntegerMath.Factorial(m - 1); //} double M = MomentMath.RawMomentFromCumulants(K); Console.WriteLine("{0} {1}", d.Moment(n), M); } }
public void MaximumLikelihoodFitToNormal() { // create a normal sample double mu = -1.0; double sigma = 2.0; Distribution d = new NormalDistribution(mu, sigma); Sample s = CreateSample(d, 1024); // do an explicit maximum likelyhood fit to a normal distribution FitResult mf = s.MaximumLikelihoodFit((IList<double> p) => new NormalDistribution(p[0], p[1]), new double[] { mu + 1.0, sigma + 1.0 }); // it should find the parameters Assert.IsTrue(mf.Dimension == 2); Assert.IsTrue(mf.Parameter(0).ConfidenceInterval(0.99).ClosedContains(mu)); Assert.IsTrue(mf.Parameter(1).ConfidenceInterval(0.99).ClosedContains(sigma)); // now do our analytic fit FitResult nf = NormalDistribution.FitToSample(s); Assert.IsTrue(TestUtilities.IsNearlyEqual(mf.Parameter(0).Value, nf.Parameter(0).Value, 1.0E-4)); //Assert.IsTrue(TestUtilities.IsNearlyEqual(mf.Parameter(1).Value, nf.Parameter(1).Value, 1.0E-4)); Assert.IsTrue(TestUtilities.IsNearlyEqual(mf.Parameter(0).Uncertainty, nf.Parameter(0).Uncertainty, 1.0E-2)); Assert.IsTrue(TestUtilities.IsNearlyEqual(mf.Parameter(1).Uncertainty, nf.Parameter(1).Uncertainty, 1.0E-2)); }
public void BivariatePolynomialRegression() { // do a set of polynomial regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as claimed Random rng = new Random(271828); // define logistic parameters double[] a = new double[] { 0.0, -1.0, 2.0, -3.0 }; // keep track of sample of returned a and b fit parameters MultivariateSample A = new MultivariateSample(a.Length); // also keep track of returned covariance estimates // since these vary slightly from fit to fit, we will average them SymmetricMatrix C = new SymmetricMatrix(a.Length); // also keep track of test statistics Sample F = new Sample(); // do 100 fits for (int k = 0; k < 100; k++) { // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution Distribution xd = new CauchyDistribution(); Distribution nd = new NormalDistribution(0.0, 4.0); // generate a synthetic data set BivariateSample s = new BivariateSample(); for (int j = 0; j < 20; j++) { double x = xd.GetRandomValue(rng); double y = nd.GetRandomValue(rng); for (int i = 0; i < a.Length; i++) { y += a[i] * MoreMath.Pow(x, i); } s.Add(x, y); } // do the regression FitResult r = s.PolynomialRegression(a.Length - 1); ColumnVector ps = r.Parameters; //Console.WriteLine("{0} {1} {2}", ps[0], ps[1], ps[2]); // record best fit parameters A.Add(ps); // record estimated covariances C += r.CovarianceMatrix; // record the fit statistic F.Add(r.GoodnessOfFit.Statistic); //Console.WriteLine("F={0}", r.GoodnessOfFit.Statistic); } C = (1.0 / A.Count) * C; // allow matrix division by real numbers // check that mean parameter estimates are what they should be: the underlying population parameters for (int i = 0; i < A.Dimension; i++) { Console.WriteLine("{0} {1}", A.Column(i).PopulationMean, a[i]); Assert.IsTrue(A.Column(i).PopulationMean.ConfidenceInterval(0.95).ClosedContains(a[i])); } // check that parameter covarainces are what they should be: the reported covariance estimates for (int i = 0; i < A.Dimension; i++) { for (int j = i; j < A.Dimension; j++) { Console.WriteLine("{0} {1} {2} {3}", i, j, C[i, j], A.TwoColumns(i, j).PopulationCovariance); Assert.IsTrue(A.TwoColumns(i, j).PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(C[i, j])); } } // check that F is distributed as it should be //Console.WriteLine(fs.KolmogorovSmirnovTest(new FisherDistribution(2, 48)).LeftProbability); }
public void NormalFit() { // pick mu >> sigma so that we get no negative values; // otherwise the attempt to fit to an exponential will fail Distribution distribution = new NormalDistribution(6.0, 2.0); Sample sample = CreateSample(distribution, 100); // fit to normal should be good FitResult nfit = NormalDistribution.FitToSample(sample); Console.WriteLine("P_n = {0}", nfit.GoodnessOfFit.LeftProbability); Assert.IsTrue(nfit.GoodnessOfFit.LeftProbability < 0.95); Assert.IsTrue(nfit.Parameter(0).ConfidenceInterval(0.95).ClosedContains(distribution.Mean)); Assert.IsTrue(nfit.Parameter(1).ConfidenceInterval(0.95).ClosedContains(distribution.StandardDeviation)); // fit to exponential should be bad FitResult efit = ExponentialDistribution.FitToSample(sample); Console.WriteLine("P_e = {0}", efit.GoodnessOfFit.LeftProbability); Assert.IsTrue(efit.GoodnessOfFit.LeftProbability > 0.95); }
public void BivariateLinearRegression() { // do a set of logistic regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned Random rng = new Random(314159); // define logistic parameters double a0 = 2.0; double b0 = -1.0; // keep track of sample of returned a and b fit parameters BivariateSample ps = new BivariateSample(); // also keep track of returned covariance estimates // since these vary slightly from fit to fit, we will average them double caa = 0.0; double cbb = 0.0; double cab = 0.0; // also keep track of test statistics Sample fs = new Sample(); // do 100 fits for (int k = 0; k < 100; k++) { // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution Distribution xd = new LogisticDistribution(); Distribution nd = new NormalDistribution(0.0, 2.0); // generate a synthetic data set BivariateSample s = new BivariateSample(); for (int i = 0; i < 25; i++) { double x = xd.GetRandomValue(rng); double y = a0 + b0 * x + nd.GetRandomValue(rng); s.Add(x, y); } // do the regression FitResult r = s.LinearRegression(); // record best fit parameters double a = r.Parameter(0).Value; double b = r.Parameter(1).Value; ps.Add(a, b); // record estimated covariances caa += r.Covariance(0, 0); cbb += r.Covariance(1, 1); cab += r.Covariance(0, 1); // record the fit statistic fs.Add(r.GoodnessOfFit.Statistic); Console.WriteLine("F={0}", r.GoodnessOfFit.Statistic); } caa /= ps.Count; cbb /= ps.Count; cab /= ps.Count; // check that mean parameter estimates are what they should be: the underlying population parameters Assert.IsTrue(ps.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0)); Assert.IsTrue(ps.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(b0)); Console.WriteLine("{0} {1}", caa, ps.X.PopulationVariance); Console.WriteLine("{0} {1}", cbb, ps.Y.PopulationVariance); // check that parameter covarainces are what they should be: the reported covariance estimates Assert.IsTrue(ps.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(caa)); Assert.IsTrue(ps.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cbb)); Assert.IsTrue(ps.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cab)); // check that F is distributed as it should be Console.WriteLine(fs.KolmogorovSmirnovTest(new FisherDistribution(2, 48)).LeftProbability); }
public void TimeNormalGenerators() { Random rng = new Random(1); //IDeviateGenerator nRng = new BoxMullerNormalGenerator(); //IDeviateGenerator nRng = new PolarRejectionNormalDeviateGenerator(); //IDeviateGenerator nRng = new RatioOfUniformsNormalGenerator(); IDeviateGenerator nRng = new LevaNormalGenerator(); //Sample sample = new Sample(); Distribution nrm = new NormalDistribution(); Stopwatch timer = Stopwatch.StartNew(); double sum = 0.0; for (int i = 0; i < 10000000; i++) { sum += nrm.InverseLeftProbability(rng.NextDouble()); //sum += nRng.GetNext(rng); //sample.Add(nRng.GetNext(rng)); } timer.Stop(); //Console.WriteLine(sample.KolmogorovSmirnovTest(new NormalDistribution()).RightProbability); Console.WriteLine(sum); Console.WriteLine(timer.ElapsedMilliseconds); }