public void WilcoxonNullDistribution() { // Pick a very non-normal distribution ContinuousDistribution d = new ExponentialDistribution(); Random rng = new Random(271828); foreach (int n in TestUtilities.GenerateIntegerValues(4, 64, 4)) { Sample wSample = new Sample(); ContinuousDistribution wDistribution = null; for (int i = 0; i < 128; i++) { BivariateSample sample = new BivariateSample(); for (int j = 0; j < n; j++) { double x = d.GetRandomValue(rng); double y = d.GetRandomValue(rng); sample.Add(x, y); } TestResult wilcoxon = sample.WilcoxonSignedRankTest(); wSample.Add(wilcoxon.Statistic); wDistribution = wilcoxon.Distribution; } TestResult ks = wSample.KolmogorovSmirnovTest(wDistribution); Assert.IsTrue(ks.Probability > 0.05); Assert.IsTrue(wSample.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wDistribution.Mean)); Assert.IsTrue(wSample.PopulationStandardDeviation.ConfidenceInterval(0.99).ClosedContains(wDistribution.StandardDeviation)); } }
public void KendallNullDistributionTest() { // Pick independent distributions for x and y, which needn't be normal and needn't be related. ContinuousDistribution xDistrubtion = new LogisticDistribution(); ContinuousDistribution yDistribution = new ExponentialDistribution(); Random rng = new Random(314159265); // generate bivariate samples of various sizes foreach (int n in TestUtilities.GenerateIntegerValues(8, 64, 4)) { Sample testStatistics = new Sample(); ContinuousDistribution testDistribution = null; for (int i = 0; i < 128; i++) { BivariateSample sample = new BivariateSample(); for (int j = 0; j < n; j++) { sample.Add(xDistrubtion.GetRandomValue(rng), yDistribution.GetRandomValue(rng)); } TestResult result = sample.KendallTauTest(); testStatistics.Add(result.Statistic); testDistribution = result.Distribution; } TestResult r2 = testStatistics.KolmogorovSmirnovTest(testDistribution); Assert.IsTrue(r2.RightProbability > 0.05); Assert.IsTrue(testStatistics.PopulationMean.ConfidenceInterval(0.99).ClosedContains(testDistribution.Mean)); Assert.IsTrue(testStatistics.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(testDistribution.Variance)); } }
public static ContinuousDistribution[,] Inverse(ContinuousDistribution[,] xx) { if (xx.GetLength(0) == 2 && xx.GetLength(1) == 2) { ContinuousDistribution det = RandomMatrix.Determinant(xx); ContinuousDistribution[,] yy = new ContinuousDistribution[2, 2]; yy[0, 0] = xx[1, 1].DividedBy(det); yy[0, 1] = xx[0, 1].Negate().DividedBy(det); yy[1, 0] = xx[1, 0].Negate().DividedBy(det); yy[1, 1] = xx[0, 0].DividedBy(det); return(yy); } if (xx.GetLength(0) == 3 && xx.GetLength(1) == 3) { // from http://www.dr-lex.be/random/matrix_inv.html ContinuousDistribution det = RandomMatrix.Determinant(xx); ContinuousDistribution[,] yy = new ContinuousDistribution[3, 3]; yy[0, 0] = xx[2, 2].Times(xx[1, 1]).Minus(xx[2, 1].Times(xx[1, 2])).DividedBy(det); yy[0, 1] = xx[2, 2].Times(xx[0, 1]).Minus(xx[2, 1].Times(xx[0, 2])).Negate().DividedBy(det); yy[0, 2] = xx[1, 2].Times(xx[0, 1]).Minus(xx[1, 1].Times(xx[0, 2])).DividedBy(det); yy[1, 0] = xx[2, 2].Times(xx[1, 0]).Minus(xx[2, 0].Times(xx[1, 2])).Negate().DividedBy(det); yy[1, 1] = xx[2, 2].Times(xx[0, 0]).Minus(xx[2, 0].Times(xx[0, 2])).DividedBy(det); yy[1, 2] = xx[1, 2].Times(xx[0, 0]).Minus(xx[1, 0].Times(xx[0, 2])).Negate().DividedBy(det); yy[2, 0] = xx[2, 1].Times(xx[1, 0]).Minus(xx[2, 0].Times(xx[1, 1])).DividedBy(det); yy[2, 1] = xx[2, 1].Times(xx[0, 0]).Minus(xx[2, 0].Times(xx[0, 1])).Negate().DividedBy(det); yy[2, 2] = xx[1, 1].Times(xx[0, 0]).Minus(xx[1, 0].Times(xx[0, 1])).DividedBy(det); return(yy); } throw new ArgumentException("Unknown matrix dimensions"); }
public void ShapiroFranciaNullDistribution() { Random rng = new Random(57721); foreach (int n in TestUtilities.GenerateIntegerValues(16, 128, 4)) { Sample vSample = new Sample(); ContinuousDistribution vDistribution = null; NormalDistribution zDistribution = new NormalDistribution(-2.0, 3.0); for (int i = 0; i < 256; i++) { Sample zSample = TestUtilities.CreateSample(zDistribution, n, i); TestResult sf = zSample.ShapiroFranciaTest(); vSample.Add(sf.Statistic); vDistribution = sf.Distribution; } TestResult ks = vSample.KolmogorovSmirnovTest(vDistribution); Assert.IsTrue(ks.Probability > 0.01); // The returned SF null distribution is approximate, so we can't // make arbitrarily stringent P demands for arbitrarily large samples. } }
public static ContinuousDistribution[,] Inverse(ContinuousDistribution[,] xx) { if (xx.GetLength(0) == 2 && xx.GetLength(1) == 2) { ContinuousDistribution det = RandomMatrix.Determinant(xx); ContinuousDistribution[,] yy = new ContinuousDistribution[2, 2]; yy[0, 0] = xx[1, 1].DividedBy(det); yy[0, 1] = xx[0, 1].Negate().DividedBy(det); yy[1, 0] = xx[1, 0].Negate().DividedBy(det); yy[1, 1] = xx[0, 0].DividedBy(det); return yy; } if (xx.GetLength(0) == 3 && xx.GetLength(1) == 3) { // from http://www.dr-lex.be/random/matrix_inv.html ContinuousDistribution det = RandomMatrix.Determinant(xx); ContinuousDistribution[,] yy = new ContinuousDistribution[3, 3]; yy[0, 0] = xx[2,2].Times(xx[1,1]).Minus(xx[2,1].Times(xx[1,2])).DividedBy(det); yy[0, 1] = xx[2,2].Times(xx[0,1]).Minus(xx[2,1].Times(xx[0,2])).Negate().DividedBy(det); yy[0, 2] = xx[1,2].Times(xx[0,1]).Minus(xx[1,1].Times(xx[0,2])).DividedBy(det); yy[1, 0] = xx[2,2].Times(xx[1,0]).Minus(xx[2,0].Times(xx[1,2])).Negate().DividedBy(det); yy[1, 1] = xx[2,2].Times(xx[0,0]).Minus(xx[2,0].Times(xx[0,2])).DividedBy(det); yy[1, 2] = xx[1,2].Times(xx[0,0]).Minus(xx[1,0].Times(xx[0,2])).Negate().DividedBy(det); yy[2, 0] = xx[2,1].Times(xx[1,0]).Minus(xx[2,0].Times(xx[1,1])).DividedBy(det); yy[2, 1] = xx[2,1].Times(xx[0,0]).Minus(xx[2,0].Times(xx[0,1])).Negate().DividedBy(det); yy[2, 2] = xx[1,1].Times(xx[0,0]).Minus(xx[1,0].Times(xx[0,1])).DividedBy(det); return yy; } throw new ArgumentException("Unknown matrix dimensions"); }
public ContDistTester(double[] a, ContDistCreator creator) { Assert.AreEqual(6, a.Length, "Unexpected length of parameter array"); _a = a; _dist = creator(_a[0], _a[1]); Assert.IsNotNull(_dist); }
public void StudentTNullDistributionTest() { ContinuousDistribution z = new NormalDistribution(-1.0, 2.0); Random rng = new Random(1); foreach (int n in TestUtilities.GenerateIntegerValues(2, 32, 4)) { Sample tSample = new Sample(); ContinuousDistribution tDistribution = null; for (int j = 0; j < 128; j++) { Sample a = new Sample(); Sample b = new Sample(); for (int i = 0; i < n; i++) { a.Add(z.GetRandomValue(rng)); b.Add(z.GetRandomValue(rng)); } TestResult tResult = Sample.StudentTTest(a, b); tSample.Add(tResult.Statistic); tDistribution = tResult.Distribution; } TestResult ks = tSample.KolmogorovSmirnovTest(tDistribution); Assert.IsTrue(ks.Probability > 0.01); Assert.IsTrue(tSample.PopulationMean.ConfidenceInterval(0.99).ClosedContains(tDistribution.Mean)); Assert.IsTrue(tSample.PopulationStandardDeviation.ConfidenceInterval(0.99).ClosedContains(tDistribution.StandardDeviation)); } }
private void TestContinuousDistributionShapeMatchesCumulativeDensity( ContinuousDistribution distribution, double min, double max, int numberOfBuckets, int avgSamplesPerBucket, double absoluteAccuracy, string message) { double[] shape = new double[numberOfBuckets]; double bucketWidth = (max - min) / numberOfBuckets; double previous = distribution.CumulativeDistribution(min); double underflow = previous; double position = min; for (int i = 0; i < numberOfBuckets; i++) { position += bucketWidth; double current = distribution.CumulativeDistribution(position); shape[i] = current - previous; previous = current; } double overflow = 1 - previous; TestContinuousDistributionShape(distribution, min, max, shape, underflow, overflow, avgSamplesPerBucket, absoluteAccuracy, message); }
internal ContinuousTestStatistic(string name, double value, ContinuousDistribution distribution) { Debug.Assert(name != null); Debug.Assert(distribution != null); this.name = name; this.value = value; this.distribution = distribution; }
public void BivariateLinearRegressionNullDistribution() { // create uncorrelated x and y values // the distribution of F-test statistics returned by linear fits should follow the expected F-distribution Random rng = new Random(987654321); NormalDistribution xd = new NormalDistribution(1.0, 2.0); NormalDistribution yd = new NormalDistribution(-3.0, 4.0); Sample fs = new Sample(); Sample rSample = new Sample(); ContinuousDistribution rDistribution = null; Sample fSample = new Sample(); ContinuousDistribution fDistribution = null; for (int i = 0; i < 127; i++) { BivariateSample sample = new BivariateSample(); for (int j = 0; j < 7; j++) { sample.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng)); } LinearRegressionResult result = sample.LinearRegression(); double f = result.F.Statistic; fs.Add(f); rSample.Add(result.R.Statistic); rDistribution = result.R.Distribution; fSample.Add(result.F.Statistic); fDistribution = result.F.Distribution; Assert.IsTrue(result.F.Statistic == result.Anova.Result.Statistic); Assert.IsTrue(TestUtilities.IsNearlyEqual( result.R.Probability, result.F.Probability, new EvaluationSettings() { RelativePrecision = 1.0E-14, AbsolutePrecision = 1.0E-16 } )); } ContinuousDistribution fd = new FisherDistribution(1, 5); Console.WriteLine("{0} v. {1}", fs.PopulationMean, fd.Mean); TestResult t = fs.KolmogorovSmirnovTest(fd); Console.WriteLine(t.LeftProbability); Assert.IsTrue(t.LeftProbability < 0.95); Assert.IsTrue(rSample.KuiperTest(rDistribution).Probability > 0.05); Assert.IsTrue(fSample.KuiperTest(fDistribution).Probability > 0.05); }
public static ContinuousDistribution[,] MakeGaussians(double[,] means, double[,] sds) { ContinuousDistribution[,] result = new ContinuousDistribution[means.GetLength(0), means.GetLength(1)]; for (int rr = 0; rr < means.GetLength(0); rr++) for (int cc = 0; cc < means.GetLength(1); cc++) result[rr, cc] = new GaussianDistribution(means[rr, cc], sds[rr, cc]*sds[rr, cc]); return result; }
internal TestResult( string discreteName, int discreteValue, DiscreteDistribution discreteDistribution, string continuousName, double continuousValue, ContinuousDistribution continuousDistribution, TestType type) { this.discreteStatistic = new DiscreteTestStatistic(discreteName, discreteValue, discreteDistribution); this.continuousStatistic = new ContinuousTestStatistic(continuousName, continuousValue, continuousDistribution); this.type = type; }
public override ContinuousDistribution Plus(ContinuousDistribution two) { if (two is ClippedGaussianDistribution) { ClippedGaussianDistribution other = (ClippedGaussianDistribution) two; return new ClippedGaussianDistribution(mean + two.Mean, variance + two.Variance, lower + other.lower, upper + other.upper); } return new GaussianDistribution(mean + two.Mean, variance + two.Variance); }
public static ContinuousDistribution[,] Transpose(ContinuousDistribution[,] xx) { ContinuousDistribution[,] yy = new ContinuousDistribution[xx.GetLength(1), xx.GetLength(0)]; for (int rr = 0; rr < xx.GetLength(0); rr++) { for (int cc = 0; cc < xx.GetLength(1); cc++) { yy[cc, rr] = xx[rr, cc]; } } return(yy); }
static double GetX(double p, ContinuousDistribution dist) { double x, xNext = 1; do { x = xNext; xNext = x - (dist.CumulativeDistribution(x) - p) / dist.ProbabilityDensity(x); }while (Math.Abs(x - xNext) > epsilon); return(xNext); }
public static ContinuousDistribution[,] MakeGaussians(double[,] means, double[,] sds) { ContinuousDistribution[,] result = new ContinuousDistribution[means.GetLength(0), means.GetLength(1)]; for (int rr = 0; rr < means.GetLength(0); rr++) { for (int cc = 0; cc < means.GetLength(1); cc++) { result[rr, cc] = new GaussianDistribution(means[rr, cc], sds[rr, cc] * sds[rr, cc]); } } return(result); }
private void DistributionProbabilityTestHelper(ContinuousDistribution distribution, double x) { Console.WriteLine("{0} {1}", distribution.GetType().Name, x); double P = distribution.LeftProbability(x); double Q = distribution.RightProbability(x); Assert.IsTrue((0.0 <= P) && (P <= 1.0)); Assert.IsTrue((0.0 <= Q) && (Q <= 1.0)); Assert.IsTrue(TestUtilities.IsNearlyEqual(P + Q, 1.0)); double p = distribution.ProbabilityDensity(x); Assert.IsTrue(p >= 0.0); }
public void WilcoxonNullDistribution() { // Pick a very non-normal distribution ContinuousDistribution d = new ExponentialDistribution(); Random rng = new Random(271828); foreach (int n in TestUtilities.GenerateIntegerValues(4, 64, 4)) { Sample wContinuousSample = new Sample(); ContinuousDistribution wContinuousDistribution = null; List <int> wDiscreteSample = new List <int>(); DiscreteDistribution wDiscreteDistribution = null; for (int i = 0; i < 256; i++) { BivariateSample sample = new BivariateSample(); for (int j = 0; j < n; j++) { double x = d.GetRandomValue(rng); double y = d.GetRandomValue(rng); sample.Add(x, y); } TestResult wilcoxon = sample.WilcoxonSignedRankTest(); if (wilcoxon.UnderlyingStatistic != null) { wDiscreteSample.Add(wilcoxon.UnderlyingStatistic.Value); wDiscreteDistribution = wilcoxon.UnderlyingStatistic.Distribution; } else { wContinuousSample.Add(wilcoxon.Statistic.Value); wContinuousDistribution = wilcoxon.Statistic.Distribution; } } if (wDiscreteDistribution != null) { TestResult chi2 = wDiscreteSample.ChiSquaredTest(wDiscreteDistribution); Assert.IsTrue(chi2.Probability > 0.01); } else { TestResult ks = wContinuousSample.KolmogorovSmirnovTest(wContinuousDistribution); Assert.IsTrue(ks.Probability > 0.01); Assert.IsTrue(wContinuousSample.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wContinuousDistribution.Mean)); Assert.IsTrue(wContinuousSample.PopulationStandardDeviation.ConfidenceInterval(0.99).ClosedContains(wContinuousDistribution.StandardDeviation)); } } }
public static Sample CreateSample(ContinuousDistribution distribution, int count, int seed) { Sample sample = new Sample(); Random rng = new Random(seed); for (int i = 0; i < count; i++) { double x = distribution.GetRandomValue(rng); sample.Add(x); } return(sample); }
public override void FillRandom(ContinuousDistribution distribution, NArray values) { var stream = distribution.RandomNumberStream.InnerStream as IntelMKLRandomNumberStream; if (stream == null) { throw new ArgumentException("distribution must use MKL random generator", "distribution"); } double[] aArray; int aStart; GetArray(values, out aArray, out aStart); // we use the random stream as appropriate IntelMathKernelLibraryRandom.FillNormals(aArray, stream, aStart, values.Length); }
public void BivariateNullAssociation() { Random rng = new Random(31415926); // Create a data structure to hold the results of Pearson, Spearman, and Kendall tests. FrameTable data = new FrameTable(); data.AddColumn <double>("r"); data.AddColumn <double>("ρ"); data.AddColumn <double>("τ"); // Create variables to hold the claimed distribution of each test statistic. ContinuousDistribution PRD = null; ContinuousDistribution SRD = null; ContinuousDistribution KTD = null; // Generate a large number of bivariate samples and conduct our three tests on each. ContinuousDistribution xDistribution = new LognormalDistribution(); ContinuousDistribution yDistribution = new CauchyDistribution(); for (int j = 0; j < 100; j++) { List <double> x = new List <double>(); List <double> y = new List <double>(); for (int i = 0; i < 100; i++) { x.Add(xDistribution.GetRandomValue(rng)); y.Add(yDistribution.GetRandomValue(rng)); } TestResult PR = Bivariate.PearsonRTest(x, y); TestResult SR = Bivariate.SpearmanRhoTest(x, y); TestResult KT = Bivariate.KendallTauTest(x, y); PRD = PR.Statistic.Distribution; SRD = SR.Statistic.Distribution; KTD = KT.Statistic.Distribution; data.AddRow(new Dictionary <string, object>() { { "r", PR.Statistic.Value }, { "ρ", SR.Statistic.Value }, { "τ", KT.Statistic.Value } }); } Assert.IsTrue(data["r"].As <double>().KolmogorovSmirnovTest(PRD).Probability > 0.05); Assert.IsTrue(data["ρ"].As <double>().KolmogorovSmirnovTest(SRD).Probability > 0.05); Assert.IsTrue(data["τ"].As <double>().KolmogorovSmirnovTest(KTD).Probability > 0.05); }
public static ContinuousDistribution[,] Multiply(ContinuousDistribution[,] xx, ContinuousDistribution[,] yy) { if (xx.GetLength(1) != yy.GetLength(0)) throw new ArgumentException("Inside dimensions must match"); ContinuousDistribution[,] zz = new ContinuousDistribution[xx.GetLength(0), yy.GetLength(1)]; for (int rr = 0; rr < xx.GetLength(0); rr++) for (int cc = 0; cc < yy.GetLength(1); cc++) { ContinuousDistribution sum = new FlatDistribution(); for (int ii = 0; ii < xx.GetLength(1); ii++) sum = sum.Plus(xx[rr, ii].Times(yy[ii, cc])); zz[rr, cc] = sum; } return zz; }
public void BivariateNullAssociation() { Random rng = new Random(314159265); // Create sample sets for our three test statisics Sample PS = new Sample(); Sample SS = new Sample(); Sample KS = new Sample(); // variables to hold the claimed distribution of teach test statistic ContinuousDistribution PD = null; ContinuousDistribution SD = null; ContinuousDistribution KD = null; // generate a large number of bivariate samples and conduct our three tests on each for (int j = 0; j < 100; j++) { BivariateSample S = new BivariateSample(); // sample size should be large so that asymptotic assumptions are justified for (int i = 0; i < 100; i++) { double x = rng.NextDouble(); double y = rng.NextDouble(); S.Add(x, y); } TestResult PR = S.PearsonRTest(); PS.Add(PR.Statistic); PD = PR.Distribution; TestResult SR = S.SpearmanRhoTest(); SS.Add(SR.Statistic); SD = SR.Distribution; TestResult KR = S.KendallTauTest(); KS.Add(KR.Statistic); KD = KR.Distribution; } // do KS to test whether the samples follow the claimed distributions //Console.WriteLine(PS.KolmogorovSmirnovTest(PD).LeftProbability); //Console.WriteLine(SS.KolmogorovSmirnovTest(SD).LeftProbability); //Console.WriteLine(KS.KolmogorovSmirnovTest(KD).LeftProbability); Assert.IsTrue(PS.KolmogorovSmirnovTest(PD).LeftProbability < 0.95); Assert.IsTrue(SS.KolmogorovSmirnovTest(SD).LeftProbability < 0.95); Assert.IsTrue(KS.KolmogorovSmirnovTest(KD).LeftProbability < 0.95); }
public static ContinuousDistribution Determinant(ContinuousDistribution[,] xx) { if (xx.GetLength(0) == 1 && xx.GetLength(1) == 1) return xx[0, 0]; if (xx.GetLength(0) == 2 && xx.GetLength(1) == 2) return xx[0, 0].Times(xx[1, 1]).Minus(xx[0, 1].Times(xx[1, 0])); if (xx.GetLength(0) == 3 && xx.GetLength(1) == 3) { ContinuousDistribution one = xx[0,0].Times(xx[2,2].Times(xx[1,1]).Minus(xx[2,1].Times(xx[1,2]))); ContinuousDistribution two = xx[1,0].Times(xx[2,2].Times(xx[0,1]).Minus(xx[2,1].Times(xx[0,2]))); ContinuousDistribution three = xx[2,0].Times(xx[1,2].Times(xx[0,1]).Minus(xx[1,1].Times(xx[0,2]))); return one.Minus(two).Plus(three); } throw new ArgumentException("Unknown matrix dimensions"); }
public void LjungBoxNullDistribution() { Sample Qs = new Sample(); ContinuousDistribution d = null; for (int i = 0; i < 100; i++) { TimeSeries series = GenerateMA1TimeSeries(0.0, 1.0, 2.0, 10, i); TestResult lbResult = series.LjungBoxTest(5); Qs.Add(lbResult.Statistic.Value); d = lbResult.Statistic.Distribution; } TestResult kResult = Qs.KuiperTest(d); Assert.IsTrue(kResult.Probability > 0.05); }
public void BivariateLinearRegressionNullDistribution() { // Create uncorrelated x and y values and do a linear fit. // The r-tests and F-test statistics returned by the linear fits // should agree and both test statistics should follow their claimed // distributions. Random rng = new Random(987654321); NormalDistribution xd = new NormalDistribution(1.0, 2.0); NormalDistribution yd = new NormalDistribution(-3.0, 4.0); Sample rSample = new Sample(); ContinuousDistribution rDistribution = null; Sample fSample = new Sample(); ContinuousDistribution fDistribution = null; for (int i = 0; i < 127; i++) { BivariateSample sample = new BivariateSample(); for (int j = 0; j < 7; j++) { sample.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng)); } LinearRegressionResult result = sample.LinearRegression(); rSample.Add(result.R.Statistic.Value); rDistribution = result.R.Statistic.Distribution; fSample.Add(result.F.Statistic.Value); fDistribution = result.F.Statistic.Distribution; Assert.IsTrue(result.F.Statistic.Value == result.Anova.Result.Statistic.Value); Assert.IsTrue(TestUtilities.IsNearlyEqual( result.R.Probability, result.F.Probability, new EvaluationSettings() { RelativePrecision = 1.0E-13, AbsolutePrecision = 1.0E-16 } )); } Assert.IsTrue(rSample.KuiperTest(rDistribution).Probability > 0.05); Assert.IsTrue(fSample.KuiperTest(fDistribution).Probability > 0.05); }
private void DistributionProbabilityTestHelper(ContinuousDistribution distribution, double x) { double P = distribution.LeftProbability(x); double Q = distribution.RightProbability(x); Assert.IsTrue((0.0 <= P) && (P <= 1.0)); Assert.IsTrue((0.0 <= Q) && (Q <= 1.0)); Assert.IsTrue(TestUtilities.IsNearlyEqual(P + Q, 1.0)); double p = distribution.ProbabilityDensity(x); Assert.IsTrue(p >= 0.0); double h = distribution.Hazard(x); if (p > 0.0 && Q > 0.0) { Assert.IsTrue(TestUtilities.IsNearlyEqual(h, p / Q)); } }
public void TwoSampleKolmogorovNullDistributionTest() { ContinuousDistribution population = new ExponentialDistribution(); int[] sizes = new int[] { 23, 30, 175 }; foreach (int na in sizes) { foreach (int nb in sizes) { Console.WriteLine("{0} {1}", na, nb); Sample d = new Sample(); ContinuousDistribution nullDistribution = null; for (int i = 0; i < 128; i++) { Sample a = TestUtilities.CreateSample(population, na, 31415 + na + i); Sample b = TestUtilities.CreateSample(population, nb, 27182 + nb + i); TestResult r = Sample.KolmogorovSmirnovTest(a, b); d.Add(r.Statistic); nullDistribution = r.Distribution; } // Only do full KS test if the number of bins is larger than the sample size, otherwise we are going to fail // because the KS test detects the granularity of the distribution TestResult mr = d.KolmogorovSmirnovTest(nullDistribution); Console.WriteLine(mr.LeftProbability); if (AdvancedIntegerMath.LCM(na, nb) > d.Count) { Assert.IsTrue(mr.LeftProbability < 0.99); } // But always test that mean and standard deviation are as expected Console.WriteLine("{0} {1}", nullDistribution.Mean, d.PopulationMean.ConfidenceInterval(0.99)); Assert.IsTrue(d.PopulationMean.ConfidenceInterval(0.99).ClosedContains(nullDistribution.Mean)); Console.WriteLine("{0} {1}", nullDistribution.StandardDeviation, d.PopulationStandardDeviation.ConfidenceInterval(0.99)); Assert.IsTrue(d.PopulationStandardDeviation.ConfidenceInterval(0.99).ClosedContains(nullDistribution.StandardDeviation)); Console.WriteLine("{0} {1}", nullDistribution.CentralMoment(3), d.PopulationCentralMoment(3).ConfidenceInterval(0.99)); //Assert.IsTrue(d.PopulationMomentAboutMean(3).ConfidenceInterval(0.99).ClosedContains(nullDistribution.MomentAboutMean(3))); //Console.WriteLine("m {0} {1}", nullDistribution.Mean, d.PopulationMean); } } }
internal static DistributionFitResult <ContinuousDistribution> MaximumLikelihoodFit(IReadOnlyList <double> sample, Func <IReadOnlyList <double>, ContinuousDistribution> factory, IReadOnlyList <double> start, IReadOnlyList <string> names) { Debug.Assert(sample != null); Debug.Assert(factory != null); Debug.Assert(start != null); Debug.Assert(names != null); Debug.Assert(start.Count == names.Count); // Define a log likelihood function Func <IReadOnlyList <double>, double> logL = (IReadOnlyList <double> a) => { ContinuousDistribution d = factory(a); double lnP = 0.0; foreach (double value in sample) { double P = d.ProbabilityDensity(value); if (P == 0.0) { throw new InvalidOperationException(); } lnP += Math.Log(P); } return(lnP); }; // Maximize it MultiExtremum maximum = MultiFunctionMath.FindLocalMaximum(logL, start); ColumnVector b = maximum.Location; SymmetricMatrix C = maximum.HessianMatrix; CholeskyDecomposition CD = C.CholeskyDecomposition(); if (CD == null) { throw new DivideByZeroException(); } C = CD.Inverse(); ContinuousDistribution distribution = factory(maximum.Location); TestResult test = sample.KolmogorovSmirnovTest(distribution); return(new ContinuousDistributionFitResult(names, b, C, distribution, test)); }
public void PearsonRNullDistribution() { Random rng = new Random(1111111); // Pick some underlying distributions for the sample variables, // which must be normal but can have any parameters. NormalDistribution xDistribution = new NormalDistribution(1, 2); NormalDistribution yDistribution = new NormalDistribution(3, 4); // Try this for several sample sizes, all low so that we see the difference from the normal distribution // n = 3 maxima at ends; n = 4 uniform; n = 5 semi-circular "mound"; n = 6 parabolic "mound". foreach (int n in new int[] { 3, 4, 5, 6, 8 }) { // find r values Sample rSample = new Sample(); ContinuousDistribution rDistribution = null; for (int i = 0; i < 128; i++) { // to get each r value, construct a bivariate sample of the given size with no cross-correlation BivariateSample xySample = new BivariateSample(); for (int j = 0; j < n; j++) { xySample.Add( xDistribution.GetRandomValue(rng), yDistribution.GetRandomValue(rng) ); } TestResult rTest = xySample.PearsonRTest(); rSample.Add(rTest.Statistic); rDistribution = rTest.Distribution; } // Check whether r is distributed as expected TestResult result = rSample.KuiperTest(new PearsonRDistribution(n)); Assert.IsTrue(result.Probability > 0.01); Assert.IsTrue(rSample.PopulationMean.ConfidenceInterval(0.95).ClosedContains(rDistribution.Mean)); Assert.IsTrue(rSample.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(rDistribution.StandardDeviation)); } }
public void McNemarTestDistribution() { // Define a population and the accuracy of two tests for a condition double fractionPositive = 0.4; double aAccuracy = 0.2; double bAccuracy = 0.9; // Form a bunch of samples; we will run a McNemar test on each List <double> statistics = new List <double>(); ContinuousDistribution distribution = null; Random rng = new Random(1); for (int i = 0; i < 32; i++) { // Run a and b tests on each person. List <bool> aResults = new List <bool>(); List <bool> bResults = new List <bool>(); for (int j = 0; j < 64; j++) { bool isPositive = rng.NextDouble() < fractionPositive; bool aResult = rng.NextDouble() < aAccuracy ? isPositive : !isPositive; aResults.Add(aResult); bool bResult = rng.NextDouble() < bAccuracy ? isPositive : !isPositive; bResults.Add(bResult); } // Do a McNemar test to determine whether tests are differently weighted. // By our construction, they shouldn't be. ContingencyTable <bool, bool> table = Bivariate.Crosstabs(aResults, bResults); TestResult result = table.Binary.McNemarTest(); statistics.Add(result.Statistic.Value); distribution = result.Statistic.Distribution; } // Since the null hypothesis is satisfied, the test statistic distribution should // match the claimed null distribution. TestResult test = statistics.KolmogorovSmirnovTest(distribution); Assert.IsTrue(test.Probability > 0.05); }
private void TestContinuousDistributionShape( ContinuousDistribution distribution, double min, double max, double[] expectedShape, double expectedUnderflow, double expectedOverflow, int avgSamplesPerBucket, double absoluteAccuracy, string message) { DistributionShape shape = DistributionShape.CreateMinMax(expectedShape.Length, min, max); int sampleCount = expectedShape.Length * avgSamplesPerBucket; for (int i = 0; i < sampleCount; i++) { shape.Push(distribution.NextDouble()); } double scale = 1.0 / (avgSamplesPerBucket * expectedShape.Length); Assert.AreEqual(expectedUnderflow, shape.Underflow * scale, absoluteAccuracy, message + " Underflow"); Assert.AreEqual(expectedOverflow, shape.Overflow * scale, absoluteAccuracy, message + " Overflow"); for (int i = 0; i < expectedShape.Length; i++) { Assert.AreEqual(expectedShape[i], shape[i] * scale, absoluteAccuracy, message + " Bucket " + i.ToString()); } }
public static ContinuousDistribution Determinant(ContinuousDistribution[,] xx) { if (xx.GetLength(0) == 1 && xx.GetLength(1) == 1) { return(xx[0, 0]); } if (xx.GetLength(0) == 2 && xx.GetLength(1) == 2) { return(xx[0, 0].Times(xx[1, 1]).Minus(xx[0, 1].Times(xx[1, 0]))); } if (xx.GetLength(0) == 3 && xx.GetLength(1) == 3) { ContinuousDistribution one = xx[0, 0].Times(xx[2, 2].Times(xx[1, 1]).Minus(xx[2, 1].Times(xx[1, 2]))); ContinuousDistribution two = xx[1, 0].Times(xx[2, 2].Times(xx[0, 1]).Minus(xx[2, 1].Times(xx[0, 2]))); ContinuousDistribution three = xx[2, 0].Times(xx[1, 2].Times(xx[0, 1]).Minus(xx[1, 1].Times(xx[0, 2]))); return(one.Minus(two).Plus(three)); } throw new ArgumentException("Unknown matrix dimensions"); }
public void KuiperNullDistributionTest() { // The distribution is irrelevent; pick one at random ContinuousDistribution sampleDistribution = new NormalDistribution(); // Loop over various sample sizes foreach (int n in TestUtilities.GenerateIntegerValues(2, 128, 8)) { // Create a sample to hold the KS statistics Sample testStatistics = new Sample(); // and a variable to hold the claimed null distribution, which should be the same for each test ContinuousDistribution nullDistribution = null; // Create a bunch of samples, each with n+1 data points // We pick n+1 instead of n just to have different sample size values than in the KS test case for (int i = 0; i < 256; i++) { // Just use n+i as a seed in order to get different points each time Sample sample = TestUtilities.CreateSample(sampleDistribution, n + 1, 512 * n + i + 2); // Do a Kuiper test of the sample against the distribution each time TestResult r1 = sample.KuiperTest(sampleDistribution); // Record the test statistic value and the claimed null distribution testStatistics.Add(r1.Statistic); nullDistribution = r1.Distribution; } // Do a KS test of our sample of Kuiper statistics against the claimed null distribution // We could use a Kuiper test here instead, which would be way cool and meta, but we picked KS instead for variety TestResult r2 = testStatistics.KolmogorovSmirnovTest(nullDistribution); Assert.IsTrue(r2.Probability > 0.01); // Test moment matches, too Assert.IsTrue(testStatistics.PopulationMean.ConfidenceInterval(0.99).ClosedContains(nullDistribution.Mean)); Assert.IsTrue(testStatistics.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(nullDistribution.Variance)); } }
public void TwoSampleKolmogorovNullDistributionTest() { Random rng = new Random(4); ContinuousDistribution population = new ExponentialDistribution(); int[] sizes = new int[] { 23, 30, 175 }; foreach (int na in sizes) { foreach (int nb in sizes) { Sample d = new Sample(); ContinuousDistribution nullDistribution = null; for (int i = 0; i < 128; i++) { List <double> a = TestUtilities.CreateDataSample(rng, population, na).ToList(); List <double> b = TestUtilities.CreateDataSample(rng, population, nb).ToList(); TestResult r = Univariate.KolmogorovSmirnovTest(a, b); d.Add(r.Statistic.Value); nullDistribution = r.Statistic.Distribution; } // Only do full KS test if the number of bins is larger than the sample size, otherwise we are going to fail // because the KS test detects the granularity of the distribution. TestResult mr = d.KolmogorovSmirnovTest(nullDistribution); if (AdvancedIntegerMath.LCM(na, nb) > d.Count) { Assert.IsTrue(mr.Probability > 0.01); } // But always test that mean and standard deviation are as expected Assert.IsTrue(d.PopulationMean.ConfidenceInterval(0.99).ClosedContains(nullDistribution.Mean)); Assert.IsTrue(d.PopulationStandardDeviation.ConfidenceInterval(0.99).ClosedContains(nullDistribution.StandardDeviation)); // This test is actually a bit sensitive, probably because the discrete-ness of the underlying distribution // and the inaccuracy of the asymptotic approximation for intermediate sample size make strict comparisons iffy. } } }
public override ContinuousDistribution Times(ContinuousDistribution two) { return new GaussianDistribution((mean * two.Variance + two.Mean * variance) / (variance + two.Variance), variance * two.Variance / (variance + two.Variance)); }
// Approximate as normal (poor approximation) - from (mu1 + s1) * (mu2 + s2) public override ContinuousDistribution Times(ContinuousDistribution two) { return new GaussianDistribution(mean * two.Mean, Math.Abs(mean) * two.Variance + Math.Abs(two.Mean) * variance); }
// Approximate as normal-- very poor approximation -- assume far from 0 public override ContinuousDistribution DividedBy(ContinuousDistribution two) { return Times(new GaussianDistribution(1/two.Mean, two.Variance / (two.Mean*two.Mean))) ; }
// Approximate as normal (true if other is Gaussian) public override ContinuousDistribution Plus(ContinuousDistribution two) { return new GaussianDistribution(mean + two.Mean, variance + two.Variance); /*return new GaussianDistribution((mean * two.Variance + two.Mean * variance) / (variance + two.Variance), variance * two.Variance / (variance + two.Variance));*/ }
public override ContinuousDistribution Times(ContinuousDistribution two) { return two; }
public static ContinuousDistribution[,] Transpose(ContinuousDistribution[,] xx) { ContinuousDistribution[,] yy = new ContinuousDistribution[xx.GetLength(1), xx.GetLength(0)]; for (int rr = 0; rr < xx.GetLength(0); rr++) for (int cc = 0; cc < xx.GetLength(1); cc++) yy[cc, rr] = xx[rr, cc]; return yy; }
public double[] EstimateEmotions(string text) { List<string> words = StringUtilities.SplitWords(text.ToLower(), true); // 3. Look up each word in ANEWFileSource double[] numer = new double[(int) Emotions.COUNT], denom = new double[(int) Emotions.COUNT]; for (int ii = 0; ii < (int) Emotions.COUNT; ii++) numer[ii] = denom[ii] = 0; foreach (string word in words) { if (word.StartsWith(" ") || word.Length <= 2) continue; ThreeTuple<ContinuousDistribution, ContinuousDistribution, ContinuousDistribution> vad; if (!source.TryGetValue(word, out vad)) { // try stemmed word string stem = stemmer.stemTerm(word); if (stem == word || !source.TryGetValue(stem, out vad)) continue; } numer[(int) Emotions.Valence] += vad.one.Mean / vad.one.Variance; denom[(int) Emotions.Valence] += 1 / vad.one.Variance; numer[(int) Emotions.Arousal] += vad.two.Mean / vad.two.Variance; denom[(int) Emotions.Arousal] += 1 / vad.two.Variance; numer[(int) Emotions.Dominance] += vad.three.Mean / vad.three.Variance; denom[(int) Emotions.Dominance] += 1 / vad.three.Variance; // 4. Apply regressions from other paper ContinuousDistribution[,] vector = new ContinuousDistribution[,] { {vad.one}, {vad.two}, {vad.three}}; ContinuousDistribution[,] emotions; if (vad.one.Mean >= .5) emotions = RandomMatrix.Multiply(positiveProduct, vector); else emotions = RandomMatrix.Multiply(negativeProduct, vector); // 5. Take mean within bounds and sum weighted by variance for (int ii = 3; ii < (int) Emotions.COUNT; ii++) { ContinuousDistribution clipped = emotions[ii - 3, 0].Transform(0, .1).Clip(0, 1); numer[ii] += clipped.Mean / clipped.Variance; denom[ii] += 1 / clipped.Variance; } } for (int ii = 0; ii < (int) Emotions.COUNT; ii++) numer[ii] /= denom[ii]; return numer; }
public TransformedDistribution(ContinuousDistribution distribution, double shift, double scale) { this.distribution = distribution; this.shift = shift; this.scale = scale; }