public void BivariateSampleManipulations() { BivariateSample s = new BivariateSample(); s.Add(1.0, 9.0); s.Add(new XY(2.0, 8.0)); s.Add(new double[] { 3.0, 4.0 }, new double[] { 7.0, 6.0 }); s.Add(new XY[] { new XY(5.0, 5.0), new XY(6.0, 4.0) }); Assert.IsTrue(s.Count == 6); Assert.IsTrue(!s.X.Contains(9.0)); s.TransposeXY(); Assert.IsTrue(s.X.Contains(9.0)); s.TransposeXY(); Assert.IsTrue(s.Remove(2.0, 8.0)); Assert.IsTrue(s.Count == 5); Assert.IsFalse(s.Remove(2.0, 8.0)); Assert.IsTrue(s.Count == 5); Assert.IsTrue(s.Remove(new XY(6.0, 4.0))); Assert.IsTrue(s.Count == 4); Assert.IsTrue(s.Contains(1.0, 9.0)); Assert.IsFalse(s.Contains(9.0, 1.0)); Assert.IsTrue(s.Contains(new XY(4.0, 6.0))); s.Clear(); Assert.IsTrue(s.Count == 0); }
// Not fixing this bug; use Polynomial interpolation for this scenario instead //[TestMethod] public void Bug6392() { // bug requests that we support regression with number of points equal to number // of fit parameters, i.e. polynomial fit var biSample = new BivariateSample(); biSample.Add(0, 1); biSample.Add(1, -1); var fitResult = biSample.LinearRegression(); }
public void WaldFit() { WaldDistribution wald = new WaldDistribution(3.5, 2.5); BivariateSample parameters = new BivariateSample(); MultivariateSample variances = new MultivariateSample(3); for (int i = 0; i < 128; i++) { Sample s = SampleTest.CreateSample(wald, 16, i); FitResult r = WaldDistribution.FitToSample(s); parameters.Add(r.Parameters[0], r.Parameters[1]); variances.Add(r.Covariance(0, 0), r.Covariance(1, 1), r.Covariance(0, 1)); Assert.IsTrue(r.GoodnessOfFit.Probability > 0.01); } Assert.IsTrue(parameters.X.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wald.Mean)); Assert.IsTrue(parameters.Y.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wald.Shape)); Assert.IsTrue(parameters.X.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(0).Median)); Assert.IsTrue(parameters.Y.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(1).Median)); Assert.IsTrue(parameters.PopulationCovariance.ConfidenceInterval(0.99).ClosedContains(variances.Column(2).Median)); }
public void Bug6162() { // When UncertianMeasurementSample.FitToPolynomial used Cholesky inversion of (A^T A), this inversion // would fail when roundoff errors would made the matrix non-positive-definite. We have now changed // to QR decomposition, which is more robust. //real data double[] X_axis = new double[] { 40270.65625, 40270.6569444444, 40270.6576388888, 40270.6583333332, 40270.6590277776, 40270.659722222, 40270.6604166669, 40270.6611111113, 40270.6618055557, 40270.6625000001 }; double[] Y_axis = new double[] { 246.824996948242, 246.850006103516, 245.875, 246.225006103516, 246.975006103516, 247.024993896484, 246.949996948242, 246.875, 247.5, 247.100006103516 }; UncertainMeasurementSample DataSet = new UncertainMeasurementSample(); for (int i = 0; i < 10; i++) { DataSet.Add(X_axis[i], Y_axis[i], 1); } UncertainMeasurementFitResult DataFit = DataSet.FitToPolynomial(3); BivariateSample bs = new BivariateSample(); for (int i = 0; i < 10; i++) { bs.Add(X_axis[i], Y_axis[i]); } PolynomialRegressionResult bsFit = bs.PolynomialRegression(3); foreach (Parameter p in bsFit.Parameters) { Console.WriteLine(p); } }
public void SpearmanNullDistributionTest() { // pick independent distributions for x and y, which needn't be normal and needn't be related Distribution xDistrubtion = new UniformDistribution(); Distribution yDistribution = new CauchyDistribution(); Random rng = new Random(1); // generate bivariate samples of various sizes foreach (int n in TestUtilities.GenerateIntegerValues(4, 64, 8)) { Sample testStatistics = new Sample(); Distribution testDistribution = null; for (int i = 0; i < 128; i++) { BivariateSample sample = new BivariateSample(); for (int j = 0; j < n; j++) { sample.Add(xDistrubtion.GetRandomValue(rng), yDistribution.GetRandomValue(rng)); } TestResult result = sample.SpearmanRhoTest(); testStatistics.Add(result.Statistic); testDistribution = result.Distribution; } TestResult r2 = testStatistics.KuiperTest(testDistribution); Console.WriteLine("n={0} P={1}", n, r2.LeftProbability); Assert.IsTrue(r2.RightProbability > 0.05); Assert.IsTrue(testStatistics.PopulationMean.ConfidenceInterval(0.99).ClosedContains(testDistribution.Mean)); Assert.IsTrue(testStatistics.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(testDistribution.Variance)); } }
public void BivariateLinearRegressionGoodnessOfFitDistribution() { // create uncorrelated x and y values // the distribution of F-test statistics returned by linear fits should follow the expected F-distribution Random rng = new Random(987654321); NormalDistribution xd = new NormalDistribution(1.0, 2.0); NormalDistribution yd = new NormalDistribution(-3.0, 4.0); Sample fs = new Sample(); for (int i = 0; i < 127; i++) { BivariateSample xys = new BivariateSample(); for (int j = 0; j < 7; j++) { xys.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng)); } double f = xys.LinearRegression().GoodnessOfFit.Statistic; fs.Add(f); } Distribution fd = new FisherDistribution(1, 5); Console.WriteLine("{0} v. {1}", fs.PopulationMean, fd.Mean); TestResult t = fs.KolmogorovSmirnovTest(fd); Console.WriteLine(t.LeftProbability); Assert.IsTrue(t.LeftProbability < 0.95); }
public void TestBivariateRegression() { // Do a bunch of linear regressions. r^2 should be distributed as expected. double a0 = 1.0; double b0 = 0.0; Random rng = new Random(1001110000); ContinuousDistribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-2.0, 4.0)); ContinuousDistribution eDistribution = new NormalDistribution(); List <double> r2Sample = new List <double>(); for (int i = 0; i < 500; i++) { BivariateSample xySample = new BivariateSample(); for (int k = 0; k < 10; k++) { double x = xDistribution.GetRandomValue(rng); double y = a0 + b0 * x + eDistribution.GetRandomValue(rng); xySample.Add(x, y); } LinearRegressionResult fit = xySample.LinearRegression(); double a = fit.Intercept.Value; double b = fit.Slope.Value; r2Sample.Add(fit.RSquared); } ContinuousDistribution r2Distribution = new BetaDistribution((2 - 1) / 2.0, (10 - 2) / 2.0); TestResult ks = r2Sample.KolmogorovSmirnovTest(r2Distribution); Assert.IsTrue(ks.Probability > 0.05); }
//Helpmer method for PerformMoleculeCovarianceNetworkAnalysis(). Takes paired fold changes for two molecules and calculates a spearman's rho coefficient and associated p-value. //These data are returned in a Corrleation data object. public static Correlation GetSpearmanCorrelation(List <double> molecule_a_fold_changes, List <double> molecule_b_fold_changes, int molecule_a_identifier, int molecule_b_identifier) { Correlation new_correlation = new Correlation(); new_correlation.mol_id_a = molecule_a_identifier; new_correlation.mol_id_b = molecule_b_identifier; new_correlation.data_points = molecule_a_fold_changes.Count(); var bs = new BivariateSample("dataOne", "dataTwo"); int count = 0; for (int i = 0; i < molecule_a_fold_changes.Count; i++) { bs.Add(molecule_a_fold_changes[i], molecule_b_fold_changes[i]); count++; } var res = bs.SpearmanRhoTest(); if (res.Statistic < 0) { new_correlation.correlation = res.Statistic; new_correlation.p_value = (2 * res.LeftProbability); //2-tailed t-test } else { new_correlation.correlation = res.Statistic; new_correlation.p_value = (2 * res.RightProbability); //2-tailed t-test } if (!double.IsNaN(new_correlation.correlation) && !double.IsNaN(new_correlation.p_value) && !double.IsInfinity(new_correlation.p_value) && !double.IsInfinity(new_correlation.correlation)) { return(new_correlation); } return(null); }
public void NormalFitCovariances() { NormalDistribution N = new NormalDistribution(-1.0, 2.0); // Create a bivariate sample to hold our fitted best mu and sigma values // so we can determine their covariance as well as their means and variances BivariateSample parameters = new BivariateSample(); MultivariateSample covariances = new MultivariateSample(3); // A bunch of times, create a normal sample for (int i = 0; i < 128; i++) { // We use small samples so the variation in mu and sigma will be more substantial. Sample s = TestUtilities.CreateSample(N, 8, i); // Fit each sample to a normal distribution NormalFitResult fit = NormalDistribution.FitToSample(s); // and record the mu and sigma values from the fit into our bivariate sample parameters.Add(fit.Mean.Value, fit.StandardDeviation.Value); // also record the claimed covariances among these parameters covariances.Add(fit.Parameters.CovarianceMatrix[0, 0], fit.Parameters.CovarianceMatrix[1, 1], fit.Parameters.CovarianceMatrix[0, 1]); } // the mean fit values should agree with the population distribution Assert.IsTrue(parameters.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.Mean)); Assert.IsTrue(parameters.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.StandardDeviation)); // but also the covariances of those fit values should agree with the claimed covariances Assert.IsTrue(parameters.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(covariances.Column(0).Mean)); Assert.IsTrue(parameters.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(covariances.Column(1).Mean)); Assert.IsTrue(parameters.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(covariances.Column(2).Mean)); }
public void PearsonRDistribution() { Random rng = new Random(1); // pick some underlying distributions for the sample variables, which must be normal but can have any parameters NormalDistribution xDistribution = new NormalDistribution(1, 2); NormalDistribution yDistribution = new NormalDistribution(3, 4); // try this for several sample sizes, all low so that we see the difference from the normal distribution // n = 3 maxima at ends; n = 4 uniform; n = 5 semi-circular "mound"; n = 6 parabolic "mound" foreach (int n in new int[] { 3, 4, 5, 6, 8 }) { Console.WriteLine("n={0}", n); // find r values Sample rSample = new Sample(); for (int i = 0; i < 100; i++) { // to get each r value, construct a bivariate sample of the given size with no cross-correlation BivariateSample xySample = new BivariateSample(); for (int j = 0; j < n; j++) { xySample.Add(xDistribution.GetRandomValue(rng), yDistribution.GetRandomValue(rng)); } double r = xySample.PearsonRTest().Statistic; rSample.Add(r); } // check whether r is distributed as expected TestResult result = rSample.KolmogorovSmirnovTest(new PearsonRDistribution(n)); Console.WriteLine("P={0}", result.LeftProbability); Assert.IsTrue(result.LeftProbability < 0.95); } }
public void WilcoxonNullDistribution() { // Pick a very non-normal distribution ContinuousDistribution d = new ExponentialDistribution(); Random rng = new Random(271828); foreach (int n in TestUtilities.GenerateIntegerValues(4, 64, 4)) { Sample wSample = new Sample(); ContinuousDistribution wDistribution = null; for (int i = 0; i < 128; i++) { BivariateSample sample = new BivariateSample(); for (int j = 0; j < n; j++) { double x = d.GetRandomValue(rng); double y = d.GetRandomValue(rng); sample.Add(x, y); } TestResult wilcoxon = sample.WilcoxonSignedRankTest(); wSample.Add(wilcoxon.Statistic); wDistribution = wilcoxon.Distribution; } TestResult ks = wSample.KolmogorovSmirnovTest(wDistribution); Assert.IsTrue(ks.Probability > 0.05); Assert.IsTrue(wSample.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wDistribution.Mean)); Assert.IsTrue(wSample.PopulationStandardDeviation.ConfidenceInterval(0.99).ClosedContains(wDistribution.StandardDeviation)); } }
public void KendallNullDistributionTest() { // Pick independent distributions for x and y, which needn't be normal and needn't be related. ContinuousDistribution xDistrubtion = new LogisticDistribution(); ContinuousDistribution yDistribution = new ExponentialDistribution(); Random rng = new Random(314159265); // generate bivariate samples of various sizes foreach (int n in TestUtilities.GenerateIntegerValues(8, 64, 4)) { Sample testStatistics = new Sample(); ContinuousDistribution testDistribution = null; for (int i = 0; i < 128; i++) { BivariateSample sample = new BivariateSample(); for (int j = 0; j < n; j++) { sample.Add(xDistrubtion.GetRandomValue(rng), yDistribution.GetRandomValue(rng)); } TestResult result = sample.KendallTauTest(); testStatistics.Add(result.Statistic); testDistribution = result.Distribution; } TestResult r2 = testStatistics.KolmogorovSmirnovTest(testDistribution); Assert.IsTrue(r2.RightProbability > 0.05); Assert.IsTrue(testStatistics.PopulationMean.ConfidenceInterval(0.99).ClosedContains(testDistribution.Mean)); Assert.IsTrue(testStatistics.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(testDistribution.Variance)); } }
public void ContingencyTableProbabilitiesAndUncertainties() { // start with an underlying population double[,] pp = new double[, ] { { 1.0 / 45.0, 2.0 / 45.0, 3.0 / 45.0 }, { 4.0 / 45.0, 5.0 / 45.0, 6.0 / 45.0 }, { 7.0 / 45.0, 8.0 / 45.0, 9.0 / 45.0 } }; // form 50 contingency tables, each with N = 50 Random rng = new Random(314159); BivariateSample p22s = new BivariateSample(); BivariateSample pr0s = new BivariateSample(); BivariateSample pc1s = new BivariateSample(); BivariateSample pr2c0s = new BivariateSample(); BivariateSample pc1r2s = new BivariateSample(); for (int i = 0; i < 50; i++) { ContingencyTable T = new ContingencyTable(3, 3); for (int j = 0; j < 50; j++) { int r, c; ChooseRandomCell(pp, rng.NextDouble(), out r, out c); T.Increment(r, c); } Assert.IsTrue(T.Total == 50); // for each contingency table, compute estimates of various population quantities UncertainValue p22 = T.ProbabilityOf(2, 2); UncertainValue pr0 = T.ProbabilityOfRow(0); UncertainValue pc1 = T.ProbabilityOfColumn(1); UncertainValue pr2c0 = T.ProbabilityOfRowConditionalOnColumn(2, 0); UncertainValue pc1r2 = T.ProbabilityOfColumnConditionalOnRow(1, 2); p22s.Add(p22.Value, p22.Uncertainty); pr0s.Add(pr0.Value, pr0.Uncertainty); pc1s.Add(pc1.Value, pc1.Uncertainty); pr2c0s.Add(pr2c0.Value, pr2c0.Uncertainty); pc1r2s.Add(pc1r2.Value, pc1r2.Uncertainty); } // the estimated population mean of each probability should include the correct probability in the underlyting distribution Assert.IsTrue(p22s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(9.0 / 45.0)); Assert.IsTrue(pr0s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(6.0 / 45.0)); Assert.IsTrue(pc1s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(15.0 / 45.0)); Assert.IsTrue(pr2c0s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(7.0 / 12.0)); Assert.IsTrue(pc1r2s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(8.0 / 24.0)); // the estimated uncertainty for each population parameter should be the standard deviation across independent measurements // since the reported uncertainly changes each time, we use the mean value for comparison Assert.IsTrue(p22s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(p22s.Y.Mean)); Assert.IsTrue(pr0s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pr0s.Y.Mean)); Assert.IsTrue(pc1s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pc1s.Y.Mean)); Assert.IsTrue(pr2c0s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pr2c0s.Y.Mean)); Assert.IsTrue(pc1r2s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pc1r2s.Y.Mean)); }
public void BivariateLinearPolynomialRegressionAgreement() { // A degree-1 polynomial fit should give the same answer as a linear fit BivariateSample B = new BivariateSample(); B.Add(0.0, 5.0); B.Add(3.0, 6.0); B.Add(1.0, 7.0); B.Add(4.0, 8.0); B.Add(2.0, 9.0); GeneralLinearRegressionResult PR = B.PolynomialRegression(1); GeneralLinearRegressionResult LR = B.LinearRegression(); Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.Parameters.ValuesVector, LR.Parameters.ValuesVector)); Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.Parameters.CovarianceMatrix, LR.Parameters.CovarianceMatrix)); }
public void BivariateLinearPolynomialRegressionAgreement() { // A degree-1 polynomial fit should give the same answer as a linear fit BivariateSample B = new BivariateSample(); B.Add(0.0, 5.0); B.Add(3.0, 6.0); B.Add(1.0, 7.0); B.Add(4.0, 8.0); B.Add(2.0, 9.0); FitResult PR = B.PolynomialRegression(1); FitResult LR = B.LinearRegression(); Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.Parameters, LR.Parameters)); Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.CovarianceMatrix, LR.CovarianceMatrix)); Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.GoodnessOfFit.Statistic, LR.GoodnessOfFit.Statistic)); }
public void BivariateLinearRegressionNullDistribution() { // create uncorrelated x and y values // the distribution of F-test statistics returned by linear fits should follow the expected F-distribution Random rng = new Random(987654321); NormalDistribution xd = new NormalDistribution(1.0, 2.0); NormalDistribution yd = new NormalDistribution(-3.0, 4.0); Sample fs = new Sample(); Sample rSample = new Sample(); ContinuousDistribution rDistribution = null; Sample fSample = new Sample(); ContinuousDistribution fDistribution = null; for (int i = 0; i < 127; i++) { BivariateSample sample = new BivariateSample(); for (int j = 0; j < 7; j++) { sample.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng)); } LinearRegressionResult result = sample.LinearRegression(); double f = result.F.Statistic; fs.Add(f); rSample.Add(result.R.Statistic); rDistribution = result.R.Distribution; fSample.Add(result.F.Statistic); fDistribution = result.F.Distribution; Assert.IsTrue(result.F.Statistic == result.Anova.Result.Statistic); Assert.IsTrue(TestUtilities.IsNearlyEqual( result.R.Probability, result.F.Probability, new EvaluationSettings() { RelativePrecision = 1.0E-14, AbsolutePrecision = 1.0E-16 } )); } ContinuousDistribution fd = new FisherDistribution(1, 5); Console.WriteLine("{0} v. {1}", fs.PopulationMean, fd.Mean); TestResult t = fs.KolmogorovSmirnovTest(fd); Console.WriteLine(t.LeftProbability); Assert.IsTrue(t.LeftProbability < 0.95); Assert.IsTrue(rSample.KuiperTest(rDistribution).Probability > 0.05); Assert.IsTrue(fSample.KuiperTest(fDistribution).Probability > 0.05); }
public void BivariateSampleManipulations() { BivariateSample s = new BivariateSample(); s.Add(1.0, 3.0); s.Add(2.0, 2.0); s.Add(3.0, 1.0); Assert.IsTrue(s.Count == 3); Assert.IsTrue(s.Remove(2.0, 2.0)); Assert.IsTrue(s.Count == 2); Assert.IsFalse(s.Remove(2.0, 2.0)); Assert.IsTrue(s.Contains(1.0, 3.0)); Assert.IsFalse(s.Contains(3.0, 3.0)); s.Clear(); Assert.IsTrue(s.Count == 0); }
public void PairedStudentTTest() { BivariateSample s = new BivariateSample(); s.Add(3, 5); s.Add(0, 1); s.Add(6, 5); s.Add(7, 7); s.Add(4, 10); s.Add(3, 9); s.Add(2, 7); s.Add(1, 11); s.Add(4, 8); TestResult r = s.PairedStudentTTest(); // Maybe we should assert something here? }
public void LinearRegressionVariances() { // do a set of logistic regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned Random rng = new Random(314159); // define line parameters double a0 = 2.0; double b0 = -1.0; // do a lot of fits, recording results of each FrameTable data = new FrameTable(); data.AddColumns <double>("a", "va", "b", "vb", "abCov", "p", "dp"); for (int k = 0; k < 128; k++) { // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution ContinuousDistribution xd = new LogisticDistribution(); ContinuousDistribution nd = new NormalDistribution(0.0, 2.0); // generate a synthetic data set BivariateSample sample = new BivariateSample(); for (int i = 0; i < 12; i++) { double x = xd.GetRandomValue(rng); double y = a0 + b0 * x + nd.GetRandomValue(rng); sample.Add(x, y); } // do the regression LinearRegressionResult result = sample.LinearRegression(); // record result UncertainValue p = result.Predict(12.0); data.AddRow(new Dictionary <string, object>() { { "a", result.Intercept.Value }, { "va", result.Parameters.VarianceOf("Intercept") }, { "b", result.Slope.Value }, { "vb", result.Parameters.VarianceOf("Slope") }, { "abCov", result.Parameters.CovarianceOf("Slope", "Intercept") }, { "p", p.Value }, { "dp", p.Uncertainty } }); } // variances of parameters should agree with predictions Assert.IsTrue(data["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["va"].As <double>().Median())); Assert.IsTrue(data["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["vb"].As <double>().Median())); Assert.IsTrue(data["a"].As <double>().PopulationCovariance(data["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(data["abCov"].As <double>().Median())); // variance of prediction should agree with claim Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Median())); }
public void WilcoxonNullDistribution() { // Pick a very non-normal distribution ContinuousDistribution d = new ExponentialDistribution(); Random rng = new Random(271828); foreach (int n in TestUtilities.GenerateIntegerValues(4, 64, 4)) { Sample wContinuousSample = new Sample(); ContinuousDistribution wContinuousDistribution = null; List <int> wDiscreteSample = new List <int>(); DiscreteDistribution wDiscreteDistribution = null; for (int i = 0; i < 256; i++) { BivariateSample sample = new BivariateSample(); for (int j = 0; j < n; j++) { double x = d.GetRandomValue(rng); double y = d.GetRandomValue(rng); sample.Add(x, y); } TestResult wilcoxon = sample.WilcoxonSignedRankTest(); if (wilcoxon.UnderlyingStatistic != null) { wDiscreteSample.Add(wilcoxon.UnderlyingStatistic.Value); wDiscreteDistribution = wilcoxon.UnderlyingStatistic.Distribution; } else { wContinuousSample.Add(wilcoxon.Statistic.Value); wContinuousDistribution = wilcoxon.Statistic.Distribution; } } if (wDiscreteDistribution != null) { TestResult chi2 = wDiscreteSample.ChiSquaredTest(wDiscreteDistribution); Assert.IsTrue(chi2.Probability > 0.01); } else { TestResult ks = wContinuousSample.KolmogorovSmirnovTest(wContinuousDistribution); Assert.IsTrue(ks.Probability > 0.01); Assert.IsTrue(wContinuousSample.PopulationMean.ConfidenceInterval(0.99).ClosedContains(wContinuousDistribution.Mean)); Assert.IsTrue(wContinuousSample.PopulationStandardDeviation.ConfidenceInterval(0.99).ClosedContains(wContinuousDistribution.StandardDeviation)); } } }
public void PairedStudentTTest() { BivariateSample s = new BivariateSample(); s.Add(3, 5); s.Add(0, 1); s.Add(6, 5); s.Add(7, 7); s.Add(4, 10); s.Add(3, 9); s.Add(2, 7); s.Add(1, 11); s.Add(4, 8); Console.WriteLine(s.Count); TestResult r = s.PairedStudentTTest(); Console.WriteLine(r.Statistic); Console.WriteLine(r.LeftProbability); }
// We see a reliabile failure of KS or Kuiper tests for Beta distribution with small parameters, e.g. Beta(0.01,0.01). // This appears to occur whether we use inverse CDF or x/(x+y) to generate beta deviates. // Perhaps it indicates a problem with P computation for beta in this region? private static List <ContinuousDistribution> CreateDistributions() { List <ContinuousDistribution> distributions = new List <ContinuousDistribution>(new ContinuousDistribution[] { new NoncentralChiSquaredDistribution(2, 3.0), new CauchyDistribution(1.0, 2.0), new UniformDistribution(Interval.FromEndpoints(-2.0, 1.0)), new UniformDistribution(Interval.FromEndpoints(7.0, 9.0)), new NormalDistribution(3.0, 2.0), new ExponentialDistribution(2.0), new ChiSquaredDistribution(3), new StudentDistribution(5), new LognormalDistribution(0.2, 0.4), new WeibullDistribution(2.0, 3.0), new LogisticDistribution(-4.0, 5.0), new FisherDistribution(4.0, 7.0), new KuiperDistribution(), new KolmogorovDistribution(), new TriangularDistribution(1.0, 2.0, 4.0), new BetaDistribution(0.5, 0.5), new BetaDistribution(0.5, 2.0), new BetaDistribution(2.0, 2.0), new ParetoDistribution(1.0, 3.0), new WaldDistribution(3.0, 1.0), new PearsonRDistribution(7), new GammaDistribution(0.8), new GammaDistribution(3.0, 5.0), new GammaDistribution(96.2), new GumbelDistribution(1.2, 2.3), new LaplaceDistribution(4.5, 6.0), new ChiDistribution(1), new ChiDistribution(4), new RayleighDistribution(3.0), new FrechetDistribution(2.9, 4.0), new NoncentralChiSquaredDistribution(2, 1.5), new TestDistribution() }); // Add some distributions that come from tests. Sample small = TestUtilities.CreateSample(distributions[0], 7); Sample large = TestUtilities.CreateSample(distributions[1], 127); distributions.Add(small.KolmogorovSmirnovTest(distributions[2]).Statistic.Distribution); distributions.Add(large.KolmogorovSmirnovTest(distributions[3]).Statistic.Distribution); //distributions.Add(small.KuiperTest(distributions[4]).Distribution); //distributions.Add(large.KuiperTest(distributions[5]).Distribution); //distributions.Add(Sample.MannWhitneyTest(small, large).Distribution); BivariateSample two = new BivariateSample(); two.Add(new double[] { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 }, new double[] { 6.0, 5.0, 4.0, 3.0, 2.0, 1.0 }); //distributions.Add(two.SpearmanRhoTest().Distribution); //distributions.Add(two.KendallTauTest().Distribution); return(distributions); }
public void BivariateSampleCopy() { // test independency of copy BivariateSample sample1 = new BivariateSample(); sample1.Add(1.0, 2.0); BivariateSample sample2 = sample1.Copy(); sample2.Add(3.0, 4.0); Assert.IsTrue(sample1.Count == 1); Assert.IsTrue(sample2.Count == 2); }
public void TestBivariateRegression() { double a0 = 1.0; double b0 = 0.0; Random rng = new Random(1001110000); Distribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-2.0, 4.0)); Distribution eDistribution = new NormalDistribution(); Sample r2Sample = new Sample(); for (int i = 0; i < 500; i++) { BivariateSample xySample = new BivariateSample(); for (int k = 0; k < 10; k++) { double x = xDistribution.GetRandomValue(rng); double y = a0 + b0 * x + eDistribution.GetRandomValue(rng); xySample.Add(x, y); } FitResult fit = xySample.LinearRegression(); double a = fit.Parameters[0]; double b = fit.Parameters[1]; double ss2 = 0.0; double ss1 = 0.0; foreach (XY xy in xySample) { ss2 += MoreMath.Sqr(xy.Y - (a + b * xy.X)); ss1 += MoreMath.Sqr(xy.Y - xySample.Y.Mean); } double r2 = 1.0 - ss2 / ss1; r2Sample.Add(r2); } Console.WriteLine("{0} {1} {2} {3} {4}", r2Sample.Count, r2Sample.PopulationMean, r2Sample.StandardDeviation, r2Sample.Minimum, r2Sample.Maximum); Distribution r2Distribution = new BetaDistribution((2 - 1) / 2.0, (10 - 2) / 2.0); //Distribution r2Distribution = new BetaDistribution((10 - 2) / 2.0, (2 - 1) / 2.0); Console.WriteLine("{0} {1}", r2Distribution.Mean, r2Distribution.StandardDeviation); TestResult ks = r2Sample.KolmogorovSmirnovTest(r2Distribution); Console.WriteLine(ks.RightProbability); Console.WriteLine(ks.Probability); }
public void BivariateNullAssociation() { Random rng = new Random(314159265); // Create sample sets for our three test statisics Sample PS = new Sample(); Sample SS = new Sample(); Sample KS = new Sample(); // variables to hold the claimed distribution of teach test statistic ContinuousDistribution PD = null; ContinuousDistribution SD = null; ContinuousDistribution KD = null; // generate a large number of bivariate samples and conduct our three tests on each for (int j = 0; j < 100; j++) { BivariateSample S = new BivariateSample(); // sample size should be large so that asymptotic assumptions are justified for (int i = 0; i < 100; i++) { double x = rng.NextDouble(); double y = rng.NextDouble(); S.Add(x, y); } TestResult PR = S.PearsonRTest(); PS.Add(PR.Statistic); PD = PR.Distribution; TestResult SR = S.SpearmanRhoTest(); SS.Add(SR.Statistic); SD = SR.Distribution; TestResult KR = S.KendallTauTest(); KS.Add(KR.Statistic); KD = KR.Distribution; } // do KS to test whether the samples follow the claimed distributions //Console.WriteLine(PS.KolmogorovSmirnovTest(PD).LeftProbability); //Console.WriteLine(SS.KolmogorovSmirnovTest(SD).LeftProbability); //Console.WriteLine(KS.KolmogorovSmirnovTest(KD).LeftProbability); Assert.IsTrue(PS.KolmogorovSmirnovTest(PD).LeftProbability < 0.95); Assert.IsTrue(SS.KolmogorovSmirnovTest(SD).LeftProbability < 0.95); Assert.IsTrue(KS.KolmogorovSmirnovTest(KD).LeftProbability < 0.95); }
public void BivariateSampleEnumerations() { List <XY> points = new List <XY>(new XY[] { new XY(1.0, 2.0), new XY(2.0, 3.0), new XY(3.0, 4.0) }); BivariateSample sample = new BivariateSample(); sample.Add(points); Assert.IsTrue(sample.Count == points.Count); foreach (XY point in sample) { Assert.IsTrue(points.Remove(point)); } Assert.IsTrue(points.Count == 0); }
public void BivariateLinearRegressionNullDistribution() { // Create uncorrelated x and y values and do a linear fit. // The r-tests and F-test statistics returned by the linear fits // should agree and both test statistics should follow their claimed // distributions. Random rng = new Random(987654321); NormalDistribution xd = new NormalDistribution(1.0, 2.0); NormalDistribution yd = new NormalDistribution(-3.0, 4.0); Sample rSample = new Sample(); ContinuousDistribution rDistribution = null; Sample fSample = new Sample(); ContinuousDistribution fDistribution = null; for (int i = 0; i < 127; i++) { BivariateSample sample = new BivariateSample(); for (int j = 0; j < 7; j++) { sample.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng)); } LinearRegressionResult result = sample.LinearRegression(); rSample.Add(result.R.Statistic.Value); rDistribution = result.R.Statistic.Distribution; fSample.Add(result.F.Statistic.Value); fDistribution = result.F.Statistic.Distribution; Assert.IsTrue(result.F.Statistic.Value == result.Anova.Result.Statistic.Value); Assert.IsTrue(TestUtilities.IsNearlyEqual( result.R.Probability, result.F.Probability, new EvaluationSettings() { RelativePrecision = 1.0E-13, AbsolutePrecision = 1.0E-16 } )); } Assert.IsTrue(rSample.KuiperTest(rDistribution).Probability > 0.05); Assert.IsTrue(fSample.KuiperTest(fDistribution).Probability > 0.05); }
public void BivariateNonlinearFitVariances() { // Verify that we can fit a non-linear function, // that the estimated parameters do cluster around the true values, // and that the estimated parameter covariances do reflect the actually observed covariances double a = 2.7; double b = 3.1; ContinuousDistribution xDistribution = new ExponentialDistribution(2.0); ContinuousDistribution eDistribution = new NormalDistribution(0.0, 4.0); FrameTable parameters = new FrameTable(); parameters.AddColumns <double>("a", "b"); MultivariateSample covariances = new MultivariateSample(3); for (int i = 0; i < 64; i++) { BivariateSample sample = new BivariateSample(); Random rng = new Random(i); for (int j = 0; j < 8; j++) { double x = xDistribution.GetRandomValue(rng); double y = a * Math.Pow(x, b) + eDistribution.GetRandomValue(rng); sample.Add(x, y); } NonlinearRegressionResult fit = sample.NonlinearRegression( (IReadOnlyList <double> p, double x) => p[0] * Math.Pow(x, p[1]), new double[] { 1.0, 1.0 } ); parameters.AddRow(fit.Parameters.ValuesVector); covariances.Add(fit.Parameters.CovarianceMatrix[0, 0], fit.Parameters.CovarianceMatrix[1, 1], fit.Parameters.CovarianceMatrix[0, 1]); } Assert.IsTrue(parameters["a"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(a)); Assert.IsTrue(parameters["b"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(b)); Assert.IsTrue(parameters["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(covariances.Column(0).Mean)); Assert.IsTrue(parameters["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(covariances.Column(1).Mean)); Assert.IsTrue(parameters["a"].As <double>().PopulationCovariance(parameters["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean)); Assert.IsTrue(Bivariate.PopulationCovariance(parameters["a"].As <double>(), parameters["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean)); }
public void PearsonRNullDistribution() { Random rng = new Random(1111111); // Pick some underlying distributions for the sample variables, // which must be normal but can have any parameters. NormalDistribution xDistribution = new NormalDistribution(1, 2); NormalDistribution yDistribution = new NormalDistribution(3, 4); // Try this for several sample sizes, all low so that we see the difference from the normal distribution // n = 3 maxima at ends; n = 4 uniform; n = 5 semi-circular "mound"; n = 6 parabolic "mound". foreach (int n in new int[] { 3, 4, 5, 6, 8 }) { // find r values Sample rSample = new Sample(); ContinuousDistribution rDistribution = null; for (int i = 0; i < 128; i++) { // to get each r value, construct a bivariate sample of the given size with no cross-correlation BivariateSample xySample = new BivariateSample(); for (int j = 0; j < n; j++) { xySample.Add( xDistribution.GetRandomValue(rng), yDistribution.GetRandomValue(rng) ); } TestResult rTest = xySample.PearsonRTest(); rSample.Add(rTest.Statistic); rDistribution = rTest.Distribution; } // Check whether r is distributed as expected TestResult result = rSample.KuiperTest(new PearsonRDistribution(n)); Assert.IsTrue(result.Probability > 0.01); Assert.IsTrue(rSample.PopulationMean.ConfidenceInterval(0.95).ClosedContains(rDistribution.Mean)); Assert.IsTrue(rSample.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(rDistribution.StandardDeviation)); } }
public static TestResult RunPairedTest(List <double> x, List <double> y, PairedTest pt) { BivariateSample s = new BivariateSample(); s.Add(x, y); switch (pt) { case PairedTest.KendallTau: return(s.KendallTauTest()); case PairedTest.PearsonR: return(s.PearsonRTest()); case PairedTest.SpearmanRho: return(s.SpearmanRhoTest()); case PairedTest.StudentT: return(s.PairedStudentTTest()); default: return(null); } }