public void BivariatePolynomialRegressionSimple() { // Pick a simple polynomial Polynomial p = Polynomial.FromCoefficients(3.0, -2.0, 1.0); // Use it to generate a data set Random rng = new Random(1); ContinuousDistribution xDistribution = new CauchyDistribution(1.0, 2.0); ContinuousDistribution errorDistribution = new NormalDistribution(0.0, 3.0); List <double> xs = new List <double>(TestUtilities.CreateDataSample(rng, xDistribution, 10)); List <double> ys = new List <double>(xs.Select(x => p.Evaluate(x) + errorDistribution.GetRandomValue(rng))); PolynomialRegressionResult fit = Bivariate.PolynomialRegression(ys, xs, p.Degree); // Parameters should agree Assert.IsTrue(fit.Parameters.Count == p.Degree + 1); for (int k = 0; k <= p.Degree; k++) { Assert.IsTrue(fit.Coefficient(k).ConfidenceInterval(0.99).ClosedContains(p.Coefficient(k))); } // Residuals should agree Assert.IsTrue(fit.Residuals.Count == xs.Count); for (int i = 0; i < xs.Count; i++) { double z = ys[i] - fit.Predict(xs[i]).Value; Assert.IsTrue(TestUtilities.IsNearlyEqual(z, fit.Residuals[i])); } // Intercept is same as coefficient of x^0 Assert.IsTrue(fit.Intercept == fit.Coefficient(0)); }
public static void ContingencyTable() { ContingencyTable <string, bool> contingency = new ContingencyTable <string, bool>( new string[] { "P", "N" }, new bool[] { true, false } ); contingency["P", true] = 35; contingency["P", false] = 65; contingency["N", true] = 4; contingency["N", false] = 896; IReadOnlyList <string> x = new string[] { "N", "P", "N", "N", "P", "N", "N", "N", "P" }; IReadOnlyList <bool> y = new bool[] { false, false, false, true, true, false, false, false, true }; ContingencyTable <string, bool> contingencyFromLists = Bivariate.Crosstabs(x, y); foreach (string row in contingency.Rows) { Console.WriteLine($"Total count of {row}: {contingency.RowTotal(row)}"); } foreach (bool column in contingency.Columns) { Console.WriteLine($"Total count of {column}: {contingency.ColumnTotal(column)}"); } Console.WriteLine($"Total counts: {contingency.Total}"); foreach (string row in contingency.Rows) { UncertainValue probability = contingency.ProbabilityOfRow(row); Console.WriteLine($"Estimated probability of {row}: {probability}"); } foreach (bool column in contingency.Columns) { UncertainValue probability = contingency.ProbabilityOfColumn(column); Console.WriteLine($"Estimated probablity of {column}: {probability}"); } UncertainValue sensitivity = contingency.ProbabilityOfRowConditionalOnColumn("P", true); Console.WriteLine($"Chance of P result given true condition: {sensitivity}"); UncertainValue precision = contingency.ProbabilityOfColumnConditionalOnRow(true, "P"); Console.WriteLine($"Chance of true condition given P result: {precision}"); UncertainValue logOddsRatio = contingency.Binary.LogOddsRatio; Console.WriteLine($"log(r) = {logOddsRatio}"); TestResult pearson = contingency.PearsonChiSquaredTest(); Console.WriteLine($"Pearson χ² = {pearson.Statistic.Value} has P = {pearson.Probability}."); TestResult fisher = contingency.Binary.FisherExactTest(); Console.WriteLine($"Fisher exact test has P = {fisher.Probability}."); }
public void BivariateAssociationDiscreteNullDistribution() { Random rng = new Random(1); // Pick very non-normal distributions for our non-parameteric tests ContinuousDistribution xd = new FrechetDistribution(1.0); ContinuousDistribution yd = new CauchyDistribution(); // Pick small sample sizes to get exact distributions foreach (int n in TestUtilities.GenerateIntegerValues(4, 24, 4)) { // Do a bunch of test runs, recording reported statistic for each. List <int> spearmanStatistics = new List <int>(); List <int> kendallStatistics = new List <int>(); DiscreteDistribution spearmanDistribution = null; DiscreteDistribution kendallDistribution = null; for (int i = 0; i < 512; i++) { List <double> x = new List <double>(); List <double> y = new List <double>(); for (int j = 0; j < n; j++) { x.Add(xd.GetRandomValue(rng)); y.Add(yd.GetRandomValue(rng)); } DiscreteTestStatistic spearman = Bivariate.SpearmanRhoTest(x, y).UnderlyingStatistic; if (spearman != null) { spearmanStatistics.Add(spearman.Value); spearmanDistribution = spearman.Distribution; } DiscreteTestStatistic kendall = Bivariate.KendallTauTest(x, y).UnderlyingStatistic; if (kendall != null) { kendallStatistics.Add(kendall.Value); kendallDistribution = kendall.Distribution; } } // Test whether statistics are actually distributed as claimed if (spearmanDistribution != null) { TestResult spearmanChiSquared = spearmanStatistics.ChiSquaredTest(spearmanDistribution); Assert.IsTrue(spearmanChiSquared.Probability > 0.01); } if (kendallDistribution != null) { TestResult kendallChiSquared = kendallStatistics.ChiSquaredTest(kendallDistribution); Assert.IsTrue(kendallChiSquared.Probability > 0.01); } } }
public static void bivariate_normal_cdf_values_test( ) //****************************************************************************80 // // Purpose: // // BIVARIATE_NORMAL_CDF_VALUES_TEST tests BIVARIATE_NORMAL_CDF_VALUES. // // Licensing: // // This code is distributed under the GNU LGPL license. // // Modified: // // 23 May 2009 // // Author: // // John Burkardt // { double fxy = 0; double r = 0; double x = 0; double y = 0; Console.WriteLine(""); Console.WriteLine("BIVARIATE_NORMAL_CDF_VALUES_TEST:"); Console.WriteLine(" BIVARIATE_NORMAL_CDF_VALUES stores values of"); Console.WriteLine(" the bivariate normal CDF."); Console.WriteLine(""); Console.WriteLine(" X Y R F(R)(X,Y)"); Console.WriteLine(""); int n_data = 0; for (;;) { Bivariate.bivariate_normal_cdf_values(ref n_data, ref x, ref y, ref r, ref fxy); if (n_data == 0) { break; } Console.WriteLine(" " + x.ToString(CultureInfo.InvariantCulture).PadLeft(12) + " " + y.ToString(CultureInfo.InvariantCulture).PadLeft(12) + " " + r.ToString(CultureInfo.InvariantCulture).PadLeft(12) + " " + fxy.ToString("0.################").PadLeft(24) + fxy + ""); } }
public void BivariateNullAssociation() { Random rng = new Random(31415926); // Create a data structure to hold the results of Pearson, Spearman, and Kendall tests. FrameTable data = new FrameTable(); data.AddColumn <double>("r"); data.AddColumn <double>("ρ"); data.AddColumn <double>("τ"); // Create variables to hold the claimed distribution of each test statistic. ContinuousDistribution PRD = null; ContinuousDistribution SRD = null; ContinuousDistribution KTD = null; // Generate a large number of bivariate samples and conduct our three tests on each. ContinuousDistribution xDistribution = new LognormalDistribution(); ContinuousDistribution yDistribution = new CauchyDistribution(); for (int j = 0; j < 100; j++) { List <double> x = new List <double>(); List <double> y = new List <double>(); for (int i = 0; i < 100; i++) { x.Add(xDistribution.GetRandomValue(rng)); y.Add(yDistribution.GetRandomValue(rng)); } TestResult PR = Bivariate.PearsonRTest(x, y); TestResult SR = Bivariate.SpearmanRhoTest(x, y); TestResult KT = Bivariate.KendallTauTest(x, y); PRD = PR.Statistic.Distribution; SRD = SR.Statistic.Distribution; KTD = KT.Statistic.Distribution; data.AddRow(new Dictionary <string, object>() { { "r", PR.Statistic.Value }, { "ρ", SR.Statistic.Value }, { "τ", KT.Statistic.Value } }); } Assert.IsTrue(data["r"].As <double>().KolmogorovSmirnovTest(PRD).Probability > 0.05); Assert.IsTrue(data["ρ"].As <double>().KolmogorovSmirnovTest(SRD).Probability > 0.05); Assert.IsTrue(data["τ"].As <double>().KolmogorovSmirnovTest(KTD).Probability > 0.05); }
public void BivariateNonlinearFitVariances() { // Verify that we can fit a non-linear function, // that the estimated parameters do cluster around the true values, // and that the estimated parameter covariances do reflect the actually observed covariances double a = 2.7; double b = 3.1; ContinuousDistribution xDistribution = new ExponentialDistribution(2.0); ContinuousDistribution eDistribution = new NormalDistribution(0.0, 4.0); FrameTable parameters = new FrameTable(); parameters.AddColumns <double>("a", "b"); MultivariateSample covariances = new MultivariateSample(3); for (int i = 0; i < 64; i++) { BivariateSample sample = new BivariateSample(); Random rng = new Random(i); for (int j = 0; j < 8; j++) { double x = xDistribution.GetRandomValue(rng); double y = a * Math.Pow(x, b) + eDistribution.GetRandomValue(rng); sample.Add(x, y); } NonlinearRegressionResult fit = sample.NonlinearRegression( (IReadOnlyList <double> p, double x) => p[0] * Math.Pow(x, p[1]), new double[] { 1.0, 1.0 } ); parameters.AddRow(fit.Parameters.ValuesVector); covariances.Add(fit.Parameters.CovarianceMatrix[0, 0], fit.Parameters.CovarianceMatrix[1, 1], fit.Parameters.CovarianceMatrix[0, 1]); } Assert.IsTrue(parameters["a"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(a)); Assert.IsTrue(parameters["b"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(b)); Assert.IsTrue(parameters["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(covariances.Column(0).Mean)); Assert.IsTrue(parameters["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(covariances.Column(1).Mean)); Assert.IsTrue(parameters["a"].As <double>().PopulationCovariance(parameters["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean)); Assert.IsTrue(Bivariate.PopulationCovariance(parameters["a"].As <double>(), parameters["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean)); }
public static void Association() { double[] x = new double[] { -0.58, 0.92, 1.41, 1.62, 2.72, 3.14 }; double[] y = new double[] { 1.00, 0.00, 2.00, 16.00, 18.0, 20.0 }; TestResult pearson = Bivariate.PearsonRTest(x, y); Console.WriteLine($"Pearson {pearson.Statistic.Name} = {pearson.Statistic.Value}"); Console.WriteLine($"{pearson.Type} P = {pearson.Probability}"); TestResult spearman = Bivariate.SpearmanRhoTest(x, y); Console.WriteLine($"Spearman {spearman.Statistic.Name} = {spearman.Statistic.Value}"); Console.WriteLine($"{spearman.Type} P = {spearman.Probability}"); TestResult kendall = Bivariate.KendallTauTest(x, y); Console.WriteLine($"Kendall {kendall.Statistic.Name} = {kendall.Statistic.Value}"); Console.WriteLine($"{kendall.Type} P = {kendall.Probability}"); }
public void McNemarTestDistribution() { // Define a population and the accuracy of two tests for a condition double fractionPositive = 0.4; double aAccuracy = 0.2; double bAccuracy = 0.9; // Form a bunch of samples; we will run a McNemar test on each List <double> statistics = new List <double>(); ContinuousDistribution distribution = null; Random rng = new Random(1); for (int i = 0; i < 32; i++) { // Run a and b tests on each person. List <bool> aResults = new List <bool>(); List <bool> bResults = new List <bool>(); for (int j = 0; j < 64; j++) { bool isPositive = rng.NextDouble() < fractionPositive; bool aResult = rng.NextDouble() < aAccuracy ? isPositive : !isPositive; aResults.Add(aResult); bool bResult = rng.NextDouble() < bAccuracy ? isPositive : !isPositive; bResults.Add(bResult); } // Do a McNemar test to determine whether tests are differently weighted. // By our construction, they shouldn't be. ContingencyTable <bool, bool> table = Bivariate.Crosstabs(aResults, bResults); TestResult result = table.Binary.McNemarTest(); statistics.Add(result.Statistic.Value); distribution = result.Statistic.Distribution; } // Since the null hypothesis is satisfied, the test statistic distribution should // match the claimed null distribution. TestResult test = statistics.KolmogorovSmirnovTest(distribution); Assert.IsTrue(test.Probability > 0.05); }
/* * Do two conics sections el and el1 intersect? Each are in * bivariate form, ax^2 + bxy + cx^2 + dx + ey + f = 0 * Solve by constructing a quartic that must have a real * solution if they intersect. This checks for real Y * intersects, then flips the parameters around to check * for real X intersects. */ bool ConicsIntersect(Bivariate el, Bivariate el1) { /* check for real y intersects, then real x intersects */ return(YIntersect(el.A, el.B, el.C, el.D, el.E, el.F, el1.A, el1.B, el1.C, el1.D, el1.E, el1.F) && YIntersect(el.C, el.B, el.A, el.E, el.D, el.F, el1.C, el1.B, el1.A, el1.E, el1.D, el1.F)); }
public void ContingencyTableProbabilities() { // Construct data where (i) there are both reference-nulls and nullable-struct-nulls, // (ii) all values of one column are equally, (iii) values of other column depend on value of first column List <string> groups = new List <string>() { "A", "B", "C", null }; FrameTable data = new FrameTable(); data.AddColumn <string>("Group"); data.AddColumn <bool?>("Outcome"); int n = 512; double pOutcomeNull = 0.05; Func <int, double> pOutcome = groupIndex => 0.8 - 0.2 * groupIndex; Random rng = new Random(10101010); for (int i = 0; i < n; i++) { int groupIndex = rng.Next(0, groups.Count); string group = groups[groupIndex]; bool? outcome = (rng.NextDouble() < pOutcome(groupIndex)); if (rng.NextDouble() < pOutcomeNull) { outcome = null; } data.AddRow(group, outcome); } // Form a contingency table. ContingencyTable <string, bool?> table = Bivariate.Crosstabs(data["Group"].As <string>(), data["Outcome"].As <bool?>()); // Total counts should match Assert.IsTrue(table.Total == n); // All values should be represented foreach (string row in table.Rows) { Assert.IsTrue(groups.Contains(row)); } // Counts in each cell and marginal totals should match foreach (string group in table.Rows) { int rowTotal = 0; foreach (bool?outcome in table.Columns) { FrameView view = data.Where(r => ((string)r["Group"] == group) && ((bool?)r["Outcome"] == outcome)); Assert.IsTrue(table[group, outcome] == view.Rows.Count); rowTotal += view.Rows.Count; } Assert.IsTrue(rowTotal == table.RowTotal(group)); } // Inferred probabilities should agree with model Assert.IsTrue(table.ProbabilityOfColumn(null).ConfidenceInterval(0.99).ClosedContains(pOutcomeNull)); for (int groupIndex = 0; groupIndex < groups.Count; groupIndex++) { string group = groups[groupIndex]; Assert.IsTrue(table.ProbabilityOfRow(group).ConfidenceInterval(0.99).ClosedContains(0.25)); Assert.IsTrue(table.ProbabilityOfColumnConditionalOnRow(true, group).ConfidenceInterval(0.99).ClosedContains(pOutcome(groupIndex) * (1.0 - pOutcomeNull))); } Assert.IsTrue(table.ProbabilityOfColumn(null).ConfidenceInterval(0.99).ClosedContains(pOutcomeNull)); // Pearson test should catch that rows and columns are corrleated Assert.IsTrue(table.PearsonChiSquaredTest().Probability < 0.05); }
private static void test02() //****************************************************************************80 // // Purpose: // // TEST02 tests BIVNOR. // // Licensing: // // This code is distributed under the GNU LGPL license. // // Modified: // // 13 April 2012 // // Author: // // John Burkardt // { double fxy1 = 0; double r = 0; double x = 0; double y = 0; bivariatenormal.BivnorData data = new(); Console.WriteLine(""); Console.WriteLine("TEST02"); Console.WriteLine(" Compare BIVNOR with some tabulated data."); Console.WriteLine(""); Console.WriteLine(" X Y " + "R P P" + " DIFF" + " " + " (Tabulated) (BIVNOR)"); Console.WriteLine(""); int n_data = 0; for (;;) { Bivariate.bivariate_normal_cdf_values(ref n_data, ref x, ref y, ref r, ref fxy1); if (n_data == 0) { break; } // // BIVNOR computes the "tail" of the probability, and we want the // initial part// // double fxy2 = bivariatenormal.bivnor(ref data, -x, -y, r); Console.WriteLine(" " + x.ToString(CultureInfo.InvariantCulture).PadLeft(8) + " " + y.ToString(CultureInfo.InvariantCulture).PadLeft(8) + " " + r.ToString(CultureInfo.InvariantCulture).PadLeft(8) + " " + fxy1.ToString(CultureInfo.InvariantCulture).PadLeft(24) + " " + fxy2.ToString(CultureInfo.InvariantCulture).PadLeft(24) + " " + Math.Abs(fxy1 - fxy2).ToString(CultureInfo.InvariantCulture).PadLeft(10) + ""); } }
public static void AnalyzingData() { FrameTable table; Uri url = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv"); WebRequest request = WebRequest.Create(url); using (WebResponse response = request.GetResponse()) { using (StreamReader reader = new StreamReader(response.GetResponseStream())) { table = FrameTable.FromCsv(reader); } } FrameView view = table.WhereNotNull(); // Get the column with (zero-based) index 4. FrameColumn column4 = view.Columns[4]; // Get the column named "Height". FrameColumn heightsColumn = view.Columns["Height"]; // Even easier way to get the column named "Height". FrameColumn alsoHeightsColumn = view["Height"]; IReadOnlyList <double> heights = view["Height"].As <double>(); SummaryStatistics summary = new SummaryStatistics(view["Height"].As <double>()); Console.WriteLine($"Count = {summary.Count}"); Console.WriteLine($"Mean = {summary.Mean}"); Console.WriteLine($"Standard Deviation = {summary.StandardDeviation}"); Console.WriteLine($"Skewness = {summary.Skewness}"); Console.WriteLine($"Estimated population mean = {summary.PopulationMean}"); Console.WriteLine($"Estimated population standard deviation = {summary.PopulationStandardDeviation}"); IReadOnlyList <double> maleHeights = view.Where <string>("Sex", s => s == "M").Columns["Height"].As <double>(); IReadOnlyList <double> femaleHeights = view.Where <string>("Sex", s => s == "F").Columns["Height"].As <double>(); TestResult test = Univariate.StudentTTest(maleHeights, femaleHeights); Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}"); Console.WriteLine($"P = {test.Probability}"); TestResult maleHeightNormality = maleHeights.ShapiroFranciaTest(); TestResult totalHeightNormality = view["Height"].As <double>().ShapiroFranciaTest(); TestResult heightCompatibility = Univariate.KolmogorovSmirnovTest(maleHeights, femaleHeights); LinearRegressionResult fit = view["Weight"].As <double>().LinearRegression(view["Height"].As <double>()); Console.WriteLine($"Model weight = ({fit.Slope}) * height + ({fit.Intercept})."); Console.WriteLine($"Model explains {fit.RSquared * 100.0}% of variation."); ContingencyTable <string, bool> contingency = Bivariate.Crosstabs(view["Sex"].As <string>(), view["Result"].As <bool>()); Console.WriteLine($"Male incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "M")}"); Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "F")}"); Console.WriteLine($"Log odds ratio = {contingency.Binary.LogOddsRatio}"); view.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0)); view.AddComputedColumn("Age", r => (DateTime.Now - (DateTime)r["Birthdate"]).TotalDays / 365.24); MultiLinearLogisticRegressionResult result = view["Result"].As <bool>().MultiLinearLogisticRegression( view["Bmi"].As <double>(), view["Sex"].As <string, double>(s => s == "M" ? 1.0 : 0.0) ); foreach (Parameter parameter in result.Parameters) { Console.WriteLine($"{parameter.Name} = {parameter.Estimate}"); } TestResult spearman = Bivariate.SpearmanRhoTest(view["Age"].As <double>(), view["Result"].As <double>()); Console.WriteLine($"{spearman.Statistic.Name} = {spearman.Statistic.Value} P = {spearman.Probability}"); }