public void ContingencyTableProbabilitiesAndUncertainties() { // start with an underlying population double[,] pp = new double[, ] { { 1.0 / 45.0, 2.0 / 45.0, 3.0 / 45.0 }, { 4.0 / 45.0, 5.0 / 45.0, 6.0 / 45.0 }, { 7.0 / 45.0, 8.0 / 45.0, 9.0 / 45.0 } }; // form 50 contingency tables, each with N = 50 Random rng = new Random(314159); BivariateSample p22s = new BivariateSample(); BivariateSample pr0s = new BivariateSample(); BivariateSample pc1s = new BivariateSample(); BivariateSample pr2c0s = new BivariateSample(); BivariateSample pc1r2s = new BivariateSample(); for (int i = 0; i < 50; i++) { ContingencyTable T = new ContingencyTable(3, 3); for (int j = 0; j < 50; j++) { int r, c; ChooseRandomCell(pp, rng.NextDouble(), out r, out c); T.Increment(r, c); } Assert.IsTrue(T.Total == 50); // for each contingency table, compute estimates of various population quantities UncertainValue p22 = T.ProbabilityOf(2, 2); UncertainValue pr0 = T.ProbabilityOfRow(0); UncertainValue pc1 = T.ProbabilityOfColumn(1); UncertainValue pr2c0 = T.ProbabilityOfRowConditionalOnColumn(2, 0); UncertainValue pc1r2 = T.ProbabilityOfColumnConditionalOnRow(1, 2); p22s.Add(p22.Value, p22.Uncertainty); pr0s.Add(pr0.Value, pr0.Uncertainty); pc1s.Add(pc1.Value, pc1.Uncertainty); pr2c0s.Add(pr2c0.Value, pr2c0.Uncertainty); pc1r2s.Add(pc1r2.Value, pc1r2.Uncertainty); } // the estimated population mean of each probability should include the correct probability in the underlyting distribution Assert.IsTrue(p22s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(9.0 / 45.0)); Assert.IsTrue(pr0s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(6.0 / 45.0)); Assert.IsTrue(pc1s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(15.0 / 45.0)); Assert.IsTrue(pr2c0s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(7.0 / 12.0)); Assert.IsTrue(pc1r2s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(8.0 / 24.0)); // the estimated uncertainty for each population parameter should be the standard deviation across independent measurements // since the reported uncertainly changes each time, we use the mean value for comparison Assert.IsTrue(p22s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(p22s.Y.Mean)); Assert.IsTrue(pr0s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pr0s.Y.Mean)); Assert.IsTrue(pc1s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pc1s.Y.Mean)); Assert.IsTrue(pr2c0s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pr2c0s.Y.Mean)); Assert.IsTrue(pc1r2s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pc1r2s.Y.Mean)); }
public static void ContingencyTable() { ContingencyTable <string, bool> contingency = new ContingencyTable <string, bool>( new string[] { "P", "N" }, new bool[] { true, false } ); contingency["P", true] = 35; contingency["P", false] = 65; contingency["N", true] = 4; contingency["N", false] = 896; IReadOnlyList <string> x = new string[] { "N", "P", "N", "N", "P", "N", "N", "N", "P" }; IReadOnlyList <bool> y = new bool[] { false, false, false, true, true, false, false, false, true }; ContingencyTable <string, bool> contingencyFromLists = Bivariate.Crosstabs(x, y); foreach (string row in contingency.Rows) { Console.WriteLine($"Total count of {row}: {contingency.RowTotal(row)}"); } foreach (bool column in contingency.Columns) { Console.WriteLine($"Total count of {column}: {contingency.ColumnTotal(column)}"); } Console.WriteLine($"Total counts: {contingency.Total}"); foreach (string row in contingency.Rows) { UncertainValue probability = contingency.ProbabilityOfRow(row); Console.WriteLine($"Estimated probability of {row}: {probability}"); } foreach (bool column in contingency.Columns) { UncertainValue probability = contingency.ProbabilityOfColumn(column); Console.WriteLine($"Estimated probablity of {column}: {probability}"); } UncertainValue sensitivity = contingency.ProbabilityOfRowConditionalOnColumn("P", true); Console.WriteLine($"Chance of P result given true condition: {sensitivity}"); UncertainValue precision = contingency.ProbabilityOfColumnConditionalOnRow(true, "P"); Console.WriteLine($"Chance of true condition given P result: {precision}"); UncertainValue logOddsRatio = contingency.Binary.LogOddsRatio; Console.WriteLine($"log(r) = {logOddsRatio}"); TestResult pearson = contingency.PearsonChiSquaredTest(); Console.WriteLine($"Pearson χ² = {pearson.Statistic.Value} has P = {pearson.Probability}."); TestResult fisher = contingency.Binary.FisherExactTest(); Console.WriteLine($"Fisher exact test has P = {fisher.Probability}."); }
public void ContingencyTableProbabilities() { // Construct data where (i) there are both reference-nulls and nullable-struct-nulls, // (ii) all values of one column are equally, (iii) values of other column depend on value of first column List <string> groups = new List <string>() { "A", "B", "C", null }; FrameTable data = new FrameTable(); data.AddColumn <string>("Group"); data.AddColumn <bool?>("Outcome"); int n = 512; double pOutcomeNull = 0.05; Func <int, double> pOutcome = groupIndex => 0.8 - 0.2 * groupIndex; Random rng = new Random(10101010); for (int i = 0; i < n; i++) { int groupIndex = rng.Next(0, groups.Count); string group = groups[groupIndex]; bool? outcome = (rng.NextDouble() < pOutcome(groupIndex)); if (rng.NextDouble() < pOutcomeNull) { outcome = null; } data.AddRow(group, outcome); } // Form a contingency table. ContingencyTable <string, bool?> table = Bivariate.Crosstabs(data["Group"].As <string>(), data["Outcome"].As <bool?>()); // Total counts should match Assert.IsTrue(table.Total == n); // All values should be represented foreach (string row in table.Rows) { Assert.IsTrue(groups.Contains(row)); } // Counts in each cell and marginal totals should match foreach (string group in table.Rows) { int rowTotal = 0; foreach (bool?outcome in table.Columns) { FrameView view = data.Where(r => ((string)r["Group"] == group) && ((bool?)r["Outcome"] == outcome)); Assert.IsTrue(table[group, outcome] == view.Rows.Count); rowTotal += view.Rows.Count; } Assert.IsTrue(rowTotal == table.RowTotal(group)); } // Inferred probabilities should agree with model Assert.IsTrue(table.ProbabilityOfColumn(null).ConfidenceInterval(0.99).ClosedContains(pOutcomeNull)); for (int groupIndex = 0; groupIndex < groups.Count; groupIndex++) { string group = groups[groupIndex]; Assert.IsTrue(table.ProbabilityOfRow(group).ConfidenceInterval(0.99).ClosedContains(0.25)); Assert.IsTrue(table.ProbabilityOfColumnConditionalOnRow(true, group).ConfidenceInterval(0.99).ClosedContains(pOutcome(groupIndex) * (1.0 - pOutcomeNull))); } Assert.IsTrue(table.ProbabilityOfColumn(null).ConfidenceInterval(0.99).ClosedContains(pOutcomeNull)); // Pearson test should catch that rows and columns are corrleated Assert.IsTrue(table.PearsonChiSquaredTest().Probability < 0.05); }