public static void ContingencyTable() { ContingencyTable <string, bool> contingency = new ContingencyTable <string, bool>( new string[] { "P", "N" }, new bool[] { true, false } ); contingency["P", true] = 35; contingency["P", false] = 65; contingency["N", true] = 4; contingency["N", false] = 896; IReadOnlyList <string> x = new string[] { "N", "P", "N", "N", "P", "N", "N", "N", "P" }; IReadOnlyList <bool> y = new bool[] { false, false, false, true, true, false, false, false, true }; ContingencyTable <string, bool> contingencyFromLists = Bivariate.Crosstabs(x, y); foreach (string row in contingency.Rows) { Console.WriteLine($"Total count of {row}: {contingency.RowTotal(row)}"); } foreach (bool column in contingency.Columns) { Console.WriteLine($"Total count of {column}: {contingency.ColumnTotal(column)}"); } Console.WriteLine($"Total counts: {contingency.Total}"); foreach (string row in contingency.Rows) { UncertainValue probability = contingency.ProbabilityOfRow(row); Console.WriteLine($"Estimated probability of {row}: {probability}"); } foreach (bool column in contingency.Columns) { UncertainValue probability = contingency.ProbabilityOfColumn(column); Console.WriteLine($"Estimated probablity of {column}: {probability}"); } UncertainValue sensitivity = contingency.ProbabilityOfRowConditionalOnColumn("P", true); Console.WriteLine($"Chance of P result given true condition: {sensitivity}"); UncertainValue precision = contingency.ProbabilityOfColumnConditionalOnRow(true, "P"); Console.WriteLine($"Chance of true condition given P result: {precision}"); UncertainValue logOddsRatio = contingency.Binary.LogOddsRatio; Console.WriteLine($"log(r) = {logOddsRatio}"); TestResult pearson = contingency.PearsonChiSquaredTest(); Console.WriteLine($"Pearson χ² = {pearson.Statistic.Value} has P = {pearson.Probability}."); TestResult fisher = contingency.Binary.FisherExactTest(); Console.WriteLine($"Fisher exact test has P = {fisher.Probability}."); }
public void BinaryContingencyTest() { // Create a table with significant association and test for it. ContingencyTable e1 = CreateExperiment(0.50, 0.50, 0.75, 128); Assert.IsTrue(e1.RowTotal(0) + e1.RowTotal(1) == e1.Total); Assert.IsTrue(e1.ColumnTotal(0) + e1.ColumnTotal(1) == e1.Total); UncertainValue lnr = e1.Binary.LogOddsRatio; Assert.IsTrue(!lnr.ConfidenceInterval(0.95).ClosedContains(0.0)); UncertainValue r = e1.Binary.OddsRatio; Assert.IsTrue(!r.ConfidenceInterval(0.95).ClosedContains(1.0)); // Chi square should detect association TestResult p = e1.PearsonChiSquaredTest(); Assert.IsTrue(p.Probability < 0.05); // Fisher exact should detect association TestResult f = e1.Binary.FisherExactTest(); Assert.IsTrue(f.Probability < 0.05); // Phi should be the same as Pearson correlation coefficient List <double> x = new List <double>(); List <double> y = new List <double>(); for (int i = 0; i < e1[0, 0]; i++) { x.Add(0); y.Add(0); } for (int i = 0; i < e1[0, 1]; i++) { x.Add(0); y.Add(1); } for (int i = 0; i < e1[1, 0]; i++) { x.Add(1); y.Add(0); } for (int i = 0; i < e1[1, 1]; i++) { x.Add(1); y.Add(1); } double s = x.CorrelationCoefficient(y); Assert.IsTrue(TestUtilities.IsNearlyEqual(s, e1.Binary.Phi)); }
public void ContingencyTableNamedOperations() { ContingencyTable t = new ContingencyTable(2, 3); t.RowNames[0] = "Male"; t.RowNames[1] = "Female"; t.ColumnNames[0] = "Party 1"; t.ColumnNames[1] = "Party 2"; t.ColumnNames[2] = "Party 3"; t["Male", "Party 1"] = 10; t["Male", "Party 2"] = 20; t["Male", "Party 3"] = 30; t["Female", "Party 1"] = 30; t["Female", "Party 2"] = 20; t["Female", "Party 3"] = 10; }
public void ContingencyTableOperations() { ContingencyTable t = new ContingencyTable(4, 3); Assert.IsTrue(t.RowCount == 4); Assert.IsTrue(t.ColumnCount == 3); Assert.IsTrue(t.RowTotal(2) == 0); Assert.IsTrue(t.ColumnTotal(1) == 0); Assert.IsTrue(t.Total == 0); t[1, 1] = 2; Assert.IsTrue(t.RowTotal(2) == 0); Assert.IsTrue(t.ColumnTotal(1) == 2); Assert.IsTrue(t.Total == 2); }
public void Bug10() { // Fisher exact test didn't give same probability when rows were permuted. // To compute the Fisher exact probability, we iterate over all contingency tables // with the same marginal totals and count their probability if it is less than or // equal to the probability of the observed matrix. // In particular for the symmetric case, the "opposite" table has the exact // same probability. But there is floating point noise, so sometimes it's // calculated probability is infinitesimally larger and isn't counted. // To fix this, we special-case the symmetric case. ContingencyTable t1 = new ContingencyTable(new int[, ] { { 18, 16 }, { 12, 14 } }); ContingencyTable t2 = new ContingencyTable(new int[, ] { { 12, 14 }, { 18, 16 } }); Assert.IsTrue(TestUtilities.IsNearlyEqual(t1.Binary.FisherExactTest().Probability, t2.Binary.FisherExactTest().Probability)); }
public void McNemarTestDistribution() { // Define a population and the accuracy of two tests for a condition double fractionPositive = 0.4; double aAccuracy = 0.2; double bAccuracy = 0.9; // Form a bunch of samples; we will run a McNemar test on each List <double> statistics = new List <double>(); ContinuousDistribution distribution = null; Random rng = new Random(1); for (int i = 0; i < 32; i++) { // Run a and b tests on each person. List <bool> aResults = new List <bool>(); List <bool> bResults = new List <bool>(); for (int j = 0; j < 64; j++) { bool isPositive = rng.NextDouble() < fractionPositive; bool aResult = rng.NextDouble() < aAccuracy ? isPositive : !isPositive; aResults.Add(aResult); bool bResult = rng.NextDouble() < bAccuracy ? isPositive : !isPositive; bResults.Add(bResult); } // Do a McNemar test to determine whether tests are differently weighted. // By our construction, they shouldn't be. ContingencyTable <bool, bool> table = Bivariate.Crosstabs(aResults, bResults); TestResult result = table.Binary.McNemarTest(); statistics.Add(result.Statistic.Value); distribution = result.Statistic.Distribution; } // Since the null hypothesis is satisfied, the test statistic distribution should // match the claimed null distribution. TestResult test = statistics.KolmogorovSmirnovTest(distribution); Assert.IsTrue(test.Probability > 0.05); }
private static ContingencyTable CreateExperiment(double p, double q0, double q1, int N) { ContingencyTable e = new ContingencyTable(2, 2); Random rng = new Random(1); for (int i = 0; i < N; i++) { int r, c; if (rng.NextDouble() < p) { r = 0; if (rng.NextDouble() < q0) { c = 0; } else { c = 1; } } else { r = 1; if (rng.NextDouble() < q1) { c = 0; } else { c = 1; } } e[r, c] += 1; } return(e); }
public void ContingencyTableProbabilities() { // Construct data where (i) there are both reference-nulls and nullable-struct-nulls, // (ii) all values of one column are equally, (iii) values of other column depend on value of first column List <string> groups = new List <string>() { "A", "B", "C", null }; FrameTable data = new FrameTable(); data.AddColumn <string>("Group"); data.AddColumn <bool?>("Outcome"); int n = 512; double pOutcomeNull = 0.05; Func <int, double> pOutcome = groupIndex => 0.8 - 0.2 * groupIndex; Random rng = new Random(10101010); for (int i = 0; i < n; i++) { int groupIndex = rng.Next(0, groups.Count); string group = groups[groupIndex]; bool? outcome = (rng.NextDouble() < pOutcome(groupIndex)); if (rng.NextDouble() < pOutcomeNull) { outcome = null; } data.AddRow(group, outcome); } // Form a contingency table. ContingencyTable <string, bool?> table = Bivariate.Crosstabs(data["Group"].As <string>(), data["Outcome"].As <bool?>()); // Total counts should match Assert.IsTrue(table.Total == n); // All values should be represented foreach (string row in table.Rows) { Assert.IsTrue(groups.Contains(row)); } // Counts in each cell and marginal totals should match foreach (string group in table.Rows) { int rowTotal = 0; foreach (bool?outcome in table.Columns) { FrameView view = data.Where(r => ((string)r["Group"] == group) && ((bool?)r["Outcome"] == outcome)); Assert.IsTrue(table[group, outcome] == view.Rows.Count); rowTotal += view.Rows.Count; } Assert.IsTrue(rowTotal == table.RowTotal(group)); } // Inferred probabilities should agree with model Assert.IsTrue(table.ProbabilityOfColumn(null).ConfidenceInterval(0.99).ClosedContains(pOutcomeNull)); for (int groupIndex = 0; groupIndex < groups.Count; groupIndex++) { string group = groups[groupIndex]; Assert.IsTrue(table.ProbabilityOfRow(group).ConfidenceInterval(0.99).ClosedContains(0.25)); Assert.IsTrue(table.ProbabilityOfColumnConditionalOnRow(true, group).ConfidenceInterval(0.99).ClosedContains(pOutcome(groupIndex) * (1.0 - pOutcomeNull))); } Assert.IsTrue(table.ProbabilityOfColumn(null).ConfidenceInterval(0.99).ClosedContains(pOutcomeNull)); // Pearson test should catch that rows and columns are corrleated Assert.IsTrue(table.PearsonChiSquaredTest().Probability < 0.05); }
public static void AnalyzingData() { FrameTable table; Uri url = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv"); WebRequest request = WebRequest.Create(url); using (WebResponse response = request.GetResponse()) { using (StreamReader reader = new StreamReader(response.GetResponseStream())) { table = FrameTable.FromCsv(reader); } } FrameView view = table.WhereNotNull(); // Get the column with (zero-based) index 4. FrameColumn column4 = view.Columns[4]; // Get the column named "Height". FrameColumn heightsColumn = view.Columns["Height"]; // Even easier way to get the column named "Height". FrameColumn alsoHeightsColumn = view["Height"]; IReadOnlyList <double> heights = view["Height"].As <double>(); SummaryStatistics summary = new SummaryStatistics(view["Height"].As <double>()); Console.WriteLine($"Count = {summary.Count}"); Console.WriteLine($"Mean = {summary.Mean}"); Console.WriteLine($"Standard Deviation = {summary.StandardDeviation}"); Console.WriteLine($"Skewness = {summary.Skewness}"); Console.WriteLine($"Estimated population mean = {summary.PopulationMean}"); Console.WriteLine($"Estimated population standard deviation = {summary.PopulationStandardDeviation}"); IReadOnlyList <double> maleHeights = view.Where <string>("Sex", s => s == "M").Columns["Height"].As <double>(); IReadOnlyList <double> femaleHeights = view.Where <string>("Sex", s => s == "F").Columns["Height"].As <double>(); TestResult test = Univariate.StudentTTest(maleHeights, femaleHeights); Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}"); Console.WriteLine($"P = {test.Probability}"); TestResult maleHeightNormality = maleHeights.ShapiroFranciaTest(); TestResult totalHeightNormality = view["Height"].As <double>().ShapiroFranciaTest(); TestResult heightCompatibility = Univariate.KolmogorovSmirnovTest(maleHeights, femaleHeights); LinearRegressionResult fit = view["Weight"].As <double>().LinearRegression(view["Height"].As <double>()); Console.WriteLine($"Model weight = ({fit.Slope}) * height + ({fit.Intercept})."); Console.WriteLine($"Model explains {fit.RSquared * 100.0}% of variation."); ContingencyTable <string, bool> contingency = Bivariate.Crosstabs(view["Sex"].As <string>(), view["Result"].As <bool>()); Console.WriteLine($"Male incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "M")}"); Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "F")}"); Console.WriteLine($"Log odds ratio = {contingency.Binary.LogOddsRatio}"); view.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0)); view.AddComputedColumn("Age", r => (DateTime.Now - (DateTime)r["Birthdate"]).TotalDays / 365.24); MultiLinearLogisticRegressionResult result = view["Result"].As <bool>().MultiLinearLogisticRegression( view["Bmi"].As <double>(), view["Sex"].As <string, double>(s => s == "M" ? 1.0 : 0.0) ); foreach (Parameter parameter in result.Parameters) { Console.WriteLine($"{parameter.Name} = {parameter.Estimate}"); } TestResult spearman = Bivariate.SpearmanRhoTest(view["Age"].As <double>(), view["Result"].As <double>()); Console.WriteLine($"{spearman.Statistic.Name} = {spearman.Statistic.Value} P = {spearman.Probability}"); }
public void BinaryContingencyInvalidConstructionTest() { int[,] M = new int[2, 3]; ContingencyTable table = new ContingencyTable(2, 3); BinaryContingencyTableOperations binary = table.Binary; }