コード例 #1
0
        public void ContingencyTableProbabilitiesAndUncertainties()
        {
            // start with an underlying population
            double[,] pp = new double[, ]
            {
                { 1.0 / 45.0, 2.0 / 45.0, 3.0 / 45.0 },
                { 4.0 / 45.0, 5.0 / 45.0, 6.0 / 45.0 },
                { 7.0 / 45.0, 8.0 / 45.0, 9.0 / 45.0 }
            };

            // form 50 contingency tables, each with N = 50
            Random          rng    = new Random(314159);
            BivariateSample p22s   = new BivariateSample();
            BivariateSample pr0s   = new BivariateSample();
            BivariateSample pc1s   = new BivariateSample();
            BivariateSample pr2c0s = new BivariateSample();
            BivariateSample pc1r2s = new BivariateSample();

            for (int i = 0; i < 50; i++)
            {
                ContingencyTable T = new ContingencyTable(3, 3);
                for (int j = 0; j < 50; j++)
                {
                    int r, c;
                    ChooseRandomCell(pp, rng.NextDouble(), out r, out c);
                    T.Increment(r, c);
                }

                Assert.IsTrue(T.Total == 50);

                // for each contingency table, compute estimates of various population quantities

                UncertainValue p22   = T.ProbabilityOf(2, 2);
                UncertainValue pr0   = T.ProbabilityOfRow(0);
                UncertainValue pc1   = T.ProbabilityOfColumn(1);
                UncertainValue pr2c0 = T.ProbabilityOfRowConditionalOnColumn(2, 0);
                UncertainValue pc1r2 = T.ProbabilityOfColumnConditionalOnRow(1, 2);
                p22s.Add(p22.Value, p22.Uncertainty);
                pr0s.Add(pr0.Value, pr0.Uncertainty);
                pc1s.Add(pc1.Value, pc1.Uncertainty);
                pr2c0s.Add(pr2c0.Value, pr2c0.Uncertainty);
                pc1r2s.Add(pc1r2.Value, pc1r2.Uncertainty);
            }

            // the estimated population mean of each probability should include the correct probability in the underlyting distribution
            Assert.IsTrue(p22s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(9.0 / 45.0));
            Assert.IsTrue(pr0s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(6.0 / 45.0));
            Assert.IsTrue(pc1s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(15.0 / 45.0));
            Assert.IsTrue(pr2c0s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(7.0 / 12.0));
            Assert.IsTrue(pc1r2s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(8.0 / 24.0));

            // the estimated uncertainty for each population parameter should be the standard deviation across independent measurements
            // since the reported uncertainly changes each time, we use the mean value for comparison
            Assert.IsTrue(p22s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(p22s.Y.Mean));
            Assert.IsTrue(pr0s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pr0s.Y.Mean));
            Assert.IsTrue(pc1s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pc1s.Y.Mean));
            Assert.IsTrue(pr2c0s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pr2c0s.Y.Mean));
            Assert.IsTrue(pc1r2s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pc1r2s.Y.Mean));
        }
コード例 #2
0
ファイル: Statistics.cs プロジェクト: zyzhu/metanumerics
        public static void ContingencyTable()
        {
            ContingencyTable <string, bool> contingency = new ContingencyTable <string, bool>(
                new string[] { "P", "N" }, new bool[] { true, false }
                );

            contingency["P", true]  = 35;
            contingency["P", false] = 65;
            contingency["N", true]  = 4;
            contingency["N", false] = 896;

            IReadOnlyList <string>          x = new string[] { "N", "P", "N", "N", "P", "N", "N", "N", "P" };
            IReadOnlyList <bool>            y = new bool[] { false, false, false, true, true, false, false, false, true };
            ContingencyTable <string, bool> contingencyFromLists = Bivariate.Crosstabs(x, y);

            foreach (string row in contingency.Rows)
            {
                Console.WriteLine($"Total count of {row}: {contingency.RowTotal(row)}");
            }
            foreach (bool column in contingency.Columns)
            {
                Console.WriteLine($"Total count of {column}: {contingency.ColumnTotal(column)}");
            }
            Console.WriteLine($"Total counts: {contingency.Total}");

            foreach (string row in contingency.Rows)
            {
                UncertainValue probability = contingency.ProbabilityOfRow(row);
                Console.WriteLine($"Estimated probability of {row}: {probability}");
            }
            foreach (bool column in contingency.Columns)
            {
                UncertainValue probability = contingency.ProbabilityOfColumn(column);
                Console.WriteLine($"Estimated probablity of {column}: {probability}");
            }

            UncertainValue sensitivity = contingency.ProbabilityOfRowConditionalOnColumn("P", true);

            Console.WriteLine($"Chance of P result given true condition: {sensitivity}");
            UncertainValue precision = contingency.ProbabilityOfColumnConditionalOnRow(true, "P");

            Console.WriteLine($"Chance of true condition given P result: {precision}");

            UncertainValue logOddsRatio = contingency.Binary.LogOddsRatio;

            Console.WriteLine($"log(r) = {logOddsRatio}");

            TestResult pearson = contingency.PearsonChiSquaredTest();

            Console.WriteLine($"Pearson χ² = {pearson.Statistic.Value} has P = {pearson.Probability}.");

            TestResult fisher = contingency.Binary.FisherExactTest();

            Console.WriteLine($"Fisher exact test has P = {fisher.Probability}.");
        }
コード例 #3
0
        public void ContingencyTableProbabilities()
        {
            // Construct data where (i) there are both reference-nulls and nullable-struct-nulls,
            // (ii) all values of one column are equally, (iii) values of other column depend on value of first column
            List <string> groups = new List <string>()
            {
                "A", "B", "C", null
            };

            FrameTable data = new FrameTable();

            data.AddColumn <string>("Group");
            data.AddColumn <bool?>("Outcome");

            int                n            = 512;
            double             pOutcomeNull = 0.05;
            Func <int, double> pOutcome     = groupIndex => 0.8 - 0.2 * groupIndex;
            Random             rng          = new Random(10101010);

            for (int i = 0; i < n; i++)
            {
                int    groupIndex = rng.Next(0, groups.Count);
                string group      = groups[groupIndex];
                bool?  outcome    = (rng.NextDouble() < pOutcome(groupIndex));
                if (rng.NextDouble() < pOutcomeNull)
                {
                    outcome = null;
                }
                data.AddRow(group, outcome);
            }

            // Form a contingency table.
            ContingencyTable <string, bool?> table = Bivariate.Crosstabs(data["Group"].As <string>(), data["Outcome"].As <bool?>());

            // Total counts should match
            Assert.IsTrue(table.Total == n);

            // All values should be represented
            foreach (string row in table.Rows)
            {
                Assert.IsTrue(groups.Contains(row));
            }

            // Counts in each cell and marginal totals should match
            foreach (string group in table.Rows)
            {
                int rowTotal = 0;
                foreach (bool?outcome in table.Columns)
                {
                    FrameView view = data.Where(r => ((string)r["Group"] == group) && ((bool?)r["Outcome"] == outcome));
                    Assert.IsTrue(table[group, outcome] == view.Rows.Count);
                    rowTotal += view.Rows.Count;
                }
                Assert.IsTrue(rowTotal == table.RowTotal(group));
            }

            // Inferred probabilities should agree with model
            Assert.IsTrue(table.ProbabilityOfColumn(null).ConfidenceInterval(0.99).ClosedContains(pOutcomeNull));
            for (int groupIndex = 0; groupIndex < groups.Count; groupIndex++)
            {
                string group = groups[groupIndex];
                Assert.IsTrue(table.ProbabilityOfRow(group).ConfidenceInterval(0.99).ClosedContains(0.25));
                Assert.IsTrue(table.ProbabilityOfColumnConditionalOnRow(true, group).ConfidenceInterval(0.99).ClosedContains(pOutcome(groupIndex) * (1.0 - pOutcomeNull)));
            }
            Assert.IsTrue(table.ProbabilityOfColumn(null).ConfidenceInterval(0.99).ClosedContains(pOutcomeNull));

            // Pearson test should catch that rows and columns are corrleated
            Assert.IsTrue(table.PearsonChiSquaredTest().Probability < 0.05);
        }