コード例 #1
0
ファイル: Statistics.cs プロジェクト: zyzhu/metanumerics
        public static void ContingencyTable()
        {
            ContingencyTable <string, bool> contingency = new ContingencyTable <string, bool>(
                new string[] { "P", "N" }, new bool[] { true, false }
                );

            contingency["P", true]  = 35;
            contingency["P", false] = 65;
            contingency["N", true]  = 4;
            contingency["N", false] = 896;

            IReadOnlyList <string>          x = new string[] { "N", "P", "N", "N", "P", "N", "N", "N", "P" };
            IReadOnlyList <bool>            y = new bool[] { false, false, false, true, true, false, false, false, true };
            ContingencyTable <string, bool> contingencyFromLists = Bivariate.Crosstabs(x, y);

            foreach (string row in contingency.Rows)
            {
                Console.WriteLine($"Total count of {row}: {contingency.RowTotal(row)}");
            }
            foreach (bool column in contingency.Columns)
            {
                Console.WriteLine($"Total count of {column}: {contingency.ColumnTotal(column)}");
            }
            Console.WriteLine($"Total counts: {contingency.Total}");

            foreach (string row in contingency.Rows)
            {
                UncertainValue probability = contingency.ProbabilityOfRow(row);
                Console.WriteLine($"Estimated probability of {row}: {probability}");
            }
            foreach (bool column in contingency.Columns)
            {
                UncertainValue probability = contingency.ProbabilityOfColumn(column);
                Console.WriteLine($"Estimated probablity of {column}: {probability}");
            }

            UncertainValue sensitivity = contingency.ProbabilityOfRowConditionalOnColumn("P", true);

            Console.WriteLine($"Chance of P result given true condition: {sensitivity}");
            UncertainValue precision = contingency.ProbabilityOfColumnConditionalOnRow(true, "P");

            Console.WriteLine($"Chance of true condition given P result: {precision}");

            UncertainValue logOddsRatio = contingency.Binary.LogOddsRatio;

            Console.WriteLine($"log(r) = {logOddsRatio}");

            TestResult pearson = contingency.PearsonChiSquaredTest();

            Console.WriteLine($"Pearson χ² = {pearson.Statistic.Value} has P = {pearson.Probability}.");

            TestResult fisher = contingency.Binary.FisherExactTest();

            Console.WriteLine($"Fisher exact test has P = {fisher.Probability}.");
        }
コード例 #2
0
        public void BinaryContingencyTest()
        {
            // Create a table with significant association and test for it.

            ContingencyTable e1 = CreateExperiment(0.50, 0.50, 0.75, 128);

            Assert.IsTrue(e1.RowTotal(0) + e1.RowTotal(1) == e1.Total);
            Assert.IsTrue(e1.ColumnTotal(0) + e1.ColumnTotal(1) == e1.Total);

            UncertainValue lnr = e1.Binary.LogOddsRatio;

            Assert.IsTrue(!lnr.ConfidenceInterval(0.95).ClosedContains(0.0));

            UncertainValue r = e1.Binary.OddsRatio;

            Assert.IsTrue(!r.ConfidenceInterval(0.95).ClosedContains(1.0));

            // Chi square should detect association
            TestResult p = e1.PearsonChiSquaredTest();

            Assert.IsTrue(p.Probability < 0.05);

            // Fisher exact should detect association
            TestResult f = e1.Binary.FisherExactTest();

            Assert.IsTrue(f.Probability < 0.05);

            // Phi should be the same as Pearson correlation coefficient
            List <double> x = new List <double>();
            List <double> y = new List <double>();

            for (int i = 0; i < e1[0, 0]; i++)
            {
                x.Add(0); y.Add(0);
            }
            for (int i = 0; i < e1[0, 1]; i++)
            {
                x.Add(0); y.Add(1);
            }
            for (int i = 0; i < e1[1, 0]; i++)
            {
                x.Add(1); y.Add(0);
            }
            for (int i = 0; i < e1[1, 1]; i++)
            {
                x.Add(1); y.Add(1);
            }
            double s = x.CorrelationCoefficient(y);

            Assert.IsTrue(TestUtilities.IsNearlyEqual(s, e1.Binary.Phi));
        }
コード例 #3
0
        public void ContingencyTableNamedOperations()
        {
            ContingencyTable t = new ContingencyTable(2, 3);

            t.RowNames[0]          = "Male";
            t.RowNames[1]          = "Female";
            t.ColumnNames[0]       = "Party 1";
            t.ColumnNames[1]       = "Party 2";
            t.ColumnNames[2]       = "Party 3";
            t["Male", "Party 1"]   = 10;
            t["Male", "Party 2"]   = 20;
            t["Male", "Party 3"]   = 30;
            t["Female", "Party 1"] = 30;
            t["Female", "Party 2"] = 20;
            t["Female", "Party 3"] = 10;
        }
コード例 #4
0
        public void ContingencyTableOperations()
        {
            ContingencyTable t = new ContingencyTable(4, 3);

            Assert.IsTrue(t.RowCount == 4);
            Assert.IsTrue(t.ColumnCount == 3);

            Assert.IsTrue(t.RowTotal(2) == 0);
            Assert.IsTrue(t.ColumnTotal(1) == 0);
            Assert.IsTrue(t.Total == 0);

            t[1, 1] = 2;
            Assert.IsTrue(t.RowTotal(2) == 0);
            Assert.IsTrue(t.ColumnTotal(1) == 2);
            Assert.IsTrue(t.Total == 2);
        }
コード例 #5
0
        public void Bug10()
        {
            // Fisher exact test didn't give same probability when rows were permuted.
            // To compute the Fisher exact probability, we iterate over all contingency tables
            // with the same marginal totals and count their probability if it is less than or
            // equal to the probability of the observed matrix.
            // In particular for the symmetric case, the "opposite" table has the exact
            // same probability. But there is floating point noise, so sometimes it's
            // calculated probability is infinitesimally larger and isn't counted.
            // To fix this, we special-case the symmetric case.

            ContingencyTable t1 = new ContingencyTable(new int[, ] {
                { 18, 16 }, { 12, 14 }
            });

            ContingencyTable t2 = new ContingencyTable(new int[, ] {
                { 12, 14 }, { 18, 16 }
            });

            Assert.IsTrue(TestUtilities.IsNearlyEqual(t1.Binary.FisherExactTest().Probability, t2.Binary.FisherExactTest().Probability));
        }
コード例 #6
0
        public void McNemarTestDistribution()
        {
            // Define a population and the accuracy of two tests for a condition
            double fractionPositive = 0.4;
            double aAccuracy        = 0.2;
            double bAccuracy        = 0.9;

            // Form a bunch of samples; we will run a McNemar test on each
            List <double>          statistics   = new List <double>();
            ContinuousDistribution distribution = null;
            Random rng = new Random(1);

            for (int i = 0; i < 32; i++)
            {
                // Run a and b tests on each person.
                List <bool> aResults = new List <bool>();
                List <bool> bResults = new List <bool>();
                for (int j = 0; j < 64; j++)
                {
                    bool isPositive = rng.NextDouble() < fractionPositive;
                    bool aResult    = rng.NextDouble() < aAccuracy ? isPositive : !isPositive;
                    aResults.Add(aResult);
                    bool bResult = rng.NextDouble() < bAccuracy ? isPositive : !isPositive;
                    bResults.Add(bResult);
                }

                // Do a McNemar test to determine whether tests are differently weighted.
                // By our construction, they shouldn't be.
                ContingencyTable <bool, bool> table = Bivariate.Crosstabs(aResults, bResults);
                TestResult result = table.Binary.McNemarTest();
                statistics.Add(result.Statistic.Value);
                distribution = result.Statistic.Distribution;
            }

            // Since the null hypothesis is satisfied, the test statistic distribution should
            // match the claimed null distribution.
            TestResult test = statistics.KolmogorovSmirnovTest(distribution);

            Assert.IsTrue(test.Probability > 0.05);
        }
コード例 #7
0
        private static ContingencyTable CreateExperiment(double p, double q0, double q1, int N)
        {
            ContingencyTable e = new ContingencyTable(2, 2);

            Random rng = new Random(1);

            for (int i = 0; i < N; i++)
            {
                int r, c;
                if (rng.NextDouble() < p)
                {
                    r = 0;
                    if (rng.NextDouble() < q0)
                    {
                        c = 0;
                    }
                    else
                    {
                        c = 1;
                    }
                }
                else
                {
                    r = 1;
                    if (rng.NextDouble() < q1)
                    {
                        c = 0;
                    }
                    else
                    {
                        c = 1;
                    }
                }
                e[r, c] += 1;
            }


            return(e);
        }
コード例 #8
0
        public void ContingencyTableProbabilities()
        {
            // Construct data where (i) there are both reference-nulls and nullable-struct-nulls,
            // (ii) all values of one column are equally, (iii) values of other column depend on value of first column
            List <string> groups = new List <string>()
            {
                "A", "B", "C", null
            };

            FrameTable data = new FrameTable();

            data.AddColumn <string>("Group");
            data.AddColumn <bool?>("Outcome");

            int                n            = 512;
            double             pOutcomeNull = 0.05;
            Func <int, double> pOutcome     = groupIndex => 0.8 - 0.2 * groupIndex;
            Random             rng          = new Random(10101010);

            for (int i = 0; i < n; i++)
            {
                int    groupIndex = rng.Next(0, groups.Count);
                string group      = groups[groupIndex];
                bool?  outcome    = (rng.NextDouble() < pOutcome(groupIndex));
                if (rng.NextDouble() < pOutcomeNull)
                {
                    outcome = null;
                }
                data.AddRow(group, outcome);
            }

            // Form a contingency table.
            ContingencyTable <string, bool?> table = Bivariate.Crosstabs(data["Group"].As <string>(), data["Outcome"].As <bool?>());

            // Total counts should match
            Assert.IsTrue(table.Total == n);

            // All values should be represented
            foreach (string row in table.Rows)
            {
                Assert.IsTrue(groups.Contains(row));
            }

            // Counts in each cell and marginal totals should match
            foreach (string group in table.Rows)
            {
                int rowTotal = 0;
                foreach (bool?outcome in table.Columns)
                {
                    FrameView view = data.Where(r => ((string)r["Group"] == group) && ((bool?)r["Outcome"] == outcome));
                    Assert.IsTrue(table[group, outcome] == view.Rows.Count);
                    rowTotal += view.Rows.Count;
                }
                Assert.IsTrue(rowTotal == table.RowTotal(group));
            }

            // Inferred probabilities should agree with model
            Assert.IsTrue(table.ProbabilityOfColumn(null).ConfidenceInterval(0.99).ClosedContains(pOutcomeNull));
            for (int groupIndex = 0; groupIndex < groups.Count; groupIndex++)
            {
                string group = groups[groupIndex];
                Assert.IsTrue(table.ProbabilityOfRow(group).ConfidenceInterval(0.99).ClosedContains(0.25));
                Assert.IsTrue(table.ProbabilityOfColumnConditionalOnRow(true, group).ConfidenceInterval(0.99).ClosedContains(pOutcome(groupIndex) * (1.0 - pOutcomeNull)));
            }
            Assert.IsTrue(table.ProbabilityOfColumn(null).ConfidenceInterval(0.99).ClosedContains(pOutcomeNull));

            // Pearson test should catch that rows and columns are corrleated
            Assert.IsTrue(table.PearsonChiSquaredTest().Probability < 0.05);
        }
コード例 #9
0
ファイル: Data.cs プロジェクト: zyzhu/metanumerics
        public static void AnalyzingData()
        {
            FrameTable table;
            Uri        url     = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv");
            WebRequest request = WebRequest.Create(url);

            using (WebResponse response = request.GetResponse()) {
                using (StreamReader reader = new StreamReader(response.GetResponseStream())) {
                    table = FrameTable.FromCsv(reader);
                }
            }
            FrameView view = table.WhereNotNull();

            // Get the column with (zero-based) index 4.
            FrameColumn column4 = view.Columns[4];
            // Get the column named "Height".
            FrameColumn heightsColumn = view.Columns["Height"];
            // Even easier way to get the column named "Height".
            FrameColumn alsoHeightsColumn = view["Height"];

            IReadOnlyList <double> heights = view["Height"].As <double>();

            SummaryStatistics summary = new SummaryStatistics(view["Height"].As <double>());

            Console.WriteLine($"Count = {summary.Count}");
            Console.WriteLine($"Mean = {summary.Mean}");
            Console.WriteLine($"Standard Deviation = {summary.StandardDeviation}");
            Console.WriteLine($"Skewness = {summary.Skewness}");
            Console.WriteLine($"Estimated population mean = {summary.PopulationMean}");
            Console.WriteLine($"Estimated population standard deviation = {summary.PopulationStandardDeviation}");

            IReadOnlyList <double> maleHeights =
                view.Where <string>("Sex", s => s == "M").Columns["Height"].As <double>();
            IReadOnlyList <double> femaleHeights =
                view.Where <string>("Sex", s => s == "F").Columns["Height"].As <double>();
            TestResult test = Univariate.StudentTTest(maleHeights, femaleHeights);

            Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}");
            Console.WriteLine($"P = {test.Probability}");

            TestResult maleHeightNormality  = maleHeights.ShapiroFranciaTest();
            TestResult totalHeightNormality = view["Height"].As <double>().ShapiroFranciaTest();
            TestResult heightCompatibility  = Univariate.KolmogorovSmirnovTest(maleHeights, femaleHeights);

            LinearRegressionResult fit =
                view["Weight"].As <double>().LinearRegression(view["Height"].As <double>());

            Console.WriteLine($"Model weight = ({fit.Slope}) * height + ({fit.Intercept}).");
            Console.WriteLine($"Model explains {fit.RSquared * 100.0}% of variation.");

            ContingencyTable <string, bool> contingency =
                Bivariate.Crosstabs(view["Sex"].As <string>(), view["Result"].As <bool>());

            Console.WriteLine($"Male incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "M")}");
            Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "F")}");
            Console.WriteLine($"Log odds ratio = {contingency.Binary.LogOddsRatio}");

            view.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0));
            view.AddComputedColumn("Age", r => (DateTime.Now - (DateTime)r["Birthdate"]).TotalDays / 365.24);

            MultiLinearLogisticRegressionResult result =
                view["Result"].As <bool>().MultiLinearLogisticRegression(
                    view["Bmi"].As <double>(),
                    view["Sex"].As <string, double>(s => s == "M" ? 1.0 : 0.0)
                    );

            foreach (Parameter parameter in result.Parameters)
            {
                Console.WriteLine($"{parameter.Name} = {parameter.Estimate}");
            }

            TestResult spearman = Bivariate.SpearmanRhoTest(view["Age"].As <double>(), view["Result"].As <double>());

            Console.WriteLine($"{spearman.Statistic.Name} = {spearman.Statistic.Value} P = {spearman.Probability}");
        }
コード例 #10
0
 public void BinaryContingencyInvalidConstructionTest()
 {
     int[,] M = new int[2, 3];
     ContingencyTable table = new ContingencyTable(2, 3);
     BinaryContingencyTableOperations binary = table.Binary;
 }