public void BivariateAssociationDiscreteNullDistribution()
        {
            Random rng = new Random(1);

            // Pick very non-normal distributions for our non-parameteric tests
            ContinuousDistribution xd = new FrechetDistribution(1.0);
            ContinuousDistribution yd = new CauchyDistribution();

            // Pick small sample sizes to get exact distributions
            foreach (int n in TestUtilities.GenerateIntegerValues(4, 24, 4))
            {
                // Do a bunch of test runs, recording reported statistic for each.
                List <int>           spearmanStatistics   = new List <int>();
                List <int>           kendallStatistics    = new List <int>();
                DiscreteDistribution spearmanDistribution = null;
                DiscreteDistribution kendallDistribution  = null;

                for (int i = 0; i < 512; i++)
                {
                    List <double> x = new List <double>();
                    List <double> y = new List <double>();
                    for (int j = 0; j < n; j++)
                    {
                        x.Add(xd.GetRandomValue(rng));
                        y.Add(yd.GetRandomValue(rng));
                    }

                    DiscreteTestStatistic spearman = Bivariate.SpearmanRhoTest(x, y).UnderlyingStatistic;
                    if (spearman != null)
                    {
                        spearmanStatistics.Add(spearman.Value);
                        spearmanDistribution = spearman.Distribution;
                    }
                    DiscreteTestStatistic kendall = Bivariate.KendallTauTest(x, y).UnderlyingStatistic;
                    if (kendall != null)
                    {
                        kendallStatistics.Add(kendall.Value);
                        kendallDistribution = kendall.Distribution;
                    }
                }

                // Test whether statistics are actually distributed as claimed
                if (spearmanDistribution != null)
                {
                    TestResult spearmanChiSquared = spearmanStatistics.ChiSquaredTest(spearmanDistribution);
                    Assert.IsTrue(spearmanChiSquared.Probability > 0.01);
                }
                if (kendallDistribution != null)
                {
                    TestResult kendallChiSquared = kendallStatistics.ChiSquaredTest(kendallDistribution);
                    Assert.IsTrue(kendallChiSquared.Probability > 0.01);
                }
            }
        }
        public void BivariateNullAssociation()
        {
            Random rng = new Random(31415926);

            // Create a data structure to hold the results of Pearson, Spearman, and Kendall tests.
            FrameTable data = new FrameTable();

            data.AddColumn <double>("r");
            data.AddColumn <double>("ρ");
            data.AddColumn <double>("τ");

            // Create variables to hold the claimed distribution of each test statistic.
            ContinuousDistribution PRD = null;
            ContinuousDistribution SRD = null;
            ContinuousDistribution KTD = null;

            // Generate a large number of bivariate samples and conduct our three tests on each.
            ContinuousDistribution xDistribution = new LognormalDistribution();
            ContinuousDistribution yDistribution = new CauchyDistribution();

            for (int j = 0; j < 100; j++)
            {
                List <double> x = new List <double>();
                List <double> y = new List <double>();
                for (int i = 0; i < 100; i++)
                {
                    x.Add(xDistribution.GetRandomValue(rng));
                    y.Add(yDistribution.GetRandomValue(rng));
                }

                TestResult PR = Bivariate.PearsonRTest(x, y);
                TestResult SR = Bivariate.SpearmanRhoTest(x, y);
                TestResult KT = Bivariate.KendallTauTest(x, y);

                PRD = PR.Statistic.Distribution;
                SRD = SR.Statistic.Distribution;
                KTD = KT.Statistic.Distribution;

                data.AddRow(new Dictionary <string, object>()
                {
                    { "r", PR.Statistic.Value }, { "ρ", SR.Statistic.Value }, { "τ", KT.Statistic.Value }
                });
            }

            Assert.IsTrue(data["r"].As <double>().KolmogorovSmirnovTest(PRD).Probability > 0.05);
            Assert.IsTrue(data["ρ"].As <double>().KolmogorovSmirnovTest(SRD).Probability > 0.05);
            Assert.IsTrue(data["τ"].As <double>().KolmogorovSmirnovTest(KTD).Probability > 0.05);
        }
Example #3
0
        public static void Association()
        {
            double[] x = new double[] { -0.58, 0.92, 1.41, 1.62, 2.72, 3.14 };
            double[] y = new double[] { 1.00, 0.00, 2.00, 16.00, 18.0, 20.0 };

            TestResult pearson = Bivariate.PearsonRTest(x, y);

            Console.WriteLine($"Pearson {pearson.Statistic.Name} = {pearson.Statistic.Value}");
            Console.WriteLine($"{pearson.Type} P = {pearson.Probability}");

            TestResult spearman = Bivariate.SpearmanRhoTest(x, y);

            Console.WriteLine($"Spearman {spearman.Statistic.Name} = {spearman.Statistic.Value}");
            Console.WriteLine($"{spearman.Type} P = {spearman.Probability}");

            TestResult kendall = Bivariate.KendallTauTest(x, y);

            Console.WriteLine($"Kendall {kendall.Statistic.Name} = {kendall.Statistic.Value}");
            Console.WriteLine($"{kendall.Type} P = {kendall.Probability}");
        }
Example #4
0
        public static void AnalyzingData()
        {
            FrameTable table;
            Uri        url     = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv");
            WebRequest request = WebRequest.Create(url);

            using (WebResponse response = request.GetResponse()) {
                using (StreamReader reader = new StreamReader(response.GetResponseStream())) {
                    table = FrameTable.FromCsv(reader);
                }
            }
            FrameView view = table.WhereNotNull();

            // Get the column with (zero-based) index 4.
            FrameColumn column4 = view.Columns[4];
            // Get the column named "Height".
            FrameColumn heightsColumn = view.Columns["Height"];
            // Even easier way to get the column named "Height".
            FrameColumn alsoHeightsColumn = view["Height"];

            IReadOnlyList <double> heights = view["Height"].As <double>();

            SummaryStatistics summary = new SummaryStatistics(view["Height"].As <double>());

            Console.WriteLine($"Count = {summary.Count}");
            Console.WriteLine($"Mean = {summary.Mean}");
            Console.WriteLine($"Standard Deviation = {summary.StandardDeviation}");
            Console.WriteLine($"Skewness = {summary.Skewness}");
            Console.WriteLine($"Estimated population mean = {summary.PopulationMean}");
            Console.WriteLine($"Estimated population standard deviation = {summary.PopulationStandardDeviation}");

            IReadOnlyList <double> maleHeights =
                view.Where <string>("Sex", s => s == "M").Columns["Height"].As <double>();
            IReadOnlyList <double> femaleHeights =
                view.Where <string>("Sex", s => s == "F").Columns["Height"].As <double>();
            TestResult test = Univariate.StudentTTest(maleHeights, femaleHeights);

            Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}");
            Console.WriteLine($"P = {test.Probability}");

            TestResult maleHeightNormality  = maleHeights.ShapiroFranciaTest();
            TestResult totalHeightNormality = view["Height"].As <double>().ShapiroFranciaTest();
            TestResult heightCompatibility  = Univariate.KolmogorovSmirnovTest(maleHeights, femaleHeights);

            LinearRegressionResult fit =
                view["Weight"].As <double>().LinearRegression(view["Height"].As <double>());

            Console.WriteLine($"Model weight = ({fit.Slope}) * height + ({fit.Intercept}).");
            Console.WriteLine($"Model explains {fit.RSquared * 100.0}% of variation.");

            ContingencyTable <string, bool> contingency =
                Bivariate.Crosstabs(view["Sex"].As <string>(), view["Result"].As <bool>());

            Console.WriteLine($"Male incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "M")}");
            Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "F")}");
            Console.WriteLine($"Log odds ratio = {contingency.Binary.LogOddsRatio}");

            view.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0));
            view.AddComputedColumn("Age", r => (DateTime.Now - (DateTime)r["Birthdate"]).TotalDays / 365.24);

            MultiLinearLogisticRegressionResult result =
                view["Result"].As <bool>().MultiLinearLogisticRegression(
                    view["Bmi"].As <double>(),
                    view["Sex"].As <string, double>(s => s == "M" ? 1.0 : 0.0)
                    );

            foreach (Parameter parameter in result.Parameters)
            {
                Console.WriteLine($"{parameter.Name} = {parameter.Estimate}");
            }

            TestResult spearman = Bivariate.SpearmanRhoTest(view["Age"].As <double>(), view["Result"].As <double>());

            Console.WriteLine($"{spearman.Statistic.Name} = {spearman.Statistic.Value} P = {spearman.Probability}");
        }