public void BivariateAssociationDiscreteNullDistribution() { Random rng = new Random(1); // Pick very non-normal distributions for our non-parameteric tests ContinuousDistribution xd = new FrechetDistribution(1.0); ContinuousDistribution yd = new CauchyDistribution(); // Pick small sample sizes to get exact distributions foreach (int n in TestUtilities.GenerateIntegerValues(4, 24, 4)) { // Do a bunch of test runs, recording reported statistic for each. List <int> spearmanStatistics = new List <int>(); List <int> kendallStatistics = new List <int>(); DiscreteDistribution spearmanDistribution = null; DiscreteDistribution kendallDistribution = null; for (int i = 0; i < 512; i++) { List <double> x = new List <double>(); List <double> y = new List <double>(); for (int j = 0; j < n; j++) { x.Add(xd.GetRandomValue(rng)); y.Add(yd.GetRandomValue(rng)); } DiscreteTestStatistic spearman = Bivariate.SpearmanRhoTest(x, y).UnderlyingStatistic; if (spearman != null) { spearmanStatistics.Add(spearman.Value); spearmanDistribution = spearman.Distribution; } DiscreteTestStatistic kendall = Bivariate.KendallTauTest(x, y).UnderlyingStatistic; if (kendall != null) { kendallStatistics.Add(kendall.Value); kendallDistribution = kendall.Distribution; } } // Test whether statistics are actually distributed as claimed if (spearmanDistribution != null) { TestResult spearmanChiSquared = spearmanStatistics.ChiSquaredTest(spearmanDistribution); Assert.IsTrue(spearmanChiSquared.Probability > 0.01); } if (kendallDistribution != null) { TestResult kendallChiSquared = kendallStatistics.ChiSquaredTest(kendallDistribution); Assert.IsTrue(kendallChiSquared.Probability > 0.01); } } }
public void BivariateNullAssociation() { Random rng = new Random(31415926); // Create a data structure to hold the results of Pearson, Spearman, and Kendall tests. FrameTable data = new FrameTable(); data.AddColumn <double>("r"); data.AddColumn <double>("ρ"); data.AddColumn <double>("τ"); // Create variables to hold the claimed distribution of each test statistic. ContinuousDistribution PRD = null; ContinuousDistribution SRD = null; ContinuousDistribution KTD = null; // Generate a large number of bivariate samples and conduct our three tests on each. ContinuousDistribution xDistribution = new LognormalDistribution(); ContinuousDistribution yDistribution = new CauchyDistribution(); for (int j = 0; j < 100; j++) { List <double> x = new List <double>(); List <double> y = new List <double>(); for (int i = 0; i < 100; i++) { x.Add(xDistribution.GetRandomValue(rng)); y.Add(yDistribution.GetRandomValue(rng)); } TestResult PR = Bivariate.PearsonRTest(x, y); TestResult SR = Bivariate.SpearmanRhoTest(x, y); TestResult KT = Bivariate.KendallTauTest(x, y); PRD = PR.Statistic.Distribution; SRD = SR.Statistic.Distribution; KTD = KT.Statistic.Distribution; data.AddRow(new Dictionary <string, object>() { { "r", PR.Statistic.Value }, { "ρ", SR.Statistic.Value }, { "τ", KT.Statistic.Value } }); } Assert.IsTrue(data["r"].As <double>().KolmogorovSmirnovTest(PRD).Probability > 0.05); Assert.IsTrue(data["ρ"].As <double>().KolmogorovSmirnovTest(SRD).Probability > 0.05); Assert.IsTrue(data["τ"].As <double>().KolmogorovSmirnovTest(KTD).Probability > 0.05); }
public static void Association() { double[] x = new double[] { -0.58, 0.92, 1.41, 1.62, 2.72, 3.14 }; double[] y = new double[] { 1.00, 0.00, 2.00, 16.00, 18.0, 20.0 }; TestResult pearson = Bivariate.PearsonRTest(x, y); Console.WriteLine($"Pearson {pearson.Statistic.Name} = {pearson.Statistic.Value}"); Console.WriteLine($"{pearson.Type} P = {pearson.Probability}"); TestResult spearman = Bivariate.SpearmanRhoTest(x, y); Console.WriteLine($"Spearman {spearman.Statistic.Name} = {spearman.Statistic.Value}"); Console.WriteLine($"{spearman.Type} P = {spearman.Probability}"); TestResult kendall = Bivariate.KendallTauTest(x, y); Console.WriteLine($"Kendall {kendall.Statistic.Name} = {kendall.Statistic.Value}"); Console.WriteLine($"{kendall.Type} P = {kendall.Probability}"); }
public static void AnalyzingData() { FrameTable table; Uri url = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv"); WebRequest request = WebRequest.Create(url); using (WebResponse response = request.GetResponse()) { using (StreamReader reader = new StreamReader(response.GetResponseStream())) { table = FrameTable.FromCsv(reader); } } FrameView view = table.WhereNotNull(); // Get the column with (zero-based) index 4. FrameColumn column4 = view.Columns[4]; // Get the column named "Height". FrameColumn heightsColumn = view.Columns["Height"]; // Even easier way to get the column named "Height". FrameColumn alsoHeightsColumn = view["Height"]; IReadOnlyList <double> heights = view["Height"].As <double>(); SummaryStatistics summary = new SummaryStatistics(view["Height"].As <double>()); Console.WriteLine($"Count = {summary.Count}"); Console.WriteLine($"Mean = {summary.Mean}"); Console.WriteLine($"Standard Deviation = {summary.StandardDeviation}"); Console.WriteLine($"Skewness = {summary.Skewness}"); Console.WriteLine($"Estimated population mean = {summary.PopulationMean}"); Console.WriteLine($"Estimated population standard deviation = {summary.PopulationStandardDeviation}"); IReadOnlyList <double> maleHeights = view.Where <string>("Sex", s => s == "M").Columns["Height"].As <double>(); IReadOnlyList <double> femaleHeights = view.Where <string>("Sex", s => s == "F").Columns["Height"].As <double>(); TestResult test = Univariate.StudentTTest(maleHeights, femaleHeights); Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}"); Console.WriteLine($"P = {test.Probability}"); TestResult maleHeightNormality = maleHeights.ShapiroFranciaTest(); TestResult totalHeightNormality = view["Height"].As <double>().ShapiroFranciaTest(); TestResult heightCompatibility = Univariate.KolmogorovSmirnovTest(maleHeights, femaleHeights); LinearRegressionResult fit = view["Weight"].As <double>().LinearRegression(view["Height"].As <double>()); Console.WriteLine($"Model weight = ({fit.Slope}) * height + ({fit.Intercept})."); Console.WriteLine($"Model explains {fit.RSquared * 100.0}% of variation."); ContingencyTable <string, bool> contingency = Bivariate.Crosstabs(view["Sex"].As <string>(), view["Result"].As <bool>()); Console.WriteLine($"Male incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "M")}"); Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "F")}"); Console.WriteLine($"Log odds ratio = {contingency.Binary.LogOddsRatio}"); view.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0)); view.AddComputedColumn("Age", r => (DateTime.Now - (DateTime)r["Birthdate"]).TotalDays / 365.24); MultiLinearLogisticRegressionResult result = view["Result"].As <bool>().MultiLinearLogisticRegression( view["Bmi"].As <double>(), view["Sex"].As <string, double>(s => s == "M" ? 1.0 : 0.0) ); foreach (Parameter parameter in result.Parameters) { Console.WriteLine($"{parameter.Name} = {parameter.Estimate}"); } TestResult spearman = Bivariate.SpearmanRhoTest(view["Age"].As <double>(), view["Result"].As <double>()); Console.WriteLine($"{spearman.Statistic.Name} = {spearman.Statistic.Value} P = {spearman.Probability}"); }