private static void CompareSamples() { List <double> a = new List <double>() { 130.0, 140.0, 150.0, 150.0, 160.0, 190.0 }; List <double> b = new List <double>() { 120.0, 150.0, 180.0, 170.0, 185.0, 175.0, 190.0, 200.0 }; TestResult student = Univariate.StudentTTest(a, b); Console.WriteLine($"{student.Statistic.Name} = {student.Statistic.Value}"); Console.WriteLine($"{student.Type} P = {student.Probability}"); student.Type = TestType.LeftTailed; Console.WriteLine($"{student.Type} P = {student.Probability}"); TestResult mannWhitney = Univariate.MannWhitneyTest(a, b); Console.WriteLine($"{mannWhitney.Statistic.Name} = {mannWhitney.Statistic.Value}"); Console.WriteLine($"{mannWhitney.Type} P = {mannWhitney.Probability}"); TestResult kolmogorov = Univariate.KolmogorovSmirnovTest(a, b); Console.WriteLine($"{kolmogorov.Statistic.Name} = {kolmogorov.Statistic.Value}"); Console.WriteLine($"{kolmogorov.Type} P = {kolmogorov.Probability}"); }
public void TwoSampleKolmogorovNullDistributionTest() { Random rng = new Random(4); ContinuousDistribution population = new ExponentialDistribution(); int[] sizes = new int[] { 23, 30, 175 }; foreach (int na in sizes) { foreach (int nb in sizes) { Sample d = new Sample(); ContinuousDistribution nullDistribution = null; for (int i = 0; i < 128; i++) { List <double> a = TestUtilities.CreateDataSample(rng, population, na).ToList(); List <double> b = TestUtilities.CreateDataSample(rng, population, nb).ToList(); TestResult r = Univariate.KolmogorovSmirnovTest(a, b); d.Add(r.Statistic.Value); nullDistribution = r.Statistic.Distribution; } // Only do full KS test if the number of bins is larger than the sample size, otherwise we are going to fail // because the KS test detects the granularity of the distribution. TestResult mr = d.KolmogorovSmirnovTest(nullDistribution); if (AdvancedIntegerMath.LCM(na, nb) > d.Count) { Assert.IsTrue(mr.Probability > 0.01); } // But always test that mean and standard deviation are as expected Assert.IsTrue(d.PopulationMean.ConfidenceInterval(0.99).ClosedContains(nullDistribution.Mean)); Assert.IsTrue(d.PopulationStandardDeviation.ConfidenceInterval(0.99).ClosedContains(nullDistribution.StandardDeviation)); // This test is actually a bit sensitive, probably because the discrete-ness of the underlying distribution // and the inaccuracy of the asymptotic approximation for intermediate sample size make strict comparisons iffy. } } }
public static void AnalyzingData() { FrameTable table; Uri url = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv"); WebRequest request = WebRequest.Create(url); using (WebResponse response = request.GetResponse()) { using (StreamReader reader = new StreamReader(response.GetResponseStream())) { table = FrameTable.FromCsv(reader); } } FrameView view = table.WhereNotNull(); // Get the column with (zero-based) index 4. FrameColumn column4 = view.Columns[4]; // Get the column named "Height". FrameColumn heightsColumn = view.Columns["Height"]; // Even easier way to get the column named "Height". FrameColumn alsoHeightsColumn = view["Height"]; IReadOnlyList <double> heights = view["Height"].As <double>(); SummaryStatistics summary = new SummaryStatistics(view["Height"].As <double>()); Console.WriteLine($"Count = {summary.Count}"); Console.WriteLine($"Mean = {summary.Mean}"); Console.WriteLine($"Standard Deviation = {summary.StandardDeviation}"); Console.WriteLine($"Skewness = {summary.Skewness}"); Console.WriteLine($"Estimated population mean = {summary.PopulationMean}"); Console.WriteLine($"Estimated population standard deviation = {summary.PopulationStandardDeviation}"); IReadOnlyList <double> maleHeights = view.Where <string>("Sex", s => s == "M").Columns["Height"].As <double>(); IReadOnlyList <double> femaleHeights = view.Where <string>("Sex", s => s == "F").Columns["Height"].As <double>(); TestResult test = Univariate.StudentTTest(maleHeights, femaleHeights); Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}"); Console.WriteLine($"P = {test.Probability}"); TestResult maleHeightNormality = maleHeights.ShapiroFranciaTest(); TestResult totalHeightNormality = view["Height"].As <double>().ShapiroFranciaTest(); TestResult heightCompatibility = Univariate.KolmogorovSmirnovTest(maleHeights, femaleHeights); LinearRegressionResult fit = view["Weight"].As <double>().LinearRegression(view["Height"].As <double>()); Console.WriteLine($"Model weight = ({fit.Slope}) * height + ({fit.Intercept})."); Console.WriteLine($"Model explains {fit.RSquared * 100.0}% of variation."); ContingencyTable <string, bool> contingency = Bivariate.Crosstabs(view["Sex"].As <string>(), view["Result"].As <bool>()); Console.WriteLine($"Male incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "M")}"); Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "F")}"); Console.WriteLine($"Log odds ratio = {contingency.Binary.LogOddsRatio}"); view.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0)); view.AddComputedColumn("Age", r => (DateTime.Now - (DateTime)r["Birthdate"]).TotalDays / 365.24); MultiLinearLogisticRegressionResult result = view["Result"].As <bool>().MultiLinearLogisticRegression( view["Bmi"].As <double>(), view["Sex"].As <string, double>(s => s == "M" ? 1.0 : 0.0) ); foreach (Parameter parameter in result.Parameters) { Console.WriteLine($"{parameter.Name} = {parameter.Estimate}"); } TestResult spearman = Bivariate.SpearmanRhoTest(view["Age"].As <double>(), view["Result"].As <double>()); Console.WriteLine($"{spearman.Statistic.Name} = {spearman.Statistic.Value} P = {spearman.Probability}"); }