Ejemplo n.º 1
0
        private static void CompareSamples()
        {
            List <double> a = new List <double>()
            {
                130.0, 140.0, 150.0, 150.0, 160.0, 190.0
            };
            List <double> b = new List <double>()
            {
                120.0, 150.0, 180.0, 170.0, 185.0, 175.0, 190.0, 200.0
            };

            TestResult student = Univariate.StudentTTest(a, b);

            Console.WriteLine($"{student.Statistic.Name} = {student.Statistic.Value}");
            Console.WriteLine($"{student.Type} P = {student.Probability}");

            student.Type = TestType.LeftTailed;
            Console.WriteLine($"{student.Type} P = {student.Probability}");

            TestResult mannWhitney = Univariate.MannWhitneyTest(a, b);

            Console.WriteLine($"{mannWhitney.Statistic.Name} = {mannWhitney.Statistic.Value}");
            Console.WriteLine($"{mannWhitney.Type} P = {mannWhitney.Probability}");

            TestResult kolmogorov = Univariate.KolmogorovSmirnovTest(a, b);

            Console.WriteLine($"{kolmogorov.Statistic.Name} = {kolmogorov.Statistic.Value}");
            Console.WriteLine($"{kolmogorov.Type} P = {kolmogorov.Probability}");
        }
Ejemplo n.º 2
0
        public void TwoSampleKolmogorovNullDistributionTest()
        {
            Random rng = new Random(4);
            ContinuousDistribution population = new ExponentialDistribution();

            int[] sizes = new int[] { 23, 30, 175 };

            foreach (int na in sizes)
            {
                foreach (int nb in sizes)
                {
                    Sample d = new Sample();
                    ContinuousDistribution nullDistribution = null;
                    for (int i = 0; i < 128; i++)
                    {
                        List <double> a = TestUtilities.CreateDataSample(rng, population, na).ToList();
                        List <double> b = TestUtilities.CreateDataSample(rng, population, nb).ToList();

                        TestResult r = Univariate.KolmogorovSmirnovTest(a, b);
                        d.Add(r.Statistic.Value);
                        nullDistribution = r.Statistic.Distribution;
                    }

                    // Only do full KS test if the number of bins is larger than the sample size, otherwise we are going to fail
                    // because the KS test detects the granularity of the distribution.
                    TestResult mr = d.KolmogorovSmirnovTest(nullDistribution);
                    if (AdvancedIntegerMath.LCM(na, nb) > d.Count)
                    {
                        Assert.IsTrue(mr.Probability > 0.01);
                    }
                    // But always test that mean and standard deviation are as expected
                    Assert.IsTrue(d.PopulationMean.ConfidenceInterval(0.99).ClosedContains(nullDistribution.Mean));
                    Assert.IsTrue(d.PopulationStandardDeviation.ConfidenceInterval(0.99).ClosedContains(nullDistribution.StandardDeviation));
                    // This test is actually a bit sensitive, probably because the discrete-ness of the underlying distribution
                    // and the inaccuracy of the asymptotic approximation for intermediate sample size make strict comparisons iffy.
                }
            }
        }
Ejemplo n.º 3
0
        public static void AnalyzingData()
        {
            FrameTable table;
            Uri        url     = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv");
            WebRequest request = WebRequest.Create(url);

            using (WebResponse response = request.GetResponse()) {
                using (StreamReader reader = new StreamReader(response.GetResponseStream())) {
                    table = FrameTable.FromCsv(reader);
                }
            }
            FrameView view = table.WhereNotNull();

            // Get the column with (zero-based) index 4.
            FrameColumn column4 = view.Columns[4];
            // Get the column named "Height".
            FrameColumn heightsColumn = view.Columns["Height"];
            // Even easier way to get the column named "Height".
            FrameColumn alsoHeightsColumn = view["Height"];

            IReadOnlyList <double> heights = view["Height"].As <double>();

            SummaryStatistics summary = new SummaryStatistics(view["Height"].As <double>());

            Console.WriteLine($"Count = {summary.Count}");
            Console.WriteLine($"Mean = {summary.Mean}");
            Console.WriteLine($"Standard Deviation = {summary.StandardDeviation}");
            Console.WriteLine($"Skewness = {summary.Skewness}");
            Console.WriteLine($"Estimated population mean = {summary.PopulationMean}");
            Console.WriteLine($"Estimated population standard deviation = {summary.PopulationStandardDeviation}");

            IReadOnlyList <double> maleHeights =
                view.Where <string>("Sex", s => s == "M").Columns["Height"].As <double>();
            IReadOnlyList <double> femaleHeights =
                view.Where <string>("Sex", s => s == "F").Columns["Height"].As <double>();
            TestResult test = Univariate.StudentTTest(maleHeights, femaleHeights);

            Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}");
            Console.WriteLine($"P = {test.Probability}");

            TestResult maleHeightNormality  = maleHeights.ShapiroFranciaTest();
            TestResult totalHeightNormality = view["Height"].As <double>().ShapiroFranciaTest();
            TestResult heightCompatibility  = Univariate.KolmogorovSmirnovTest(maleHeights, femaleHeights);

            LinearRegressionResult fit =
                view["Weight"].As <double>().LinearRegression(view["Height"].As <double>());

            Console.WriteLine($"Model weight = ({fit.Slope}) * height + ({fit.Intercept}).");
            Console.WriteLine($"Model explains {fit.RSquared * 100.0}% of variation.");

            ContingencyTable <string, bool> contingency =
                Bivariate.Crosstabs(view["Sex"].As <string>(), view["Result"].As <bool>());

            Console.WriteLine($"Male incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "M")}");
            Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "F")}");
            Console.WriteLine($"Log odds ratio = {contingency.Binary.LogOddsRatio}");

            view.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0));
            view.AddComputedColumn("Age", r => (DateTime.Now - (DateTime)r["Birthdate"]).TotalDays / 365.24);

            MultiLinearLogisticRegressionResult result =
                view["Result"].As <bool>().MultiLinearLogisticRegression(
                    view["Bmi"].As <double>(),
                    view["Sex"].As <string, double>(s => s == "M" ? 1.0 : 0.0)
                    );

            foreach (Parameter parameter in result.Parameters)
            {
                Console.WriteLine($"{parameter.Name} = {parameter.Estimate}");
            }

            TestResult spearman = Bivariate.SpearmanRhoTest(view["Age"].As <double>(), view["Result"].As <double>());

            Console.WriteLine($"{spearman.Statistic.Name} = {spearman.Statistic.Value} P = {spearman.Probability}");
        }