示例#1
0
        public void BivariatePolynomialRegressionSimple()
        {
            // Pick a simple polynomial
            Polynomial p = Polynomial.FromCoefficients(3.0, -2.0, 1.0);

            // Use it to generate a data set
            Random rng = new Random(1);
            ContinuousDistribution xDistribution     = new CauchyDistribution(1.0, 2.0);
            ContinuousDistribution errorDistribution = new NormalDistribution(0.0, 3.0);
            List <double>          xs = new List <double>(TestUtilities.CreateDataSample(rng, xDistribution, 10));
            List <double>          ys = new List <double>(xs.Select(x => p.Evaluate(x) + errorDistribution.GetRandomValue(rng)));

            PolynomialRegressionResult fit = Bivariate.PolynomialRegression(ys, xs, p.Degree);

            // Parameters should agree
            Assert.IsTrue(fit.Parameters.Count == p.Degree + 1);
            for (int k = 0; k <= p.Degree; k++)
            {
                Assert.IsTrue(fit.Coefficient(k).ConfidenceInterval(0.99).ClosedContains(p.Coefficient(k)));
            }

            // Residuals should agree
            Assert.IsTrue(fit.Residuals.Count == xs.Count);
            for (int i = 0; i < xs.Count; i++)
            {
                double z = ys[i] - fit.Predict(xs[i]).Value;
                Assert.IsTrue(TestUtilities.IsNearlyEqual(z, fit.Residuals[i]));
            }

            // Intercept is same as coefficient of x^0
            Assert.IsTrue(fit.Intercept == fit.Coefficient(0));
        }
示例#2
0
        public static void ContingencyTable()
        {
            ContingencyTable <string, bool> contingency = new ContingencyTable <string, bool>(
                new string[] { "P", "N" }, new bool[] { true, false }
                );

            contingency["P", true]  = 35;
            contingency["P", false] = 65;
            contingency["N", true]  = 4;
            contingency["N", false] = 896;

            IReadOnlyList <string>          x = new string[] { "N", "P", "N", "N", "P", "N", "N", "N", "P" };
            IReadOnlyList <bool>            y = new bool[] { false, false, false, true, true, false, false, false, true };
            ContingencyTable <string, bool> contingencyFromLists = Bivariate.Crosstabs(x, y);

            foreach (string row in contingency.Rows)
            {
                Console.WriteLine($"Total count of {row}: {contingency.RowTotal(row)}");
            }
            foreach (bool column in contingency.Columns)
            {
                Console.WriteLine($"Total count of {column}: {contingency.ColumnTotal(column)}");
            }
            Console.WriteLine($"Total counts: {contingency.Total}");

            foreach (string row in contingency.Rows)
            {
                UncertainValue probability = contingency.ProbabilityOfRow(row);
                Console.WriteLine($"Estimated probability of {row}: {probability}");
            }
            foreach (bool column in contingency.Columns)
            {
                UncertainValue probability = contingency.ProbabilityOfColumn(column);
                Console.WriteLine($"Estimated probablity of {column}: {probability}");
            }

            UncertainValue sensitivity = contingency.ProbabilityOfRowConditionalOnColumn("P", true);

            Console.WriteLine($"Chance of P result given true condition: {sensitivity}");
            UncertainValue precision = contingency.ProbabilityOfColumnConditionalOnRow(true, "P");

            Console.WriteLine($"Chance of true condition given P result: {precision}");

            UncertainValue logOddsRatio = contingency.Binary.LogOddsRatio;

            Console.WriteLine($"log(r) = {logOddsRatio}");

            TestResult pearson = contingency.PearsonChiSquaredTest();

            Console.WriteLine($"Pearson χ² = {pearson.Statistic.Value} has P = {pearson.Probability}.");

            TestResult fisher = contingency.Binary.FisherExactTest();

            Console.WriteLine($"Fisher exact test has P = {fisher.Probability}.");
        }
示例#3
0
        public void BivariateAssociationDiscreteNullDistribution()
        {
            Random rng = new Random(1);

            // Pick very non-normal distributions for our non-parameteric tests
            ContinuousDistribution xd = new FrechetDistribution(1.0);
            ContinuousDistribution yd = new CauchyDistribution();

            // Pick small sample sizes to get exact distributions
            foreach (int n in TestUtilities.GenerateIntegerValues(4, 24, 4))
            {
                // Do a bunch of test runs, recording reported statistic for each.
                List <int>           spearmanStatistics   = new List <int>();
                List <int>           kendallStatistics    = new List <int>();
                DiscreteDistribution spearmanDistribution = null;
                DiscreteDistribution kendallDistribution  = null;

                for (int i = 0; i < 512; i++)
                {
                    List <double> x = new List <double>();
                    List <double> y = new List <double>();
                    for (int j = 0; j < n; j++)
                    {
                        x.Add(xd.GetRandomValue(rng));
                        y.Add(yd.GetRandomValue(rng));
                    }

                    DiscreteTestStatistic spearman = Bivariate.SpearmanRhoTest(x, y).UnderlyingStatistic;
                    if (spearman != null)
                    {
                        spearmanStatistics.Add(spearman.Value);
                        spearmanDistribution = spearman.Distribution;
                    }
                    DiscreteTestStatistic kendall = Bivariate.KendallTauTest(x, y).UnderlyingStatistic;
                    if (kendall != null)
                    {
                        kendallStatistics.Add(kendall.Value);
                        kendallDistribution = kendall.Distribution;
                    }
                }

                // Test whether statistics are actually distributed as claimed
                if (spearmanDistribution != null)
                {
                    TestResult spearmanChiSquared = spearmanStatistics.ChiSquaredTest(spearmanDistribution);
                    Assert.IsTrue(spearmanChiSquared.Probability > 0.01);
                }
                if (kendallDistribution != null)
                {
                    TestResult kendallChiSquared = kendallStatistics.ChiSquaredTest(kendallDistribution);
                    Assert.IsTrue(kendallChiSquared.Probability > 0.01);
                }
            }
        }
示例#4
0
    public static void bivariate_normal_cdf_values_test( )
    //****************************************************************************80
    //
    //  Purpose:
    //
    //    BIVARIATE_NORMAL_CDF_VALUES_TEST tests BIVARIATE_NORMAL_CDF_VALUES.
    //
    //  Licensing:
    //
    //    This code is distributed under the GNU LGPL license.
    //
    //  Modified:
    //
    //    23 May 2009
    //
    //  Author:
    //
    //    John Burkardt
    //
    {
        double fxy = 0;
        double r   = 0;
        double x   = 0;
        double y   = 0;

        Console.WriteLine("");
        Console.WriteLine("BIVARIATE_NORMAL_CDF_VALUES_TEST:");
        Console.WriteLine("  BIVARIATE_NORMAL_CDF_VALUES stores values of");
        Console.WriteLine("  the bivariate normal CDF.");
        Console.WriteLine("");
        Console.WriteLine("      X            Y            R            F(R)(X,Y)");
        Console.WriteLine("");
        int n_data = 0;

        for (;;)
        {
            Bivariate.bivariate_normal_cdf_values(ref n_data, ref x, ref y, ref r, ref fxy);
            if (n_data == 0)
            {
                break;
            }

            Console.WriteLine("  "
                              + x.ToString(CultureInfo.InvariantCulture).PadLeft(12) + "  "
                              + y.ToString(CultureInfo.InvariantCulture).PadLeft(12) + "  "
                              + r.ToString(CultureInfo.InvariantCulture).PadLeft(12) + "  "
                              + fxy.ToString("0.################").PadLeft(24) + fxy + "");
        }
    }
        public void BivariateNullAssociation()
        {
            Random rng = new Random(31415926);

            // Create a data structure to hold the results of Pearson, Spearman, and Kendall tests.
            FrameTable data = new FrameTable();

            data.AddColumn <double>("r");
            data.AddColumn <double>("ρ");
            data.AddColumn <double>("τ");

            // Create variables to hold the claimed distribution of each test statistic.
            ContinuousDistribution PRD = null;
            ContinuousDistribution SRD = null;
            ContinuousDistribution KTD = null;

            // Generate a large number of bivariate samples and conduct our three tests on each.
            ContinuousDistribution xDistribution = new LognormalDistribution();
            ContinuousDistribution yDistribution = new CauchyDistribution();

            for (int j = 0; j < 100; j++)
            {
                List <double> x = new List <double>();
                List <double> y = new List <double>();
                for (int i = 0; i < 100; i++)
                {
                    x.Add(xDistribution.GetRandomValue(rng));
                    y.Add(yDistribution.GetRandomValue(rng));
                }

                TestResult PR = Bivariate.PearsonRTest(x, y);
                TestResult SR = Bivariate.SpearmanRhoTest(x, y);
                TestResult KT = Bivariate.KendallTauTest(x, y);

                PRD = PR.Statistic.Distribution;
                SRD = SR.Statistic.Distribution;
                KTD = KT.Statistic.Distribution;

                data.AddRow(new Dictionary <string, object>()
                {
                    { "r", PR.Statistic.Value }, { "ρ", SR.Statistic.Value }, { "τ", KT.Statistic.Value }
                });
            }

            Assert.IsTrue(data["r"].As <double>().KolmogorovSmirnovTest(PRD).Probability > 0.05);
            Assert.IsTrue(data["ρ"].As <double>().KolmogorovSmirnovTest(SRD).Probability > 0.05);
            Assert.IsTrue(data["τ"].As <double>().KolmogorovSmirnovTest(KTD).Probability > 0.05);
        }
示例#6
0
        public void BivariateNonlinearFitVariances()
        {
            // Verify that we can fit a non-linear function,
            // that the estimated parameters do cluster around the true values,
            // and that the estimated parameter covariances do reflect the actually observed covariances

            double a = 2.7;
            double b = 3.1;

            ContinuousDistribution xDistribution = new ExponentialDistribution(2.0);
            ContinuousDistribution eDistribution = new NormalDistribution(0.0, 4.0);

            FrameTable parameters = new FrameTable();

            parameters.AddColumns <double>("a", "b");
            MultivariateSample covariances = new MultivariateSample(3);

            for (int i = 0; i < 64; i++)
            {
                BivariateSample sample = new BivariateSample();
                Random          rng    = new Random(i);
                for (int j = 0; j < 8; j++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double y = a * Math.Pow(x, b) + eDistribution.GetRandomValue(rng);
                    sample.Add(x, y);
                }

                NonlinearRegressionResult fit = sample.NonlinearRegression(
                    (IReadOnlyList <double> p, double x) => p[0] * Math.Pow(x, p[1]),
                    new double[] { 1.0, 1.0 }
                    );

                parameters.AddRow(fit.Parameters.ValuesVector);
                covariances.Add(fit.Parameters.CovarianceMatrix[0, 0], fit.Parameters.CovarianceMatrix[1, 1], fit.Parameters.CovarianceMatrix[0, 1]);
            }

            Assert.IsTrue(parameters["a"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(a));
            Assert.IsTrue(parameters["b"].As <double>().PopulationMean().ConfidenceInterval(0.99).ClosedContains(b));

            Assert.IsTrue(parameters["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(covariances.Column(0).Mean));
            Assert.IsTrue(parameters["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(covariances.Column(1).Mean));
            Assert.IsTrue(parameters["a"].As <double>().PopulationCovariance(parameters["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean));
            Assert.IsTrue(Bivariate.PopulationCovariance(parameters["a"].As <double>(), parameters["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(covariances.Column(2).Mean));
        }
示例#7
0
        public static void Association()
        {
            double[] x = new double[] { -0.58, 0.92, 1.41, 1.62, 2.72, 3.14 };
            double[] y = new double[] { 1.00, 0.00, 2.00, 16.00, 18.0, 20.0 };

            TestResult pearson = Bivariate.PearsonRTest(x, y);

            Console.WriteLine($"Pearson {pearson.Statistic.Name} = {pearson.Statistic.Value}");
            Console.WriteLine($"{pearson.Type} P = {pearson.Probability}");

            TestResult spearman = Bivariate.SpearmanRhoTest(x, y);

            Console.WriteLine($"Spearman {spearman.Statistic.Name} = {spearman.Statistic.Value}");
            Console.WriteLine($"{spearman.Type} P = {spearman.Probability}");

            TestResult kendall = Bivariate.KendallTauTest(x, y);

            Console.WriteLine($"Kendall {kendall.Statistic.Name} = {kendall.Statistic.Value}");
            Console.WriteLine($"{kendall.Type} P = {kendall.Probability}");
        }
示例#8
0
        public void McNemarTestDistribution()
        {
            // Define a population and the accuracy of two tests for a condition
            double fractionPositive = 0.4;
            double aAccuracy        = 0.2;
            double bAccuracy        = 0.9;

            // Form a bunch of samples; we will run a McNemar test on each
            List <double>          statistics   = new List <double>();
            ContinuousDistribution distribution = null;
            Random rng = new Random(1);

            for (int i = 0; i < 32; i++)
            {
                // Run a and b tests on each person.
                List <bool> aResults = new List <bool>();
                List <bool> bResults = new List <bool>();
                for (int j = 0; j < 64; j++)
                {
                    bool isPositive = rng.NextDouble() < fractionPositive;
                    bool aResult    = rng.NextDouble() < aAccuracy ? isPositive : !isPositive;
                    aResults.Add(aResult);
                    bool bResult = rng.NextDouble() < bAccuracy ? isPositive : !isPositive;
                    bResults.Add(bResult);
                }

                // Do a McNemar test to determine whether tests are differently weighted.
                // By our construction, they shouldn't be.
                ContingencyTable <bool, bool> table = Bivariate.Crosstabs(aResults, bResults);
                TestResult result = table.Binary.McNemarTest();
                statistics.Add(result.Statistic.Value);
                distribution = result.Statistic.Distribution;
            }

            // Since the null hypothesis is satisfied, the test statistic distribution should
            // match the claimed null distribution.
            TestResult test = statistics.KolmogorovSmirnovTest(distribution);

            Assert.IsTrue(test.Probability > 0.05);
        }
示例#9
0
 /*
  * Do two conics sections el and el1 intersect? Each are in
  * bivariate form, ax^2  + bxy  + cx^2  + dx  + ey  + f = 0
  * Solve by constructing a quartic that must have a real
  * solution if they intersect.  This checks for real Y
  * intersects, then flips the parameters around to check
  * for real X intersects.
  */
 bool ConicsIntersect(Bivariate el, Bivariate el1)
 {
     /* check for real y intersects, then real x intersects */
     return(YIntersect(el.A, el.B, el.C, el.D, el.E, el.F, el1.A, el1.B, el1.C, el1.D, el1.E, el1.F) && YIntersect(el.C, el.B, el.A, el.E, el.D, el.F, el1.C, el1.B, el1.A, el1.E, el1.D, el1.F));
 }
示例#10
0
        public void ContingencyTableProbabilities()
        {
            // Construct data where (i) there are both reference-nulls and nullable-struct-nulls,
            // (ii) all values of one column are equally, (iii) values of other column depend on value of first column
            List <string> groups = new List <string>()
            {
                "A", "B", "C", null
            };

            FrameTable data = new FrameTable();

            data.AddColumn <string>("Group");
            data.AddColumn <bool?>("Outcome");

            int                n            = 512;
            double             pOutcomeNull = 0.05;
            Func <int, double> pOutcome     = groupIndex => 0.8 - 0.2 * groupIndex;
            Random             rng          = new Random(10101010);

            for (int i = 0; i < n; i++)
            {
                int    groupIndex = rng.Next(0, groups.Count);
                string group      = groups[groupIndex];
                bool?  outcome    = (rng.NextDouble() < pOutcome(groupIndex));
                if (rng.NextDouble() < pOutcomeNull)
                {
                    outcome = null;
                }
                data.AddRow(group, outcome);
            }

            // Form a contingency table.
            ContingencyTable <string, bool?> table = Bivariate.Crosstabs(data["Group"].As <string>(), data["Outcome"].As <bool?>());

            // Total counts should match
            Assert.IsTrue(table.Total == n);

            // All values should be represented
            foreach (string row in table.Rows)
            {
                Assert.IsTrue(groups.Contains(row));
            }

            // Counts in each cell and marginal totals should match
            foreach (string group in table.Rows)
            {
                int rowTotal = 0;
                foreach (bool?outcome in table.Columns)
                {
                    FrameView view = data.Where(r => ((string)r["Group"] == group) && ((bool?)r["Outcome"] == outcome));
                    Assert.IsTrue(table[group, outcome] == view.Rows.Count);
                    rowTotal += view.Rows.Count;
                }
                Assert.IsTrue(rowTotal == table.RowTotal(group));
            }

            // Inferred probabilities should agree with model
            Assert.IsTrue(table.ProbabilityOfColumn(null).ConfidenceInterval(0.99).ClosedContains(pOutcomeNull));
            for (int groupIndex = 0; groupIndex < groups.Count; groupIndex++)
            {
                string group = groups[groupIndex];
                Assert.IsTrue(table.ProbabilityOfRow(group).ConfidenceInterval(0.99).ClosedContains(0.25));
                Assert.IsTrue(table.ProbabilityOfColumnConditionalOnRow(true, group).ConfidenceInterval(0.99).ClosedContains(pOutcome(groupIndex) * (1.0 - pOutcomeNull)));
            }
            Assert.IsTrue(table.ProbabilityOfColumn(null).ConfidenceInterval(0.99).ClosedContains(pOutcomeNull));

            // Pearson test should catch that rows and columns are corrleated
            Assert.IsTrue(table.PearsonChiSquaredTest().Probability < 0.05);
        }
示例#11
0
    private static void test02()

    //****************************************************************************80
    //
    //  Purpose:
    //
    //    TEST02 tests BIVNOR.
    //
    //  Licensing:
    //
    //    This code is distributed under the GNU LGPL license.
    //
    //  Modified:
    //
    //    13 April 2012
    //
    //  Author:
    //
    //    John Burkardt
    //
    {
        double fxy1 = 0;
        double r    = 0;
        double x    = 0;
        double y    = 0;

        bivariatenormal.BivnorData data = new();

        Console.WriteLine("");
        Console.WriteLine("TEST02");
        Console.WriteLine("  Compare BIVNOR with some tabulated data.");
        Console.WriteLine("");
        Console.WriteLine("      X          Y          " +
                          "R           P                         P" +
                          "                      DIFF" +
                          "                                " +
                          "       (Tabulated)               (BIVNOR)");
        Console.WriteLine("");

        int n_data = 0;

        for (;;)
        {
            Bivariate.bivariate_normal_cdf_values(ref n_data, ref x, ref y, ref r, ref fxy1);

            if (n_data == 0)
            {
                break;
            }

            //
            //  BIVNOR computes the "tail" of the probability, and we want the
            //  initial part//
            //
            double fxy2 = bivariatenormal.bivnor(ref data, -x, -y, r);

            Console.WriteLine("  " + x.ToString(CultureInfo.InvariantCulture).PadLeft(8)
                              + "  " + y.ToString(CultureInfo.InvariantCulture).PadLeft(8)
                              + "  " + r.ToString(CultureInfo.InvariantCulture).PadLeft(8)
                              + "  " + fxy1.ToString(CultureInfo.InvariantCulture).PadLeft(24)
                              + "  " + fxy2.ToString(CultureInfo.InvariantCulture).PadLeft(24)
                              + "  " + Math.Abs(fxy1 - fxy2).ToString(CultureInfo.InvariantCulture).PadLeft(10) + "");
        }
    }
示例#12
0
        public static void AnalyzingData()
        {
            FrameTable table;
            Uri        url     = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv");
            WebRequest request = WebRequest.Create(url);

            using (WebResponse response = request.GetResponse()) {
                using (StreamReader reader = new StreamReader(response.GetResponseStream())) {
                    table = FrameTable.FromCsv(reader);
                }
            }
            FrameView view = table.WhereNotNull();

            // Get the column with (zero-based) index 4.
            FrameColumn column4 = view.Columns[4];
            // Get the column named "Height".
            FrameColumn heightsColumn = view.Columns["Height"];
            // Even easier way to get the column named "Height".
            FrameColumn alsoHeightsColumn = view["Height"];

            IReadOnlyList <double> heights = view["Height"].As <double>();

            SummaryStatistics summary = new SummaryStatistics(view["Height"].As <double>());

            Console.WriteLine($"Count = {summary.Count}");
            Console.WriteLine($"Mean = {summary.Mean}");
            Console.WriteLine($"Standard Deviation = {summary.StandardDeviation}");
            Console.WriteLine($"Skewness = {summary.Skewness}");
            Console.WriteLine($"Estimated population mean = {summary.PopulationMean}");
            Console.WriteLine($"Estimated population standard deviation = {summary.PopulationStandardDeviation}");

            IReadOnlyList <double> maleHeights =
                view.Where <string>("Sex", s => s == "M").Columns["Height"].As <double>();
            IReadOnlyList <double> femaleHeights =
                view.Where <string>("Sex", s => s == "F").Columns["Height"].As <double>();
            TestResult test = Univariate.StudentTTest(maleHeights, femaleHeights);

            Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}");
            Console.WriteLine($"P = {test.Probability}");

            TestResult maleHeightNormality  = maleHeights.ShapiroFranciaTest();
            TestResult totalHeightNormality = view["Height"].As <double>().ShapiroFranciaTest();
            TestResult heightCompatibility  = Univariate.KolmogorovSmirnovTest(maleHeights, femaleHeights);

            LinearRegressionResult fit =
                view["Weight"].As <double>().LinearRegression(view["Height"].As <double>());

            Console.WriteLine($"Model weight = ({fit.Slope}) * height + ({fit.Intercept}).");
            Console.WriteLine($"Model explains {fit.RSquared * 100.0}% of variation.");

            ContingencyTable <string, bool> contingency =
                Bivariate.Crosstabs(view["Sex"].As <string>(), view["Result"].As <bool>());

            Console.WriteLine($"Male incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "M")}");
            Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "F")}");
            Console.WriteLine($"Log odds ratio = {contingency.Binary.LogOddsRatio}");

            view.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0));
            view.AddComputedColumn("Age", r => (DateTime.Now - (DateTime)r["Birthdate"]).TotalDays / 365.24);

            MultiLinearLogisticRegressionResult result =
                view["Result"].As <bool>().MultiLinearLogisticRegression(
                    view["Bmi"].As <double>(),
                    view["Sex"].As <string, double>(s => s == "M" ? 1.0 : 0.0)
                    );

            foreach (Parameter parameter in result.Parameters)
            {
                Console.WriteLine($"{parameter.Name} = {parameter.Estimate}");
            }

            TestResult spearman = Bivariate.SpearmanRhoTest(view["Age"].As <double>(), view["Result"].As <double>());

            Console.WriteLine($"{spearman.Statistic.Name} = {spearman.Statistic.Value} P = {spearman.Probability}");
        }