Example #1
0
        /// <summary>
        /// Performs a Kendall concordance test for association.
        /// </summary>
        /// <returns>The result of the test.</returns>
        /// <remarks>
        /// <para>Kendall's &#x3C4; is a non-parameteric and robust test of association
        /// between two variables. It simply measures the number of cases where an increase
        /// in one variable is associated with an increase in the other (corcordant pairs),
        /// compared with the number of cases where an increase in one variable is associated
        /// with a decrease in the other (discordant pairs).</para>
        /// <para>Because &#x3C4; depends only on the sign
        /// of a change and not its magnitude, it is not skewed by outliers exhibiting very large
        /// changes, nor by cases where the degree of change in one variable associated with
        /// a given change in the other changes over the range of the varibles. Of course, it may
        /// still miss an association whoose sign changes over the range of the variables. For example,
        /// if data points lie along a semi-circle in the plane, an increase in the first variable
        /// is associated with an increase in the second variable along the rising arc and and decrease in
        /// the second variable along the falling arc. No test that looks for single-signed correlation
        /// will catch this association.
        /// </para>
        /// <para>Because it examine all pairs of data points, the Kendall test requires
        /// O(N<sup>2</sup>) operations. It is thus impractical for very large data sets. While
        /// not quite as robust as the Kendall test, the Spearman test is a good fall-back in such cases.</para>
        /// </remarks>
        /// <exception cref="InsufficientDataException"><see cref="Count"/> is less than two.</exception>
        /// <seealso cref="PearsonRTest"/>
        /// <seealso cref="SpearmanRhoTest"/>
        /// <seealso href="http://en.wikipedia.org/wiki/Kendall_tau_test" />
        public TestResult KendallTauTest()
        {
            int n = xData.Count;

            if (n < 2)
            {
                throw new InsufficientDataException();
            }

            // loop over all pairs, counting concordant and discordant
            int C = 0;
            int D = 0;

            for (int i = 0; i < n; i++)
            {
                for (int j = 0; j < i; j++)
                {
                    // note the way each variable varies in the pair
                    int sx = Math.Sign(xData[i] - xData[j]);
                    int sy = Math.Sign(yData[i] - yData[j]);

                    // if they vary in the same way, they are concordant, otherwise they are discordant
                    if (sx == sy)
                    {
                        C++;
                    }
                    else
                    {
                        D++;
                    }
                    // note this does not count ties specially, as is sometimes done
                }
            }

            // compute tau
            double t = 1.0 * (C - D) / (C + D);

            // compute tau distribution
            Distribution tauDistribution;

            if (n <= 20)
            {
                tauDistribution = new DiscreteAsContinuousDistribution(new KendallExactDistribution(n), Interval.FromEndpoints(-1.0, 1.0));
            }
            else
            {
                double dt = Math.Sqrt((4 * n + 10) / 9.0 / n / (n - 1));
                tauDistribution = new NormalDistribution(0.0, dt);
            }

            return(new TestResult(t, tauDistribution));
        }
Example #2
0
        /// <summary>
        /// Performs a Spearman rank-order test of association between the two variables.
        /// </summary>
        /// <returns>The result of the test.</returns>
        /// <remarks>
        /// <para>The Spearman rank-order test of association is a non-parametric test for association between
        /// two variables. The test statistic rho is the correlation coefficient of the <em>rank</em> of
        /// each entry in the sample. It is thus invariant over monotonic reparameterizations of the data,
        /// and will, for example, detect a quadratic or exponential association just as well as a linear
        /// association.</para>
        /// <para>The Spearman rank-order test requires O(N log N) operations.</para>
        /// </remarks>
        /// <exception cref="InsufficientDataException">There are fewer than three data points.</exception>
        /// <seealso cref="PearsonRTest"/>
        /// <seealso cref="KendallTauTest"/>
        /// <seealso href="http://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient"/>
        public TestResult SpearmanRhoTest()
        {
            if (Count < 3)
            {
                throw new InsufficientDataException();
            }

            // analytic expressions for the mean and variance of ranks
            double M = (Count - 1) / 2.0;
            double V = (Count + 1) * (Count - 1) / 12.0;

            // compute the covariance of ranks
            int[]  rx = xData.GetRanks();
            int[]  ry = yData.GetRanks();
            double C  = 0.0;

            for (int i = 0; i < Count; i++)
            {
                C += (rx[i] - M) * (ry[i] - M);
            }
            C = C / Count;

            // compute rho
            double rho = C / V;

            // for small enough sample, use the exact distribution
            Distribution rhoDistribution;

            if (Count <= 10)
            {
                // for small enough sample, use the exact distribution
                // it would be nice to do this for at least slightly higher n, but computation time grows dramatically
                // would like to ensure return in less than 100ms; current timings n=10 35ms, n=11 72ms, n=12 190ms
                rhoDistribution = new DiscreteAsContinuousDistribution(new SpearmanExactDistribution(Count), Interval.FromEndpoints(-1.0, 1.0));
            }
            else
            {
                // for larger samples, use the normal approximation
                // would like to fit support and C_4 too; look into logit-normal
                // i was not happy with Edgeworth expansion, which can fit C_4 but screws up tails badly, even giving negative probabilities
                rhoDistribution = new NormalDistribution(0.0, 1.0 / Math.Sqrt(Count - 1));
            }

            return(new TestResult(rho, rhoDistribution));
        }
        /// <summary>
        /// Performs a Wilcoxon signed rank test.
        /// </summary>
        /// <returns>The result of the test.</returns>
        /// <remarks>
        /// <para>The Wilcoxon signed rank test is a non-parametric alternative to the
        /// paired t-test (<see cref="PairedStudentTTest"/>). Given two measurements on
        /// the same subjects, this method tests for changes in the distribution between
        /// the two measurements. It is sensitive primarily to shifts in the median.
        /// Note that the distributions of the individual measurements
        /// may be far from normal, and may be different for each subject.</para>
        /// </remarks>
        /// <seealso href="https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test"/>
        public TestResult WilcoxonSignedRankTest()
        {
            int n = this.Count;

            if (n < 2)
            {
                throw new InsufficientDataException();
            }

            double[] z = new double[n];
            for (int i = 0; i < z.Length; i++)
            {
                z[i] = xData[i] - yData[i];
            }

            Array.Sort(z, (x, y) => Math.Abs(x).CompareTo(Math.Abs(y)));

            int W = 0;

            for (int i = 0; i < z.Length; i++)
            {
                if (z[i] > 0.0)
                {
                    W += (i + 1);
                }
            }

            ContinuousDistribution nullDistribution;

            if (Count < 32)
            {
                DiscreteDistribution wilcoxon = new WilcoxonDistribution(n);
                nullDistribution = new DiscreteAsContinuousDistribution(wilcoxon);
            }
            else
            {
                double mu    = n * (n + 1.0) / 4.0;
                double sigma = Math.Sqrt(mu * (2.0 * n + 1.0) / 6.0);
                nullDistribution = new NormalDistribution(mu, sigma);
            }

            return(new TestResult("W", W, TestType.TwoTailed, nullDistribution));
        }
Example #4
0
        /// <summary>
        /// Performs a Kendall concordance test for association.
        /// </summary>
        /// <param name="x">The values of the first variable.</param>
        /// <param name="y">The values of the second variable.</param>
        /// <returns>The result of the test.</returns>
        /// <remarks>
        /// <para>Kendall's &#x3C4; is a non-parametric and robust test of association
        /// between two variables. It simply measures the number of cases where an increase
        /// in one variable is associated with an increase in the other (concordant pairs),
        /// compared with the number of cases where an increase in one variable is associated
        /// with a decrease in the other (discordant pairs).</para>
        /// <para>Because &#x3C4; depends only on the sign
        /// of the difference and not its magnitude, it is not skewed by outliers exhibiting very large
        /// changes, nor by cases where the degree of difference
        /// changes over the ranges of the variables. Of course, it may
        /// still miss an association whose sign changes over the range of the variables. For example,
        /// if data points lie along a semi-circle in the plane, an increase in the first variable
        /// is associated with an increase in the second variable along the rising arc and and decrease in
        /// the second variable along the falling arc.
        /// </para>
        /// <para>Because it examines all pairs of data points, the Kendall test requires
        /// O(N<sup>2</sup>) operations. It is thus impractical for very large data sets. While
        /// not quite as robust as the Kendall test, the Spearman test is a good fall-back in such cases.</para>
        /// </remarks>
        /// <exception cref="ArgumentNullException"><paramref name="x"/> or <paramref name="y"/> is <see langword="null"/>.</exception>
        /// <exception cref="DimensionMismatchException"><paramref name="x"/> and <paramref name="y"/> do not contain the same number of entries.</exception>
        /// <exception cref="InsufficientDataException">There are fewer than two entries in the sample.</exception>
        /// <seealso cref="PearsonRTest(IReadOnlyList{double},IReadOnlyList{double})"/>
        /// <seealso cref="SpearmanRhoTest(IReadOnlyList{double},IReadOnlyList{double})"/>
        /// <seealso href="http://en.wikipedia.org/wiki/Kendall_tau_test" />
        public static TestResult KendallTauTest(IReadOnlyList <double> x, IReadOnlyList <double> y)
        {
            if (x == null)
            {
                throw new ArgumentNullException(nameof(x));
            }
            if (y == null)
            {
                throw new ArgumentNullException(nameof(y));
            }
            if (x.Count != y.Count)
            {
                throw new DimensionMismatchException();
            }

            int n = x.Count;

            if (n < 2)
            {
                throw new InsufficientDataException();
            }

            // loop over all pairs, counting concordant and discordant
            int C = 0;
            int D = 0;

            for (int i = 0; i < n; i++)
            {
                for (int j = 0; j < i; j++)
                {
                    // note the way each variable varies in the pair
                    int sx = Math.Sign(x[i] - x[j]);
                    int sy = Math.Sign(y[i] - y[j]);

                    // if they vary in the same way, they are concordant, otherwise they are discordant
                    if (sx == sy)
                    {
                        C++;
                    }
                    else
                    {
                        D++;
                    }
                    // note this does not count ties specially, as is sometimes done
                }
            }
            double tau = 1.0 * (C - D) / (C + D);

            // Concordant and discordant counts should sum to total pairs.
            Debug.Assert(C + D == n * (n - 1) / 2);

            // Compute null distribution.
            if (n <= 20)
            {
                DiscreteDistribution   dDistribution   = new KendallExactDistribution(n);
                ContinuousDistribution tauDistribution = new DiscreteAsContinuousDistribution(dDistribution, Interval.FromEndpoints(-1.0, +1.0));
                return(new TestResult("D", D, dDistribution, "τ", tau, tauDistribution, TestType.TwoTailed));
            }
            else
            {
                double dTau = Math.Sqrt((4 * n + 10) / 9.0 / n / (n - 1));
                ContinuousDistribution tauDistribution = new NormalDistribution(0.0, dTau);
                return(new TestResult("τ", tau, tauDistribution, TestType.TwoTailed));
            }
        }
Example #5
0
        /// <summary>
        /// Performs a Spearman rank-order test of association between the two variables.
        /// </summary>
        /// <param name="x">The values of the first variable.</param>
        /// <param name="y">The values of the second variable.</param>
        /// <returns>The result of the test.</returns>
        /// <remarks>
        /// <para>The Spearman rank-order test of association is a non-parametric test for association between
        /// two variables. The test statistic rho is the correlation coefficient of the <em>rank</em> of
        /// each entry in the sample. It is thus invariant over monotonic re-parameterizations of the data,
        /// and will, for example, detect a quadratic or exponential association just as well as a linear
        /// association.</para>
        /// <para>The Spearman rank-order test requires O(N log N) operations.</para>
        /// </remarks>
        /// <exception cref="ArgumentNullException"><paramref name="x"/> or <paramref name="y"/> is <see langword="null"/>.</exception>
        /// <exception cref="DimensionMismatchException"><paramref name="x"/> and <paramref name="y"/> do not contain the same number of entries.</exception>
        /// <exception cref="InsufficientDataException">There are fewer than three data points.</exception>
        /// <seealso cref="PearsonRTest(IReadOnlyList{double},IReadOnlyList{double})"/>
        /// <seealso cref="KendallTauTest"/>
        /// <seealso href="http://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient"/>
        public static TestResult SpearmanRhoTest(IReadOnlyList <double> x, IReadOnlyList <double> y)
        {
            if (x == null)
            {
                throw new ArgumentNullException(nameof(x));
            }
            if (y == null)
            {
                throw new ArgumentNullException(nameof(y));
            }
            if (x.Count != y.Count)
            {
                throw new DimensionMismatchException();
            }

            int n = x.Count;

            if (n < 3)
            {
                throw new InsufficientDataException();
            }

            // Find the ranks.
            int[] rx = Univariate.GetRanks(x);
            int[] ry = Univariate.GetRanks(y);

            // Compute the statistic and its null distribution.
            // Use analytic expressions for the mean M and variance V of the ranks.
            // C is the covariance of the ranks, rho is just the corresponding correlation coefficient.
            // S encodes the same information, but as an integer that varies in steps of one, so
            // its null distribution can be described by a DiscreteDistribution.
            double M = (n - 1) / 2.0;
            double V = (n + 1) * (n - 1) / 12.0;
            int    S = 0;
            double C = 0.0;

            for (int i = 0; i < n; i++)
            {
                // Statisticians define S using 1-based ranks, so add 1 to each
                // rank when computing S. This isn't important for C, because
                // we are subtracting off mean.
                S += (rx[i] + 1) * (ry[i] + 1);
                C += (rx[i] - M) * (ry[i] - M);
            }
            C = C / n;
            double rho = C / V;

            // Compute the null distribution.
            if (n < 12)
            {
                // For small enough samples, use the exact distribution.
                // It would be nice to do this for at least slightly higher n, but the time to compute the exact
                // distribution grows dramatically with n. I would like to return in less than about 100ms.
                // Current timings are n = 10 35ms, n = 11, 72ms, n = 12 190ms.
                DiscreteDistribution   sDistribution   = new SpearmanExactDistribution(n);
                ContinuousDistribution rhoDistribution = new DiscreteAsContinuousDistribution(new SpearmanExactDistribution(n), Interval.FromEndpoints(-1.0, 1.0));
                return(new TestResult("s", S, sDistribution, "ρ", rho, rhoDistribution, TestType.TwoTailed));
            }
            else
            {
                // For larger samples, use the normal approximation.
                // It would be nice to fit support and/or fourth cumulant.
                // I was not happy with an Edgeworth expansion, which can fit the fourth cumulant, but screws up the tails
                // badly, even giving negative probabilities for extreme values, which are quite likely for null-violating samples.
                // Look into bounded quasi-normal distributions such as the logit-normal and truncated normal.
                ContinuousDistribution rhoDistribution = new NormalDistribution(0.0, 1.0 / Math.Sqrt(n - 1));
                return(new TestResult("ρ", rho, rhoDistribution, TestType.TwoTailed));
            }
        }