public static RealType QuantileIP <RealType>(
     IAlgebraReal <RealType> algebra,
     IList <RealType> list,
     float quantile)
 {
     list = ToolsCollection.Sort(list, algebra);
     return(QuantileSorted(algebra, list, quantile));
 }
示例#2
0
        public static double TestStatic(IList <double> sample)
        {
            // as in http://nl.mathworks.com/matlabcentral/fileexchange/13964-shapiro-wilk-and-shapiro-francia-normality-tests/content/swtest.m
            //% SWTEST Shapiro - Wilk parametric hypothesis test of composite normality.
            //%   [H, pValue, SWstatistic] = SWTEST(X, ALPHA) performs the
            //% Shapiro - Wilk test to determine if the null hypothesis of
            //% composite normality is a reasonable assumption regarding the
            //% population distribution of a random sample X. The desired significance
            //% level, ALPHA, is an optional scalar input(default = 0.05).
            //%
            //% The Shapiro - Wilk and Shapiro-Francia null hypothesis is:
            //% "X is normal with unspecified mean and variance."
            //%
            //% This is an omnibus test, and is generally considered relatively
            //% powerful against a variety of alternatives.
            //% Shapiro - Wilk test is better than the Shapiro - Francia test for
            //% Platykurtic sample. Conversely, Shapiro - Francia test is better than the
            //% Shapiro - Wilk test for Leptokurtic samples.
            //%
            //% If the sample is Leptokurtic performs the Shapiro - Francia
            //% If the sample is Platykurtic performs the Shapiro - Wilk test.
            //%
            //%
            //% Inputs:
            //% X - a vector of deviates from an unknown distribution.The observation
            //% number must exceed 3 and less than 5000.
            //%

            //% Outputs:
            //% pValue - is the p - value, or the probability of observing the given
            //% result by chance given that the null hypothesis is true. Small values
            //% of pValue cast doubt on the validity of the null hypothesis.
            //%
            //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
            //% Copyright(c) 17 March 2009 by Ahmed Ben Sada %
            //% Department of Finance, IHEC Sousse - Tunisia %
            //% Email: [email protected] %
            //%                    $ Revision 3.0 $ Date: 18 Juin 2014 $               %
            //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

            //%
            //% References:
            //%
            //% -Royston P. "Remark AS R94", Applied Statistics(1995), Vol. 44,
            //% No. 4, pp. 547 - 551.
            //% AS R94-- calculates Shapiro - Wilk normality test and P - value
            //%   for sample sizes 3 <= n <= 5000.Handles censored or uncensored data.
            //% Corrects AS 181, which was found to be inaccurate for n > 50.
            //% Subroutine can be found at: http://lib.stat.cmu.edu/apstat/R94
            //%
            //% -Royston P. "A pocket-calculator algorithm for the Shapiro-Francia test
            //%   for non - normality: An application to medicine", Statistics in Medecine
            //% (1993a), Vol. 12, pp. 181 - 184.
            //%
            //% -Royston P. "A Toolkit for Testing Non-Normality in Complete and
            //% Censored Samples", Journal of the Royal Statistical Society Series D
            //% (1993b), Vol. 42, No. 1, pp. 37 - 43.
            //%
            //% -Royston P. "Approximating the Shapiro-Wilk W-test for non-normality",
            //% Statistics and Computing (1992), Vol. 2, pp. 117 - 119.
            //%
            //% -Royston P. "An Extension of Shapiro and Wilk's W Test for Normality
            //% to Large Samples", Journal of the Royal Statistical Society Series C
            //% (1982a), Vol. 31, No. 2, pp. 115 - 124.
            //%



            if (sample.Count < 3)
            {
                throw new Exception("Sample vector  must have at least 3 valid observations.");
            }

            if (5000 < sample.Count)
            {
                throw new Exception("Shapiro-Wilk test might be inaccurate due to large sample size ( > 5000).");
            }

            // % First, calculate the a's for weights as a function of the m's
            // % See Royston(1992, p. 117) and Royston (1993b, p. 38) for details
            // % in the approximation.

            ToolsCollection.Sort(sample);
            //% Sort the vector X in ascending order.
            int n = sample.Count;

            double[] mtilde = new double[n];
            for (int index = 0; index < n; index++)
            {
                mtilde[index] = Normal.InvCDF(0.0, 1.0, ((index + 1) - (3.0 / 8.0)) / (n + (1.0 / 4.0)));
            }

            double mtilde_in_product = 0.0;

            for (int index = 0; index < n; index++)
            {
                mtilde_in_product += mtilde[index] * mtilde[index];
            }

            double[] weights = new double[n]; //% Preallocate the weights.
            for (int index = 0; index < n; index++)
            {
                //sould say  weights = 1 / sqrt(mtilde'*mtilde) * mtilde;
                weights[index] = 1.0 / Math.Sqrt(mtilde_in_product) * mtilde[index];
            }


            double sample_mean = ToolsMathStatistics.Mean(sample);
            double kurtosis    = ToolsMathStatistics.KurtosisPlain(sample);

            if (kurtosis > 3)
            {
                //% The Shapiro - Francia test is better for leptokurtic samples.

                //% The Shapiro - Francia statistic W' is calculated to avoid excessive
                //% rounding errors for W' close to 1 (a potential problem in very
                //% large samples).
                double sf_nom   = Inproduct(sample, weights);
                double sf_denom = 0.0;
                for (int index = 0; index < n; index++)
                {
                    sf_denom += (sample[index] - sample_mean) * (sample[index] - sample_mean);
                }
                double W = (sf_nom * sf_nom) / sf_denom;

                //% Royston(1993a, p. 183):
                double nu    = Math.Log(n);
                double u1    = Math.Log(nu) - nu;
                double u2    = Math.Log(nu) + 2 / nu;
                double mu    = -1.2725 + (1.0521 * u1);
                double sigma = 1.0308 - (0.26758 * u2);

                double newSFstatistic = Math.Log(1 - W);

                //% Compute the normalized Shapiro - Francia statistic and its p-value.
                double NormalSFstatistic = (newSFstatistic - mu) / sigma;

                //% Computes the p-value, Royston(1993a, p. 183).
                double pValue = 1 - Normal.CDF(0, 1, NormalSFstatistic);
                return(pValue);
            }
            else
            {
                //% The Shapiro - Wilk test is better for platykurtic samples.
                double u = 1 / Math.Sqrt(n);

                //% Royston(1992, p. 117) and Royston(1993b, p. 38):
                double[] PolyCoef_1 = new double [] { -2.706056, 4.434685, -2.071190, -0.147981, 0.221157, weights[n - 1] }; //TODO check was  weights[n]
                double[] PolyCoef_2 = new double [] { -3.582633, 5.682633, -1.752461, -0.293762, 0.042981, weights[n - 2] }; //TODO check was  weights[n - 1]

                //% Royston(1992, p. 118) and Royston (1993b, p. 40, Table 1)
                double[] PolyCoef_3 = new double [] { -0.0006714, 0.0250540, -0.39978, 0.54400 };
                double[] PolyCoef_4 = new double [] { -0.0020322, 0.0627670, -0.77857, 1.38220 };
                double[] PolyCoef_5 = new double [] { 0.00389150, -0.083751, -0.31082, -1.5861 };
                double[] PolyCoef_6 = new double [] { 0.00303020, -0.082676, -0.48030 };

                double[] PolyCoef_7 = new double[] { 0.459, -2.273 };

                weights[n - 1] = Polyval(PolyCoef_1, u);
                weights[1]     = -weights[n - 1];

                int    count = 0;
                double phi   = 0.0;
                if (n > 5)
                {
                    weights[n - 2] = Polyval(PolyCoef_2, u);
                    weights[2]     = -weights[n - 2]; //TODO check n - 1
                    count          = 3;
                    phi            = (Inproduct(mtilde, mtilde) - 2 * Math.Pow(mtilde[n - 1], 2) - 2 * Math.Pow(mtilde[n - 2], 2)) / (1 - 2 * Math.Pow(weights[n - 1], 2) - 2 * Math.Pow(weights[n - 2], 2));
                }
                else
                {
                    count = 2;
                    phi   = (Inproduct(mtilde, mtilde) - 2 * Math.Pow(mtilde[n - 1], 2)) / (1 - 2 * Math.Pow(weights[n - 1], 2));
                }

                //% Special attention when n = 3(this is a special case).
                if (n == 3)
                {
                    //% Royston(1992, p. 117)
                    weights[1]     = 1 / Math.Sqrt(2);
                    weights[n - 1] = -weights[1];
                    phi            = 1;
                }


                // % The vector 'WEIGHTS' obtained next corresponds to the same coefficients
                // % listed by Shapiro-Wilk in their original test for small samples.

                for (int index = count; index < n - count; index++)
                {
                    weights[index] = mtilde[index] / Math.Sqrt(phi);
                }


                //% The Shapiro - Wilk statistic W is calculated to avoid excessive rounding
                //% errors for W close to 1(a potential problem in very large samples).
                double[] residual = ToolsMathCollectionDouble.Subtract(sample, sample_mean);
                double   W        = Math.Pow(Inproduct(weights, sample), 2) / Inproduct(residual, residual);

                //%
                //% Calculate the normalized W and its significance level(exact for
                //% n = 3).Royston(1992, p. 118) and Royston (1993b, p. 40, Table 1).
                //%

                double newn           = Math.Log(n);
                double mu             = 0.0;
                double sigma          = 0.0;
                double gam            = 0.0;
                double newSWstatistic = 0.0;
                if (n > 11)
                {
                    mu             = Polyval(PolyCoef_5, newn);
                    sigma          = Math.Exp(Polyval(PolyCoef_6, newn));
                    newSWstatistic = Math.Log(1 - W);
                }
                else if ((n >= 4) && (n <= 11))
                {
                    mu    = Polyval(PolyCoef_3, n);
                    sigma = Math.Exp(Polyval(PolyCoef_4, n));
                    gam   = Polyval(PolyCoef_7, n);

                    newSWstatistic = -Math.Log(gam - Math.Log(1 - W));
                }
                else if (n == 3)
                {
                    mu             = 0;
                    sigma          = 1;
                    newSWstatistic = 0;
                }


                //% Compute the normalized Shapiro - Wilk statistic and its p-value.
                double NormalSWstatistic = (newSWstatistic - mu) / sigma;

                //% NormalSWstatistic is referred to the upper tail of N(0, 1),
                //% Royston(1992, p. 119).
                double pValue = 1 - Normal.CDF(mu, sigma, newSWstatistic);

                //% Special attention when n = 3(this is a special case).
                if (n == 3)
                {
                    pValue = 6 / Math.PI * (Math.Asin(Math.Sqrt(W)) - Math.Asin(Math.Sqrt(3.0 / 4.0)));
                    //% Royston(1982a, p. 121)
                }
                return(pValue);
            }
        }