public static double TestStatic(IList <IList <double> > samples) { int sample_size = samples[0].Count; foreach (IList <double> sample in samples) { if (sample.Count != sample_size) { throw new Exception("samples must be of equal size"); } } double[,] measurement_ranks = ToolsMathStatistics.Ranks1(samples); double[] rank_sums = ToolsMathCollection.Sums0(measurement_ranks); double chi_square_statistic_part = 0.0; for (int treatment_index = 0; treatment_index < rank_sums.Length; treatment_index++) { chi_square_statistic_part += ToolsMath.Sqr(rank_sums[treatment_index]); } double treatment_count = samples.Count; double block_count = sample_size; double chi_square_statistic = ((12.0 / (block_count * treatment_count * (treatment_count + 1.0))) * chi_square_statistic_part) - (3 * block_count * (treatment_count + 1.0)); return(ChiSquared.CDF(rank_sums.Length, chi_square_statistic)); }
public static double TestStatic(IList <IList <double> > samples) { double sample_count = samples.Count; double total_count = ToolsCollection.CountElements(samples); double total_mean = ToolsMathStatistics.MeanAll(samples); IList <double> sample_means = ToolsMathStatistics.Means0(samples); double sstr = 0.0; for (int sample_index = 0; sample_index < samples.Count; sample_index++) { sstr += samples[sample_index].Count * ToolsMath.Sqr(sample_means[sample_index] - total_mean); } double sse = 0.0; for (int sample_index = 0; sample_index < samples.Count; sample_index++) { for (int measurement_index = 0; measurement_index < samples[sample_index].Count; measurement_index++) { sse += ToolsMath.Sqr(samples[sample_index][measurement_index] - sample_means[sample_index]); } } //FTransform double degrees_of_freedom_0 = (sample_count - 1.0); double degrees_of_freedom_1 = (total_count - sample_count); double summed_variance = sstr / degrees_of_freedom_0; double total_varaiance = sse / degrees_of_freedom_1; double f_statistic = summed_variance / total_varaiance; return(FisherSnedecor.CDF(degrees_of_freedom_0, degrees_of_freedom_1, f_statistic)); }
public double GetAUC(LabelType label_value) { int label_index = this.Model.DataContext.GetLabelDescriptor(0).GetValueIndex(label_value); double[] label_scores = new double[label_values.Length]; double[] second_scores = new double[label_values.Length]; bool[] labels = new bool[label_values.Length]; for (int index = 0; index < label_values.Length; index++) { label_scores[index] = likelihoods[index][label_index]; labels[index] = (this.Model.DataContext.GetLabelDescriptor(0).GetValueIndex(label_values[index]) == label_index); List <int> ordering_indexes = ToolsMathCollection.Ordering(likelihoods[index]); if (ordering_indexes[0] == label_index) { second_scores[index] = likelihoods[index][label_index]; } else { second_scores[index] = likelihoods[index][ordering_indexes[0]]; } } double[] scores = ToolsMathCollection.DivideList(label_scores, second_scores); return(ToolsMathStatistics.ComputeROCAUCTrapeziod(labels, scores)); }
public override double Test(IList <IList <double> > samples) { double total_count = ToolsCollection.CountElements(samples); double pooled_mean = ToolsMathStatistics.MeanAll(samples); double nominator_part = 0.0; double denominator_part = 0.0; for (int sample_index = 0; sample_index < samples.Count; sample_index++) { double sample_mean = ToolsMathStatistics.Mean(samples[sample_index]); double sample_median = ToolsMathStatistics.Quantile(samples[sample_index], 0.5f); nominator_part += (sample_mean - pooled_mean) * (sample_mean - pooled_mean) * samples[sample_index].Count; for (int measurement_index = 0; measurement_index < samples[sample_index].Count; measurement_index++) { double diff = Math.Abs(sample_median - samples[sample_index][measurement_index]) - sample_mean; //This is the difference with brown forsythe test denominator_part += diff * diff; } } double degrees_of_freedom_0 = samples.Count - 1; double degrees_of_freedom_1 = total_count - samples.Count; double f_statistic = (degrees_of_freedom_1 * nominator_part) / (degrees_of_freedom_0 * denominator_part); FisherSnedecor distribution = new FisherSnedecor(degrees_of_freedom_0, degrees_of_freedom_1, new Random()); return(distribution.CumulativeDistribution(f_statistic)); }
//TODO implement 3 mean variants //http://www.itl.nist.gov/div898/handbook/eda/section3/eda35a.htm public static double TestStatic(IList <IList <double> > samples) { double total_count = ToolsCollection.CountElements(samples); double total_mean = ToolsMathStatistics.MeanAll(samples); //double[] sample_means = ToolsMathStatistics.Means0(samples); //double total_mean = ToolsMathStatistics.MedianAll(samples); double[] sample_means = ToolsMathStatistics.Medians0(samples); double summed_varriance = 0.0; for (int sample_index = 0; sample_index < samples.Count; sample_index++) { summed_varriance += samples[sample_index].Count * ToolsMath.Sqr(sample_means[sample_index] - total_mean); } double total_variance = 0.0; for (int sample_index = 0; sample_index < samples.Count; sample_index++) { for (int measurement_index = 0; measurement_index < samples[sample_index].Count; measurement_index++) { total_variance += ToolsMath.Sqr(samples[sample_index][measurement_index] - sample_means[sample_index]); } } double degrees_of_freedom_0 = samples.Count - 1; double degrees_of_freedom_1 = total_count - samples.Count; double f_statistic = (degrees_of_freedom_1 * summed_varriance) / (degrees_of_freedom_0 * total_variance); return(FisherSnedecor.CDF(degrees_of_freedom_0, degrees_of_freedom_1, f_statistic)); }
public static Tuple <double, double> TestStatic(IList <IList <double> > samples) { int sample_size = samples[0].Count; foreach (IList <double> sample in samples) { if (sample.Count != sample_size) { throw new Exception("samples must be of equal size"); } } // Larsen Marx 4Th editiopn P779 double sample_count = samples.Count; double total_count = ToolsCollection.CountElements(samples); double total_mean = ToolsMathStatistics.MeanAll(samples); double sum_squared_all = ToolsMathStatistics.SumSquaredAll(samples); IList <double> sample_sums = ToolsMathCollection.Sums0(samples); IList <double> measurement_sums = ToolsMathCollection.Sums1(samples); // compute C double c = ToolsMath.Sqr(total_mean * total_count) / (sample_size * sample_count); double sstot = sum_squared_all - c; double ssb = 0; for (int measurement_index = 0; measurement_index < sample_size; measurement_index++) { ssb += ToolsMath.Sqr(measurement_sums[measurement_index]) / sample_count; } ssb -= c; double sstr = 0.0; for (int sample_index = 0; sample_index < samples.Count; sample_index++) { sstr += ToolsMath.Sqr(sample_sums[sample_index]) / sample_size; } sstr -= c; double sse = sstot - ssb - sstr; double degrees_of_freedom_0_samples = (sample_count - 1.0); double degrees_of_freedom_0_measurements = (sample_size - 1.0); double degrees_of_freedom_1 = degrees_of_freedom_0_samples * degrees_of_freedom_0_measurements; //F-Transform samples double f_statistic_samples = (sstr / degrees_of_freedom_0_samples) / (sse / degrees_of_freedom_1); //F-Transform measurements double f_statistic_measurements = (ssb / degrees_of_freedom_0_measurements) / (sse / degrees_of_freedom_1); return(new Tuple <double, double>( FisherSnedecor.CDF(degrees_of_freedom_0_samples, degrees_of_freedom_1, f_statistic_samples), FisherSnedecor.CDF(degrees_of_freedom_0_measurements, degrees_of_freedom_1, f_statistic_measurements))); }
public static double TestStatic(IList <double> sample) { double mean = ToolsMathStatistics.Mean(sample); double standard_deviation = ToolsMathStatistics.StandardDeviation(sample, mean); //TODO it is actually illigal to use estimated paramter for the tested distribution return(TestGoodnessOfFitKolmogorovSmirnov.TestStatic(new DistributionNormalUnivariateFloat64(mean, standard_deviation), sample)); }
public TransformWhiteningOld(IAlgebraLinear <MatrixType> algebra, float [,] data) { AMatrix <MatrixType> data_matrix = algebra.Create(data); means = algebra.Create(ToolsMathStatistics.Means1(data)); AMatrix <MatrixType> covariance_matrix = data_matrix * data_matrix.Transpose(); matrix_backward = covariance_matrix.Algebra.ComputeRoot(covariance_matrix); matrix_forward = matrix_backward.Invert(); }
public void TestROCHanleyMcNeilTestComputeStandardError0() { Random random = new Random(0); double[] sample_0 = new double[] { 177, 177, 165, 172, 172, 179, 163, 175, 166, 182, 177, 168, 179, 177 }; bool[] labels = new bool[] { true, false, true, false, true, false, true, false, true, false, true, false, true, false }; double auc_0 = ToolsMathStatistics.ComputeROCAUCTrapeziod(labels, sample_0); double se_0 = TestROCHanleyMcNeil.ComputeStandardError(auc_0, sample_0, labels, random, 1000); Assert.IsTrue(0.145 < se_0); Assert.IsTrue(se_0 < 0.146); }
public double Compute(ISet <int> selection) { bool[] selected_labels = labels.Select(selection); double error = 0; for (int index = 0; index < values.Length; index++) { double auc = ToolsMathStatistics.ComputeROCAUCTrapeziod(selected_labels, values[index].Select(selection)); error += Math.Abs(desired_aucs[index] - auc) * weigths[index]; } return(error); }
public static double TestStatic(IList <double> sample_0, IList <double> sample_1) { // as in https://en.wikipedia.org/wiki/F-test_of_equality_of_variances // and //Larsen Marx 4th edition P569 double variance_0 = ToolsMathStatistics.Variance(sample_0); double variance_1 = ToolsMathStatistics.Variance(sample_1); double f_statistic = variance_0 / variance_1; double degrees_of_freedom_0 = (sample_0.Count - 1); double degrees_of_freedom_1 = (sample_1.Count - 1); return(FisherSnedecor.CDF(degrees_of_freedom_0, degrees_of_freedom_1, f_statistic)); }
public override double Test(IList <double> sample_0, IList <double> sample_1) { double mean_1 = ToolsMathStatistics.Mean(sample_0); double mean_2 = ToolsMathStatistics.Mean(sample_1); double variance = ToolsMathStatistics.VariancePooled(sample_0, sample_1); double t_statistic = (mean_1 - mean_2) / (Math.Sqrt(variance) * Math.Sqrt((1.0 / sample_0.Count) + (1.0 / sample_1.Count))); int degrees_of_freedom = (sample_0.Count + sample_1.Count) - 2; StudentT distribution = new StudentT(0.1, 1.0, degrees_of_freedom); return(distribution.CumulativeDistribution(-t_statistic)); }
public void DoExperiment() { double commission = 7; double spread = 5; int lookAhead = 60; //Load data string[,] table = ToolsIOCSV.ReadCSVFile(@"D:\GoogleDrive\TestData\Trading\eurusddata.csv", Delimiter.Comma); //Convert data to values double[] ask = new double[table.GetLength(0)]; double[] bid = new double[table.GetLength(0)]; for (int i = 0; i < table.GetLength(0); i++) { bid[i] = 100000 * double.Parse(table[i, 2].Replace(".", ",")); ask[i] = bid[i] + spread; } double mean1 = ToolsMathStatistics.Mean(ask); double mean2 = ToolsMathStatistics.Mean(bid); double[] buyCurve = new double[table.GetLength(0) - lookAhead]; double[] sellCurve = new double[table.GetLength(0) - lookAhead]; string[] buyCurveString = new string[buyCurve.Length]; for (int i = 0; i < buyCurve.Length; i++) { double max = double.MinValue; double min = double.MaxValue; for (int j = 1; j < lookAhead; j++) { max = Math.Max(max, bid[i + j]); min = Math.Min(min, ask[i + j]); } buyCurve[i] = max - ask[i] - commission; buyCurveString[i] = bid[i] + ";" + buyCurve[i]; sellCurve[i] = bid[i] - min - commission; } double mean3 = ToolsMathStatistics.Mean(buyCurve); double mean4 = ToolsMathStatistics.Mean(sellCurve); File.AppendAllLines(@"D:\GoogleDrive\TestData\Trading\buyCurveOut.csv", buyCurveString); /* * Load data * for each time point find optimal buy order * create line of optimal buy orders */ }
public FunctionMarginalIntervalNormal(IDataContext data_context, double[][] feature_data, int[] label_data, int feature_index, int label_index, double epsilon) { List <double> occurances = new List <double>(); for (int instance_index = 0; instance_index < feature_data.GetLength(0); instance_index++) { if (label_index == label_data[instance_index]) { occurances.Add(feature_data[instance_index][feature_index]); } } this.mean = ToolsMathStatistics.Mean(occurances); this.standard_deviation = ToolsMathStatistics.StandardDeviation(occurances, mean); }
public IFunctionBijective <float [], float []> Generate( IList <float []> instances) { float[,] array = ToolsCollection.ConvertToTable(instances); float[,] array_transposed = ToolsCollection.Transpose(array); float [] lower_bounds = new float [array_transposed.Length]; float [] upper_bounds = new float [array_transposed.Length]; for (int index = 0; index < array_transposed.Length; index++) { lower_bounds[index] = ToolsMathStatistics.QuantileSorted(array_transposed.Select1DIndex0(index), quantile); upper_bounds[index] = ToolsMathStatistics.QuantileSorted(array_transposed.Select1DIndex0(index), 1 - quantile); } return(new TransformRescale(lower_bounds, upper_bounds)); }
public static double TestStatic(IList <double> sample_0, IList <double> sample_1) { if (sample_0.Count != sample_1.Count) { throw new Exception("Samples not of equal size"); } double[] difference = ToolsMathCollection.SubtractElements(sample_0, sample_1); double mean_difference = ToolsMathStatistics.Mean(difference); double variance = ToolsMathStatistics.Variance(difference); double t_statistic = mean_difference / (Math.Sqrt(variance) / Math.Sqrt(sample_0.Count)); double degrees_of_freedom = (sample_0.Count - 1); StudentT distribution = new StudentT(0.1, 1.0, degrees_of_freedom); return(distribution.CumulativeDistribution(t_statistic)); }
public void DoExperiment() { double commission = 7; //Load data string [,] table = ToolsIOCSV.ReadCSVFile(@"D:\GoogleDrive\TestData\Trading\spreads.csv", Delimiter.SemiColon); //Convert data to values double[] ask = new double [table.GetLength(0)]; double[] bid = new double [table.GetLength(0)]; for (int i = 0; i < table.GetLength(0); i++) { ask[i] = 10000 * double.Parse(table[i, 3].Replace(".", ",")); bid[i] = 10000 * double.Parse(table[i, 2].Replace(".", ",")); } double mean1 = ToolsMathStatistics.Mean(ask); double mean2 = ToolsMathStatistics.Mean(bid); double[] buyCurve = new double[table.GetLength(0) - 3600]; double[] sellCurve = new double[table.GetLength(0) - 3600]; for (int i = 0; i < buyCurve.Length; i++) { double max = double.MinValue; double min = double.MaxValue; for (int j = 1; j < 3600; j++) { max = Math.Max(max, bid[i + j]); min = Math.Min(min, ask[i + j]); } buyCurve[i] = max - ask[i] - commission; sellCurve[i] = bid[i] - min - commission; } double mean3 = ToolsMathStatistics.Mean(buyCurve); double mean4 = ToolsMathStatistics.Mean(sellCurve); /* * Load data * for each time point find optimal buy order * create line of optimal buy orders */ }
public static double TestStatic(IList <IList <double> > samples) { double total_size = 0.0f; double squared_rank_mean_sum = 0.0f; double[][] rank_samples = ToolsMathStatistics.ConvertToAccendingRanks(samples); foreach (double[] sample in rank_samples) { total_size += sample.Length; double sum = ToolsMathCollection.Sum(sample); squared_rank_mean_sum += (sum * sum) / ((double)sample.Length); } double statistic = (12.0 * squared_rank_mean_sum) / (total_size * (total_size + 1)) - 3 * (total_size + 1); return(1 - ChiSquared.CDF(samples.Count - 1, statistic)); }
private static double Evaluate(ISet <int> current, bool[] labels, double[] values, double[][] values_other) { // Here the actual testing happend we want the lowest p value between the values set and its competitors bool[] selected_labels = labels.Select(current); double auc = ToolsMathStatistics.ComputeROCAUCTrapeziod(selected_labels, values.Select(current)); double best_other_auc = ToolsMathStatistics.ComputeROCAUCTrapeziod(selected_labels, values_other[0].Select(current)); for (int index = 1; index < values_other.Length; index++) { //TODO Here we actually want a statistical test double other_auc = ToolsMathStatistics.ComputeROCAUCTrapeziod(selected_labels, values_other[index].Select(current)); if (best_other_auc < other_auc) { best_other_auc = other_auc; } } return(auc - best_other_auc); }
public static double TestStatic(IList <bool> labels, IList <double> sample_0) { if (sample_0.Count != labels.Count) { throw new Exception("Labels not of equal size"); } //U = AUC∗nP∗nN int positive_count = ToolsCollection.CountOccurance(labels, true); int negative_count = sample_0.Count - positive_count; double auc = ToolsMathStatistics.ComputeROCAUCTrapeziod(labels, sample_0); double u_statistic = positive_count * negative_count * auc; //Z transform double expected_value = (positive_count * negative_count) / 2.0; double variance = (positive_count * negative_count * (positive_count + negative_count + 1)) / 12.0; double z_value = (u_statistic - expected_value) / Math.Sqrt(variance); return(1 - Normal.CDF(0.0, 1.0, z_value)); }
public static void PlotHistogram(string file_path, double[] values, int bincount, double lower_quantile, double upper_quantile) { Array.Sort(values); double lowerbound = ToolsMathStatistics.QuantileSorted(values, lower_quantile); double upperbound = ToolsMathStatistics.QuantileSorted(values, upper_quantile); double[] selected_values = ToolsMathCollection.Select(values, lowerbound, upperbound); double stride = (upperbound - lowerbound) / bincount; double[] bin_limits = new double[bincount - 1]; bin_limits[0] = lowerbound + stride; for (int bin_limit_index = 1; bin_limit_index < bin_limits.Length; bin_limit_index++) { bin_limits[bin_limit_index] = bin_limits[bin_limit_index - 1] + stride; } PlotModel model = new HistrogramPlot(selected_values, bin_limits).PlotModel; WriteToFile(file_path, model, 800, 800); }
public static double TestStatic(IList <IList <double> > samples) { double pooled_varriance = ToolsMathStatistics.VariancePooled(samples); double total_count = ToolsCollection.CountElements(samples); double nom_part = 0; double denom_part = 0; for (int sample_index = 0; sample_index < samples.Count; sample_index++) { nom_part += (samples[sample_index].Count - 1.0) * Math.Log(ToolsMathStatistics.Variance(samples[sample_index])); denom_part += (1.0 / (samples[sample_index].Count - 1)); } double nominator = (total_count - samples.Count) * Math.Log(pooled_varriance) - nom_part; double denominator = 1 + ((1.0 / (3 * (samples.Count - 1))) * (denom_part - (1 / (total_count - samples.Count)))); double chi_square_statisic = nominator / denominator; double degrees_of_freedom = samples.Count - 1; return(ChiSquared.CDF(degrees_of_freedom, chi_square_statisic));; }
public override double Test(IList <double> sample_0, IList <double> sample_1) { double mean_0 = ToolsMathStatistics.Mean(sample_0); double mean_1 = ToolsMathStatistics.Mean(sample_1); double variance_0 = ToolsMathStatistics.Variance(sample_0, mean_0); double variance_1 = ToolsMathStatistics.Variance(sample_0, mean_1); double t_statistic = (mean_0 - mean_1) / Math.Sqrt((variance_0 / sample_0.Count) + (variance_1 / sample_1.Count)); //Welch–Satterthwaite equation: double dof_nominator = ToolsMath.Sqr((variance_0 / sample_0.Count) + (variance_1 / sample_1.Count)); double dof_denominator = (Math.Pow(variance_0, 4) / (sample_0.Count * sample_0.Count * (sample_0.Count - 1))) + (Math.Pow(variance_1, 4) / (sample_1.Count * sample_1.Count * (sample_1.Count - 1))); double degrees_of_freedom = dof_nominator / dof_denominator; StudentT distribution = new StudentT(0.1, 1.0, degrees_of_freedom); return(distribution.CumulativeDistribution(-t_statistic)); }
public static double TestStatic(IList <bool> labels, IList <double> sample_0, IList <double> sample_1, Random random, int trial_count) { if (sample_0.Count != sample_1.Count) { throw new Exception("Samples not of equal size"); } if (sample_0.Count != labels.Count) { throw new Exception("Labels not of equal size"); } double auc_0 = ToolsMathStatistics.ComputeROCAUCTrapeziod(labels, sample_0); double auc_1 = ToolsMathStatistics.ComputeROCAUCTrapeziod(labels, sample_1); double se_0 = ComputeStandardError(auc_0, sample_0, labels, random, trial_count); double se_1 = ComputeStandardError(auc_1, sample_1, labels, random, trial_count); double r = ComputeKendalTauA(sample_0, sample_1); double z_value = ZTransform(auc_0, auc_1, se_0, se_1, r); double p_value = 1 - Normal.CDF(0.0, 1.0, z_value); return(p_value); }
public override bool ComputeRBA(IMarketModelIndicator market_model, double[] result) { if (previous_1 == 0) { previous_1 = market_model.CurrentBid; previous_2 = market_model.CurrentBid; } double momentum = previous_1 - previous_2; double error_now = market_model.CurrentBid - (previous_1 + (momentum_weight * momentum)); indicator_error_now[index_indicator_error_now % indicator_error_now.Length] = error_now; index_indicator_error_now++; double error_now_s = ToolsMathStatistics.StandardDeviation(indicator_error_now); if (error_now_s == 0) { error_now_s = double.Epsilon; } double error_now_z = error_now / error_now_s; double error_now_weight = max_error_weight * (1 - Math.Exp(-ToolsMath.Sqr(error_now_z) / expected_error_z)); result[0] = previous_1 + (momentum * momentum_weight) + (error_now * error_now_weight); result[1] = result[0] + (error_now_s * 2); result[2] = result[0] - (error_now_s * 2); result[3] = error_now; result[4] = error_now_s; result[5] = error_now_z; result[6] = error_now_weight; result[7] = momentum; previous_2 = previous_1; previous_1 = result[0]; return(indicator_error_now.Length < market_model.Second1.HistoryCount); }
public static double TestStatic(IList <double> sample) { // as in http://nl.mathworks.com/matlabcentral/fileexchange/13964-shapiro-wilk-and-shapiro-francia-normality-tests/content/swtest.m //% SWTEST Shapiro - Wilk parametric hypothesis test of composite normality. //% [H, pValue, SWstatistic] = SWTEST(X, ALPHA) performs the //% Shapiro - Wilk test to determine if the null hypothesis of //% composite normality is a reasonable assumption regarding the //% population distribution of a random sample X. The desired significance //% level, ALPHA, is an optional scalar input(default = 0.05). //% //% The Shapiro - Wilk and Shapiro-Francia null hypothesis is: //% "X is normal with unspecified mean and variance." //% //% This is an omnibus test, and is generally considered relatively //% powerful against a variety of alternatives. //% Shapiro - Wilk test is better than the Shapiro - Francia test for //% Platykurtic sample. Conversely, Shapiro - Francia test is better than the //% Shapiro - Wilk test for Leptokurtic samples. //% //% If the sample is Leptokurtic performs the Shapiro - Francia //% If the sample is Platykurtic performs the Shapiro - Wilk test. //% //% //% Inputs: //% X - a vector of deviates from an unknown distribution.The observation //% number must exceed 3 and less than 5000. //% //% Outputs: //% pValue - is the p - value, or the probability of observing the given //% result by chance given that the null hypothesis is true. Small values //% of pValue cast doubt on the validity of the null hypothesis. //% //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //% Copyright(c) 17 March 2009 by Ahmed Ben Sada % //% Department of Finance, IHEC Sousse - Tunisia % //% Email: [email protected] % //% $ Revision 3.0 $ Date: 18 Juin 2014 $ % //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //% //% References: //% //% -Royston P. "Remark AS R94", Applied Statistics(1995), Vol. 44, //% No. 4, pp. 547 - 551. //% AS R94-- calculates Shapiro - Wilk normality test and P - value //% for sample sizes 3 <= n <= 5000.Handles censored or uncensored data. //% Corrects AS 181, which was found to be inaccurate for n > 50. //% Subroutine can be found at: http://lib.stat.cmu.edu/apstat/R94 //% //% -Royston P. "A pocket-calculator algorithm for the Shapiro-Francia test //% for non - normality: An application to medicine", Statistics in Medecine //% (1993a), Vol. 12, pp. 181 - 184. //% //% -Royston P. "A Toolkit for Testing Non-Normality in Complete and //% Censored Samples", Journal of the Royal Statistical Society Series D //% (1993b), Vol. 42, No. 1, pp. 37 - 43. //% //% -Royston P. "Approximating the Shapiro-Wilk W-test for non-normality", //% Statistics and Computing (1992), Vol. 2, pp. 117 - 119. //% //% -Royston P. "An Extension of Shapiro and Wilk's W Test for Normality //% to Large Samples", Journal of the Royal Statistical Society Series C //% (1982a), Vol. 31, No. 2, pp. 115 - 124. //% if (sample.Count < 3) { throw new Exception("Sample vector must have at least 3 valid observations."); } if (5000 < sample.Count) { throw new Exception("Shapiro-Wilk test might be inaccurate due to large sample size ( > 5000)."); } // % First, calculate the a's for weights as a function of the m's // % See Royston(1992, p. 117) and Royston (1993b, p. 38) for details // % in the approximation. ToolsCollection.Sort(sample); //% Sort the vector X in ascending order. int n = sample.Count; double[] mtilde = new double[n]; for (int index = 0; index < n; index++) { mtilde[index] = Normal.InvCDF(0.0, 1.0, ((index + 1) - (3.0 / 8.0)) / (n + (1.0 / 4.0))); } double mtilde_in_product = 0.0; for (int index = 0; index < n; index++) { mtilde_in_product += mtilde[index] * mtilde[index]; } double[] weights = new double[n]; //% Preallocate the weights. for (int index = 0; index < n; index++) { //sould say weights = 1 / sqrt(mtilde'*mtilde) * mtilde; weights[index] = 1.0 / Math.Sqrt(mtilde_in_product) * mtilde[index]; } double sample_mean = ToolsMathStatistics.Mean(sample); double kurtosis = ToolsMathStatistics.KurtosisPlain(sample); if (kurtosis > 3) { //% The Shapiro - Francia test is better for leptokurtic samples. //% The Shapiro - Francia statistic W' is calculated to avoid excessive //% rounding errors for W' close to 1 (a potential problem in very //% large samples). double sf_nom = Inproduct(sample, weights); double sf_denom = 0.0; for (int index = 0; index < n; index++) { sf_denom += (sample[index] - sample_mean) * (sample[index] - sample_mean); } double W = (sf_nom * sf_nom) / sf_denom; //% Royston(1993a, p. 183): double nu = Math.Log(n); double u1 = Math.Log(nu) - nu; double u2 = Math.Log(nu) + 2 / nu; double mu = -1.2725 + (1.0521 * u1); double sigma = 1.0308 - (0.26758 * u2); double newSFstatistic = Math.Log(1 - W); //% Compute the normalized Shapiro - Francia statistic and its p-value. double NormalSFstatistic = (newSFstatistic - mu) / sigma; //% Computes the p-value, Royston(1993a, p. 183). double pValue = 1 - Normal.CDF(0, 1, NormalSFstatistic); return(pValue); } else { //% The Shapiro - Wilk test is better for platykurtic samples. double u = 1 / Math.Sqrt(n); //% Royston(1992, p. 117) and Royston(1993b, p. 38): double[] PolyCoef_1 = new double [] { -2.706056, 4.434685, -2.071190, -0.147981, 0.221157, weights[n - 1] }; //TODO check was weights[n] double[] PolyCoef_2 = new double [] { -3.582633, 5.682633, -1.752461, -0.293762, 0.042981, weights[n - 2] }; //TODO check was weights[n - 1] //% Royston(1992, p. 118) and Royston (1993b, p. 40, Table 1) double[] PolyCoef_3 = new double [] { -0.0006714, 0.0250540, -0.39978, 0.54400 }; double[] PolyCoef_4 = new double [] { -0.0020322, 0.0627670, -0.77857, 1.38220 }; double[] PolyCoef_5 = new double [] { 0.00389150, -0.083751, -0.31082, -1.5861 }; double[] PolyCoef_6 = new double [] { 0.00303020, -0.082676, -0.48030 }; double[] PolyCoef_7 = new double[] { 0.459, -2.273 }; weights[n - 1] = Polyval(PolyCoef_1, u); weights[1] = -weights[n - 1]; int count = 0; double phi = 0.0; if (n > 5) { weights[n - 2] = Polyval(PolyCoef_2, u); weights[2] = -weights[n - 2]; //TODO check n - 1 count = 3; phi = (Inproduct(mtilde, mtilde) - 2 * Math.Pow(mtilde[n - 1], 2) - 2 * Math.Pow(mtilde[n - 2], 2)) / (1 - 2 * Math.Pow(weights[n - 1], 2) - 2 * Math.Pow(weights[n - 2], 2)); } else { count = 2; phi = (Inproduct(mtilde, mtilde) - 2 * Math.Pow(mtilde[n - 1], 2)) / (1 - 2 * Math.Pow(weights[n - 1], 2)); } //% Special attention when n = 3(this is a special case). if (n == 3) { //% Royston(1992, p. 117) weights[1] = 1 / Math.Sqrt(2); weights[n - 1] = -weights[1]; phi = 1; } // % The vector 'WEIGHTS' obtained next corresponds to the same coefficients // % listed by Shapiro-Wilk in their original test for small samples. for (int index = count; index < n - count; index++) { weights[index] = mtilde[index] / Math.Sqrt(phi); } //% The Shapiro - Wilk statistic W is calculated to avoid excessive rounding //% errors for W close to 1(a potential problem in very large samples). double[] residual = ToolsMathCollectionDouble.Subtract(sample, sample_mean); double W = Math.Pow(Inproduct(weights, sample), 2) / Inproduct(residual, residual); //% //% Calculate the normalized W and its significance level(exact for //% n = 3).Royston(1992, p. 118) and Royston (1993b, p. 40, Table 1). //% double newn = Math.Log(n); double mu = 0.0; double sigma = 0.0; double gam = 0.0; double newSWstatistic = 0.0; if (n > 11) { mu = Polyval(PolyCoef_5, newn); sigma = Math.Exp(Polyval(PolyCoef_6, newn)); newSWstatistic = Math.Log(1 - W); } else if ((n >= 4) && (n <= 11)) { mu = Polyval(PolyCoef_3, n); sigma = Math.Exp(Polyval(PolyCoef_4, n)); gam = Polyval(PolyCoef_7, n); newSWstatistic = -Math.Log(gam - Math.Log(1 - W)); } else if (n == 3) { mu = 0; sigma = 1; newSWstatistic = 0; } //% Compute the normalized Shapiro - Wilk statistic and its p-value. double NormalSWstatistic = (newSWstatistic - mu) / sigma; //% NormalSWstatistic is referred to the upper tail of N(0, 1), //% Royston(1992, p. 119). double pValue = 1 - Normal.CDF(mu, sigma, newSWstatistic); //% Special attention when n = 3(this is a special case). if (n == 3) { pValue = 6 / Math.PI * (Math.Asin(Math.Sqrt(W)) - Math.Asin(Math.Sqrt(3.0 / 4.0))); //% Royston(1982a, p. 121) } return(pValue); } }