public void TwoSampleKolmogorovSmirnovTestConstructorTest() { Accord.Math.Tools.SetupGenerator(0); // Create a K-S test to verify if two samples have been // drawn from different populations. In this example, we // will first generate a number of samples from different // distributions, and then check if the K-S test can indeed // see the difference: // Generate 15 points from a Normal distribution with mean 5 and sigma 2 double[] sample1 = new NormalDistribution(mean: 5, stdDev: 1).Generate(25); // Generate 15 points from an uniform distribution from 0 to 10 double[] sample2 = new UniformContinuousDistribution(a: 0, b: 10).Generate(25); // Now we can create a K-S test and test the unequal hypothesis: var test = new TwoSampleKolmogorovSmirnovTest(sample1, sample2, TwoSampleKolmogorovSmirnovTestHypothesis.SamplesDistributionsAreUnequal); bool significant = test.Significant; // outputs true Assert.IsTrue(test.Significant); Assert.AreEqual(0.44, test.Statistic, 1e-15); Assert.IsFalse(Double.IsNaN(test.Statistic)); Assert.AreEqual(0.00826, test.PValue, 1e-5); }
public void TwoSampleKolmogorovSmirnovTestConstructorTest3() { double[] x = { 1, 2, 3, 4, 5 }; double[] y = { 2.5, 4.5 }; var target = new TwoSampleKolmogorovSmirnovTest(x, y); Assert.AreEqual(0.4, target.Statistic); double actual = target.PValue; Assert.AreEqual(0.952, actual, 1e-3); }
public void TwoSampleKolmogorovSmirnovTestConstructorTest2() { // The following example comes from the stats page of the College // of Saint Benedict and Saint John's University (Kirkman, 1996). // http://www.physics.csbsju.edu/stats/ double[] redwell = { 23.4, 30.9, 18.8, 23.0, 21.4, 1, 24.6, 23.8, 24.1, 18.7, 16.3, 20.3, 14.9, 35.4, 21.6, 21.2, 21.0, 15.0, 15.6, 24.0, 34.6, 40.9, 30.7, 24.5, 16.6, 1, 21.7, 1, 23.6, 1, 25.7, 19.3, 46.9, 23.3, 21.8, 33.3, 24.9, 24.4, 1, 19.8, 17.2, 21.5, 25.5, 23.3, 18.6, 22.0, 29.8, 33.3, 1, 21.3, 18.6, 26.8, 19.4, 21.1, 21.2, 20.5, 19.8, 26.3, 39.3, 21.4, 22.6, 1, 35.3, 7.0, 19.3, 21.3, 10.1, 20.2, 1, 36.2, 16.7, 21.1, 39.1, 19.9, 32.1, 23.1, 21.8, 30.4, 19.62, 15.5 }; double[] whitney = { 16.5, 1, 22.6, 25.3, 23.7, 1, 23.3, 23.9, 16.2, 23.0, 21.6, 10.8, 12.2, 23.6, 10.1, 24.4, 16.4, 11.7, 17.7, 34.3, 24.3, 18.7, 27.5, 25.8, 22.5, 14.2, 21.7, 1, 31.2, 13.8, 29.7, 23.1, 26.1, 25.1, 23.4, 21.7, 24.4, 13.2, 22.1, 26.7, 22.7, 1, 18.2, 28.7, 29.1, 27.4, 22.3, 13.2, 22.5, 25.0, 1, 6.6, 23.7, 23.5, 17.3, 24.6, 27.8, 29.7, 25.3, 19.9, 18.2, 26.2, 20.4, 23.3, 26.7, 26.0, 1, 25.1, 33.1, 35.0, 25.3, 23.6, 23.2, 20.2, 24.7, 22.6, 39.1, 26.5, 22.7 }; // Create a non-parametric Kolmogorov-Smirnov's test var twoTail = new TwoSampleKolmogorovSmirnovTest(redwell, whitney, alternate: TwoSampleKolmogorovSmirnovTestHypothesis.SamplesDistributionsAreUnequal); var oneTailGreater = new TwoSampleKolmogorovSmirnovTest(redwell, whitney, alternate: TwoSampleKolmogorovSmirnovTestHypothesis.FirstSampleIsLargerThanSecond); var oneTailLesser = new TwoSampleKolmogorovSmirnovTest(redwell, whitney, alternate: TwoSampleKolmogorovSmirnovTestHypothesis.FirstSampleIsSmallerThanSecond); Assert.AreEqual(0.2204113924050633, twoTail.Statistic, 1e-10); Assert.AreEqual(0.2204113924050633, oneTailGreater.Statistic, 1e-10); Assert.AreEqual(0.1242088607594936, oneTailLesser.Statistic, 1e-10); Assert.AreEqual(0.03463090913864153, twoTail.PValue, 1e-10); Assert.AreEqual(0.0177488245823226, oneTailGreater.PValue, 1e-10); Assert.AreEqual(0.270697775095498, oneTailLesser.PValue, 1e-10); }
protected override void EndProcessing() { var hypo = TestingHelper.GetTwoSampleKolmogorovSmirnovTestHypothesis(Alternate); TwoSampleKolmogorovSmirnovTest test; if (ParameterSetName == "Pipeline") { test = new TwoSampleKolmogorovSmirnovTest(_data[Sample1Name].ToDoubleArray(), _data[Sample2Name].ToDoubleArray(), hypo); } else { test = new TwoSampleKolmogorovSmirnovTest(Sample1, Sample2, hypo); } test.Size = Size; WriteObject(test); }
private void buildModel() { if (!checkTables()) { //Console.WriteLine("CheckTables = false"); return; } if (!getSampleRatios()) { //Console.WriteLine("Sample Ratios = false"); return; } pca = new dataPrepPrincipleComponents(Sample1, Variables); cluster = new dataPrepClusterKmean(Sample1, Variables, numberOfBins); foreach (string s in cntDic.Keys) { buildSamples(s); double[] pValueArr = new double[Variables.Length]; double[] sValueArr = new double[Variables.Length]; //double[] s1Arr = sample1[0]; //double[] s2Arr = sample2[0]; //TwoSampleKolmogorovSmirnovTest test = new TwoSampleKolmogorovSmirnovTest(s1Arr, s2Arr, TwoSampleKolmogorovSmirnovTestHypothesis.SamplesDistributionsAreUnequal); //getCdfProp(s, 0, test); //pValue = test.PValue; //sValue = test.Statistic; for (int i = 0; i < Variables.Length; i++) { double[] s1Arr = sample1[i]; double[] s2Arr = sample2[i]; TwoSampleKolmogorovSmirnovTest test = new TwoSampleKolmogorovSmirnovTest(s1Arr, s2Arr, TwoSampleKolmogorovSmirnovTestHypothesis.SamplesDistributionsAreUnequal); //Console.WriteLine(test.Significant.ToString()); getCdfProp(s, i, test); double pValueS = test.PValue; double sValueS = test.Statistic; pValueArr[i] = pValueS; sValueArr[i] = sValueS; } pDic.Add(s, pValueArr); sDic.Add(s, sValueArr); } }
/*private void PerformCmv(Distribution variantResult, Distribution scaledVariantTarget, Distribution nsgaFv, Distribution featureTarget, Distribution interacTarget, Distribution interacVals, Solution solution) * { * Task<double>[] tasks = {}; * if (Model.Setting.NoVariantCalculation) * { * tasks = new Task<double>[Model.Setting.NumberOfInteractions > 0 ? 2 : 1]; * } * else * { * tasks = new Task<double>[Model.Setting.NumberOfInteractions > 0 ? 3 : 2]; * } * * * * tasks[0] = Task.Factory.StartNew(() => * { * var cmv2 = new CramerVonMises(nsgaFv.Values, featureTarget.Values); * return Math.Abs(cmv2.Calculate()); * }); * * if (!Model.Setting.NoVariantCalculation) * { * tasks[2] = Task.Factory.StartNew(() => * { * var cmv = new CramerVonMises(variantResult.Values, scaledVariantTarget.Values); * return Math.Abs(cmv.Calculate()); * }); * } * if (Model.Setting.NumberOfInteractions > 0) * { * * tasks[1] = Task.Factory.StartNew(() => * { * var cmv3 = new CramerVonMises(interacTarget.Values, interacVals.Values); * return Math.Abs(cmv3.Calculate()); * }); * * } * * Task.WaitAll(tasks); * * solution.Objective[0] = tasks[0].Result; * solution.Objective[1] = tasks[1].Result; * if (Model.Setting.NumberOfInteractions > 0) solution.Objective[2] = tasks[2].Result; * } */ private static void PerfomKs(Distribution variantResult, Distribution scaledVariantTarget, Distribution nsgaFv, Distribution featureTarget, Solution solution) { var variant = new TwoSampleKolmogorovSmirnovTest(variantResult.Values, scaledVariantTarget.Values); if (variant.PValue == 0d) { solution.Objective[0] = double.MaxValue; } else { solution.Objective[0] = -variant.PValue; } var feature = new TwoSampleKolmogorovSmirnovTest(nsgaFv.Values, featureTarget.Values); if (feature.PValue == 0d) { solution.Objective[1] = double.MaxValue; } else { solution.Objective[1] = -feature.PValue; } }
private void getCdfProp(string s, int i, TwoSampleKolmogorovSmirnovTest test) { //Console.WriteLine("Key to look up = " + s); double[][] variableBinProps; double[][] variableBinProps2; if (binPropDic1.TryGetValue(s, out variableBinProps)) { } else { variableBinProps = new double[Variables.Length][]; for (int k = 0; k < Variables.Length; k++) { variableBinProps[k] = new double[numberOfBins]; } binPropDic1.Add(s, variableBinProps); } if (binPropDic2.TryGetValue(s, out variableBinProps2)) { } else { variableBinProps2 = new double[Variables.Length][]; for (int k = 0; k < Variables.Length; k++) { variableBinProps2[k] = new double[numberOfBins]; } binPropDic2.Add(s, variableBinProps2); } //Console.WriteLine("Upper bound of VariableBinProps" + variableBinProps.GetUpperBound(0)); double[] binProp2 = variableBinProps2[i]; double[] binProp = variableBinProps[i]; double[][] minMax = minMaxDic1[s]; double min = minMax[0][i]; double max = minMax[1][i]; //Console.WriteLine(min); //Console.WriteLine(max); double binD = (max - min); //Console.WriteLine(binD); double pCdfp = 0; double pCdfp2 = 0; //int cnt = 0; double[][] minMax2 = minMaxDic2[s]; double min2 = minMax2[0][i]; double max2 = minMax2[1][i]; if (min2 < min) { min = min2; } if (max2 > max) { max = max2; } binD = (max - min); double dvid = System.Convert.ToDouble(numberOfBins); for (int j = 1; j <= numberOfBins; j++)//double d = 0.1; d <= 1; d+=0.1) { double dX = min + binD * (j / dvid); double pCdf = test.EmpiricalDistribution1.DistributionFunction(dX); double pCdf2 = test.EmpiricalDistribution2.DistributionFunction(dX); //Console.WriteLine("Count = " + j.ToString()); //Console.WriteLine("Strata = " + s); binProp[j - 1] = pCdf - pCdfp; binProp2[j - 1] = pCdf2 - pCdfp2; pCdfp = pCdf; pCdfp2 = pCdf2; //cnt+=1; } }
public double Calculate(Distribution first, Distribution second) { var t = new TwoSampleKolmogorovSmirnovTest(first.Values, second.Values); return(t.Statistic); }
private void getCdfProp(string s, int i, TwoSampleKolmogorovSmirnovTest test) { //Console.WriteLine("Key to look up = " + s); double[][] variableBinProps; double[][] variableBinProps2; if(binPropDic1.TryGetValue(s,out variableBinProps)) { } else { variableBinProps = new double[Variables.Length][]; for (int k = 0; k < Variables.Length; k++) { variableBinProps[k] = new double[numberOfBins]; } binPropDic1.Add(s,variableBinProps); } if (binPropDic2.TryGetValue(s, out variableBinProps2)) { } else { variableBinProps2 = new double[Variables.Length][]; for (int k = 0; k < Variables.Length; k++) { variableBinProps2[k] = new double[numberOfBins]; } binPropDic2.Add(s, variableBinProps2); } //Console.WriteLine("Upper bound of VariableBinProps" + variableBinProps.GetUpperBound(0)); double[] binProp2 = variableBinProps2[i]; double[] binProp = variableBinProps[i]; double[][] minMax = minMaxDic1[s]; double min = minMax[0][i]; double max = minMax[1][i]; //Console.WriteLine(min); //Console.WriteLine(max); double binD = (max - min); //Console.WriteLine(binD); double pCdfp = 0; double pCdfp2 = 0; //int cnt = 0; double[][] minMax2 = minMaxDic2[s]; double min2 = minMax2[0][i]; double max2 = minMax2[1][i]; if (min2 < min) min = min2; if (max2 > max) max = max2; binD = (max - min); double dvid = System.Convert.ToDouble(numberOfBins); for (int j=1;j<=numberOfBins;j++)//double d = 0.1; d <= 1; d+=0.1) { double dX = min + binD * (j/dvid); double pCdf = test.EmpiricalDistribution1.DistributionFunction(dX); double pCdf2 = test.EmpiricalDistribution2.DistributionFunction(dX); //Console.WriteLine("Count = " + j.ToString()); //Console.WriteLine("Strata = " + s); binProp[j-1] = pCdf-pCdfp; binProp2[j - 1] = pCdf2 - pCdfp2; pCdfp = pCdf; pCdfp2 = pCdf2; //cnt+=1; } }
private void buildModel() { if (!checkTables()) { //Console.WriteLine("CheckTables = false"); return; } if (!getSampleRatios()) { //Console.WriteLine("Sample Ratios = false"); return; } pca = new dataPrepPrincipleComponents(Sample1, Variables); cluster = new dataPrepClusterKmean(Sample1, Variables, numberOfBins); foreach(string s in cntDic.Keys) { buildSamples(s); double[] pValueArr = new double[Variables.Length]; double[] sValueArr = new double[Variables.Length]; //double[] s1Arr = sample1[0]; //double[] s2Arr = sample2[0]; //TwoSampleKolmogorovSmirnovTest test = new TwoSampleKolmogorovSmirnovTest(s1Arr, s2Arr, TwoSampleKolmogorovSmirnovTestHypothesis.SamplesDistributionsAreUnequal); //getCdfProp(s, 0, test); //pValue = test.PValue; //sValue = test.Statistic; for (int i = 0; i < Variables.Length; i++) { double[] s1Arr = sample1[i]; double[] s2Arr = sample2[i]; TwoSampleKolmogorovSmirnovTest test = new TwoSampleKolmogorovSmirnovTest(s1Arr, s2Arr, TwoSampleKolmogorovSmirnovTestHypothesis.SamplesDistributionsAreUnequal); //Console.WriteLine(test.Significant.ToString()); getCdfProp(s, i, test); double pValueS = test.PValue; double sValueS = test.Statistic; pValueArr[i] = pValueS; sValueArr[i] = sValueS; } pDic.Add(s, pValueArr); sDic.Add(s, sValueArr); } }