/// <summary> /// <para>Using VC-dimension, we can bound the probability of making an error when estimating empirical probability /// distributions. We are using Theorem 2.41 in "All Of Nonparametric Statistics". /// http://books.google.com/books?id=MRFlzQfRg7UC&lpg=PP1&dq=all%20of%20nonparametric%20statistics&pg=PA22#v=onepage&q=%22shatter%20coe%EF%AC%83cients%20do%20not%22&f=false .</para> /// <para>Note that for intervals on the real line the VC-dimension is 2.</para> /// </summary> /// <param name="epsilon">The error we are willing to tolerate.</param> /// <param name="delta">The error probability we are willing to tolerate.</param> /// <param name="s">The samples to use for testing.</param> /// <param name="dist">The distribution we are testing.</param> public static void VapnikChervonenkisTest(double epsilon, double delta, IEnumerable <double> s, IDistribution dist) { double N = (double)s.Count(); Assert.GreaterThan(N, Math.Ceiling(32.0 * Math.Log(16.0 / delta) / epsilon / epsilon)); var histogram = new Histogram(s, NumberOfBuckets); for (int i = 0; i < NumberOfBuckets; i++) { double p = dist.CumulativeDistribution(histogram[i].UpperBound) - dist.CumulativeDistribution(histogram[i].LowerBound); double pe = histogram[i].Count / N; Assert.LessThan(Math.Abs(p - pe), epsilon, dist.ToString()); } }
/// <summary> /// Vapnik Chervonenkis test. /// </summary> /// <param name="epsilon">The error we are willing to tolerate.</param> /// <param name="delta">The error probability we are willing to tolerate.</param> /// <param name="s">The samples to use for testing.</param> /// <param name="dist">The distribution we are testing.</param> public static void VapnikChervonenkisTest(double epsilon, double delta, IEnumerable <double> s, IDistribution dist) { // Using VC-dimension, we can bound the probability of making an error when estimating empirical probability // distributions. We are using Theorem 2.41 in "All Of Nonparametric Statistics". // http://books.google.com/books?id=MRFlzQfRg7UC&lpg=PP1&dq=all%20of%20nonparametric%20statistics&pg=PA22#v=onepage&q=%22shatter%20coe%EF%AC%83cients%20do%20not%22&f=false .</para> // For intervals on the real line the VC-dimension is 2. double n = s.Count(); Assert.Greater(n, Math.Ceiling(32.0 * Math.Log(16.0 / delta) / epsilon / epsilon)); var histogram = new Histogram(s, NumberOfBuckets); for (var i = 0; i < NumberOfBuckets; i++) { var p = dist.CumulativeDistribution(histogram[i].UpperBound) - dist.CumulativeDistribution(histogram[i].LowerBound); var pe = histogram[i].Count / n; Assert.Less(Math.Abs(p - pe), epsilon, dist.ToString()); } }
public static void TestIntegrateDistribution( double x, ISampler <double> sampler, IDistribution referenceDistribution, double error) { const int trials = 1000000; var actual = IntegrateCdf(sampler, x, trials); var expected = referenceDistribution.CumulativeDistribution(x); Assert.AreEqual(expected, actual, error); }
/// <summary> /// Vapnik Chervonenkis test. /// </summary> /// <param name="epsilon">The error we are willing to tolerate.</param> /// <param name="delta">The error probability we are willing to tolerate.</param> /// <param name="s">The samples to use for testing.</param> /// <param name="dist">The distribution we are testing.</param> public static void VapnikChervonenkisTest(double epsilon, double delta, IEnumerable<double> s, IDistribution dist) { // Using VC-dimension, we can bound the probability of making an error when estimating empirical probability // distributions. We are using Theorem 2.41 in "All Of Nonparametric Statistics". // http://books.google.com/books?id=MRFlzQfRg7UC&lpg=PP1&dq=all%20of%20nonparametric%20statistics&pg=PA22#v=onepage&q=%22shatter%20coe%EF%AC%83cients%20do%20not%22&f=false .</para> // For intervals on the real line the VC-dimension is 2. double n = s.Count(); Assert.Greater(n, Math.Ceiling(32.0 * Math.Log(16.0 / delta) / epsilon / epsilon)); var histogram = new Histogram(s, NumberOfBuckets); for (var i = 0; i < NumberOfBuckets; i++) { var p = dist.CumulativeDistribution(histogram[i].UpperBound) - dist.CumulativeDistribution(histogram[i].LowerBound); var pe = histogram[i].Count / n; Assert.Less(Math.Abs(p - pe), epsilon, dist.ToString()); } }
/// <summary> /// <para>Using VC-dimension, we can bound the probability of making an error when estimating empirical probability /// distributions. We are using Theorem 2.41 in "All Of Nonparametric Statistics". /// http://books.google.com/books?id=MRFlzQfRg7UC&lpg=PP1&dq=all%20of%20nonparametric%20statistics&pg=PA22#v=onepage&q=%22shatter%20coe%EF%AC%83cients%20do%20not%22&f=false .</para> /// <para>Note that for intervals on the real line the VC-dimension is 2.</para> /// </summary> /// <param name="epsilon">The error we are willing to tolerate.</param> /// <param name="delta">The error probability we are willing to tolerate.</param> /// <param name="s">The samples to use for testing.</param> /// <param name="dist">The distribution we are testing.</param> public static void VapnikChervonenkisTest(double epsilon, double delta, IEnumerable<double> s, IDistribution dist) { double N = (double) s.Count(); Assert.GreaterThan(N, Math.Ceiling(32.0 * Math.Log(16.0 / delta) / epsilon / epsilon)); var histogram = new Histogram(s, NumberOfBuckets); for (int i = 0; i < NumberOfBuckets; i++) { double p = dist.CumulativeDistribution(histogram[i].UpperBound) - dist.CumulativeDistribution(histogram[i].LowerBound); double pe = histogram[i].Count / N; Assert.LessThan(Math.Abs(p - pe), epsilon, dist.ToString()); } }