/// <summary>
        /// <para>Using VC-dimension, we can bound the probability of making an error when estimating empirical probability
        /// distributions. We are using Theorem 2.41 in "All Of Nonparametric Statistics".
        /// http://books.google.com/books?id=MRFlzQfRg7UC&lpg=PP1&dq=all%20of%20nonparametric%20statistics&pg=PA22#v=onepage&q=%22shatter%20coe%EF%AC%83cients%20do%20not%22&f=false .</para>
        /// <para>Note that for intervals on the real line the VC-dimension is 2.</para>
        /// </summary>
        /// <param name="epsilon">The error we are willing to tolerate.</param>
        /// <param name="delta">The error probability we are willing to tolerate.</param>
        /// <param name="s">The samples to use for testing.</param>
        /// <param name="dist">The distribution we are testing.</param>
        public static void VapnikChervonenkisTest(double epsilon, double delta, IEnumerable <double> s, IDistribution dist)
        {
            double N = (double)s.Count();

            Assert.GreaterThan(N, Math.Ceiling(32.0 * Math.Log(16.0 / delta) / epsilon / epsilon));

            var histogram = new Histogram(s, NumberOfBuckets);

            for (int i = 0; i < NumberOfBuckets; i++)
            {
                double p  = dist.CumulativeDistribution(histogram[i].UpperBound) - dist.CumulativeDistribution(histogram[i].LowerBound);
                double pe = histogram[i].Count / N;
                Assert.LessThan(Math.Abs(p - pe), epsilon, dist.ToString());
            }
        }
コード例 #2
0
        /// <summary>
        /// Vapnik Chervonenkis test.
        /// </summary>
        /// <param name="epsilon">The error we are willing to tolerate.</param>
        /// <param name="delta">The error probability we are willing to tolerate.</param>
        /// <param name="s">The samples to use for testing.</param>
        /// <param name="dist">The distribution we are testing.</param>
        public static void VapnikChervonenkisTest(double epsilon, double delta, IEnumerable <double> s, IDistribution dist)
        {
            // Using VC-dimension, we can bound the probability of making an error when estimating empirical probability
            // distributions. We are using Theorem 2.41 in "All Of Nonparametric Statistics".
            // http://books.google.com/books?id=MRFlzQfRg7UC&lpg=PP1&dq=all%20of%20nonparametric%20statistics&pg=PA22#v=onepage&q=%22shatter%20coe%EF%AC%83cients%20do%20not%22&f=false .</para>
            // For intervals on the real line the VC-dimension is 2.
            double n = s.Count();

            Assert.Greater(n, Math.Ceiling(32.0 * Math.Log(16.0 / delta) / epsilon / epsilon));

            var histogram = new Histogram(s, NumberOfBuckets);

            for (var i = 0; i < NumberOfBuckets; i++)
            {
                var p  = dist.CumulativeDistribution(histogram[i].UpperBound) - dist.CumulativeDistribution(histogram[i].LowerBound);
                var pe = histogram[i].Count / n;
                Assert.Less(Math.Abs(p - pe), epsilon, dist.ToString());
            }
        }
コード例 #3
0
        public static void TestIntegrateDistribution(
            double x,
            ISampler <double> sampler,
            IDistribution referenceDistribution,
            double error)
        {
            const int trials   = 1000000;
            var       actual   = IntegrateCdf(sampler, x, trials);
            var       expected = referenceDistribution.CumulativeDistribution(x);

            Assert.AreEqual(expected, actual, error);
        }
コード例 #4
0
        /// <summary>
        /// Vapnik Chervonenkis test.
        /// </summary>
        /// <param name="epsilon">The error we are willing to tolerate.</param>
        /// <param name="delta">The error probability we are willing to tolerate.</param>
        /// <param name="s">The samples to use for testing.</param>
        /// <param name="dist">The distribution we are testing.</param>
        public static void VapnikChervonenkisTest(double epsilon, double delta, IEnumerable<double> s, IDistribution dist)
        {
            // Using VC-dimension, we can bound the probability of making an error when estimating empirical probability
            // distributions. We are using Theorem 2.41 in "All Of Nonparametric Statistics". 
            // http://books.google.com/books?id=MRFlzQfRg7UC&lpg=PP1&dq=all%20of%20nonparametric%20statistics&pg=PA22#v=onepage&q=%22shatter%20coe%EF%AC%83cients%20do%20not%22&f=false .</para>
            // For intervals on the real line the VC-dimension is 2.
            double n = s.Count();
            Assert.Greater(n, Math.Ceiling(32.0 * Math.Log(16.0 / delta) / epsilon / epsilon));

            var histogram = new Histogram(s, NumberOfBuckets);
            for (var i = 0; i < NumberOfBuckets; i++)
            {
                var p = dist.CumulativeDistribution(histogram[i].UpperBound) - dist.CumulativeDistribution(histogram[i].LowerBound);
                var pe = histogram[i].Count / n;
                Assert.Less(Math.Abs(p - pe), epsilon, dist.ToString());
            }
        }
コード例 #5
0
        /// <summary>
        /// <para>Using VC-dimension, we can bound the probability of making an error when estimating empirical probability
        /// distributions. We are using Theorem 2.41 in "All Of Nonparametric Statistics".
        /// http://books.google.com/books?id=MRFlzQfRg7UC&lpg=PP1&dq=all%20of%20nonparametric%20statistics&pg=PA22#v=onepage&q=%22shatter%20coe%EF%AC%83cients%20do%20not%22&f=false .</para>
        /// <para>Note that for intervals on the real line the VC-dimension is 2.</para>
        /// </summary>
        /// <param name="epsilon">The error we are willing to tolerate.</param>
        /// <param name="delta">The error probability we are willing to tolerate.</param>
        /// <param name="s">The samples to use for testing.</param>
        /// <param name="dist">The distribution we are testing.</param>
        public static void VapnikChervonenkisTest(double epsilon, double delta, IEnumerable<double> s, IDistribution dist)
        {
            double N = (double) s.Count();
            Assert.GreaterThan(N, Math.Ceiling(32.0 * Math.Log(16.0 / delta) / epsilon / epsilon));

            var histogram = new Histogram(s, NumberOfBuckets);

            for (int i = 0; i < NumberOfBuckets; i++)
            {
                double p = dist.CumulativeDistribution(histogram[i].UpperBound) - dist.CumulativeDistribution(histogram[i].LowerBound);
                double pe = histogram[i].Count / N;
                Assert.LessThan(Math.Abs(p - pe), epsilon, dist.ToString());
            }
        }