Example #1
0
        public WelchResult IsGreater(double[] x, double[] y, Threshold threshold = null)
        {
            int n1 = x.Length, n2 = y.Length;

            if (n1 < 2)
            {
                throw new ArgumentException("x should contains at least 2 elements", nameof(x));
            }
            if (n2 < 2)
            {
                throw new ArgumentException("y should contains at least 2 elements", nameof(y));
            }

            Moments xm = Moments.Create(x), ym = Moments.Create(y);
            double  v1 = xm.Variance, v2 = ym.Variance, m1 = xm.Mean, m2 = ym.Mean;

            threshold = threshold ?? RelativeThreshold.Default;
            double thresholdValue = threshold.GetValue(x);
            double se             = Math.Sqrt(v1 / n1 + v2 / n2);
            double t              = ((m1 - m2) - thresholdValue) / se;
            double df             = (v1 / n1 + v2 / n2).Sqr() /
                                    ((v1 / n1).Sqr() / (n1 - 1) + (v2 / n2).Sqr() / (n2 - 1));
            double pValue = 1 - new StudentDistribution(df).Cdf(t);

            return(new WelchResult(t, df, pValue, threshold));
        }
Example #2
0
        public Histogram Build(IReadOnlyList <double> values)
        {
            var    moments = Moments.Create(values);
            double binSize = GetOptimalBinSize(values.Count, moments.StandardDeviation);

            if (Math.Abs(binSize) < 1e-9)
            {
                binSize = 1;
            }
            return(Build(values, binSize));
        }
Example #3
0
        /// <summary>
        /// Determines whether the sample mean is different from a known mean
        /// </summary>
        /// <remarks>Should be consistent with t.test(x, mu = mu, alternative = "greater") from R </remarks>
        public OneSidedTestResult IsGreater(double[] sample, double value, Threshold threshold = null)
        {
            var    moments = Moments.Create(sample);
            double mean    = moments.Mean;
            double stdDev  = moments.StandardDeviation;
            double n       = sample.Length;
            double df      = n - 1;

            threshold = threshold ?? RelativeThreshold.Default;

            double t = (mean - value) /
                       (stdDev / Math.Sqrt(n));
            double pValue = 1 - new StudentDistribution(df).Cdf(t);

            return(new OneSidedTestResult(pValue, threshold));
        }
Example #4
0
        public static double Calculate([NotNull] double[] values)
        {
            try
            {
                var clearedValues = TukeyOutlierDetector.Create(values).WithoutAllOutliers(values).ToList();
                int n             = clearedValues.Count;
                var quartiles     = Quartiles.Create(clearedValues);
                var moments       = Moments.Create(clearedValues);

                double mValue = 0;

                double binSize = AdaptiveHistogramBuilder.GetOptimalBinSize(n, moments.StandardDeviation);
                if (Abs(binSize) < 1e-9)
                {
                    binSize = 1;
                }
                while (true)
                {
                    var histogram = HistogramBuilder.Adaptive.Build(clearedValues, binSize);
                    var x         = new List <int> {
                        0
                    };
                    x.AddRange(histogram.Bins.Select(bin => bin.Count));
                    x.Add(0);

                    int sum = 0;
                    for (int i = 1; i < x.Count; i++)
                    {
                        sum += Abs(x[i] - x[i - 1]);
                    }
                    mValue = Max(mValue, sum * 1.0 / x.Max());

                    if (binSize > quartiles.Max - quartiles.Min)
                    {
                        break;
                    }
                    binSize *= 2.0;
                }

                return(mValue);
            }
            catch (Exception)
            {
                return(1); // In case of any bugs, we return 1 because it's an invalid value (mValue is always >= 2)
            }
        }
Example #5
0
        public static double Calc([NotNull] Sample x, [NotNull] Sample y)
        {
            Assertion.NotNull(nameof(x), x);
            Assertion.NotNull(nameof(y), y);
            if (x.Count < 2)
            {
                throw new ArgumentOutOfRangeException(nameof(x), $"{nameof(x)} should contain at least 2 elements");
            }
            if (y.Count < 2)
            {
                throw new ArgumentOutOfRangeException(nameof(y), $"{nameof(y)} should contain at least 2 elements");
            }

            int    nx = x.Count;
            int    ny = y.Count;
            var    mx = Moments.Create(x);
            var    my = Moments.Create(y);
            double s  = Math.Sqrt(((nx - 1) * mx.Variance + (ny - 1) * my.Variance) / (nx + ny - 2));

            return((my.Mean - mx.Mean) / s);
        }
Example #6
0
        public Statistics(IEnumerable <double> values)
        {
            OriginalValues = values.Where(d => !double.IsNaN(d)).ToArray();
            SortedValues   = OriginalValues.OrderBy(value => value).ToArray();
            N = SortedValues.Count;
            if (N == 0)
            {
                throw new InvalidOperationException("Sequence of values contains no elements, Statistics can't be calculated");
            }

            var quartiles = Quartiles.FromSorted(SortedValues);

            Min                = quartiles.Min;
            Q1                 = quartiles.Q1;
            Median             = quartiles.Median;
            Q3                 = quartiles.Q3;
            Max                = quartiles.Max;
            InterquartileRange = quartiles.InterquartileRange;

            var moments = Moments.Create(SortedValues);

            Mean = moments.Mean;
            StandardDeviation = moments.StandardDeviation;
            Variance          = moments.Variance;
            Skewness          = moments.Skewness;
            Kurtosis          = moments.Kurtosis;

            var tukey = TukeyOutlierDetector.FromQuartiles(quartiles);

            LowerFence      = tukey.LowerFence;
            UpperFence      = tukey.UpperFence;
            AllOutliers     = SortedValues.Where(tukey.IsOutlier).ToArray();
            LowerOutliers   = SortedValues.Where(tukey.IsLowerOutlier).ToArray();
            UpperOutliers   = SortedValues.Where(tukey.IsUpperOutlier).ToArray();
            outlierDetector = tukey;

            StandardError      = StandardDeviation / Math.Sqrt(N);
            ConfidenceInterval = new ConfidenceInterval(Mean, StandardError, N);
            Percentiles        = new PercentileValues(SortedValues);
        }