예제 #1
0
        public static TukeyOutlierDetector Create([NotNull] Sample sample, double k = DefaultK,
                                                  [CanBeNull] IQuantileEstimator quantileEstimator = null)
        {
            Assertion.NotNull(nameof(sample), sample);

            quantileEstimator ??= HarrellDavisQuantileEstimator.Instance;
            return(new TukeyOutlierDetector(Quartiles.Create(sample, quantileEstimator), k));
        }
예제 #2
0
        public static TukeyOutlierDetector Create([NotNull] IReadOnlyList <double> values, double k = DefaultK,
                                                  [CanBeNull] IQuantileEstimator quantileEstimator  = null)
        {
            if (values == null)
            {
                throw new ArgumentNullException(nameof(values));
            }
            if (values.Count == 0)
            {
                return(EmptySampleDetector);
            }

            quantileEstimator ??= HarrellDavisQuantileEstimator.Instance;
            return(new TukeyOutlierDetector(Quartiles.Create(values, quantileEstimator), k));
        }
예제 #3
0
        public static double Calculate([NotNull] double[] values)
        {
            try
            {
                var clearedValues = TukeyOutlierDetector.Create(values).WithoutAllOutliers(values).ToList();
                int n             = clearedValues.Count;
                var quartiles     = Quartiles.Create(clearedValues);
                var moments       = Moments.Create(clearedValues);

                double mValue = 0;

                double binSize = AdaptiveHistogramBuilder.GetOptimalBinSize(n, moments.StandardDeviation);
                if (Abs(binSize) < 1e-9)
                {
                    binSize = 1;
                }
                while (true)
                {
                    var histogram = HistogramBuilder.Adaptive.Build(clearedValues, binSize);
                    var x         = new List <int> {
                        0
                    };
                    x.AddRange(histogram.Bins.Select(bin => bin.Count));
                    x.Add(0);

                    int sum = 0;
                    for (int i = 1; i < x.Count; i++)
                    {
                        sum += Abs(x[i] - x[i - 1]);
                    }
                    mValue = Max(mValue, sum * 1.0 / x.Max());

                    if (binSize > quartiles.Max - quartiles.Min)
                    {
                        break;
                    }
                    binSize *= 2.0;
                }

                return(mValue);
            }
            catch (Exception)
            {
                return(1); // In case of any bugs, we return 1 because it's an invalid value (mValue is always >= 2)
            }
        }
예제 #4
0
        private void Check(
            [JetBrains.Annotations.NotNull] IReadOnlyList <double> values,
            [JetBrains.Annotations.NotNull] IReadOnlyList <double> expectedQuartiles,
            [CanBeNull] IQuantileEstimator quantileEstimator)
        {
            var quartiles = Quartiles.Create(values, quantileEstimator);

            Assert.Equal(expectedQuartiles[0], quartiles.Q0);
            Assert.Equal(expectedQuartiles[1], quartiles.Q1);
            Assert.Equal(expectedQuartiles[2], quartiles.Q2);
            Assert.Equal(expectedQuartiles[3], quartiles.Q3);
            Assert.Equal(expectedQuartiles[4], quartiles.Q4);

            Assert.Equal(expectedQuartiles[0], quartiles.Min);
            Assert.Equal(expectedQuartiles[2], quartiles.Median);
            Assert.Equal(expectedQuartiles[4], quartiles.Max);
            Assert.Equal(expectedQuartiles[3] - expectedQuartiles[1], quartiles.InterquartileRange);
        }
예제 #5
0
        public Statistics(IEnumerable <double> values)
        {
            OriginalValues = values.Where(d => !double.IsNaN(d)).ToArray();
            SortedValues   = OriginalValues.OrderBy(value => value).ToArray();
            N = SortedValues.Count;
            if (N == 0)
            {
                throw new InvalidOperationException("Sequence of values contains no elements, Statistics can't be calculated");
            }

            var quartiles = Quartiles.FromSorted(SortedValues);

            Min                = quartiles.Min;
            Q1                 = quartiles.Q1;
            Median             = quartiles.Median;
            Q3                 = quartiles.Q3;
            Max                = quartiles.Max;
            InterquartileRange = quartiles.InterquartileRange;

            var moments = Moments.Create(SortedValues);

            Mean = moments.Mean;
            StandardDeviation = moments.StandardDeviation;
            Variance          = moments.Variance;
            Skewness          = moments.Skewness;
            Kurtosis          = moments.Kurtosis;

            var tukey = TukeyOutlierDetector.FromQuartiles(quartiles);

            LowerFence      = tukey.LowerFence;
            UpperFence      = tukey.UpperFence;
            AllOutliers     = SortedValues.Where(tukey.IsOutlier).ToArray();
            LowerOutliers   = SortedValues.Where(tukey.IsLowerOutlier).ToArray();
            UpperOutliers   = SortedValues.Where(tukey.IsUpperOutlier).ToArray();
            outlierDetector = tukey;

            StandardError      = StandardDeviation / Math.Sqrt(N);
            ConfidenceInterval = new ConfidenceInterval(Mean, StandardError, N);
            Percentiles        = new PercentileValues(SortedValues);
        }
        /// <summary>
        /// returns quartiles (0th, 1st, 2nd, 3rd, 4th, 5th.)
        ///
        /// see: https://en.wikipedia.org/wiki/Quantile#Examples
        ///
        /// This generic overload returns discrete values:  the items at those quartile indicies rounded down.  (no averaging of values)
        /// the float based overload will average results.
        /// </summary>
        static public Quartiles <T> ComputeQuartiles <T>(Span <T> afVal, int length)
        {
            int iSize = length;
            int iMid  = iSize / 2;            //this is the mid from a zero based index, eg mid of 7 = 3;

            var toReturn = new Quartiles <T>();

            toReturn.count = length;
            //q0 and q4
            toReturn.q0 = afVal[0];
            toReturn.q4 = afVal[length - 1];

            toReturn.q2 = afVal[iMid];

            int iMidMid = iMid / 2;

            toReturn.q1 = afVal[iMidMid];
            toReturn.q3 = afVal[iMid + iMidMid];



            return(toReturn);
        }
예제 #7
0
        public Statistics(IEnumerable <double> values)
        {
            var sortedValues = new SegmentedList <double>(values.Where(d => !double.IsNaN(d)));

            sortedValues.Sort();
            if (sortedValues.Count < 1)
            {
                return;
            }

            var quartiles = Quartiles.FromSorted(sortedValues);

            Min    = quartiles.Min;
            Q1     = quartiles.Q1;
            Median = quartiles.Median;
            Q3     = quartiles.Q3;
            Max    = quartiles.Max;

            Mean = sortedValues.Average();

            var tukey = TukeyOutlierDetector.FromQuartiles(quartiles);

            LowerFence = tukey.LowerFence;
            UpperFence = tukey.UpperFence;

            P0   = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 0.0);
            P25  = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 0.25);
            P50  = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 0.50);
            P67  = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 0.67);
            P80  = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 0.80);
            P85  = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 0.85);
            P90  = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 0.90);
            P95  = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 0.95);
            P99  = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 0.99);
            P100 = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 1.00);
        }
예제 #8
0
 public TukeyOutlierDetector(double[] values, double k = 1.5)
 {
     quartiles = Quartiles.FromUnsorted(values);
     this.k    = k;
 }
예제 #9
0
        // Use this for initialization
        public QLearning()
        {
            exp  = new Experience[experienceSize];
            expi = 0;
            expn = 0;
            t    = 0;
            r0   = -99f;

            // species a 2-layer neural network with one hidden layer of 20 neurons
            net = new Net();

            // input layer declares size of input. here: 2-D data
            // ConvNetSharp works on 3-Dimensional volumes (width, height, depth), but if you're not dealing with images
            // then the first two dimensions (width, height) will always be kept at size 1
            net.AddLayer(new InputLayer(1, 1, numStates));

            // declare 20 neurons, followed by ReLU (rectified linear unit non-linearity)
            net.AddLayer(new FullyConnLayer(hiddenNeurons - 10, Activation.Relu));

            //snet.AddLayer(new FullyConnLayer(hiddenNeurons/4, Activation.Relu));

            // declare the linear classifier on top of the previous hidden layer
            net.AddLayer(new RegressionLayer(numActions));

            Debug.Log("Network initialized");


            // species a 2-layer neural network with one hidden layer of 20 neurons
            netClassify = new Net();

            // input layer declares size of input. here: 2-D data
            // ConvNetSharp works on 3-Dimensional volumes (width, height, depth), but if you're not dealing with images
            // then the first two dimensions (width, height) will always be kept at size 1
            netClassify.AddLayer(new InputLayer(1, 1, 2));

            // declare 20 neurons, followed by ReLU (rectified linear unit non-linearity)
            netClassify.AddLayer(new FullyConnLayer(4, Activation.Relu));

            //snet.AddLayer(new FullyConnLayer(hiddenNeurons/4, Activation.Relu));

            // declare the linear classifier on top of the previous hidden layer
            netClassify.AddLayer(new SoftmaxLayer(2));

            Debug.Log("Network Classify initialized");

            /*
             * List<double> list = new List<double>();
             *
             * list = netToList(net);
             *
             * outputList(list, "agent1");
             *
             *
             * ListToNet(net, list);
             *
             * List<double> list2 = new List<double>();
             *
             * list2 = netToList(net);
             *
             * list2[1] = 0.5f;
             *
             * outputList(list2, "agent2");
             *
             */



            //double[] weights = { 0.3, -0.5, 0.1, 0.9, 0.6 };



            // forward a random data point through the network
            //var x = new Volume(weights);

            //var prob = net.Forward(x);

            // prob is a Volume. Volumes have a property Weights that stores the raw data, and WeightGradients that stores gradients
            //Debug.Log("probability that x is class 0: " + prob.Weights[0]); // prints e.g. 0.50101

            trainer = new SgdTrainer(net)
            {
                LearningRate = 0.01, L2Decay = 0.001, Momentum = 0.0, BatchSize = 5
            };

            //trainer.Train(x, 0); // train the network, specifying that x is class zero

            // Volume prob2 = net.Forward(x);

            //Debug.Log("probability that x is class 0: " + prob2.Weights[0]);
            // now prints 0.50374, slightly higher than previous 0.50101: the networks
            // weights have been adjusted by the Trainer to give a higher probability to
            // the class we trained the network with (zero)

            e = new Entropy();

            q = new Quartiles();

            double[] arr = new double[8] {
                5, 6, 7, 2, 1, 8, 4, 3
            };

            double[] ascOrderedArray = (from i in arr orderby i ascending select i).ToArray();

            Debug.Log(q.umidmean(ascOrderedArray));

            Debug.Log(q.lmidmean(ascOrderedArray));
        }
예제 #10
0
 public void intQuartilesTest() // array larger than size of 1 and evens + decimals
 {
     int[]     nums = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     decimal[] ans  = { 3, 6, 9 };
     CollectionAssert.AreEqual(ans, Quartiles.findQuartiles(nums));
 }
예제 #11
0
        //public ProcessedBuckets Process(string data)
        //{
        //    int[] checksum = null;
        //    int[] bucketArray = new int[ArrayBucketSize];

        //    SlideWindow slideWindow = new SlideWindow();
        //    int length = data.Length;

        //    for (int i = 0; i < length; i++)
        //    {
        //        int startWindow = slideWindow.GetPivot();
        //        slideWindow.Put(data[i]);

        //        checksum = slideWindow.GetChecksum(startWindow, checksum);

        //        foreach (var tripletHash in slideWindow.GetTripletHashes(startWindow))
        //        {
        //            bucketArray[tripletHash]++;
        //        }
        //    }

        //    return buildProcessedBuckets(length, bucketArray, checksum);
        //}

        private ProcessedBuckets buildProcessedBuckets(int dataLength, int[] bucketArray, int[] checksum)
        {
            Quartiles quartiles = new Quartiles(bucketArray);

            return(new ProcessedBuckets(checksum, bucketArray, dataLength, quartiles));
        }
예제 #12
0
 public static TukeyOutlierDetector Create(Quartiles quartiles, double k = DefaultK)
 {
     return(new TukeyOutlierDetector(quartiles, k));
 }
예제 #13
0
 private TukeyOutlierDetector(Quartiles quartiles, double k)
 {
     LowerFence = quartiles.Q1 - k * quartiles.InterquartileRange;
     UpperFence = quartiles.Q3 + k * quartiles.InterquartileRange;
 }
        /// <summary>
        /// Return the quartile values of an ordered set of doubles
        ///   assume the sorting has already been done.
        ///
        /// This actually turns out to be a bit of a PITA, because there is no universal agreement
        ///   on choosing the quartile values. In the case of odd values, some count the median value
        ///   in finding the 1st and 3rd quartile and some discard the median value.
        ///   the two different methods result in two different answers.
        ///   The below method produces the arithmatic mean of the two methods, and insures the median
        ///   is given it's correct weight so that the median changes as smoothly as possible as
        ///   more data ppints are added.
        ///
        /// This method uses the following logic:
        ///
        /// ===If there are an even number of data points:
        ///    Use the median to divide the ordered data set into two halves.
        ///    The lower quartile value is the median of the lower half of the data.
        ///    The upper quartile value is the median of the upper half of the data.
        ///
        /// ===If there are (4n+1) data points:
        ///    The lower quartile is 25% of the nth data value plus 75% of the (n+1)th data value.
        ///    The upper quartile is 75% of the (3n+1)th data point plus 25% of the (3n+2)th data point.
        ///
        ///===If there are (4n+3) data points:
        ///   The lower quartile is 75% of the (n+1)th data value plus 25% of the (n+2)th data value.
        ///   The upper quartile is 25% of the (3n+2)th data point plus 75% of the (3n+3)th data point.
        ///
        /// </summary>
        static public Quartiles <float> ComputeQuartiles(Span <float> afVal, int length)
        {
            int iSize = length;
            int iMid  = iSize / 2;            //this is the mid from a zero based index, eg mid of 7 = 3;

            var toReturn = new Quartiles <float>();

            toReturn.count = length;
            //q0 and q4
            toReturn.q0 = afVal[0];
            toReturn.q4 = afVal[length - 1];

            if (iSize % 2 == 0)
            {
                //================ EVEN NUMBER OF POINTS: =====================
                //even between low and high point
                toReturn.q2 = (afVal[iMid - 1] + afVal[iMid]) / 2;

                int iMidMid = iMid / 2;

                //easy split
                if (iMid % 2 == 0)
                {
                    toReturn.q1 = (afVal[iMidMid - 1] + afVal[iMidMid]) / 2;
                    toReturn.q3 = (afVal[iMid + iMidMid - 1] + afVal[iMid + iMidMid]) / 2;
                }
                else
                {
                    toReturn.q1 = afVal[iMidMid];
                    toReturn.q3 = afVal[iMidMid + iMid];
                }
            }
            else if (iSize == 1)
            {
                //================= special case, sorry ================
                toReturn.q1 = afVal[0];
                toReturn.q2 = afVal[0];
                toReturn.q3 = afVal[0];
            }
            else
            {
                //odd number so the median is just the midpoint in the array.
                toReturn.q2 = afVal[iMid];

                if ((iSize - 1) % 4 == 0)
                {
                    //======================(4n-1) POINTS =========================
                    int n = (iSize - 1) / 4;
                    toReturn.q1 = (afVal[n - 1] * .25f) + (afVal[n] * .75f);
                    toReturn.q3 = (afVal[3 * n] * .75f) + (afVal[3 * n + 1] * .25f);
                }
                else if ((iSize - 3) % 4 == 0)
                {
                    //======================(4n-3) POINTS =========================
                    int n = (iSize - 3) / 4;

                    toReturn.q1 = (afVal[n] * .75f) + (afVal[n + 1] * .25f);
                    toReturn.q3 = (afVal[3 * n + 1] * .25f) + (afVal[3 * n + 2] * .75f);
                }
            }

            return(toReturn);
        }
예제 #15
0
 public void QuartileNullTest()
 {
     Assert.Throws <ArgumentNullException>(() => Quartiles.Create((Sample)null));
 }
예제 #16
0
 public static TukeyOutlierDetector FromUnsorted(double[] values, double k = DefaultK)
 {
     return(new TukeyOutlierDetector(Quartiles.FromUnsorted(values), k));
 }