public static TukeyOutlierDetector Create([NotNull] Sample sample, double k = DefaultK, [CanBeNull] IQuantileEstimator quantileEstimator = null) { Assertion.NotNull(nameof(sample), sample); quantileEstimator ??= HarrellDavisQuantileEstimator.Instance; return(new TukeyOutlierDetector(Quartiles.Create(sample, quantileEstimator), k)); }
public static TukeyOutlierDetector Create([NotNull] IReadOnlyList <double> values, double k = DefaultK, [CanBeNull] IQuantileEstimator quantileEstimator = null) { if (values == null) { throw new ArgumentNullException(nameof(values)); } if (values.Count == 0) { return(EmptySampleDetector); } quantileEstimator ??= HarrellDavisQuantileEstimator.Instance; return(new TukeyOutlierDetector(Quartiles.Create(values, quantileEstimator), k)); }
public static double Calculate([NotNull] double[] values) { try { var clearedValues = TukeyOutlierDetector.Create(values).WithoutAllOutliers(values).ToList(); int n = clearedValues.Count; var quartiles = Quartiles.Create(clearedValues); var moments = Moments.Create(clearedValues); double mValue = 0; double binSize = AdaptiveHistogramBuilder.GetOptimalBinSize(n, moments.StandardDeviation); if (Abs(binSize) < 1e-9) { binSize = 1; } while (true) { var histogram = HistogramBuilder.Adaptive.Build(clearedValues, binSize); var x = new List <int> { 0 }; x.AddRange(histogram.Bins.Select(bin => bin.Count)); x.Add(0); int sum = 0; for (int i = 1; i < x.Count; i++) { sum += Abs(x[i] - x[i - 1]); } mValue = Max(mValue, sum * 1.0 / x.Max()); if (binSize > quartiles.Max - quartiles.Min) { break; } binSize *= 2.0; } return(mValue); } catch (Exception) { return(1); // In case of any bugs, we return 1 because it's an invalid value (mValue is always >= 2) } }
private void Check( [JetBrains.Annotations.NotNull] IReadOnlyList <double> values, [JetBrains.Annotations.NotNull] IReadOnlyList <double> expectedQuartiles, [CanBeNull] IQuantileEstimator quantileEstimator) { var quartiles = Quartiles.Create(values, quantileEstimator); Assert.Equal(expectedQuartiles[0], quartiles.Q0); Assert.Equal(expectedQuartiles[1], quartiles.Q1); Assert.Equal(expectedQuartiles[2], quartiles.Q2); Assert.Equal(expectedQuartiles[3], quartiles.Q3); Assert.Equal(expectedQuartiles[4], quartiles.Q4); Assert.Equal(expectedQuartiles[0], quartiles.Min); Assert.Equal(expectedQuartiles[2], quartiles.Median); Assert.Equal(expectedQuartiles[4], quartiles.Max); Assert.Equal(expectedQuartiles[3] - expectedQuartiles[1], quartiles.InterquartileRange); }
public Statistics(IEnumerable <double> values) { OriginalValues = values.Where(d => !double.IsNaN(d)).ToArray(); SortedValues = OriginalValues.OrderBy(value => value).ToArray(); N = SortedValues.Count; if (N == 0) { throw new InvalidOperationException("Sequence of values contains no elements, Statistics can't be calculated"); } var quartiles = Quartiles.FromSorted(SortedValues); Min = quartiles.Min; Q1 = quartiles.Q1; Median = quartiles.Median; Q3 = quartiles.Q3; Max = quartiles.Max; InterquartileRange = quartiles.InterquartileRange; var moments = Moments.Create(SortedValues); Mean = moments.Mean; StandardDeviation = moments.StandardDeviation; Variance = moments.Variance; Skewness = moments.Skewness; Kurtosis = moments.Kurtosis; var tukey = TukeyOutlierDetector.FromQuartiles(quartiles); LowerFence = tukey.LowerFence; UpperFence = tukey.UpperFence; AllOutliers = SortedValues.Where(tukey.IsOutlier).ToArray(); LowerOutliers = SortedValues.Where(tukey.IsLowerOutlier).ToArray(); UpperOutliers = SortedValues.Where(tukey.IsUpperOutlier).ToArray(); outlierDetector = tukey; StandardError = StandardDeviation / Math.Sqrt(N); ConfidenceInterval = new ConfidenceInterval(Mean, StandardError, N); Percentiles = new PercentileValues(SortedValues); }
/// <summary> /// returns quartiles (0th, 1st, 2nd, 3rd, 4th, 5th.) /// /// see: https://en.wikipedia.org/wiki/Quantile#Examples /// /// This generic overload returns discrete values: the items at those quartile indicies rounded down. (no averaging of values) /// the float based overload will average results. /// </summary> static public Quartiles <T> ComputeQuartiles <T>(Span <T> afVal, int length) { int iSize = length; int iMid = iSize / 2; //this is the mid from a zero based index, eg mid of 7 = 3; var toReturn = new Quartiles <T>(); toReturn.count = length; //q0 and q4 toReturn.q0 = afVal[0]; toReturn.q4 = afVal[length - 1]; toReturn.q2 = afVal[iMid]; int iMidMid = iMid / 2; toReturn.q1 = afVal[iMidMid]; toReturn.q3 = afVal[iMid + iMidMid]; return(toReturn); }
public Statistics(IEnumerable <double> values) { var sortedValues = new SegmentedList <double>(values.Where(d => !double.IsNaN(d))); sortedValues.Sort(); if (sortedValues.Count < 1) { return; } var quartiles = Quartiles.FromSorted(sortedValues); Min = quartiles.Min; Q1 = quartiles.Q1; Median = quartiles.Median; Q3 = quartiles.Q3; Max = quartiles.Max; Mean = sortedValues.Average(); var tukey = TukeyOutlierDetector.FromQuartiles(quartiles); LowerFence = tukey.LowerFence; UpperFence = tukey.UpperFence; P0 = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 0.0); P25 = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 0.25); P50 = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 0.50); P67 = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 0.67); P80 = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 0.80); P85 = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 0.85); P90 = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 0.90); P95 = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 0.95); P99 = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 0.99); P100 = SimpleQuantileEstimator.Instance.GetQuantileFromSorted(sortedValues, 1.00); }
public TukeyOutlierDetector(double[] values, double k = 1.5) { quartiles = Quartiles.FromUnsorted(values); this.k = k; }
// Use this for initialization public QLearning() { exp = new Experience[experienceSize]; expi = 0; expn = 0; t = 0; r0 = -99f; // species a 2-layer neural network with one hidden layer of 20 neurons net = new Net(); // input layer declares size of input. here: 2-D data // ConvNetSharp works on 3-Dimensional volumes (width, height, depth), but if you're not dealing with images // then the first two dimensions (width, height) will always be kept at size 1 net.AddLayer(new InputLayer(1, 1, numStates)); // declare 20 neurons, followed by ReLU (rectified linear unit non-linearity) net.AddLayer(new FullyConnLayer(hiddenNeurons - 10, Activation.Relu)); //snet.AddLayer(new FullyConnLayer(hiddenNeurons/4, Activation.Relu)); // declare the linear classifier on top of the previous hidden layer net.AddLayer(new RegressionLayer(numActions)); Debug.Log("Network initialized"); // species a 2-layer neural network with one hidden layer of 20 neurons netClassify = new Net(); // input layer declares size of input. here: 2-D data // ConvNetSharp works on 3-Dimensional volumes (width, height, depth), but if you're not dealing with images // then the first two dimensions (width, height) will always be kept at size 1 netClassify.AddLayer(new InputLayer(1, 1, 2)); // declare 20 neurons, followed by ReLU (rectified linear unit non-linearity) netClassify.AddLayer(new FullyConnLayer(4, Activation.Relu)); //snet.AddLayer(new FullyConnLayer(hiddenNeurons/4, Activation.Relu)); // declare the linear classifier on top of the previous hidden layer netClassify.AddLayer(new SoftmaxLayer(2)); Debug.Log("Network Classify initialized"); /* * List<double> list = new List<double>(); * * list = netToList(net); * * outputList(list, "agent1"); * * * ListToNet(net, list); * * List<double> list2 = new List<double>(); * * list2 = netToList(net); * * list2[1] = 0.5f; * * outputList(list2, "agent2"); * */ //double[] weights = { 0.3, -0.5, 0.1, 0.9, 0.6 }; // forward a random data point through the network //var x = new Volume(weights); //var prob = net.Forward(x); // prob is a Volume. Volumes have a property Weights that stores the raw data, and WeightGradients that stores gradients //Debug.Log("probability that x is class 0: " + prob.Weights[0]); // prints e.g. 0.50101 trainer = new SgdTrainer(net) { LearningRate = 0.01, L2Decay = 0.001, Momentum = 0.0, BatchSize = 5 }; //trainer.Train(x, 0); // train the network, specifying that x is class zero // Volume prob2 = net.Forward(x); //Debug.Log("probability that x is class 0: " + prob2.Weights[0]); // now prints 0.50374, slightly higher than previous 0.50101: the networks // weights have been adjusted by the Trainer to give a higher probability to // the class we trained the network with (zero) e = new Entropy(); q = new Quartiles(); double[] arr = new double[8] { 5, 6, 7, 2, 1, 8, 4, 3 }; double[] ascOrderedArray = (from i in arr orderby i ascending select i).ToArray(); Debug.Log(q.umidmean(ascOrderedArray)); Debug.Log(q.lmidmean(ascOrderedArray)); }
public void intQuartilesTest() // array larger than size of 1 and evens + decimals { int[] nums = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; decimal[] ans = { 3, 6, 9 }; CollectionAssert.AreEqual(ans, Quartiles.findQuartiles(nums)); }
//public ProcessedBuckets Process(string data) //{ // int[] checksum = null; // int[] bucketArray = new int[ArrayBucketSize]; // SlideWindow slideWindow = new SlideWindow(); // int length = data.Length; // for (int i = 0; i < length; i++) // { // int startWindow = slideWindow.GetPivot(); // slideWindow.Put(data[i]); // checksum = slideWindow.GetChecksum(startWindow, checksum); // foreach (var tripletHash in slideWindow.GetTripletHashes(startWindow)) // { // bucketArray[tripletHash]++; // } // } // return buildProcessedBuckets(length, bucketArray, checksum); //} private ProcessedBuckets buildProcessedBuckets(int dataLength, int[] bucketArray, int[] checksum) { Quartiles quartiles = new Quartiles(bucketArray); return(new ProcessedBuckets(checksum, bucketArray, dataLength, quartiles)); }
public static TukeyOutlierDetector Create(Quartiles quartiles, double k = DefaultK) { return(new TukeyOutlierDetector(quartiles, k)); }
private TukeyOutlierDetector(Quartiles quartiles, double k) { LowerFence = quartiles.Q1 - k * quartiles.InterquartileRange; UpperFence = quartiles.Q3 + k * quartiles.InterquartileRange; }
/// <summary> /// Return the quartile values of an ordered set of doubles /// assume the sorting has already been done. /// /// This actually turns out to be a bit of a PITA, because there is no universal agreement /// on choosing the quartile values. In the case of odd values, some count the median value /// in finding the 1st and 3rd quartile and some discard the median value. /// the two different methods result in two different answers. /// The below method produces the arithmatic mean of the two methods, and insures the median /// is given it's correct weight so that the median changes as smoothly as possible as /// more data ppints are added. /// /// This method uses the following logic: /// /// ===If there are an even number of data points: /// Use the median to divide the ordered data set into two halves. /// The lower quartile value is the median of the lower half of the data. /// The upper quartile value is the median of the upper half of the data. /// /// ===If there are (4n+1) data points: /// The lower quartile is 25% of the nth data value plus 75% of the (n+1)th data value. /// The upper quartile is 75% of the (3n+1)th data point plus 25% of the (3n+2)th data point. /// ///===If there are (4n+3) data points: /// The lower quartile is 75% of the (n+1)th data value plus 25% of the (n+2)th data value. /// The upper quartile is 25% of the (3n+2)th data point plus 75% of the (3n+3)th data point. /// /// </summary> static public Quartiles <float> ComputeQuartiles(Span <float> afVal, int length) { int iSize = length; int iMid = iSize / 2; //this is the mid from a zero based index, eg mid of 7 = 3; var toReturn = new Quartiles <float>(); toReturn.count = length; //q0 and q4 toReturn.q0 = afVal[0]; toReturn.q4 = afVal[length - 1]; if (iSize % 2 == 0) { //================ EVEN NUMBER OF POINTS: ===================== //even between low and high point toReturn.q2 = (afVal[iMid - 1] + afVal[iMid]) / 2; int iMidMid = iMid / 2; //easy split if (iMid % 2 == 0) { toReturn.q1 = (afVal[iMidMid - 1] + afVal[iMidMid]) / 2; toReturn.q3 = (afVal[iMid + iMidMid - 1] + afVal[iMid + iMidMid]) / 2; } else { toReturn.q1 = afVal[iMidMid]; toReturn.q3 = afVal[iMidMid + iMid]; } } else if (iSize == 1) { //================= special case, sorry ================ toReturn.q1 = afVal[0]; toReturn.q2 = afVal[0]; toReturn.q3 = afVal[0]; } else { //odd number so the median is just the midpoint in the array. toReturn.q2 = afVal[iMid]; if ((iSize - 1) % 4 == 0) { //======================(4n-1) POINTS ========================= int n = (iSize - 1) / 4; toReturn.q1 = (afVal[n - 1] * .25f) + (afVal[n] * .75f); toReturn.q3 = (afVal[3 * n] * .75f) + (afVal[3 * n + 1] * .25f); } else if ((iSize - 3) % 4 == 0) { //======================(4n-3) POINTS ========================= int n = (iSize - 3) / 4; toReturn.q1 = (afVal[n] * .75f) + (afVal[n + 1] * .25f); toReturn.q3 = (afVal[3 * n + 1] * .25f) + (afVal[3 * n + 2] * .75f); } } return(toReturn); }
public void QuartileNullTest() { Assert.Throws <ArgumentNullException>(() => Quartiles.Create((Sample)null)); }
public static TukeyOutlierDetector FromUnsorted(double[] values, double k = DefaultK) { return(new TukeyOutlierDetector(Quartiles.FromUnsorted(values), k)); }