// see introselect http://en.wikipedia.org/wiki/Selection_algorithm#Introselect and impl of the sort in arraysorthelper // replacing given parts of a sample at the high and low end with the most extreme remaining values public OfflineUnivariateStat Winsorize(double percent = 0.25) { OfflineUnivariateStat newStat = new OfflineUnivariateStat(); int sizeToCut = (int)(data.Count * percent); for (int i = 0; i < sizeToCut; i++) { newStat.Add(data[sizeToCut]); } for (int i = sizeToCut; i < data.Count - sizeToCut; i++) { newStat.Add(data[i]); } for (int i = data.Count - sizeToCut; i < data.Count; i++) { newStat.Add(data[data.Count - sizeToCut - 1]); } return newStat; }
// quantize so that Q1 ~ Q3 is 10 buckets, and find the bucket with the most values in it // then recurse if there's still a lot of points in our largest bucket // if there are 2 possible answer, we return the lowest one public double GetMode() { double bucketSize = 0.1 * (GetQuantile(.75) - GetQuantile(.25)); if (bucketSize <= 0) return Median; double result = 0; int maxNbPoints = 0; int i = 0; int oldI = 0; for (double d = Minimum + bucketSize; i < data.Count; d += bucketSize) { while (i < data.Count && data[i] < d) { i++; } if (i - oldI > maxNbPoints) { maxNbPoints = i - oldI; result = d - bucketSize / 2; } oldI = i; } OfflineUnivariateStat newStat = new OfflineUnivariateStat(); for (int j = 0; j < data.Count; j++) { if (data[j] > result + 3 / 2 * bucketSize) break; if (data[j] >= result - 3 / 2 * bucketSize) newStat.Add(data[j]); } if (maxNbPoints > 1000) return newStat.GetMode(); else return newStat.Median; }
// remove parts of a sample at the high and low end public OfflineUnivariateStat Trim(double percent = 0.25) { OfflineUnivariateStat newStat = new OfflineUnivariateStat(); int sizeToCut = (int)(data.Count * percent); for (int i = sizeToCut; i < data.Count - sizeToCut; i++) { newStat.Add(data[i]); } return newStat; }
// breakdown point: 0.29 public double GetHodgesLehmann() { OfflineUnivariateStat newStat = new OfflineUnivariateStat(); for (int i = 0; i < data.Count; i++) { for (int j = i; j < data.Count; j++) { newStat.Add(data[i] + data[j]); } } return 0.5 * newStat.Median; }