public Histogram Build(IReadOnlyList <double> values, double binSize) { if (binSize < 1e-9) { throw new ArgumentException($"binSize ({binSize.ToString("0.##", DefaultCultureInfo.Instance)}) should be a positive number", nameof(binSize)); } var list = values.CopyToArray(); if (list.Length == 0) { throw new ArgumentException("Values should be non-empty", nameof(values)); } Array.Sort(list); int firstBin = GetBinIndex(list.First(), binSize); int lastBin = GetBinIndex(list.Last(), binSize); int binCount = lastBin - firstBin + 1; var bins = new HistogramBin[binCount]; int counter = 0; for (int i = 0; i < bins.Length; i++) { var bin = new List <double>(); double lower = (firstBin + i) * binSize; double upper = (firstBin + i + 1) * binSize; while (counter < list.Length && (list[counter] < upper || i == bins.Length - 1)) { bin.Add(list[counter++]); } bins[i] = new HistogramBin(lower, upper, bin.ToArray()); } return(new Histogram(binSize, bins)); }
public static HistogramBin Union(HistogramBin bin1, HistogramBin bin2) => new HistogramBin( Math.Min(bin1.Lower, bin2.Lower), Math.Max(bin1.Upper, bin2.Upper), bin1.Values.Union(bin2.Values).OrderBy(value => value).ToArray());
// TODO: Optimize public Histogram Build(IReadOnlyList <double> values, double binSize) { const double eps = 1e-9; const double margin = 0.1; const double adaptiveFactor = 0.02; if (binSize < eps) { throw new ArgumentException( $"binSize ({binSize.ToString("0.##", DefaultCultureInfo.Instance)}) should be a positive number", nameof(binSize)); } if (binSize < Resolution) { binSize = Resolution; } binSize = NiceCeiling(binSize); var list = values.ToList(); if (list.Count == 0) { throw new ArgumentException("Values should be non-empty", nameof(values)); } list.Sort(); if (list.Last() - list.First() < binSize) { double center = (list.First() + list.Last()) / 2; double lower = center - binSize / 2; double upper = center + binSize / 2; return(new Histogram(binSize, new[] { new HistogramBin(lower, upper, list.ToArray()) })); } var points = new List <double> { NiceFloor(list.Min() - binSize / 2), NiceCeiling(list.Max() + binSize / 2) }; int processedPointCount = 0; while (true) { if (points.Count > 10 * list.Count) { var errorMessage = new StringBuilder(); errorMessage.AppendLine("Failed to run AdaptiveHistogramBuilder.BuildWithFixedBinSize"); errorMessage.AppendLine("BinSize: " + binSize.ToString("N12", DefaultCultureInfo.Instance)); errorMessage.AppendLine("Values: "); foreach (double value in list) { errorMessage.AppendLine(" " + value.ToString("N12", DefaultCultureInfo.Instance)); } throw new InvalidOperationException(errorMessage.ToString()); } int pointIndex = -1; for (int i = processedPointCount; i < points.Count - 1; i++) { double adaptiveBinSize = (points[i] + points[i + 1]) / 2.0 * adaptiveFactor; double maxSize = Math.Max(binSize * (1.0 + 2 * margin), adaptiveBinSize); if (points[i + 1] - points[i] > maxSize) { pointIndex = i; break; } } if (pointIndex == -1) { break; } double lower = points[pointIndex]; double upper = points[pointIndex + 1]; int bestIndex1 = -1; int bestIndex2 = -1; int bestCount = -1; double bestDist = double.MaxValue; bool Inside(double x) => x > lower - eps && x < upper - eps; for (int i = 0; i < list.Count; i++) { if (Inside(list[i])) { int j = i; while (j < list.Count && Inside(list[j]) && list[j] - list[i] < binSize) { j++; } int count = j - i; double dist = list[j - 1] - list[i]; if (count > bestCount || count == bestCount && dist < bestDist) { bestCount = count; bestIndex1 = i; bestIndex2 = j - 1; bestDist = dist; } } } if (bestIndex1 != -1) { double center = (list[bestIndex1] + list[bestIndex2]) / 2.0; double adaptiveBinSize = Math.Max(binSize, center * adaptiveFactor); double left = NiceFloor(center - adaptiveBinSize / 2); double right = NiceFloor(Math.Min(center + adaptiveBinSize / 2, upper)); if (left > lower + binSize * margin) { points.Insert(pointIndex + 1, left); } else if (right < upper - binSize * margin && right > lower + binSize * margin) { points.Insert(pointIndex + 1, right); processedPointCount++; } else { processedPointCount++; } } else { points.Insert(pointIndex + 1, NiceFloor(lower + binSize)); processedPointCount++; } } var bins = new List <HistogramBin>(points.Count - 1); int counter = 0; for (int i = 0; i < points.Count - 1; i++) { var bin = new List <double>(); double lower = points[i]; double upper = points[i + 1]; while (counter < list.Count && (list[counter] < upper || i == points.Count - 1)) { bin.Add(list[counter++]); } bins.Add(new HistogramBin(lower, upper, bin.ToArray())); } // Trim while (bins.Any() && bins.First().IsEmpty) { bins.RemoveAt(0); } while (bins.Any() && bins.Last().IsEmpty) { bins.RemoveAt(bins.Count - 1); } // Join small bins to neighbors counter = 0; double lastValue = 0; while (counter < bins.Count) { if (bins[counter].HasAny) { lastValue = Math.Max(lastValue, bins[counter].Values.Last()); } double adaptiveThreshold = Math.Max(binSize / 2, lastValue * adaptiveFactor); if (bins[counter].Gap < adaptiveThreshold) { double leftGap = counter > 0 ? bins[counter - 1].Gap : double.MaxValue; double rightGap = counter < bins.Count - 1 ? bins[counter + 1].Gap : double.MaxValue; if (leftGap < rightGap && counter > 0) { bins[counter - 1] = HistogramBin.Union(bins[counter - 1], bins[counter]); bins.RemoveAt(counter); } else if (counter < bins.Count - 1) { bins[counter] = HistogramBin.Union(bins[counter], bins[counter + 1]); bins.RemoveAt(counter + 1); } else { counter++; } } else { counter++; } } return(new Histogram(binSize, bins.ToArray())); }