public static Histogram OptimalSquaredFreedom(int histSize, ICollection distribution) { if (distribution.Count < Math.Max(histSize, 2)) { throw new ArgumentException("Not enough points in the distribution."); } // "values" contains the sorted distribution. double[] values = new double[distribution.Count]; distribution.CopyTo(values, 0); Array.Sort(values); // 'optimalCost[i,k]' contains the optimal costs for an // histogram with the 'i+1' first values and 'k' buckets. double[,] optimalCost = new double[values.Length, histSize]; // 'lastBucketIndex[i,k]' contains the index of the first // value of the last bucket for optimal histogram comprising // the 'i+1' first values and 'k' buckets. int[,] lastBucketIndex = new int[values.Length, histSize]; // "One bucket" histograms initialization for (int i = 0; i < values.Length; i++) { optimalCost[i, 0] = (values[i] - values[0]) * (values[i] - values[0]) * (i + 1); } // "One value per bucket" histograms initialization for (int k = 0; k < histSize; k++) { // optimalCost[k, k] = 0; lastBucketIndex[k, k] = k; } // ----- Dynamic programming part ----- // Loop on the number of buckets // (note that there are 'k+1' buckets) for (int k = 1; k < histSize; k++) { // Loop on the number of considered values // (note that there are 'i+1' considered values) for (int i = k; i < values.Length; i++) { optimalCost[i, k] = double.PositiveInfinity; // Loop for finding the optimal boundary of the last bucket // ('j+1' is the index of the first value in the last bucket) for (int j = (k - 1), avg = (k - 1); j < i; j++) { double currentCost = optimalCost[j, k - 1] + (values[i] - values[j + 1]) * (values[i] - values[j + 1]) * (i - j); if (currentCost < optimalCost[i, k]) { optimalCost[i, k] = currentCost; lastBucketIndex[i, k] = j + 1; } } } } // ----- Reconstitution of the histogram ----- Histogram histogram = new Histogram(); int index = values.Length - 1; for (int k = (histSize - 1); k >= 0; k--) { histogram.Add(new Bucket(values[lastBucketIndex[index, k]], values[index], index - lastBucketIndex[index, k] + 1)); index = lastBucketIndex[index, k] - 1; } //histogram.JoinBuckets(); return(histogram); }
/// <summary>Returns the optimal variance histogram.</summary> /// <param name="bucketCount">The number of buckets in the histogram.</param> /// <param name="distribution"><c>double</c> elements expected.</param> /// <remarks>Requires a computations time quadratic to /// <c>distribution.Length</c>.</remarks> public static Histogram OptimalVariance(int bucketCount, ICollection distribution) { if (distribution.Count < bucketCount) { throw new ArgumentException("Not enough points in the distribution."); } // "values" contains the sorted distribution. double[] values = new double[distribution.Count]; distribution.CopyTo(values, 0); Array.Sort(values); // 'optimalCost[i,k]' contains the optimal costs for an // histogram with the 'i+1' first values and 'k' buckets. double[,] optimalCost = new double[values.Length, bucketCount]; // 'lastBucketIndex[i,k]' contains the index of the first // value of the last bucket for optimal histogram comprising // the 'i+1' first values and 'k' buckets. int[,] lastBucketIndex = new int[values.Length, bucketCount]; // 'prefixSum[i]' contains the sum of the 'i-1' first values. double[] prefixSum = new double[values.Length + 1], // 'sqPrefixSum' contains the sum of the 'i-1' first squared values. sqPrefixSum = new double[values.Length + 1]; // Initialization of the prefix sums for (int i = 0; i < values.Length; i++) { prefixSum[i + 1] = prefixSum[i] + values[i]; sqPrefixSum[i + 1] = sqPrefixSum[i] + values[i] * values[i]; } // "One bucket" histograms initialization for (int i = 0; i < values.Length; i++) { optimalCost[i, 0] = sqPrefixSum[i + 1] - prefixSum[i + 1] * prefixSum[i + 1] / (i + 1); } // "One value per bucket" histograms initialization for (int k = 0; k < bucketCount; k++) { // optimalCost[k, k] = 0; lastBucketIndex[k, k] = k; } // ----- Dynamic programming part ----- // Loop on the number of buckets // (note that there are 'k+1' buckets) for (int k = 1; k < bucketCount; k++) { // Loop on the number of considered values // (note that there are 'i+1' considered values) for (int i = k; i < values.Length; i++) { optimalCost[i, k] = double.PositiveInfinity; // Loop for finding the optimal boundary of the last bucket // ('j+1' is the index of the first value in the last bucket) for (int j = (k - 1); j < i; j++) { double currentCost = optimalCost[j, k - 1] + sqPrefixSum[i + 1] - sqPrefixSum[j + 1] - (prefixSum[i + 1] - prefixSum[j + 1]) * (prefixSum[i + 1] - prefixSum[j + 1]) / (i - j); if (currentCost < optimalCost[i, k]) { optimalCost[i, k] = currentCost; lastBucketIndex[i, k] = j + 1; } } } } // ----- Reconstitution of the histogram ----- Histogram histogram = new Histogram(); int index = values.Length - 1; for (int k = (bucketCount - 1); k >= 0; k--) { histogram.Add(new Bucket(values[lastBucketIndex[index, k]], values[index], index - lastBucketIndex[index, k] + 1)); index = lastBucketIndex[index, k] - 1; } //histogram.JoinBuckets(); return(histogram); }