/// <summary> /// Returns the optimal squared freedom histogram. /// </summary> public static Histogram OptimalSquaredFreedom(int histSize, ICollection distribution) { if(distribution.Count < Math.Max(histSize, 2)) { throw new ArgumentException(Resources.InvalidOperationHistogramNotEnoughPoints); } // "values" contains the sorted distribution. double[] values = new double[distribution.Count]; distribution.CopyTo(values, 0); Array.Sort(values); // 'optimalCost[i,k]' contains the optimal costs for an // histogram with the 'i+1' first values and 'k' buckets. double[,] optimalCost = new double[values.Length, histSize]; // 'lastBucketIndex[i,k]' contains the index of the first // value of the last bucket for optimal histogram comprising // the 'i+1' first values and 'k' buckets. int[,] lastBucketIndex = new int[values.Length, histSize]; // "One bucket" histograms initialization for(int i = 0; i < values.Length; i++) { optimalCost[i, 0] = (values[i] - values[0]) * (values[i] - values[0]) * (i + 1); } // "One value per bucket" histograms initialization for(int k = 0; k < histSize; k++) { // optimalCost[k, k] = 0; lastBucketIndex[k, k] = k; } // ----- Dynamic programming part ----- // Loop on the number of buckets // (note that there are 'k+1' buckets) for(int k = 1; k < histSize; k++) { // Loop on the number of considered values // (note that there are 'i+1' considered values) for(int i = k; i < values.Length; i++) { optimalCost[i, k] = double.PositiveInfinity; // Loop for finding the optimal boundary of the last bucket // ('j+1' is the index of the first value in the last bucket) for(int j = (k - 1), avg = (k - 1); j < i; j++) { double currentCost = optimalCost[j, k - 1] + (values[i] - values[j + 1]) * (values[i] - values[j + 1]) * (i - j); if(currentCost < optimalCost[i, k]) { optimalCost[i, k] = currentCost; lastBucketIndex[i, k] = j + 1; } } } } // ----- Reconstitution of the histogram ----- Histogram histogram = new Histogram(); int index = values.Length - 1; for(int k = (histSize - 1); k >= 0; k--) { histogram.Add(new Bucket( values[lastBucketIndex[index, k]], values[index], index - lastBucketIndex[index, k] + 1 )); index = lastBucketIndex[index, k] - 1; } return histogram; }
/// <summary> /// Returns the optimal dispersion histogram. /// </summary> public static Histogram OptimalDispersion(int bucketCount, ICollection distribution) { if(distribution.Count < Math.Max(bucketCount, 2)) { throw new ArgumentException(Resources.InvalidOperationHistogramNotEnoughPoints); } // "values" contains the sorted distribution. double[] values = new double[distribution.Count]; distribution.CopyTo(values, 0); Array.Sort(values); // 'optimalCost[i,k]' contains the optimal costs for an // histogram with the 'i+1' first values and 'k' buckets. double[,] optimalCost = new double[values.Length, bucketCount]; // 'lastBucketIndex[i,k]' contains the index of the first // value of the last bucket for optimal histogram comprising // the 'i+1' first values and 'k' buckets. int[,] lastBucketIndex = new int[values.Length, bucketCount]; // 'prefixSum[i]' contains the sum of the 'i-1' first values. double[] prefixSum = new double[values.Length + 1]; // Initialization of the prefix sums for(int i = 0; i < values.Length; i++) { prefixSum[i + 1] = prefixSum[i] + values[i]; } // "One bucket" histograms initialization for(int i = 0, avg = 0; i < values.Length; i++) { while((avg + 1) < values.Length && values[avg + 1] < prefixSum[i + 1] / (i + 1)) { avg++; } optimalCost[i, 0] = prefixSum[i + 1] - 2 * prefixSum[avg + 1] + (2 * avg - i + 1) * (prefixSum[i + 1] / (i + 1)); } // "One value per bucket" histograms initialization for(int k = 0; k < bucketCount; k++) { // optimalCost[k, k] = 0; lastBucketIndex[k, k] = k; } // ----- Dynamic programming part ----- // Loop on the number of buckets // (note that there are 'k+1' buckets) for(int k = 1; k < bucketCount; k++) { // Loop on the number of considered values // (note that there are 'i+1' considered values) for(int i = k; i < values.Length; i++) { optimalCost[i, k] = double.PositiveInfinity; // Loop for finding the optimal boundary of the last bucket // ('j+1' is the index of the first value in the last bucket) for(int j = (k - 1), avg = (k - 1); j < i; j++) { while((avg + 1) < values.Length && values[avg + 1] < (prefixSum[i + 1] - prefixSum[j + 1]) / (i - j)) { avg++; } double currentCost = optimalCost[j, k - 1] + prefixSum[i + 1] + prefixSum[j + 1] - 2 * prefixSum[avg + 1] + (2 * avg - i - j) * (prefixSum[i + 1] - prefixSum[j + 1]) / (i - j); if(currentCost < optimalCost[i, k]) { optimalCost[i, k] = currentCost; lastBucketIndex[i, k] = j + 1; } } } } // ----- Reconstitution of the histogram ----- Histogram histogram = new Histogram(); int index = values.Length - 1; for(int k = (bucketCount - 1); k >= 0; k--) { histogram.Add(new Bucket( values[lastBucketIndex[index, k]], values[index], index - lastBucketIndex[index, k] + 1 )); index = lastBucketIndex[index, k] - 1; } return histogram; }
public static Histogram OptimalSquaredFreedom(int histSize, ICollection distribution) { if (distribution.Count < Math.Max(histSize, 2)) { throw new ArgumentException("Not enough points in the distribution."); } // "values" contains the sorted distribution. double[] values = new double[distribution.Count]; distribution.CopyTo(values, 0); Array.Sort(values); // 'optimalCost[i,k]' contains the optimal costs for an // histogram with the 'i+1' first values and 'k' buckets. double[,] optimalCost = new double[values.Length, histSize]; // 'lastBucketIndex[i,k]' contains the index of the first // value of the last bucket for optimal histogram comprising // the 'i+1' first values and 'k' buckets. int[,] lastBucketIndex = new int[values.Length, histSize]; // "One bucket" histograms initialization for (int i = 0; i < values.Length; i++) { optimalCost[i, 0] = (values[i] - values[0]) * (values[i] - values[0]) * (i + 1); } // "One value per bucket" histograms initialization for (int k = 0; k < histSize; k++) { // optimalCost[k, k] = 0; lastBucketIndex[k, k] = k; } // ----- Dynamic programming part ----- // Loop on the number of buckets // (note that there are 'k+1' buckets) for (int k = 1; k < histSize; k++) { // Loop on the number of considered values // (note that there are 'i+1' considered values) for (int i = k; i < values.Length; i++) { optimalCost[i, k] = double.PositiveInfinity; // Loop for finding the optimal boundary of the last bucket // ('j+1' is the index of the first value in the last bucket) for (int j = (k - 1), avg = (k - 1); j < i; j++) { double currentCost = optimalCost[j, k - 1] + (values[i] - values[j + 1]) * (values[i] - values[j + 1]) * (i - j); if (currentCost < optimalCost[i, k]) { optimalCost[i, k] = currentCost; lastBucketIndex[i, k] = j + 1; } } } } // ----- Reconstitution of the histogram ----- Histogram histogram = new Histogram(); int index = values.Length - 1; for (int k = (histSize - 1); k >= 0; k--) { histogram.Add(new Bucket(values[lastBucketIndex[index, k]], values[index], index - lastBucketIndex[index, k] + 1)); index = lastBucketIndex[index, k] - 1; } //histogram.JoinBuckets(); return(histogram); }
/// <summary>Returns the optimal variance histogram.</summary> /// <param name="histSize">The number of buckets in the histogram.</param> /// <param name="distribution"><c>double</c> elements expected.</param> /// <remarks>Requires a computations time quadratic to /// <c>distribution.Length</c>.</remarks> public static Histogram OptimalVariance(int bucketCount, ICollection distribution) { if (distribution.Count < bucketCount) { throw new ArgumentException("Not enough points in the distribution."); } // "values" contains the sorted distribution. double[] values = new double[distribution.Count]; distribution.CopyTo(values, 0); Array.Sort(values); // 'optimalCost[i,k]' contains the optimal costs for an // histogram with the 'i+1' first values and 'k' buckets. double[,] optimalCost = new double[values.Length, bucketCount]; // 'lastBucketIndex[i,k]' contains the index of the first // value of the last bucket for optimal histogram comprising // the 'i+1' first values and 'k' buckets. int[,] lastBucketIndex = new int[values.Length, bucketCount]; // 'prefixSum[i]' contains the sum of the 'i-1' first values. double[] prefixSum = new double[values.Length + 1], // 'sqPrefixSum' contains the sum of the 'i-1' first squared values. sqPrefixSum = new double[values.Length + 1]; // Initialization of the prefix sums for (int i = 0; i < values.Length; i++) { prefixSum[i + 1] = prefixSum[i] + values[i]; sqPrefixSum[i + 1] = sqPrefixSum[i] + values[i] * values[i]; } // "One bucket" histograms initialization for (int i = 0; i < values.Length; i++) { optimalCost[i, 0] = sqPrefixSum[i + 1] - prefixSum[i + 1] * prefixSum[i + 1] / (i + 1); } // "One value per bucket" histograms initialization for (int k = 0; k < bucketCount; k++) { // optimalCost[k, k] = 0; lastBucketIndex[k, k] = k; } // ----- Dynamic programming part ----- // Loop on the number of buckets // (note that there are 'k+1' buckets) for (int k = 1; k < bucketCount; k++) { // Loop on the number of considered values // (note that there are 'i+1' considered values) for (int i = k; i < values.Length; i++) { optimalCost[i, k] = double.PositiveInfinity; // Loop for finding the optimal boundary of the last bucket // ('j+1' is the index of the first value in the last bucket) for (int j = (k - 1); j < i; j++) { double currentCost = optimalCost[j, k - 1] + sqPrefixSum[i + 1] - sqPrefixSum[j + 1] - (prefixSum[i + 1] - prefixSum[j + 1]) * (prefixSum[i + 1] - prefixSum[j + 1]) / (i - j); if (currentCost < optimalCost[i, k]) { optimalCost[i, k] = currentCost; lastBucketIndex[i, k] = j + 1; } } } } // ----- Reconstitution of the histogram ----- Histogram histogram = new Histogram(); int index = values.Length - 1; for (int k = (bucketCount - 1); k >= 0; k--) { histogram.Add(new Bucket(values[lastBucketIndex[index, k]], values[index], index - lastBucketIndex[index, k] + 1)); index = lastBucketIndex[index, k] - 1; } //histogram.JoinBuckets(); return(histogram); }
/// <summary> /// Returns the optimal dispersion histogram. /// </summary> public static Histogram OptimalDispersion(int bucketCount, ICollection distribution) { if (distribution.Count < Math.Max(bucketCount, 2)) { throw new ArgumentException(Resources.InvalidOperationHistogramNotEnoughPoints); } // "values" contains the sorted distribution. double[] values = new double[distribution.Count]; distribution.CopyTo(values, 0); Array.Sort(values); // 'optimalCost[i,k]' contains the optimal costs for an // histogram with the 'i+1' first values and 'k' buckets. double[,] optimalCost = new double[values.Length, bucketCount]; // 'lastBucketIndex[i,k]' contains the index of the first // value of the last bucket for optimal histogram comprising // the 'i+1' first values and 'k' buckets. int[,] lastBucketIndex = new int[values.Length, bucketCount]; // 'prefixSum[i]' contains the sum of the 'i-1' first values. double[] prefixSum = new double[values.Length + 1]; // Initialization of the prefix sums for (int i = 0; i < values.Length; i++) { prefixSum[i + 1] = prefixSum[i] + values[i]; } // "One bucket" histograms initialization for (int i = 0, avg = 0; i < values.Length; i++) { while ((avg + 1) < values.Length && values[avg + 1] < prefixSum[i + 1] / (i + 1)) { avg++; } optimalCost[i, 0] = prefixSum[i + 1] - 2 * prefixSum[avg + 1] + (2 * avg - i + 1) * (prefixSum[i + 1] / (i + 1)); } // "One value per bucket" histograms initialization for (int k = 0; k < bucketCount; k++) { // optimalCost[k, k] = 0; lastBucketIndex[k, k] = k; } // ----- Dynamic programming part ----- // Loop on the number of buckets // (note that there are 'k+1' buckets) for (int k = 1; k < bucketCount; k++) { // Loop on the number of considered values // (note that there are 'i+1' considered values) for (int i = k; i < values.Length; i++) { optimalCost[i, k] = double.PositiveInfinity; // Loop for finding the optimal boundary of the last bucket // ('j+1' is the index of the first value in the last bucket) for (int j = (k - 1), avg = (k - 1); j < i; j++) { while ((avg + 1) < values.Length && values[avg + 1] < (prefixSum[i + 1] - prefixSum[j + 1]) / (i - j)) { avg++; } double currentCost = optimalCost[j, k - 1] + prefixSum[i + 1] + prefixSum[j + 1] - 2 * prefixSum[avg + 1] + (2 * avg - i - j) * (prefixSum[i + 1] - prefixSum[j + 1]) / (i - j); if (currentCost < optimalCost[i, k]) { optimalCost[i, k] = currentCost; lastBucketIndex[i, k] = j + 1; } } } } // ----- Reconstitution of the histogram ----- Histogram histogram = new Histogram(); int index = values.Length - 1; for (int k = (bucketCount - 1); k >= 0; k--) { histogram.Add(new Bucket( values[lastBucketIndex[index, k]], values[index], index - lastBucketIndex[index, k] + 1 )); index = lastBucketIndex[index, k] - 1; } return(histogram); }
/// <summary>Returns the optimal variance histogram.</summary> /// <param name="histSize">The number of buckets in the histogram.</param> /// <param name="distribution"><c>double</c> elements expected.</param> /// <remarks>Requires a computations time quadratic to /// <c>distribution.Length</c>.</remarks> public static Histogram OptimalVariance(int bucketCount, ICollection distribution) { if (distribution.Count < bucketCount) throw new ArgumentException("Not enough points in the distribution."); // "values" contains the sorted distribution. double[] values = new double[distribution.Count]; distribution.CopyTo(values, 0); Array.Sort(values); // 'optimalCost[i,k]' contains the optimal costs for an // histogram with the 'i+1' first values and 'k' buckets. double[,] optimalCost = new double[values.Length, bucketCount]; // 'lastBucketIndex[i,k]' contains the index of the first // value of the last bucket for optimal histogram comprising // the 'i+1' first values and 'k' buckets. int[,] lastBucketIndex = new int[values.Length, bucketCount]; // 'prefixSum[i]' contains the sum of the 'i-1' first values. double[] prefixSum = new double[values.Length + 1], // 'sqPrefixSum' contains the sum of the 'i-1' first squared values. sqPrefixSum = new double[values.Length + 1]; // Initialization of the prefix sums for (int i = 0; i < values.Length; i++) { prefixSum[i + 1] = prefixSum[i] + values[i]; sqPrefixSum[i + 1] = sqPrefixSum[i] + values[i] * values[i]; } // "One bucket" histograms initialization for (int i = 0; i < values.Length; i++) optimalCost[i, 0] = sqPrefixSum[i + 1] - prefixSum[i + 1] * prefixSum[i + 1] / (i + 1); // "One value per bucket" histograms initialization for (int k = 0; k < bucketCount; k++) { // optimalCost[k, k] = 0; lastBucketIndex[k, k] = k; } // ----- Dynamic programming part ----- // Loop on the number of buckets // (note that there are 'k+1' buckets) for (int k = 1; k < bucketCount; k++) // Loop on the number of considered values // (note that there are 'i+1' considered values) for (int i = k; i < values.Length; i++) { optimalCost[i, k] = double.PositiveInfinity; // Loop for finding the optimal boundary of the last bucket // ('j+1' is the index of the first value in the last bucket) for (int j = (k - 1); j < i; j++) { double currentCost = optimalCost[j, k - 1] + sqPrefixSum[i + 1] - sqPrefixSum[j + 1] - (prefixSum[i + 1] - prefixSum[j + 1]) * (prefixSum[i + 1] - prefixSum[j + 1]) / (i - j); if (currentCost < optimalCost[i, k]) { optimalCost[i, k] = currentCost; lastBucketIndex[i, k] = j + 1; } } } // ----- Reconstitution of the histogram ----- Histogram histogram = new Histogram(); int index = values.Length - 1; for (int k = (bucketCount - 1); k >= 0; k--) { histogram.Add(new Bucket(values[lastBucketIndex[index, k]], values[index], index - lastBucketIndex[index, k] + 1)); index = lastBucketIndex[index, k] - 1; } //histogram.JoinBuckets(); return histogram; }