Example #1
0
        public static Histogram OptimalSquaredFreedom(int histSize, ICollection distribution)
        {
            if (distribution.Count < Math.Max(histSize, 2))
            {
                throw new ArgumentException("Not enough points in the distribution.");
            }
            // "values" contains the sorted distribution.
            double[] values = new double[distribution.Count];
            distribution.CopyTo(values, 0);
            Array.Sort(values);
            // 'optimalCost[i,k]' contains the optimal costs for an
            // histogram with the 'i+1' first values and 'k' buckets.
            double[,] optimalCost = new double[values.Length, histSize];
            // 'lastBucketIndex[i,k]' contains the index of the first
            // value of the last bucket for optimal histogram comprising
            // the 'i+1' first values and 'k' buckets.
            int[,] lastBucketIndex = new int[values.Length, histSize];
            // "One bucket" histograms initialization
            for (int i = 0; i < values.Length; i++)
            {
                optimalCost[i, 0] =
                    (values[i] - values[0]) * (values[i] - values[0]) * (i + 1);
            }
            // "One value per bucket" histograms initialization
            for (int k = 0; k < histSize; k++)
            {
                // optimalCost[k, k] = 0;
                lastBucketIndex[k, k] = k;
            }
            // ----- Dynamic programming part -----
            // Loop on the number of buckets
            // (note that there are 'k+1' buckets)
            for (int k = 1; k < histSize; k++)
            {
                // Loop on the number of considered values
                // (note that there are 'i+1' considered values)
                for (int i = k; i < values.Length; i++)
                {
                    optimalCost[i, k] = double.PositiveInfinity;
                    // Loop for finding the optimal boundary of the last bucket
                    // ('j+1' is the index of the first value in the last bucket)
                    for (int j = (k - 1), avg = (k - 1); j < i; j++)
                    {
                        double currentCost = optimalCost[j, k - 1] +
                                             (values[i] - values[j + 1]) * (values[i] - values[j + 1]) * (i - j);

                        if (currentCost < optimalCost[i, k])
                        {
                            optimalCost[i, k]     = currentCost;
                            lastBucketIndex[i, k] = j + 1;
                        }
                    }
                }
            }
            // ----- Reconstitution of the histogram -----
            Histogram histogram = new Histogram();
            int       index     = values.Length - 1;

            for (int k = (histSize - 1); k >= 0; k--)
            {
                histogram.Add(new Bucket(values[lastBucketIndex[index, k]],
                                         values[index], index - lastBucketIndex[index, k] + 1));
                index = lastBucketIndex[index, k] - 1;
            }
            //histogram.JoinBuckets();
            return(histogram);
        }
Example #2
0
        /// <summary>Returns the optimal variance histogram.</summary>
        /// <param name="bucketCount">The number of buckets in the histogram.</param>
        /// <param name="distribution"><c>double</c> elements expected.</param>
        /// <remarks>Requires a computations time quadratic to
        /// <c>distribution.Length</c>.</remarks>
        public static Histogram OptimalVariance(int bucketCount, ICollection distribution)
        {
            if (distribution.Count < bucketCount)
            {
                throw new ArgumentException("Not enough points in the distribution.");
            }

            // "values" contains the sorted distribution.
            double[] values = new double[distribution.Count];
            distribution.CopyTo(values, 0);
            Array.Sort(values);

            // 'optimalCost[i,k]' contains the optimal costs for an
            // histogram with the 'i+1' first values and 'k' buckets.
            double[,] optimalCost = new double[values.Length, bucketCount];

            // 'lastBucketIndex[i,k]' contains the index of the first
            // value of the last bucket for optimal histogram comprising
            // the 'i+1' first values and 'k' buckets.
            int[,] lastBucketIndex = new int[values.Length, bucketCount];

            // 'prefixSum[i]' contains the sum of the 'i-1' first values.
            double[] prefixSum = new double[values.Length + 1],
            // 'sqPrefixSum' contains the sum of the 'i-1' first squared values.
            sqPrefixSum = new double[values.Length + 1];

            // Initialization of the prefix sums
            for (int i = 0; i < values.Length; i++)
            {
                prefixSum[i + 1]   = prefixSum[i] + values[i];
                sqPrefixSum[i + 1] = sqPrefixSum[i] + values[i] * values[i];
            }

            // "One bucket" histograms initialization
            for (int i = 0; i < values.Length; i++)
            {
                optimalCost[i, 0] = sqPrefixSum[i + 1] -
                                    prefixSum[i + 1] * prefixSum[i + 1] / (i + 1);
            }

            // "One value per bucket" histograms initialization
            for (int k = 0; k < bucketCount; k++)
            {
                // optimalCost[k, k] = 0;
                lastBucketIndex[k, k] = k;
            }

            // ----- Dynamic programming part -----

            // Loop on the number of buckets
            // (note that there are 'k+1' buckets)
            for (int k = 1; k < bucketCount; k++)
            {
                // Loop on the number of considered values
                // (note that there are 'i+1' considered values)
                for (int i = k; i < values.Length; i++)
                {
                    optimalCost[i, k] = double.PositiveInfinity;
                    // Loop for finding the optimal boundary of the last bucket
                    // ('j+1' is the index of the first value in the last bucket)
                    for (int j = (k - 1); j < i; j++)
                    {
                        double currentCost = optimalCost[j, k - 1] + sqPrefixSum[i + 1] - sqPrefixSum[j + 1]
                                             - (prefixSum[i + 1] - prefixSum[j + 1]) * (prefixSum[i + 1] - prefixSum[j + 1]) / (i - j);

                        if (currentCost < optimalCost[i, k])
                        {
                            optimalCost[i, k]     = currentCost;
                            lastBucketIndex[i, k] = j + 1;
                        }
                    }
                }
            }

            // ----- Reconstitution of the histogram -----
            Histogram histogram = new Histogram();
            int       index     = values.Length - 1;

            for (int k = (bucketCount - 1); k >= 0; k--)
            {
                histogram.Add(new Bucket(values[lastBucketIndex[index, k]],
                                         values[index], index - lastBucketIndex[index, k] + 1));

                index = lastBucketIndex[index, k] - 1;
            }

            //histogram.JoinBuckets();
            return(histogram);
        }