Beispiel #1
0
 private static float PowerOfTwo(int exp)
 {
     Contracts.Assert(0 <= exp && exp < ExpInf);
     return(FloatUtils.GetPowerOfTwoSingle(exp));
 }
Beispiel #2
0
        /// <summary>
        /// Finds the bins for Single values (and integer labels)
        /// </summary>
        /// <param name="maxBins">Maximum number of bins</param>
        /// <param name="minBinSize">Minimum number of values per bin (stopping condition for greedy bin splitting)</param>
        /// <param name="nLabels">Cardinality of the labels</param>
        /// <param name="values">The feature values</param>
        /// <param name="labels">The corresponding label values</param>
        /// <returns>An array of split points, no more than <paramref name="maxBins"/> total (but maybe less), ending with PositiveInfinity</returns>
        public Single[] FindBins(int maxBins, int minBinSize, int nLabels, IList <Single> values, IList <int> labels)
        {
            // prepare the values: count distinct values and populate the value pair array
            _valueCount       = values.Count;
            _labelCardinality = nLabels;
            _maxBins          = maxBins;
            _minBinSize       = minBinSize;
            Contracts.Assert(_valueCount == labels.Count);
            _distinctValueCount = 0;
            var seenValues = new HashSet <Single>();
            var valuePairs = new ValuePair <Single> [_valueCount];

            for (int i = 0; i < _valueCount; i++)
            {
                valuePairs[i] = new ValuePair <Single>(values[i], labels[i]);
                if (seenValues.Add(values[i]))
                {
                    _distinctValueCount++;
                }
            }
            Array.Sort(valuePairs);

            // populate the cumulative counts with unique values
            _cumulativeCounts = new int[_distinctValueCount, _labelCardinality + 1];
            var    distinctValues = new Single[_distinctValueCount];
            Single curValue       = Single.NegativeInfinity;
            int    curIndex       = -1;

            foreach (var pair in valuePairs)
            {
                Contracts.Assert(pair.Value >= curValue);
                if (pair.Value > curValue || curIndex < 0)
                {
                    curValue = pair.Value;
                    curIndex++;
                    distinctValues[curIndex] = curValue;
                    if (curIndex > 0)
                    {
                        for (int i = 0; i < _labelCardinality + 1; i++)
                        {
                            _cumulativeCounts[curIndex, i] = _cumulativeCounts[curIndex - 1, i];
                        }
                    }
                }
                _cumulativeCounts[curIndex, pair.Label]++;
                _cumulativeCounts[curIndex, _labelCardinality]++;
            }

            Contracts.Assert(curIndex == _distinctValueCount - 1);

            var boundaries = FindBinsCore();

            Contracts.Assert(Utils.Size(boundaries) > 0);
            Contracts.Assert(boundaries.Length == 1 && boundaries[0] == 0 || boundaries[0] > 0, "boundaries are exclusive, can't have 0");
            Contracts.Assert(boundaries[boundaries.Length - 1] == _distinctValueCount);

            // transform boundary indices back into bin upper bounds
            var numUpperBounds = boundaries.Length;

            Single[] result = new Single[numUpperBounds];
            for (int i = 0; i < numUpperBounds - 1; i++)
            {
                var split = boundaries[i];
                result[i] = BinFinderBase.GetSplitValue(distinctValues[split - 1], distinctValues[split]);

                // Even though distinctValues may contain infinities, the boundaries may not be infinite:
                // GetSplitValue(a,b) only returns +-inf if a==b==+-inf,
                // and distinctValues won't contain more than one +inf or -inf.
                Contracts.Assert(FloatUtils.IsFinite(result[i]));
            }

            result[numUpperBounds - 1] = Single.PositiveInfinity;
            AssertStrictlyIncreasing(result);

            return(result);
        }