private static float PowerOfTwo(int exp) { Contracts.Assert(0 <= exp && exp < ExpInf); return(FloatUtils.GetPowerOfTwoSingle(exp)); }
/// <summary> /// Finds the bins for Single values (and integer labels) /// </summary> /// <param name="maxBins">Maximum number of bins</param> /// <param name="minBinSize">Minimum number of values per bin (stopping condition for greedy bin splitting)</param> /// <param name="nLabels">Cardinality of the labels</param> /// <param name="values">The feature values</param> /// <param name="labels">The corresponding label values</param> /// <returns>An array of split points, no more than <paramref name="maxBins"/> total (but maybe less), ending with PositiveInfinity</returns> public Single[] FindBins(int maxBins, int minBinSize, int nLabels, IList <Single> values, IList <int> labels) { // prepare the values: count distinct values and populate the value pair array _valueCount = values.Count; _labelCardinality = nLabels; _maxBins = maxBins; _minBinSize = minBinSize; Contracts.Assert(_valueCount == labels.Count); _distinctValueCount = 0; var seenValues = new HashSet <Single>(); var valuePairs = new ValuePair <Single> [_valueCount]; for (int i = 0; i < _valueCount; i++) { valuePairs[i] = new ValuePair <Single>(values[i], labels[i]); if (seenValues.Add(values[i])) { _distinctValueCount++; } } Array.Sort(valuePairs); // populate the cumulative counts with unique values _cumulativeCounts = new int[_distinctValueCount, _labelCardinality + 1]; var distinctValues = new Single[_distinctValueCount]; Single curValue = Single.NegativeInfinity; int curIndex = -1; foreach (var pair in valuePairs) { Contracts.Assert(pair.Value >= curValue); if (pair.Value > curValue || curIndex < 0) { curValue = pair.Value; curIndex++; distinctValues[curIndex] = curValue; if (curIndex > 0) { for (int i = 0; i < _labelCardinality + 1; i++) { _cumulativeCounts[curIndex, i] = _cumulativeCounts[curIndex - 1, i]; } } } _cumulativeCounts[curIndex, pair.Label]++; _cumulativeCounts[curIndex, _labelCardinality]++; } Contracts.Assert(curIndex == _distinctValueCount - 1); var boundaries = FindBinsCore(); Contracts.Assert(Utils.Size(boundaries) > 0); Contracts.Assert(boundaries.Length == 1 && boundaries[0] == 0 || boundaries[0] > 0, "boundaries are exclusive, can't have 0"); Contracts.Assert(boundaries[boundaries.Length - 1] == _distinctValueCount); // transform boundary indices back into bin upper bounds var numUpperBounds = boundaries.Length; Single[] result = new Single[numUpperBounds]; for (int i = 0; i < numUpperBounds - 1; i++) { var split = boundaries[i]; result[i] = BinFinderBase.GetSplitValue(distinctValues[split - 1], distinctValues[split]); // Even though distinctValues may contain infinities, the boundaries may not be infinite: // GetSplitValue(a,b) only returns +-inf if a==b==+-inf, // and distinctValues won't contain more than one +inf or -inf. Contracts.Assert(FloatUtils.IsFinite(result[i])); } result[numUpperBounds - 1] = Single.PositiveInfinity; AssertStrictlyIncreasing(result); return(result); }