/// <summary> /// Subtract from myself the counts of the child histogram /// </summary> /// <param name="child">Another histogram to subtract</param> public unsafe void Subtract(FeatureHistogram child) { if (child.NumFeatureValues != NumFeatureValues) throw Contracts.Except("cannot subtract FeatureHistograms of different lengths"); fixed(FloatType *pSumTargetsByBin = SumTargetsByBin) fixed(FloatType * pChildSumTargetsByBin = child.SumTargetsByBin) fixed(double *pSumWeightsByBin = SumWeightsByBin) fixed(double *pChildSumWeightsByBin = child.SumWeightsByBin) fixed(int *pTotalCountByBin = CountByBin) fixed(int *pChildTotalCountByBin = child.CountByBin) { if (pSumWeightsByBin == null) { for (int i = 0; i < NumFeatureValues; i++) { pSumTargetsByBin[i] -= pChildSumTargetsByBin[i]; pTotalCountByBin[i] -= pChildTotalCountByBin[i]; } } else { Contracts.Assert(pChildSumWeightsByBin != null); for (int i = 0; i < NumFeatureValues; i++) { pSumTargetsByBin[i] -= pChildSumTargetsByBin[i]; pSumWeightsByBin[i] -= pChildSumWeightsByBin[i]; pTotalCountByBin[i] -= pChildTotalCountByBin[i]; } } } }
private unsafe void SumupCPlusPlusSparse(SumupInputData input, FeatureHistogram histogram, byte *pValues, byte *pDeltas, int numDeltas, IntArrayBits bitsPerItem) { fixed(FloatType *pSumTargetsByBin = histogram.SumTargetsByBin) fixed(FloatType * pSampleOutputs = input.Outputs) fixed(double *pSumWeightsByBin = histogram.SumWeightsByBin) fixed(double *pSampleWeights = input.Weights) fixed(int *pIndices = input.DocIndices) fixed(int *pCountByBin = histogram.CountByBin) { int rv = #if USE_SINGLE_PRECISION C_SumupDeltaSparse_float #else C_SumupDeltaSparse_double #endif ((int)BitsPerItem, pValues, pDeltas, numDeltas, pIndices, pSampleOutputs, pSampleWeights, pSumTargetsByBin, pSumWeightsByBin, pCountByBin, input.TotalCount, input.SumTargets, input.SumWeights); if (rv < 0) { throw Contracts.Except("CSumup sumupdeltasparse {0}", rv); } } }
private void SumupRootWeighted(SumupInputData input, FeatureHistogram histogram) { // Sum up the non-zero values, then subtract from total to get the zero values Contracts.Assert(histogram.SumWeightsByBin != null); Contracts.Assert(input.Weights != null); double totalOutput = 0; double totalWeight = 0; int currentPos = 0; for (int i = 0; i < _values.Length; i++) { currentPos += _deltas[i]; int featureBin = _values[i]; FloatType output = (FloatType)input.Outputs[currentPos]; FloatType weight = (FloatType)input.Weights[currentPos]; histogram.SumTargetsByBin[featureBin] = (FloatType)(histogram.SumTargetsByBin[featureBin] + output); histogram.SumWeightsByBin[featureBin] = (FloatType)(histogram.SumWeightsByBin[featureBin] + weight); ++histogram.CountByBin[featureBin]; totalOutput += output; totalWeight += weight; } // Fixup the zeros. There were some zero items already placed in the zero-th entry, just add the remainder histogram.SumTargetsByBin[0] += (FloatType)(input.SumTargets - totalOutput); histogram.SumWeightsByBin[0] += (FloatType)(input.SumWeights - totalWeight); histogram.CountByBin[0] += input.TotalCount - _values.Length; }
protected static unsafe void SumupCPlusPlusDense(SumupInputData input, FeatureHistogram histogram, byte *data, int numBits) { using (Timer.Time(TimerEvent.SumupCppDense)) { fixed(FloatType *pSumTargetsByBin = histogram.SumTargetsByBin) fixed(FloatType * pSampleOutputs = input.Outputs) fixed(double *pSumWeightsByBin = histogram.SumWeightsByBin) fixed(double *pSampleWeights = input.Weights) fixed(int *pIndices = input.DocIndices) fixed(int *pCountByBin = histogram.CountByBin) { int rv = #if USE_SINGLE_PRECISION C_Sumup_float #else C_Sumup_double #endif (numBits, data, pIndices, pSampleOutputs, pSampleWeights, pSumTargetsByBin, pSumWeightsByBin, pCountByBin, input.TotalCount, input.SumTargets, input.SumWeights); if (rv < 0) { throw Contracts.Except("CSumup returned error {0}", rv); } } } }
public override void Sumup(SumupInputData input, FeatureHistogram histogram) { using (Timer.Time(TimerEvent.SumupSparse)) { #if USE_FASTTREENATIVE var callbackIntArray = _values as DenseDataCallbackIntArray; if (callbackIntArray != null) { unsafe { fixed(byte *pDeltas = _deltas) { byte *pDeltas2 = pDeltas; callbackIntArray.Callback(pValues => { SumupCPlusPlusSparse(input, histogram, (byte *)pValues, pDeltas2, _deltas.Length, _values.BitsPerItem); }); } } return; } #endif if (input.DocIndices == null) { SumupRoot(input, histogram); } else { SumupLeaf(input, histogram); } } }
public SufficientStats(SingletonFeatureFlock flock, bool hasWeights) : base(flock.Count) { Contracts.AssertValue(flock); _flock = flock; _hist = new FeatureHistogram(_flock._bins, _flock._binUpperBounds.Length, hasWeights); }
#pragma warning restore TLC_GeneralName public unsafe void SumupCPlusPlus(SumupInputData input, FeatureHistogram histogram) { using (Timer.Time(TimerEvent.SumupSegment)) { fixed(FloatType *pSumTargetsByBin = histogram.SumTargetsByBin) fixed(FloatType * pSampleOutputs = input.Outputs) fixed(double *pSumWeightsByBin = histogram.SumWeightsByBin) fixed(double *pSampleOuputWeights = input.Weights) fixed(uint *pData = _data) fixed(byte *pSegType = _segType) fixed(int *pSegLength = _segLength) fixed(int *pIndices = input.DocIndices) fixed(int *pCountByBin = histogram.CountByBin) { int rv = #if USE_SINGLE_PRECISION C_SumupSegment_float #else C_SumupSegment_double #endif (pData, pSegType, pSegLength, pIndices, pSampleOutputs, pSampleOuputWeights, pSumTargetsByBin, pSumWeightsByBin, pCountByBin, input.TotalCount, input.SumTargets); if (rv < 0) { throw Contracts.Except("CSumup returned error {0}", rv); } } } }
public override void Sumup(SumupInputData input, FeatureHistogram histogram) { histogram.SumTargetsByBin[0] = input.SumTargets; if (histogram.SumWeightsByBin != null) { histogram.SumWeightsByBin[0] = input.SumWeights; } histogram.CountByBin[0] = input.TotalCount; }
public override void Sumup(SumupInputData input, FeatureHistogram histogram) { unsafe { fixed(byte *pData = _data) { SumupCPlusPlusDense(input, histogram, pData, 4); } } }
private void SumupNative(SumupInputData input, FeatureHistogram histogram) { unsafe { fixed(byte *pData = _data) { SumupCPlusPlusDense(input, histogram, pData, 8); } } }
private unsafe void SumupLeaf(SumupInputData input, FeatureHistogram histogram) { if (_length == 0) { return; } int nextStep = _deltas[0]; int pos = 0; fixed(int *pDocIndicesFixed = input.DocIndices) fixed(FloatType * pOutputsFixed = input.Outputs) fixed(double *pWeightsFixed = input.Weights) fixed(double *pSumWeightsFixed = histogram.SumWeightsByBin) { int * pdoc = pDocIndicesFixed; int * end = pDocIndicesFixed + input.TotalCount; FloatType *pOutputs = pOutputsFixed; double * pWeights = pWeightsFixed; while (pdoc < end) { while (nextStep <= *pdoc) { nextStep += _deltas[++pos]; } int bin = _values[pos]; int count = 0; FloatType subsum = histogram.SumTargetsByBin[bin]; if (pWeightsFixed != null) { double subweightsum = histogram.SumWeightsByBin[bin]; while (pdoc < end && nextStep > *pdoc) { subsum += *(pOutputs++); subweightsum += *(pWeights++); count++; pdoc++; } histogram.SumWeightsByBin[bin] = subweightsum; } else { while (pdoc < end && nextStep > *pdoc) { subsum += *(pOutputs++); count++; pdoc++; } } histogram.SumTargetsByBin[bin] = subsum; histogram.CountByBin[bin] += count; } } }
public override void Sumup(SumupInputData input, FeatureHistogram histogram) { unsafe { fixed(int *pData = _data) { byte *pDataBytes = (byte *)pData; SumupCPlusPlusDense(input, histogram, pDataBytes, 32); } } }
public void SumupNative(SumupInputData input, FeatureHistogram histogram) { unsafe { fixed(ushort *pData = _data) { byte *pDataBytes = (byte *)pData; SumupCPlusPlusDense(input, histogram, pDataBytes, 16); } } }
public override unsafe void Sumup(SumupInputData input, FeatureHistogram histogram) { using (Timer.Time(TimerEvent.SumupDense10)) { if (input.DocIndices == null) { SumupRoot(histogram, input.Outputs, input.Weights); return; } int fval = 0; fixed(uint *pData = _data) fixed(int *pCountByBin = histogram.CountByBin) fixed(int *pDocIndicies = input.DocIndices) fixed(FloatType * pSumTargetsByBin = histogram.SumTargetsByBin) fixed(FloatType * pTargets = input.Outputs) { if (histogram.SumWeightsByBin != null) { fixed(double *pSumWeightsByBin = histogram.SumWeightsByBin) fixed(double *pWeights = input.Weights) { for (int ii = 0; ii < input.TotalCount; ++ii) { long offset = pDocIndicies[ii]; offset = (offset << 3) + (offset << 1); int minor = (int)(offset & 0x1f); int major = (int)(offset >> 5); fval = (int)(((*(ulong *)(pData + major)) >> minor) & _mask); pSumTargetsByBin[fval] += pTargets[ii]; pSumWeightsByBin[fval] += pWeights[ii]; ++pCountByBin[fval]; } } } else { int end = input.TotalCount; for (int ii = 0; ii < end; ++ii) { long offset = pDocIndicies[ii]; offset = (offset << 3) + (offset << 1); int minor = (int)(offset & 0x1f); int major = (int)(offset >> 5); fval = (int)(((*(ulong *)(pData + major)) >> minor) & _mask); pSumTargetsByBin[fval] += pTargets[ii]; ++pCountByBin[fval]; } } } } }
public override void Sumup(SumupInputData input, FeatureHistogram histogram) { using (Timer.Time(TimerEvent.SumupRepeat)) { if (input.DocIndices == null) { SumupRoot(input, histogram); } else { SumupLeaf(input, histogram); } } }
public override void Sumup(SumupInputData input, FeatureHistogram histogram) { using (Timer.Time(TimerEvent.SumupSegment)) { if (_length == 0) { return; } #if USE_FASTTREENATIVE SumupCPlusPlus(input, histogram); #else base.Sumup(input, histogram); #endif } }
private void SumupRoot(FeatureHistogram histogram, FloatType[] outputs, double[] weights) { int fval; long offset = 0; for (int i = 0; i < Length; ++i) { fval = Get(offset, _mask); histogram.SumTargetsByBin[fval] += outputs[i]; if (histogram.SumWeightsByBin != null) { histogram.SumWeightsByBin[fval] += weights[i]; } ++histogram.CountByBin[fval]; offset += _bits; } }
private void SumupWeighted(SumupInputData input, FeatureHistogram histogram) { Contracts.AssertValue(histogram.SumWeightsByBin); Contracts.AssertValue(input.Weights); IIntArrayForwardIndexer indexer = GetIndexer(); for (int i = 0; i < input.TotalCount; i++) { int featureBin = input.DocIndices == null ? indexer[i] : indexer[input.DocIndices[i]]; if (featureBin < 0 || featureBin >= histogram.SumTargetsByBin.Length || featureBin >= histogram.NumFeatureValues) { throw Contracts.Except("Feature bin {0} is invalid", featureBin); } histogram.SumTargetsByBin[featureBin] += input.Outputs[i]; histogram.SumWeightsByBin[featureBin] += input.Weights[i]; ++histogram.CountByBin[featureBin]; } }
private unsafe void SumupRoot(SumupInputData input, FeatureHistogram histogram) { fixed(FloatType *pOutputsFixed = input.Outputs) fixed(FloatType * pSumTargetsFixed = histogram.SumTargetsByBin) fixed(double *pWeightsFixed = input.Weights) fixed(double *pSumWeightsFixed = histogram.SumWeightsByBin) { FloatType *pOutputs = pOutputsFixed; double * pWeights = pWeightsFixed; for (int i = 0; i < _values.Length; i++) { int featureBin = _values[i]; //FloatType* pSumTargets = pSumTargetsFixed + featureBin; FloatType subsum = pSumTargetsFixed[featureBin]; for (int j = 0; j < _deltas[i]; ++j) { subsum += pOutputs[j]; } pSumTargetsFixed[featureBin] = subsum; if (pWeightsFixed != null) { double subweightsum = pSumWeightsFixed[featureBin]; for (int j = 0; j < _deltas[i]; ++j) { subweightsum += pWeights[j]; } pSumWeightsFixed[featureBin] = subweightsum; pWeights += _deltas[i]; } pOutputs += _deltas[i]; histogram.CountByBin[featureBin] += _deltas[i]; } } }
public override void Sumup(SumupInputData input, FeatureHistogram histogram) => SumupHandler(input, histogram);
private unsafe void SumupLeafWeighted(SumupInputData input, FeatureHistogram histogram) { Contracts.Assert(histogram.SumWeightsByBin != null); Contracts.Assert(input.Weights != null); int iDocIndices = 0; int iSparse = 0; int totalCount = 0; FloatType totalOutput = 0; double totalWeights = 0; int currentPos = _deltas.Length > 0 ? _deltas[iSparse] : _length; fixed(int *pDocIndices = input.DocIndices) fixed(byte *pDeltas = _deltas) fixed(FloatType * pOutputs = input.Outputs) fixed(double *pWeights = input.Weights) { while (true) { if (currentPos < pDocIndices[iDocIndices]) { if (++iSparse >= _deltas.Length) { break; } currentPos += pDeltas[iSparse]; } else if (currentPos > pDocIndices[iDocIndices]) { if (++iDocIndices >= input.TotalCount) { break; } } else { // a nonzero entry matched one of the docs in the leaf, add it to the histogram int featureBin = _values[iSparse]; FloatType output = pOutputs[iDocIndices]; histogram.SumTargetsByBin[featureBin] += output; totalOutput += output; double weights = pWeights[iDocIndices]; histogram.SumWeightsByBin[featureBin] += weights; totalWeights += weights; ++histogram.CountByBin[featureBin]; totalCount++; if (++iSparse >= _deltas.Length) { break; } if (pDeltas[iSparse] > 0) { currentPos += pDeltas[iSparse]; if (++iDocIndices >= input.TotalCount) { break; } } } } } // Fixup the zeros. There were some zero items already placed in the zero-th entry, just add the remainder histogram.SumTargetsByBin[0] += (FloatType)(input.SumTargets - totalOutput); histogram.SumWeightsByBin[0] += (FloatType)(input.SumWeights - totalWeights); histogram.CountByBin[0] += input.TotalCount - totalCount; }
// Fixing the arrays and using unsafe accesses may give a slight speedup, but it is hard to tell. // OPTIMIZE: Another two methods would be doing binary search or using a hashtable -- binary search // when there are very few docs in the leaf private unsafe void SumupLeaf(SumupInputData input, FeatureHistogram histogram) { if (histogram.SumWeightsByBin != null) { SumupLeafWeighted(input, histogram); return; } int iDocIndices = 0; int iSparse = 0; int totalCount = 0; FloatType totalOutput = 0; int currentPos = _deltas.Length > 0 ? _deltas[iSparse] : _length; fixed(int *pDocIndices = input.DocIndices) fixed(byte *pDeltas = _deltas) fixed(FloatType * pOutputs = input.Outputs) { for (; ;) { if (currentPos < pDocIndices[iDocIndices]) { if (++iSparse >= _deltas.Length) { break; } currentPos += pDeltas[iSparse]; } else if (currentPos > pDocIndices[iDocIndices]) { if (++iDocIndices >= input.TotalCount) { break; } } else { // A nonzero entry matched one of the docs in the leaf, add it to the histogram. int featureBin = _values[iSparse]; FloatType output = pOutputs[iDocIndices]; histogram.SumTargetsByBin[featureBin] += output; totalOutput += output; ++histogram.CountByBin[featureBin]; totalCount++; if (++iSparse >= _deltas.Length) { break; } // Note that if the delta is 0, we will "stay" on this document, thus // allowing the sumup to work to accumulate multiple bins per document. if (pDeltas[iSparse] > 0) { currentPos += pDeltas[iSparse]; if (++iDocIndices >= input.TotalCount) { break; } } } } } // Fixup the zeros. There were some zero items already placed in the zero-th entry, just add the remainder histogram.SumTargetsByBin[0] += (FloatType)(input.SumTargets - totalOutput); histogram.CountByBin[0] += input.TotalCount - totalCount; }
public override long SizeInBytes() { return(FeatureHistogram.EstimateMemoryUsedForFeatureHistogram(_hist.NumFeatureValues, _hist.SumWeightsByBin != null)); }