public virtual void Sumup(SumupInputData input, FeatureHistogram histogram) { Contracts.Assert((input.Weights == null) == (histogram.SumWeightsByBin == null)); if (histogram.SumWeightsByBin != null) { SumupWeighted(input, histogram); return; } IIntArrayForwardIndexer indexer = GetIndexer(); for (int i = 0; i < input.TotalCount; i++) { int featureBin = input.DocIndices == null ? indexer[i] : indexer[input.DocIndices[i]]; if (featureBin < 0 || featureBin >= histogram.SumTargetsByBin.Length || featureBin >= histogram.NumFeatureValues) { throw Contracts.Except("Feature bin {0} is invalid", featureBin); } histogram.SumTargetsByBin[featureBin] += input.Outputs[i]; ++histogram.CountByBin[featureBin]; } }
private unsafe void SumupRoot(SumupInputData input, FeatureHistogram histogram) { fixed(FloatType *pOutputsFixed = input.Outputs) fixed(FloatType * pSumTargetsFixed = histogram.SumTargetsByBin) fixed(double *pWeightsFixed = input.Weights) fixed(double *pSumWeightsFixed = histogram.SumWeightsByBin) { FloatType *pOutputs = pOutputsFixed; double * pWeights = pWeightsFixed; for (int i = 0; i < _values.Length; i++) { int featureBin = _values[i]; //FloatType* pSumTargets = pSumTargetsFixed + featureBin; FloatType subsum = pSumTargetsFixed[featureBin]; for (int j = 0; j < _deltas[i]; ++j) { subsum += pOutputs[j]; } pSumTargetsFixed[featureBin] = subsum; if (pWeightsFixed != null) { double subweightsum = pSumWeightsFixed[featureBin]; for (int j = 0; j < _deltas[i]; ++j) { subweightsum += pWeights[j]; } pSumWeightsFixed[featureBin] = subweightsum; pWeights += _deltas[i]; } pOutputs += _deltas[i]; histogram.CountByBin[featureBin] += _deltas[i]; } } }
private unsafe void SumupLeafWeighted(SumupInputData input, FeatureHistogram histogram) { Contracts.Assert(histogram.SumWeightsByBin != null); Contracts.Assert(input.Weights != null); int iDocIndices = 0; int iSparse = 0; int totalCount = 0; FloatType totalOutput = 0; double totalWeights = 0; int currentPos = _deltas.Length > 0 ? _deltas[iSparse] : _length; fixed(int *pDocIndices = input.DocIndices) fixed(byte *pDeltas = _deltas) fixed(FloatType * pOutputs = input.Outputs) fixed(double *pWeights = input.Weights) { while (true) { if (currentPos < pDocIndices[iDocIndices]) { if (++iSparse >= _deltas.Length) { break; } currentPos += pDeltas[iSparse]; } else if (currentPos > pDocIndices[iDocIndices]) { if (++iDocIndices >= input.TotalCount) { break; } } else { // a nonzero entry matched one of the docs in the leaf, add it to the histogram int featureBin = _values[iSparse]; FloatType output = pOutputs[iDocIndices]; histogram.SumTargetsByBin[featureBin] += output; totalOutput += output; double weights = pWeights[iDocIndices]; histogram.SumWeightsByBin[featureBin] += weights; totalWeights += weights; ++histogram.CountByBin[featureBin]; totalCount++; if (++iSparse >= _deltas.Length) { break; } if (pDeltas[iSparse] > 0) { currentPos += pDeltas[iSparse]; if (++iDocIndices >= input.TotalCount) { break; } } } } } // Fixup the zeros. There were some zero items already placed in the zero-th entry, just add the remainder histogram.SumTargetsByBin[0] += (FloatType)(input.SumTargets - totalOutput); histogram.SumWeightsByBin[0] += (FloatType)(input.SumWeights - totalWeights); histogram.CountByBin[0] += input.TotalCount - totalCount; }
// Fixing the arrays and using unsafe accesses may give a slight speedup, but it is hard to tell. // OPTIMIZE: Another two methods would be doing binary search or using a hashtable -- binary search // when there are very few docs in the leaf private unsafe void SumupLeaf(SumupInputData input, FeatureHistogram histogram) { if (histogram.SumWeightsByBin != null) { SumupLeafWeighted(input, histogram); return; } int iDocIndices = 0; int iSparse = 0; int totalCount = 0; FloatType totalOutput = 0; int currentPos = _deltas.Length > 0 ? _deltas[iSparse] : _length; fixed(int *pDocIndices = input.DocIndices) fixed(byte *pDeltas = _deltas) fixed(FloatType * pOutputs = input.Outputs) { for (; ;) { if (currentPos < pDocIndices[iDocIndices]) { if (++iSparse >= _deltas.Length) { break; } currentPos += pDeltas[iSparse]; } else if (currentPos > pDocIndices[iDocIndices]) { if (++iDocIndices >= input.TotalCount) { break; } } else { // A nonzero entry matched one of the docs in the leaf, add it to the histogram. int featureBin = _values[iSparse]; FloatType output = pOutputs[iDocIndices]; histogram.SumTargetsByBin[featureBin] += output; totalOutput += output; ++histogram.CountByBin[featureBin]; totalCount++; if (++iSparse >= _deltas.Length) { break; } // Note that if the delta is 0, we will "stay" on this document, thus // allowing the sumup to work to accumulate multiple bins per document. if (pDeltas[iSparse] > 0) { currentPos += pDeltas[iSparse]; if (++iDocIndices >= input.TotalCount) { break; } } } } } // Fixup the zeros. There were some zero items already placed in the zero-th entry, just add the remainder histogram.SumTargetsByBin[0] += (FloatType)(input.SumTargets - totalOutput); histogram.CountByBin[0] += input.TotalCount - totalCount; }