public virtual void Sumup(SumupInputData input, FeatureHistogram histogram) { Contracts.Assert((input.Weights == null) == (histogram.SumWeightsByBin == null)); if (histogram.SumWeightsByBin != null) { SumupWeighted(input, histogram); return; } IIntArrayForwardIndexer indexer = GetIndexer(); for (int i = 0; i < input.TotalCount; i++) { int featureBin = input.DocIndices == null ? indexer[i] : indexer[input.DocIndices[i]]; if (featureBin < 0 || featureBin >= histogram.SumTargetsByBin.Length || featureBin >= histogram.NumFeatureValues) { throw Contracts.Except("Feature bin {0} is invalid", featureBin); } histogram.SumTargetsByBin[featureBin] += input.Outputs[i]; ++histogram.CountByBin[featureBin]; } }
private unsafe void SumupRoot(SumupInputData input, FeatureHistogram histogram) { fixed(FloatType *pOutputsFixed = input.Outputs) fixed(FloatType * pSumTargetsFixed = histogram.SumTargetsByBin) fixed(double *pWeightsFixed = input.Weights) fixed(double *pSumWeightsFixed = histogram.SumWeightsByBin) { FloatType *pOutputs = pOutputsFixed; double * pWeights = pWeightsFixed; for (int i = 0; i < _values.Length; i++) { int featureBin = _values[i]; //FloatType* pSumTargets = pSumTargetsFixed + featureBin; FloatType subsum = pSumTargetsFixed[featureBin]; for (int j = 0; j < _deltas[i]; ++j) { subsum += pOutputs[j]; } pSumTargetsFixed[featureBin] = subsum; if (pWeightsFixed != null) { double subweightsum = pSumWeightsFixed[featureBin]; for (int j = 0; j < _deltas[i]; ++j) { subweightsum += pWeights[j]; } pSumWeightsFixed[featureBin] = subweightsum; pWeights += _deltas[i]; } pOutputs += _deltas[i]; histogram.CountByBin[featureBin] += _deltas[i]; } } }
public override void Sumup(SumupInputData input, FeatureHistogram histogram) { using (Timer.Time(TimerEvent.SumupSparse)) { if (UseFastTreeNative) { var callbackIntArray = _values as DenseDataCallbackIntArray; if (callbackIntArray != null) { unsafe { fixed(byte *pDeltas = _deltas) { byte *pDeltas2 = pDeltas; callbackIntArray.Callback(pValues => { SumupCPlusPlusSparse(input, histogram, (byte *)pValues, pDeltas2, _deltas.Length, _values.BitsPerItem); }); } } return; } } if (input.DocIndices == null) { SumupRoot(input, histogram); } else { SumupLeaf(input, histogram); } } }
private unsafe void SumupLeafWeighted(SumupInputData input, FeatureHistogram histogram) { Contracts.Assert(histogram.SumWeightsByBin != null); Contracts.Assert(input.Weights != null); int iDocIndices = 0; int iSparse = 0; int totalCount = 0; FloatType totalOutput = 0; double totalWeights = 0; int currentPos = _deltas.Length > 0 ? _deltas[iSparse] : _length; fixed(int *pDocIndices = input.DocIndices) fixed(byte *pDeltas = _deltas) fixed(FloatType * pOutputs = input.Outputs) fixed(double *pWeights = input.Weights) { while (true) { if (currentPos < pDocIndices[iDocIndices]) { if (++iSparse >= _deltas.Length) { break; } currentPos += pDeltas[iSparse]; } else if (currentPos > pDocIndices[iDocIndices]) { if (++iDocIndices >= input.TotalCount) { break; } } else { // a nonzero entry matched one of the docs in the leaf, add it to the histogram int featureBin = _values[iSparse]; FloatType output = pOutputs[iDocIndices]; histogram.SumTargetsByBin[featureBin] += output; totalOutput += output; double weights = pWeights[iDocIndices]; histogram.SumWeightsByBin[featureBin] += weights; totalWeights += weights; ++histogram.CountByBin[featureBin]; totalCount++; if (++iSparse >= _deltas.Length) { break; } if (pDeltas[iSparse] > 0) { currentPos += pDeltas[iSparse]; if (++iDocIndices >= input.TotalCount) { break; } } } } } // Fixup the zeros. There were some zero items already placed in the zero-th entry, just add the remainder histogram.SumTargetsByBin[0] += (FloatType)(input.SumTargets - totalOutput); histogram.SumWeightsByBin[0] += (FloatType)(input.SumWeights - totalWeights); histogram.CountByBin[0] += input.TotalCount - totalCount; }
// Fixing the arrays and using unsafe accesses may give a slight speedup, but it is hard to tell. // OPTIMIZE: Another two methods would be doing binary search or using a hashtable -- binary search // when there are very few docs in the leaf private unsafe void SumupLeaf(SumupInputData input, FeatureHistogram histogram) { if (histogram.SumWeightsByBin != null) { SumupLeafWeighted(input, histogram); return; } int iDocIndices = 0; int iSparse = 0; int totalCount = 0; FloatType totalOutput = 0; int currentPos = _deltas.Length > 0 ? _deltas[iSparse] : _length; fixed(int *pDocIndices = input.DocIndices) fixed(byte *pDeltas = _deltas) fixed(FloatType * pOutputs = input.Outputs) { for (; ;) { if (currentPos < pDocIndices[iDocIndices]) { if (++iSparse >= _deltas.Length) { break; } currentPos += pDeltas[iSparse]; } else if (currentPos > pDocIndices[iDocIndices]) { if (++iDocIndices >= input.TotalCount) { break; } } else { // A nonzero entry matched one of the docs in the leaf, add it to the histogram. int featureBin = _values[iSparse]; FloatType output = pOutputs[iDocIndices]; histogram.SumTargetsByBin[featureBin] += output; totalOutput += output; ++histogram.CountByBin[featureBin]; totalCount++; if (++iSparse >= _deltas.Length) { break; } // Note that if the delta is 0, we will "stay" on this document, thus // allowing the sumup to work to accumulate multiple bins per document. if (pDeltas[iSparse] > 0) { currentPos += pDeltas[iSparse]; if (++iDocIndices >= input.TotalCount) { break; } } } } } // Fixup the zeros. There were some zero items already placed in the zero-th entry, just add the remainder histogram.SumTargetsByBin[0] += (FloatType)(input.SumTargets - totalOutput); histogram.CountByBin[0] += input.TotalCount - totalCount; }
public void Run() { using (var ch = _host.Start("Run")) { IntArray[] arrays = CreateRandomIntArrays(ch); FeatureHistogram[] histograms = arrays.Select(bins => new FeatureHistogram(bins, _bins, false)).ToArray(arrays.Length); long bytes = arrays.Sum(i => (long)i.SizeInBytes()); ch.Info("Created {0} int arrays taking {1} bytes", arrays.Length, bytes); // Objects for the pool. ch.Info("Parallelism = {0}", _parallel); AutoResetEvent[] events = Utils.BuildArray(_parallel, i => new AutoResetEvent(false)); AutoResetEvent[] mainEvents = Utils.BuildArray(_parallel, i => new AutoResetEvent(false)); SumupInputData data = new SumupInputData(_len, 0, 0, new FloatType[_len], null, new int[_len]); Thread[] threadPool = new Thread[_parallel]; Stopwatch sw = new Stopwatch(); long ticksPerCycle = (long)(Stopwatch.Frequency * _seconds); double[] partitionProportion = { 1, 1, 0.5, 1e-1, 1e-2, 1e-3, 1e-4 }; long completed = 0; for (int t = 0; t < threadPool.Length; ++t) { Thread thread = threadPool[t] = Utils.RunOnForegroundThread((object io) => { int w = (int)io; AutoResetEvent ev = events[w]; AutoResetEvent mev = mainEvents[w]; for (int s = 0; s < partitionProportion.Length; s++) { ev.WaitOne(); long localCompleted = 0; for (int f = w; ; f = f + threadPool.Length < arrays.Length ? f + threadPool.Length : w) { // This should repeat till done. arrays[f].Sumup(data, histograms[f]); if (sw.ElapsedTicks > ticksPerCycle) { break; } Interlocked.Increment(ref completed); ++localCompleted; } mev.Set(); } }); thread.Start(t); } foreach (double partition in partitionProportion) { InitSumupInputData(data, partition, _host.Rand); completed = 0; sw.Restart(); foreach (var e in events) { e.Set(); } foreach (var e in mainEvents) { e.WaitOne(); } double ticksPerDoc = (double)ticksPerCycle / (completed * data.TotalCount); double nsPerDoc = ticksPerDoc * 1e9 / Stopwatch.Frequency; ch.Info("Partition {0} ({1} of {2}), completed {3} ({4:0.000} ns per doc)", partition, data.TotalCount, _len, completed, nsPerDoc); } } }
public override void Sumup(SumupInputData input, FeatureHistogram histogram) => SumupHandler(input, histogram);