예제 #1
0
        public virtual void Sumup(SumupInputData input, FeatureHistogram histogram)
        {
            Contracts.Assert((input.Weights == null) == (histogram.SumWeightsByBin == null));
            if (histogram.SumWeightsByBin != null)
            {
                SumupWeighted(input, histogram);
                return;
            }
            IIntArrayForwardIndexer indexer = GetIndexer();

            for (int i = 0; i < input.TotalCount; i++)
            {
                int featureBin = input.DocIndices == null ? indexer[i] : indexer[input.DocIndices[i]];
                if (featureBin < 0 ||
                    featureBin >= histogram.SumTargetsByBin.Length ||
                    featureBin >= histogram.NumFeatureValues)
                {
                    throw Contracts.Except("Feature bin {0} is invalid", featureBin);
                }

                histogram.SumTargetsByBin[featureBin] += input.Outputs[i];
                ++histogram.CountByBin[featureBin];
            }
        }
예제 #2
0
        private unsafe void SumupRoot(SumupInputData input, FeatureHistogram histogram)
        {
            fixed(FloatType *pOutputsFixed = input.Outputs)
            fixed(FloatType * pSumTargetsFixed = histogram.SumTargetsByBin)
            fixed(double *pWeightsFixed        = input.Weights)
            fixed(double *pSumWeightsFixed     = histogram.SumWeightsByBin)
            {
                FloatType *pOutputs = pOutputsFixed;
                double *   pWeights = pWeightsFixed;

                for (int i = 0; i < _values.Length; i++)
                {
                    int featureBin = _values[i];
                    //FloatType* pSumTargets = pSumTargetsFixed + featureBin;
                    FloatType subsum = pSumTargetsFixed[featureBin];

                    for (int j = 0; j < _deltas[i]; ++j)
                    {
                        subsum += pOutputs[j];
                    }
                    pSumTargetsFixed[featureBin] = subsum;
                    if (pWeightsFixed != null)
                    {
                        double subweightsum = pSumWeightsFixed[featureBin];
                        for (int j = 0; j < _deltas[i]; ++j)
                        {
                            subweightsum += pWeights[j];
                        }
                        pSumWeightsFixed[featureBin] = subweightsum;
                        pWeights += _deltas[i];
                    }
                    pOutputs += _deltas[i];
                    histogram.CountByBin[featureBin] += _deltas[i];
                }
            }
        }
예제 #3
0
        public override void Sumup(SumupInputData input, FeatureHistogram histogram)
        {
            using (Timer.Time(TimerEvent.SumupSparse))
            {
                if (UseFastTreeNative)
                {
                    var callbackIntArray = _values as DenseDataCallbackIntArray;
                    if (callbackIntArray != null)
                    {
                        unsafe
                        {
                            fixed(byte *pDeltas = _deltas)
                            {
                                byte *pDeltas2 = pDeltas;

                                callbackIntArray.Callback(pValues =>
                                {
                                    SumupCPlusPlusSparse(input, histogram, (byte *)pValues, pDeltas2, _deltas.Length,
                                                         _values.BitsPerItem);
                                });
                            }
                        }
                        return;
                    }
                }

                if (input.DocIndices == null)
                {
                    SumupRoot(input, histogram);
                }
                else
                {
                    SumupLeaf(input, histogram);
                }
            }
        }
예제 #4
0
        private unsafe void SumupLeafWeighted(SumupInputData input, FeatureHistogram histogram)
        {
            Contracts.Assert(histogram.SumWeightsByBin != null);
            Contracts.Assert(input.Weights != null);

            int       iDocIndices  = 0;
            int       iSparse      = 0;
            int       totalCount   = 0;
            FloatType totalOutput  = 0;
            double    totalWeights = 0;
            int       currentPos   = _deltas.Length > 0 ? _deltas[iSparse] : _length;

            fixed(int *pDocIndices = input.DocIndices)
            fixed(byte *pDeltas        = _deltas)
            fixed(FloatType * pOutputs = input.Outputs)
            fixed(double *pWeights     = input.Weights)
            {
                while (true)
                {
                    if (currentPos < pDocIndices[iDocIndices])
                    {
                        if (++iSparse >= _deltas.Length)
                        {
                            break;
                        }
                        currentPos += pDeltas[iSparse];
                    }
                    else if (currentPos > pDocIndices[iDocIndices])
                    {
                        if (++iDocIndices >= input.TotalCount)
                        {
                            break;
                        }
                    }
                    else
                    {
                        // a nonzero entry matched one of the docs in the leaf, add it to the histogram
                        int       featureBin = _values[iSparse];
                        FloatType output     = pOutputs[iDocIndices];
                        histogram.SumTargetsByBin[featureBin] += output;
                        totalOutput += output;
                        double weights = pWeights[iDocIndices];
                        histogram.SumWeightsByBin[featureBin] += weights;
                        totalWeights += weights;
                        ++histogram.CountByBin[featureBin];

                        totalCount++;

                        if (++iSparse >= _deltas.Length)
                        {
                            break;
                        }

                        if (pDeltas[iSparse] > 0)
                        {
                            currentPos += pDeltas[iSparse];
                            if (++iDocIndices >= input.TotalCount)
                            {
                                break;
                            }
                        }
                    }
                }
            }
            // Fixup the zeros. There were some zero items already placed in the zero-th entry, just add the remainder
            histogram.SumTargetsByBin[0] += (FloatType)(input.SumTargets - totalOutput);
            histogram.SumWeightsByBin[0] += (FloatType)(input.SumWeights - totalWeights);
            histogram.CountByBin[0]      += input.TotalCount - totalCount;
        }
예제 #5
0
        // Fixing the arrays and using unsafe accesses may give a slight speedup, but it is hard to tell.
        // OPTIMIZE: Another two methods would be doing binary search or using a hashtable -- binary search
        //  when there are very few docs in the leaf
        private unsafe void SumupLeaf(SumupInputData input, FeatureHistogram histogram)
        {
            if (histogram.SumWeightsByBin != null)
            {
                SumupLeafWeighted(input, histogram);
                return;
            }
            int       iDocIndices = 0;
            int       iSparse     = 0;
            int       totalCount  = 0;
            FloatType totalOutput = 0;
            int       currentPos  = _deltas.Length > 0 ? _deltas[iSparse] : _length;

            fixed(int *pDocIndices = input.DocIndices)
            fixed(byte *pDeltas        = _deltas)
            fixed(FloatType * pOutputs = input.Outputs)
            {
                for (; ;)
                {
                    if (currentPos < pDocIndices[iDocIndices])
                    {
                        if (++iSparse >= _deltas.Length)
                        {
                            break;
                        }
                        currentPos += pDeltas[iSparse];
                    }
                    else if (currentPos > pDocIndices[iDocIndices])
                    {
                        if (++iDocIndices >= input.TotalCount)
                        {
                            break;
                        }
                    }
                    else
                    {
                        // A nonzero entry matched one of the docs in the leaf, add it to the histogram.
                        int       featureBin = _values[iSparse];
                        FloatType output     = pOutputs[iDocIndices];
                        histogram.SumTargetsByBin[featureBin] += output;
                        totalOutput += output;
                        ++histogram.CountByBin[featureBin];

                        totalCount++;

                        if (++iSparse >= _deltas.Length)
                        {
                            break;
                        }

                        // Note that if the delta is 0, we will "stay" on this document, thus
                        // allowing the sumup to work to accumulate multiple bins per document.
                        if (pDeltas[iSparse] > 0)
                        {
                            currentPos += pDeltas[iSparse];
                            if (++iDocIndices >= input.TotalCount)
                            {
                                break;
                            }
                        }
                    }
                }
            }
            // Fixup the zeros. There were some zero items already placed in the zero-th entry, just add the remainder
            histogram.SumTargetsByBin[0] += (FloatType)(input.SumTargets - totalOutput);
            histogram.CountByBin[0]      += input.TotalCount - totalCount;
        }
예제 #6
0
        public void Run()
        {
            using (var ch = _host.Start("Run"))
            {
                IntArray[]         arrays     = CreateRandomIntArrays(ch);
                FeatureHistogram[] histograms =
                    arrays.Select(bins => new FeatureHistogram(bins, _bins, false)).ToArray(arrays.Length);
                long bytes = arrays.Sum(i => (long)i.SizeInBytes());
                ch.Info("Created {0} int arrays taking {1} bytes", arrays.Length, bytes);

                // Objects for the pool.
                ch.Info("Parallelism = {0}", _parallel);
                AutoResetEvent[] events              = Utils.BuildArray(_parallel, i => new AutoResetEvent(false));
                AutoResetEvent[] mainEvents          = Utils.BuildArray(_parallel, i => new AutoResetEvent(false));
                SumupInputData   data                = new SumupInputData(_len, 0, 0, new FloatType[_len], null, new int[_len]);
                Thread[]         threadPool          = new Thread[_parallel];
                Stopwatch        sw                  = new Stopwatch();
                long             ticksPerCycle       = (long)(Stopwatch.Frequency * _seconds);
                double[]         partitionProportion = { 1, 1, 0.5, 1e-1, 1e-2, 1e-3, 1e-4 };

                long completed = 0;

                for (int t = 0; t < threadPool.Length; ++t)
                {
                    Thread thread = threadPool[t] = Utils.RunOnForegroundThread((object io) =>
                    {
                        int w              = (int)io;
                        AutoResetEvent ev  = events[w];
                        AutoResetEvent mev = mainEvents[w];
                        for (int s = 0; s < partitionProportion.Length; s++)
                        {
                            ev.WaitOne();
                            long localCompleted = 0;
                            for (int f = w; ; f = f + threadPool.Length < arrays.Length ? f + threadPool.Length : w)
                            {
                                // This should repeat till done.
                                arrays[f].Sumup(data, histograms[f]);
                                if (sw.ElapsedTicks > ticksPerCycle)
                                {
                                    break;
                                }
                                Interlocked.Increment(ref completed);
                                ++localCompleted;
                            }
                            mev.Set();
                        }
                    });
                    thread.Start(t);
                }

                foreach (double partition in partitionProportion)
                {
                    InitSumupInputData(data, partition, _host.Rand);
                    completed = 0;
                    sw.Restart();
                    foreach (var e in events)
                    {
                        e.Set();
                    }
                    foreach (var e in mainEvents)
                    {
                        e.WaitOne();
                    }
                    double ticksPerDoc = (double)ticksPerCycle / (completed * data.TotalCount);
                    double nsPerDoc    = ticksPerDoc * 1e9 / Stopwatch.Frequency;
                    ch.Info("Partition {0} ({1} of {2}), completed {3} ({4:0.000} ns per doc)",
                            partition, data.TotalCount, _len, completed, nsPerDoc);
                }
            }
        }
예제 #7
0
 public override void Sumup(SumupInputData input, FeatureHistogram histogram) => SumupHandler(input, histogram);