Exemple #1
0
        public virtual void Sumup(SumupInputData input, FeatureHistogram histogram)
        {
            Contracts.Assert((input.Weights == null) == (histogram.SumWeightsByBin == null));
            if (histogram.SumWeightsByBin != null)
            {
                SumupWeighted(input, histogram);
                return;
            }
            IIntArrayForwardIndexer indexer = GetIndexer();

            for (int i = 0; i < input.TotalCount; i++)
            {
                int featureBin = input.DocIndices == null ? indexer[i] : indexer[input.DocIndices[i]];
                if (featureBin < 0 ||
                    featureBin >= histogram.SumTargetsByBin.Length ||
                    featureBin >= histogram.NumFeatureValues)
                {
                    throw Contracts.Except("Feature bin {0} is invalid", featureBin);
                }

                histogram.SumTargetsByBin[featureBin] += input.Outputs[i];
                ++histogram.CountByBin[featureBin];
            }
        }
        private unsafe void SumupRoot(SumupInputData input, FeatureHistogram histogram)
        {
            fixed(FloatType *pOutputsFixed = input.Outputs)
            fixed(FloatType * pSumTargetsFixed = histogram.SumTargetsByBin)
            fixed(double *pWeightsFixed        = input.Weights)
            fixed(double *pSumWeightsFixed     = histogram.SumWeightsByBin)
            {
                FloatType *pOutputs = pOutputsFixed;
                double *   pWeights = pWeightsFixed;

                for (int i = 0; i < _values.Length; i++)
                {
                    int featureBin = _values[i];
                    //FloatType* pSumTargets = pSumTargetsFixed + featureBin;
                    FloatType subsum = pSumTargetsFixed[featureBin];

                    for (int j = 0; j < _deltas[i]; ++j)
                    {
                        subsum += pOutputs[j];
                    }
                    pSumTargetsFixed[featureBin] = subsum;
                    if (pWeightsFixed != null)
                    {
                        double subweightsum = pSumWeightsFixed[featureBin];
                        for (int j = 0; j < _deltas[i]; ++j)
                        {
                            subweightsum += pWeights[j];
                        }
                        pSumWeightsFixed[featureBin] = subweightsum;
                        pWeights += _deltas[i];
                    }
                    pOutputs += _deltas[i];
                    histogram.CountByBin[featureBin] += _deltas[i];
                }
            }
        }
        private unsafe void SumupLeafWeighted(SumupInputData input, FeatureHistogram histogram)
        {
            Contracts.Assert(histogram.SumWeightsByBin != null);
            Contracts.Assert(input.Weights != null);

            int       iDocIndices  = 0;
            int       iSparse      = 0;
            int       totalCount   = 0;
            FloatType totalOutput  = 0;
            double    totalWeights = 0;
            int       currentPos   = _deltas.Length > 0 ? _deltas[iSparse] : _length;

            fixed(int *pDocIndices = input.DocIndices)
            fixed(byte *pDeltas        = _deltas)
            fixed(FloatType * pOutputs = input.Outputs)
            fixed(double *pWeights     = input.Weights)
            {
                while (true)
                {
                    if (currentPos < pDocIndices[iDocIndices])
                    {
                        if (++iSparse >= _deltas.Length)
                        {
                            break;
                        }
                        currentPos += pDeltas[iSparse];
                    }
                    else if (currentPos > pDocIndices[iDocIndices])
                    {
                        if (++iDocIndices >= input.TotalCount)
                        {
                            break;
                        }
                    }
                    else
                    {
                        // a nonzero entry matched one of the docs in the leaf, add it to the histogram
                        int       featureBin = _values[iSparse];
                        FloatType output     = pOutputs[iDocIndices];
                        histogram.SumTargetsByBin[featureBin] += output;
                        totalOutput += output;
                        double weights = pWeights[iDocIndices];
                        histogram.SumWeightsByBin[featureBin] += weights;
                        totalWeights += weights;
                        ++histogram.CountByBin[featureBin];

                        totalCount++;

                        if (++iSparse >= _deltas.Length)
                        {
                            break;
                        }

                        if (pDeltas[iSparse] > 0)
                        {
                            currentPos += pDeltas[iSparse];
                            if (++iDocIndices >= input.TotalCount)
                            {
                                break;
                            }
                        }
                    }
                }
            }
            // Fixup the zeros. There were some zero items already placed in the zero-th entry, just add the remainder
            histogram.SumTargetsByBin[0] += (FloatType)(input.SumTargets - totalOutput);
            histogram.SumWeightsByBin[0] += (FloatType)(input.SumWeights - totalWeights);
            histogram.CountByBin[0]      += input.TotalCount - totalCount;
        }
        // Fixing the arrays and using unsafe accesses may give a slight speedup, but it is hard to tell.
        // OPTIMIZE: Another two methods would be doing binary search or using a hashtable -- binary search
        //  when there are very few docs in the leaf
        private unsafe void SumupLeaf(SumupInputData input, FeatureHistogram histogram)
        {
            if (histogram.SumWeightsByBin != null)
            {
                SumupLeafWeighted(input, histogram);
                return;
            }
            int       iDocIndices = 0;
            int       iSparse     = 0;
            int       totalCount  = 0;
            FloatType totalOutput = 0;
            int       currentPos  = _deltas.Length > 0 ? _deltas[iSparse] : _length;

            fixed(int *pDocIndices = input.DocIndices)
            fixed(byte *pDeltas        = _deltas)
            fixed(FloatType * pOutputs = input.Outputs)
            {
                for (; ;)
                {
                    if (currentPos < pDocIndices[iDocIndices])
                    {
                        if (++iSparse >= _deltas.Length)
                        {
                            break;
                        }
                        currentPos += pDeltas[iSparse];
                    }
                    else if (currentPos > pDocIndices[iDocIndices])
                    {
                        if (++iDocIndices >= input.TotalCount)
                        {
                            break;
                        }
                    }
                    else
                    {
                        // A nonzero entry matched one of the docs in the leaf, add it to the histogram.
                        int       featureBin = _values[iSparse];
                        FloatType output     = pOutputs[iDocIndices];
                        histogram.SumTargetsByBin[featureBin] += output;
                        totalOutput += output;
                        ++histogram.CountByBin[featureBin];

                        totalCount++;

                        if (++iSparse >= _deltas.Length)
                        {
                            break;
                        }

                        // Note that if the delta is 0, we will "stay" on this document, thus
                        // allowing the sumup to work to accumulate multiple bins per document.
                        if (pDeltas[iSparse] > 0)
                        {
                            currentPos += pDeltas[iSparse];
                            if (++iDocIndices >= input.TotalCount)
                            {
                                break;
                            }
                        }
                    }
                }
            }
            // Fixup the zeros. There were some zero items already placed in the zero-th entry, just add the remainder
            histogram.SumTargetsByBin[0] += (FloatType)(input.SumTargets - totalOutput);
            histogram.CountByBin[0]      += input.TotalCount - totalCount;
        }