コード例 #1
0
 /// <summary>
 /// Instantiates an indexer that translates from the "concatenated" bin space across all features,
 /// into the original logical space for each individual feature.
 /// </summary>
 /// <param name="indexer">The indexer into the "shared" <see cref="IntArray"/>, that we
 /// are translating into the original logical space for this feature, where values in the
 /// range of [<paramref name="min"/>,<paramref name="lim"/>) will map from 1 onwards, and all
 /// other values will map to 0</param>
 /// <param name="min">The minimum value from the indexer that will map to 1</param>
 /// <param name="lim">The exclusive upper bound on values from the indexer</param>
 public Indexer(IIntArrayForwardIndexer indexer, int min, int lim)
 {
     Contracts.AssertValue(indexer);
     Contracts.Assert(1 <= min && min < lim);
     _indexer     = indexer;
     _minMinusOne = min - 1;
     _lim         = lim;
 }
コード例 #2
0
 public override IntArray[] Split(int[][] assignment)
 {
     return(assignment.Select(a =>
     {
         IIntArrayForwardIndexer indexer = GetIndexer();
         return new DeltaRepeatIntArray(a.Length, BitsPerItem, a.Select(i => indexer[i]));
     }).ToArray());
 }
コード例 #3
0
        public override IntArray[] Split(int[][] assignment)
        {
            IntArray[] parts = new IntArray[assignment.Length];
            for (int i = 0; i < assignment.Length; ++i)
            {
                IIntArrayForwardIndexer indexer = GetIndexer();
                parts[i] = IntArray.New(assignment[i].Length, IntArrayType.Sparse, BitsPerItem, assignment[i].Select(x => indexer[x]));
            }

            return(parts);
        }
コード例 #4
0
        /// <summary>
        /// Splits the documents of a specified leaf to its two children based on a feature and a threshold value
        /// </summary>
        /// <param name="leaf">the leaf being split</param>
        /// <param name="indexer"></param>
        /// <param name="threshold">the threshold</param>
        /// <param name="gtChildIndex">Index of child node that contains documents whose split
        /// feature value is greater than the split threshold</param>
        public unsafe void Split(int leaf, IIntArrayForwardIndexer indexer, UInt32 threshold, int gtChildIndex)
        {
            using (Timer.Time(TimerEvent.DocumentPartitioningSplit))
            {
                if (_tempDocuments == null)
                {
                    _tempDocuments = new int[_documents.Length];
                }

                // Note: lteChildIndex = leaf
                int begin   = _leafBegin[leaf];
                int end     = begin + _leafCount[leaf];
                int newEnd  = begin;
                int tempEnd = begin;

                fixed(int *pDocuments = _documents)
                fixed(int *pTempDocuments = _tempDocuments)
                {
                    for (int curr = begin; curr < end; ++curr)
                    {
                        int doc = pDocuments[curr];
                        if (indexer[doc] > threshold)
                        {
                            pTempDocuments[tempEnd++] = doc;
                        }
                        else
                        {
                            pDocuments[newEnd++] = doc;
                        }
                    }
                }

                int newCount = newEnd - begin;
                int gtCount  = tempEnd - begin;
                Array.Copy(_tempDocuments, begin, _documents, newEnd, gtCount);

                _leafCount[leaf]         = newCount;
                _leafBegin[gtChildIndex] = newEnd;
                _leafCount[gtChildIndex] = gtCount;
            }
        }
コード例 #5
0
        private void SumupWeighted(SumupInputData input, FeatureHistogram histogram)
        {
            Contracts.AssertValue(histogram.SumWeightsByBin);
            Contracts.AssertValue(input.Weights);
            IIntArrayForwardIndexer indexer = GetIndexer();

            for (int i = 0; i < input.TotalCount; i++)
            {
                int featureBin = input.DocIndices == null ? indexer[i] : indexer[input.DocIndices[i]];
                if (featureBin < 0 ||
                    featureBin >= histogram.SumTargetsByBin.Length ||
                    featureBin >= histogram.NumFeatureValues)
                {
                    throw Contracts.Except("Feature bin {0} is invalid", featureBin);
                }

                histogram.SumTargetsByBin[featureBin] += input.Outputs[i];
                histogram.SumWeightsByBin[featureBin] += input.Weights[i];
                ++histogram.CountByBin[featureBin];
            }
        }
コード例 #6
0
        /// <summary>
        /// Get the document partitions of a specified leaf if it is split based on a feature and a threshold value.
        /// </summary>
        /// <param name="leaf">the leaf being split</param>
        /// <param name="indexer">the indexer to access the feature value</param>
        /// <param name="threshold">the threshold</param>
        /// <param name="leftDocuments">[out] the left documents split from the leaf</param>
        /// <param name="leftDocumentSize">[out] the size of left documents</param>
        /// <param name="rightDocuments">[out] the right documents split from the leaf</param>
        /// <param name="rightDocumentSize">[out] the size of right documents</param>
        public unsafe void GetLeafDocumentPartitions(
            int leaf,
            IIntArrayForwardIndexer indexer,
            UInt32 threshold,
            out int[] leftDocuments,
            out int leftDocumentSize,
            out int[] rightDocuments,
            out int rightDocumentSize)
        {
            using (Timer.Time(TimerEvent.DocumentPartitioningSplit))
            {
                leftDocuments    = new int[_leafCount[leaf]];
                leftDocumentSize = 0;

                rightDocuments    = new int[_leafCount[leaf]];
                rightDocumentSize = 0;

                int begin = _leafBegin[leaf];
                int end   = begin + _leafCount[leaf];

                fixed(int *pDocuments = _documents)
                fixed(int *pTempLeftDocIndices  = leftDocuments)
                fixed(int *pTempRightDocIndices = rightDocuments)
                {
                    for (int curr = begin; curr < end; ++curr)
                    {
                        int doc = pDocuments[curr];
                        if (indexer[doc] > threshold)
                        {
                            pTempRightDocIndices[rightDocumentSize++] = doc;
                        }
                        else
                        {
                            pTempLeftDocIndices[leftDocumentSize++] = doc;
                        }
                    }
                }
            }
        }
コード例 #7
0
        /// <summary>
        /// Clone an IntArray containing only the items indexed by <paramref name="itemIndices"/>
        /// </summary>
        /// <param name="itemIndices"> item indices will be contained in the cloned IntArray  </param>
        /// <returns> The cloned IntArray </returns>
        public override IntArray Clone(int[] itemIndices)
        {
            IIntArrayForwardIndexer indexer = GetIndexer();

            return(new DeltaRepeatIntArray(itemIndices.Length, BitsPerItem, itemIndices.Select(i => indexer[i])));
        }
コード例 #8
0
        /// <summary>
        /// Clone an IntArray containing only the items indexed by <paramref name="itemIndices"/>
        /// </summary>
        /// <param name="itemIndices"> item indices will be contained in the cloned IntArray  </param>
        /// <returns> The cloned IntArray </returns>
        public override IntArray Clone(int[] itemIndices)
        {
            IIntArrayForwardIndexer indexer = GetIndexer();

            return(IntArray.New(itemIndices.Length, IntArrayType.Sparse, BitsPerItem, itemIndices.Select(x => indexer[x])));
        }
コード例 #9
0
        /// <summary>
        /// Finds the most space efficient representation of the feature
        /// (with slight slack cut for dense features). The behavior of
        /// this method depends upon the static value <see cref="CompatibilityLevel"/>.
        /// </summary>
        /// <param name="workarray">Should be non-null if you want it to
        /// consider segment arrays.</param>
        /// <returns>Returns a more space efficient version of the array,
        /// or the item itself if that is impossible, somehow.</returns>
        public IntArray Compress(uint[] workarray = null)
        {
            int maxval     = 0;
            int zerocount  = 0;
            int runs       = 0;
            int last       = -1;
            int overflows  = 0;
            int zoverflows = 0;
            int runnow     = 0; // The longest run of having the same value.
            int len        = Length;
            IIntArrayForwardIndexer ind = GetIndexer();

            for (int i = 0; i < len; ++i)
            {
                int val = ind[i];
                if (workarray != null)
                {
                    workarray[i] = (uint)val;
                }
                if (val == 0)
                {
                    zerocount++;
                }
                else if (val > maxval)
                {
                    maxval = val;
                }
                if (last == val)
                {
                    runs++;
                    if (++runnow > byte.MaxValue)
                    {
                        // We have 256 items in a row the same.
                        overflows++;
                        if (val == 0)
                        {
                            zoverflows++;
                        }
                        runnow = 0;
                    }
                }
                last = val;
            }
            // Estimate the costs of the available options.
            IntArrayBits classicBits    = IntArray.NumBitsNeeded(maxval + 1);
            long         denseBits      = (long)classicBits * (long)Length;
            long         sparseBits     = (long)(Math.Max((int)classicBits, 8) + 8) * (long)(Length - zerocount + zoverflows);
            long         rleBits        = (long)(classicBits + 8) * (long)(Length - runs + overflows);
            long         segBits        = long.MaxValue;
            int          segTransitions = 0;

            if (workarray != null)
            {
                int bits = SegmentIntArray.BitsForValue((uint)maxval);
                if (bits <= 21)
                {
                    SegmentIntArray.SegmentFindOptimalPath(workarray, Length,
                                                           bits, out segBits, out segTransitions);
                }
            }
            if ((IntArray.CompatibilityLevel & 0x4) == 0)
            {
                rleBits = long.MaxValue;
            }
            long         bestCost = Math.Min(Math.Min(Math.Min(denseBits, sparseBits), rleBits), segBits);
            IntArrayType bestType = IntArrayType.Dense;

            if (bestCost >= denseBits * 98 / 100)
            {
                // Cut the dense bits a wee bit of slack.
            }
            else if (bestCost == sparseBits)
            {
                bestType = IntArrayType.Sparse;
            }
            else if (bestCost == rleBits)
            {
                bestType = IntArrayType.Repeat;
            }
            else
            {
                bestType = IntArrayType.Segmented;
            }
            if (bestType == Type && classicBits == BitsPerItem)
            {
                return(this);
            }
            IntArray bins = null;

            if (bestType != IntArrayType.Segmented)
            {
                bins = IntArray.New(Length, bestType, classicBits, this);
            }
            else
            {
                bins = SegmentIntArray.FromWorkArray(workarray, Length, segBits, segTransitions);
            }
            return(bins);
        }
コード例 #10
0
 public FlockIndexer(OneHotFeatureFlock flock)
 {
     Contracts.AssertValue(flock);
     _flock   = flock;
     _indexer = _flock.Bins.GetIndexer();
 }
コード例 #11
0
 public Indexer(SingletonFeatureFlock flock)
 {
     Contracts.AssertValue(flock);
     _flock   = flock;
     _indexer = _flock.GetIndexer(0);
 }