/// <summary> /// Instantiates an indexer that translates from the "concatenated" bin space across all features, /// into the original logical space for each individual feature. /// </summary> /// <param name="indexer">The indexer into the "shared" <see cref="IntArray"/>, that we /// are translating into the original logical space for this feature, where values in the /// range of [<paramref name="min"/>,<paramref name="lim"/>) will map from 1 onwards, and all /// other values will map to 0</param> /// <param name="min">The minimum value from the indexer that will map to 1</param> /// <param name="lim">The exclusive upper bound on values from the indexer</param> public Indexer(IIntArrayForwardIndexer indexer, int min, int lim) { Contracts.AssertValue(indexer); Contracts.Assert(1 <= min && min < lim); _indexer = indexer; _minMinusOne = min - 1; _lim = lim; }
public override IntArray[] Split(int[][] assignment) { return(assignment.Select(a => { IIntArrayForwardIndexer indexer = GetIndexer(); return new DeltaRepeatIntArray(a.Length, BitsPerItem, a.Select(i => indexer[i])); }).ToArray()); }
public override IntArray[] Split(int[][] assignment) { IntArray[] parts = new IntArray[assignment.Length]; for (int i = 0; i < assignment.Length; ++i) { IIntArrayForwardIndexer indexer = GetIndexer(); parts[i] = IntArray.New(assignment[i].Length, IntArrayType.Sparse, BitsPerItem, assignment[i].Select(x => indexer[x])); } return(parts); }
/// <summary> /// Splits the documents of a specified leaf to its two children based on a feature and a threshold value /// </summary> /// <param name="leaf">the leaf being split</param> /// <param name="indexer"></param> /// <param name="threshold">the threshold</param> /// <param name="gtChildIndex">Index of child node that contains documents whose split /// feature value is greater than the split threshold</param> public unsafe void Split(int leaf, IIntArrayForwardIndexer indexer, UInt32 threshold, int gtChildIndex) { using (Timer.Time(TimerEvent.DocumentPartitioningSplit)) { if (_tempDocuments == null) { _tempDocuments = new int[_documents.Length]; } // Note: lteChildIndex = leaf int begin = _leafBegin[leaf]; int end = begin + _leafCount[leaf]; int newEnd = begin; int tempEnd = begin; fixed(int *pDocuments = _documents) fixed(int *pTempDocuments = _tempDocuments) { for (int curr = begin; curr < end; ++curr) { int doc = pDocuments[curr]; if (indexer[doc] > threshold) { pTempDocuments[tempEnd++] = doc; } else { pDocuments[newEnd++] = doc; } } } int newCount = newEnd - begin; int gtCount = tempEnd - begin; Array.Copy(_tempDocuments, begin, _documents, newEnd, gtCount); _leafCount[leaf] = newCount; _leafBegin[gtChildIndex] = newEnd; _leafCount[gtChildIndex] = gtCount; } }
private void SumupWeighted(SumupInputData input, FeatureHistogram histogram) { Contracts.AssertValue(histogram.SumWeightsByBin); Contracts.AssertValue(input.Weights); IIntArrayForwardIndexer indexer = GetIndexer(); for (int i = 0; i < input.TotalCount; i++) { int featureBin = input.DocIndices == null ? indexer[i] : indexer[input.DocIndices[i]]; if (featureBin < 0 || featureBin >= histogram.SumTargetsByBin.Length || featureBin >= histogram.NumFeatureValues) { throw Contracts.Except("Feature bin {0} is invalid", featureBin); } histogram.SumTargetsByBin[featureBin] += input.Outputs[i]; histogram.SumWeightsByBin[featureBin] += input.Weights[i]; ++histogram.CountByBin[featureBin]; } }
/// <summary> /// Get the document partitions of a specified leaf if it is split based on a feature and a threshold value. /// </summary> /// <param name="leaf">the leaf being split</param> /// <param name="indexer">the indexer to access the feature value</param> /// <param name="threshold">the threshold</param> /// <param name="leftDocuments">[out] the left documents split from the leaf</param> /// <param name="leftDocumentSize">[out] the size of left documents</param> /// <param name="rightDocuments">[out] the right documents split from the leaf</param> /// <param name="rightDocumentSize">[out] the size of right documents</param> public unsafe void GetLeafDocumentPartitions( int leaf, IIntArrayForwardIndexer indexer, UInt32 threshold, out int[] leftDocuments, out int leftDocumentSize, out int[] rightDocuments, out int rightDocumentSize) { using (Timer.Time(TimerEvent.DocumentPartitioningSplit)) { leftDocuments = new int[_leafCount[leaf]]; leftDocumentSize = 0; rightDocuments = new int[_leafCount[leaf]]; rightDocumentSize = 0; int begin = _leafBegin[leaf]; int end = begin + _leafCount[leaf]; fixed(int *pDocuments = _documents) fixed(int *pTempLeftDocIndices = leftDocuments) fixed(int *pTempRightDocIndices = rightDocuments) { for (int curr = begin; curr < end; ++curr) { int doc = pDocuments[curr]; if (indexer[doc] > threshold) { pTempRightDocIndices[rightDocumentSize++] = doc; } else { pTempLeftDocIndices[leftDocumentSize++] = doc; } } } } }
/// <summary> /// Clone an IntArray containing only the items indexed by <paramref name="itemIndices"/> /// </summary> /// <param name="itemIndices"> item indices will be contained in the cloned IntArray </param> /// <returns> The cloned IntArray </returns> public override IntArray Clone(int[] itemIndices) { IIntArrayForwardIndexer indexer = GetIndexer(); return(new DeltaRepeatIntArray(itemIndices.Length, BitsPerItem, itemIndices.Select(i => indexer[i]))); }
/// <summary> /// Clone an IntArray containing only the items indexed by <paramref name="itemIndices"/> /// </summary> /// <param name="itemIndices"> item indices will be contained in the cloned IntArray </param> /// <returns> The cloned IntArray </returns> public override IntArray Clone(int[] itemIndices) { IIntArrayForwardIndexer indexer = GetIndexer(); return(IntArray.New(itemIndices.Length, IntArrayType.Sparse, BitsPerItem, itemIndices.Select(x => indexer[x]))); }
/// <summary> /// Finds the most space efficient representation of the feature /// (with slight slack cut for dense features). The behavior of /// this method depends upon the static value <see cref="CompatibilityLevel"/>. /// </summary> /// <param name="workarray">Should be non-null if you want it to /// consider segment arrays.</param> /// <returns>Returns a more space efficient version of the array, /// or the item itself if that is impossible, somehow.</returns> public IntArray Compress(uint[] workarray = null) { int maxval = 0; int zerocount = 0; int runs = 0; int last = -1; int overflows = 0; int zoverflows = 0; int runnow = 0; // The longest run of having the same value. int len = Length; IIntArrayForwardIndexer ind = GetIndexer(); for (int i = 0; i < len; ++i) { int val = ind[i]; if (workarray != null) { workarray[i] = (uint)val; } if (val == 0) { zerocount++; } else if (val > maxval) { maxval = val; } if (last == val) { runs++; if (++runnow > byte.MaxValue) { // We have 256 items in a row the same. overflows++; if (val == 0) { zoverflows++; } runnow = 0; } } last = val; } // Estimate the costs of the available options. IntArrayBits classicBits = IntArray.NumBitsNeeded(maxval + 1); long denseBits = (long)classicBits * (long)Length; long sparseBits = (long)(Math.Max((int)classicBits, 8) + 8) * (long)(Length - zerocount + zoverflows); long rleBits = (long)(classicBits + 8) * (long)(Length - runs + overflows); long segBits = long.MaxValue; int segTransitions = 0; if (workarray != null) { int bits = SegmentIntArray.BitsForValue((uint)maxval); if (bits <= 21) { SegmentIntArray.SegmentFindOptimalPath(workarray, Length, bits, out segBits, out segTransitions); } } if ((IntArray.CompatibilityLevel & 0x4) == 0) { rleBits = long.MaxValue; } long bestCost = Math.Min(Math.Min(Math.Min(denseBits, sparseBits), rleBits), segBits); IntArrayType bestType = IntArrayType.Dense; if (bestCost >= denseBits * 98 / 100) { // Cut the dense bits a wee bit of slack. } else if (bestCost == sparseBits) { bestType = IntArrayType.Sparse; } else if (bestCost == rleBits) { bestType = IntArrayType.Repeat; } else { bestType = IntArrayType.Segmented; } if (bestType == Type && classicBits == BitsPerItem) { return(this); } IntArray bins = null; if (bestType != IntArrayType.Segmented) { bins = IntArray.New(Length, bestType, classicBits, this); } else { bins = SegmentIntArray.FromWorkArray(workarray, Length, segBits, segTransitions); } return(bins); }
public FlockIndexer(OneHotFeatureFlock flock) { Contracts.AssertValue(flock); _flock = flock; _indexer = _flock.Bins.GetIndexer(); }
public Indexer(SingletonFeatureFlock flock) { Contracts.AssertValue(flock); _flock = flock; _indexer = _flock.GetIndexer(0); }