public SegmentIntArray(byte[] buffer, ref int position) { _bpi = (IntArrayBits)(buffer.ToInt(ref position)); _length = buffer.ToInt(ref position); _segType = buffer.ToByteArray(ref position); _segLength = buffer.ToIntArray(ref position); _data = buffer.ToUIntArray(ref position); }
public override IntArray Clone(IntArrayBits bitsPerItem, IntArrayType type) { if (type == IntArrayType.Current) { type = IntArrayType.Dense; } return(New(Length, type, bitsPerItem, this)); }
private SegmentIntArray(byte[] segType, int[] segLen, uint[] data, int len) { _segType = segType; _segLength = segLen; _data = data; _length = len; _bpi = IntArrayBits.Bits32; }
/// <summary> /// Construct a sparse int array from index, value pairs. /// </summary> /// <param name="length">The total length of the constructed array.</param> /// <param name="bitsPerItem">The number of bits required to store the values.</param> /// <param name="nonZeroValues">An ordered enumerable of (index,value) pairs. /// Each index should be strictly increasing as the iterable proceeds.</param> public DeltaSparseIntArray(int length, IntArrayBits bitsPerItem, IEnumerable <KeyValuePair <int, int> > nonZeroValues) { using (Timer.Time(TimerEvent.SparseConstruction)) { List <int> tempValueList = new List <int>(); List <byte> tempDeltaList = new List <byte>(); int currentIndex = 0; foreach (KeyValuePair <int, int> pair in nonZeroValues) { int index = pair.Key; int value = pair.Value; if (index <= currentIndex && (index < 0 || tempValueList.Count > 0)) { throw Contracts.Except("index {0} occurred after {1}", index, currentIndex); } while (index - currentIndex > byte.MaxValue) { tempDeltaList.Add(byte.MaxValue); tempValueList.Add(0); currentIndex += byte.MaxValue; } tempDeltaList.Add((byte)(index - currentIndex)); tempValueList.Add(value); currentIndex = index; } // Add the final chunks of 0's if it ended early while (length - currentIndex > byte.MaxValue) { tempDeltaList.Add(byte.MaxValue); tempValueList.Add(0); currentIndex += byte.MaxValue; } if (currentIndex >= length && currentIndex > 0) { throw Contracts.Except("Index {0} inconsistent with length {1}", currentIndex, length); } _length = length; // It is faster not to use a 4-bit dense array here. The memory difference is minor, since it's just // the sparse values that are saved on. // TODO: Implement a special iterator for 4-bit array, and change this code to use the iterator, which // may be faster if (bitsPerItem == IntArrayBits.Bits0) { throw Contracts.Except("Use dense arrays for 0 bits"); } if (bitsPerItem <= IntArrayBits.Bits8) { bitsPerItem = IntArrayBits.Bits8; } _values = IntArray.New(tempValueList.Count, IntArrayType.Dense, bitsPerItem, tempValueList) as DenseIntArray; _deltas = tempDeltaList.ToArray(); } }
public DeltaRepeatIntArray(int length, IntArrayBits bitsPerItem, IEnumerable <int> values) { using (Timer.Time(TimerEvent.SparseConstruction)) { List <int> tempValueList = new List <int>(); List <byte> tempDeltaList = new List <byte>(); _length = 0; byte delta = 0; int lastVal = -1; foreach (int val in values) { if (val != lastVal || delta == byte.MaxValue) { tempValueList.Add(val); lastVal = val; if (_length != 0) { tempDeltaList.Add(delta); } delta = 0; } ++delta; ++_length; } if (delta > 0) { tempDeltaList.Add(delta); } if (_length != length) { throw Contracts.Except("Length provided to repeat vector is inconsistent with value enumeration"); } // It is faster not to use a 4-bit dense array here. The memory difference is minor, since it's just // the sparse values that are saved on. // TODO: Implement a special iterator for 4-bit array, and change this code to use the iterator, which // may be faster if (bitsPerItem == IntArrayBits.Bits0) { throw Contracts.Except("Use dense arrays for 0 bits"); } if (bitsPerItem <= IntArrayBits.Bits8) { bitsPerItem = IntArrayBits.Bits8; } _values = IntArray.New(tempValueList.Count, IntArrayType.Dense, bitsPerItem, tempValueList) as DenseIntArray; _deltas = tempDeltaList.ToArray(); } }
public DeltaSparseIntArray(int length, IntArrayBits bitsPerItem) { if (bitsPerItem == IntArrayBits.Bits0) { throw Contracts.Except("Use dense arrays for 0 bits"); } if (bitsPerItem <= IntArrayBits.Bits8) { bitsPerItem = IntArrayBits.Bits8; } _length = length; }
public static IntArray New(int length, IntArrayType type, IntArrayBits bitsPerItem, IEnumerable <int> values) { Contracts.CheckParam(length >= 0, nameof(length)); Contracts.CheckParam(Enum.IsDefined(typeof(IntArrayType), type) && type != IntArrayType.Current, nameof(type)); Contracts.CheckParam(Enum.IsDefined(typeof(IntArrayBits), bitsPerItem), nameof(bitsPerItem)); Contracts.CheckValue(values, nameof(values)); if (type == IntArrayType.Dense || bitsPerItem == IntArrayBits.Bits0) { if (bitsPerItem == IntArrayBits.Bits0) { Contracts.Assert(values.All(x => x == 0)); return(new Dense0BitIntArray(length)); } //else if (bitsPerItem == IntArrayBits.Bits1) return new Dense1BitIntArray(length); else if (bitsPerItem <= IntArrayBits.Bits4) { return(new Dense4BitIntArray(length, values)); } else if (bitsPerItem <= IntArrayBits.Bits8) { return(new Dense8BitIntArray(length, values)); } else if (bitsPerItem <= IntArrayBits.Bits10) { return(new Dense10BitIntArray(length, values)); } else if (bitsPerItem <= IntArrayBits.Bits16) { return(new Dense16BitIntArray(length, values)); } else { return(new Dense32BitIntArray(length, values)); } } else if (type == IntArrayType.Sparse) { return(new DeltaSparseIntArray(length, bitsPerItem, values)); } else if (type == IntArrayType.Repeat) { return(new DeltaRepeatIntArray(length, bitsPerItem, values)); } else if (type == IntArrayType.Segmented) { // Segmented should probably not be used in this way. return(new SegmentIntArray(length, values)); } return(null); }
/// <summary> /// Creates a new int array given a byte representation /// </summary> /// <param name="buffer">the byte array representation of the dense array. The buffer can be larger than needed since the caller might be re-using buffers from a pool</param> /// <param name="position">the position in the byte array</param> /// <returns>the int array object</returns> public static IntArray New(byte[] buffer, ref int position) { IntArrayType type = (IntArrayType)buffer.ToInt(ref position); IntArrayBits bitsPerItem = (IntArrayBits)buffer.ToInt(ref position); if (type == IntArrayType.Dense) { if (bitsPerItem == IntArrayBits.Bits0) { return(new Dense0BitIntArray(buffer, ref position)); } else if (bitsPerItem == IntArrayBits.Bits4) { return(new Dense4BitIntArray(buffer, ref position)); } else if (bitsPerItem == IntArrayBits.Bits8) { return(new Dense8BitIntArray(buffer, ref position)); } else if (bitsPerItem == IntArrayBits.Bits10) { return(new Dense10BitIntArray(buffer, ref position)); } else if (bitsPerItem == IntArrayBits.Bits16) { return(new Dense16BitIntArray(buffer, ref position)); } else { return(new Dense32BitIntArray(buffer, ref position)); } } else if (type == IntArrayType.Sparse) { return(new DeltaSparseIntArray(buffer, ref position)); } else if (type == IntArrayType.Repeat) { return(new DeltaRepeatIntArray(buffer, ref position)); } else if (type == IntArrayType.Segmented) { return(new SegmentIntArray(buffer, ref position)); } return(null); }
private static IntArray ConcatBins(TsvFeature[] parts, uint[] concatValueMap) { using (Timer.Time(TimerEvent.ConcatBins)) { int length = parts.Sum(x => x.Length); IntArrayBits bitsPerItem = IntArray.NumBitsNeeded(concatValueMap.Length); DenseIntArray concatBins = (DenseIntArray)IntArray.New(length, IntArrayType.Dense, bitsPerItem); int pos = 0; for (int partIndex = 0; partIndex < parts.Length; ++partIndex) { IntArray bins = parts[partIndex].Bins; if (concatValueMap.Length == parts[partIndex].ValueMap.Length) { foreach (int bin in bins) { concatBins[pos++] = bin; } } else { int[] binMap = MakeBinMap(parts[partIndex]._valueMap, concatValueMap); foreach (int bin in bins) { concatBins[pos++] = binMap[bin]; } } } if (bitsPerItem != IntArrayBits.Bits0 && parts.All(x => x.Bins is DeltaSparseIntArray)) { return(new DeltaSparseIntArray(length, bitsPerItem, concatBins)); } else { return(concatBins); } } }
public static IntArray New(int length, IntArrayType type, IntArrayBits bitsPerItem) { Contracts.CheckParam(length >= 0, nameof(length)); Contracts.CheckParam(type == IntArrayType.Current || type == IntArrayType.Repeat || type == IntArrayType.Segmented, nameof(type)); if (type == IntArrayType.Dense || bitsPerItem == IntArrayBits.Bits0) { if (bitsPerItem == IntArrayBits.Bits0) { return(new Dense0BitIntArray(length)); } //else if (bitsPerItem <= IntArrayBits.Bits1) return new Dense1BitIntArray(length); else if (bitsPerItem <= IntArrayBits.Bits4) { return(new Dense4BitIntArray(length)); } else if (bitsPerItem <= IntArrayBits.Bits8) { return(new Dense8BitIntArray(length)); } else if (bitsPerItem <= IntArrayBits.Bits10) { return(new Dense10BitIntArray(length)); } else if (bitsPerItem <= IntArrayBits.Bits16) { return(new Dense16BitIntArray(length)); } else { return(new Dense32BitIntArray(length)); } } else if (type == IntArrayType.Sparse) { return(new DeltaSparseIntArray(length, bitsPerItem)); } // REVIEW: ??? What is this? return(null); }
/// <summary> /// Concatenates an array of features into one long feature /// </summary> /// <param name="parts">An array of features</param> /// <returns>A concatenated feature</returns> public static TsvFeature Concat(TsvFeature[] parts) { IntArrayBits bitsPerItem = IntArrayBits.Bits0; if (parts.Length == 1) { bitsPerItem = IntArray.NumBitsNeeded(parts[0].ValueMap.Length); if (bitsPerItem == parts[0].Bins.BitsPerItem) { return(parts[0]); } IntArray b = parts[0].Bins; IntArray newBins = IntArray.New(b.Length, b.Type, bitsPerItem, b); return(new TsvFeature(newBins, parts[0].ValueMap, parts[0]._name)); } uint[] concatValueMap = Algorithms.MergeSortedUniqued(parts.Select(x => x.ValueMap).ToArray()); bitsPerItem = IntArray.NumBitsNeeded(concatValueMap.Length); IntArray concatBins = ConcatBins(parts, concatValueMap); return(new TsvFeature(concatBins, concatValueMap, parts[0]._name)); }
private IntArray[] CreateRandomIntArrays(IChannel ch) { IntArray[] arrays = new IntArray[_count]; using (var pch = _host.StartProgressChannel("Create IntArrays")) { int created = 0; pch.SetHeader(new ProgressHeader("arrays"), e => e.SetProgress(0, created, arrays.Length)); IntArrayBits bits = IntArray.NumBitsNeeded(_bins); ch.Info("Bits per item is {0}", bits); int salt = _host.Rand.Next(); Func <IChannel, Random, IEnumerable <int> > createIntArray; switch (_type) { case IntArrayType.Dense: createIntArray = CreateDense; break; case IntArrayType.Sparse: createIntArray = CreateSparse; if (_param == 1) { createIntArray = CreateDense; } break; default: throw _host.ExceptNotImpl("Haven't yet wrote a random generator appropriate for {0}", _type); } ParallelEnumerable.Range(0, arrays.Length).ForAll(i => { Random r = new Random(salt + i); arrays[i] = IntArray.New(_len, _type, bits, createIntArray(ch, r)); created++; }); return(arrays); } }
public override IntArray Clone(IntArrayBits bitsPerItem, IntArrayType type) { if (type == IntArrayType.Sparse || type == IntArrayType.Current) { if (bitsPerItem <= IntArrayBits.Bits8) { bitsPerItem = IntArrayBits.Bits8; } DenseIntArray newValues = _values.Clone(bitsPerItem, IntArrayType.Dense) as DenseIntArray; return(new DeltaSparseIntArray(newValues, _deltas, _length)); } else { DenseIntArray dense = IntArray.New(Length, IntArrayType.Dense, BitsPerItem) as DenseIntArray; int index = 0; for (int i = 0; i < _values.Length; ++i) { index += _deltas[i]; dense[index] = _values[i]; } return(dense); } }
public SegmentIntArray(int length, IEnumerable <int> values) { using (Timer.Time(TimerEvent.SparseConstruction)) { uint[] vals = new uint[length]; uint pos = 0; uint max = 0; foreach (int v in values) { if (pos >= length) { throw Contracts.Except("Length provided to segment vector is inconsistent with value enumeration"); } vals[pos++] = (uint)v; if ((uint)v > max) { max = (uint)v; } } if (pos != length) { throw Contracts.Except("Length provided to segment vector is inconsistent with value enumeration"); } int maxbits = BitsForValue(max); int transitions; long bits; SegmentFindOptimalPath(vals, vals.Length, maxbits, out bits, out transitions); var b = FromWorkArray(vals, vals.Length, bits, transitions); _segType = b._segType; _segLength = b._segLength; _data = b._data; _length = b._length; _bpi = b._bpi; } }
public override IntArray Clone(IntArrayBits bitsPerItem, IntArrayType type) { return(IntArray.New(_length, type, bitsPerItem, this)); }
public override IntArray Clone(IntArrayBits bitsPerItem, IntArrayType type) { throw Contracts.ExceptNotImpl(); }
private unsafe void SumupCPlusPlusSparse(SumupInputData input, FeatureHistogram histogram, byte *pValues, byte *pDeltas, int numDeltas, IntArrayBits bitsPerItem) { fixed(FloatType *pSumTargetsByBin = histogram.SumTargetsByBin) fixed(FloatType * pSampleOutputs = input.Outputs) fixed(double *pSumWeightsByBin = histogram.SumWeightsByBin) fixed(double *pSampleWeights = input.Weights) fixed(int *pIndices = input.DocIndices) fixed(int *pCountByBin = histogram.CountByBin) { int rv = #if USE_SINGLE_PRECISION C_SumupDeltaSparse_float #else C_SumupDeltaSparse_double #endif ((int)BitsPerItem, pValues, pDeltas, numDeltas, pIndices, pSampleOutputs, pSampleWeights, pSumTargetsByBin, pSumWeightsByBin, pCountByBin, input.TotalCount, input.SumTargets, input.SumWeights); if (rv < 0) { throw Contracts.Except("CSumup sumupdeltasparse {0}", rv); } } }
/// <summary> /// Clones the contents of this IntArray into an new IntArray /// </summary> /// <param name="bitsPerItem">The number of bits per item in the created IntArray</param> /// <param name="type">The type of the new IntArray</param> public abstract IntArray Clone(IntArrayBits bitsPerItem, IntArrayType type);
/// <summary> /// Finds the most space efficient representation of the feature /// (with slight slack cut for dense features). The behavior of /// this method depends upon the static value <see cref="CompatibilityLevel"/>. /// </summary> /// <param name="workarray">Should be non-null if you want it to /// consider segment arrays.</param> /// <returns>Returns a more space efficient version of the array, /// or the item itself if that is impossible, somehow.</returns> public IntArray Compress(uint[] workarray = null) { int maxval = 0; int zerocount = 0; int runs = 0; int last = -1; int overflows = 0; int zoverflows = 0; int runnow = 0; // The longest run of having the same value. int len = Length; IIntArrayForwardIndexer ind = GetIndexer(); for (int i = 0; i < len; ++i) { int val = ind[i]; if (workarray != null) { workarray[i] = (uint)val; } if (val == 0) { zerocount++; } else if (val > maxval) { maxval = val; } if (last == val) { runs++; if (++runnow > byte.MaxValue) { // We have 256 items in a row the same. overflows++; if (val == 0) { zoverflows++; } runnow = 0; } } last = val; } // Estimate the costs of the available options. IntArrayBits classicBits = IntArray.NumBitsNeeded(maxval + 1); long denseBits = (long)classicBits * (long)Length; long sparseBits = (long)(Math.Max((int)classicBits, 8) + 8) * (long)(Length - zerocount + zoverflows); long rleBits = (long)(classicBits + 8) * (long)(Length - runs + overflows); long segBits = long.MaxValue; int segTransitions = 0; if (workarray != null) { int bits = SegmentIntArray.BitsForValue((uint)maxval); if (bits <= 21) { SegmentIntArray.SegmentFindOptimalPath(workarray, Length, bits, out segBits, out segTransitions); } } if ((IntArray.CompatibilityLevel & 0x4) == 0) { rleBits = long.MaxValue; } long bestCost = Math.Min(Math.Min(Math.Min(denseBits, sparseBits), rleBits), segBits); IntArrayType bestType = IntArrayType.Dense; if (bestCost >= denseBits * 98 / 100) { // Cut the dense bits a wee bit of slack. } else if (bestCost == sparseBits) { bestType = IntArrayType.Sparse; } else if (bestCost == rleBits) { bestType = IntArrayType.Repeat; } else { bestType = IntArrayType.Segmented; } if (bestType == Type && classicBits == BitsPerItem) { return(this); } IntArray bins = null; if (bestType != IntArrayType.Segmented) { bins = IntArray.New(Length, bestType, classicBits, this); } else { bins = SegmentIntArray.FromWorkArray(workarray, Length, segBits, segTransitions); } return(bins); }