Example #1
0
 public SegmentIntArray(byte[] buffer, ref int position)
 {
     _bpi       = (IntArrayBits)(buffer.ToInt(ref position));
     _length    = buffer.ToInt(ref position);
     _segType   = buffer.ToByteArray(ref position);
     _segLength = buffer.ToIntArray(ref position);
     _data      = buffer.ToUIntArray(ref position);
 }
 public override IntArray Clone(IntArrayBits bitsPerItem, IntArrayType type)
 {
     if (type == IntArrayType.Current)
     {
         type = IntArrayType.Dense;
     }
     return(New(Length, type, bitsPerItem, this));
 }
Example #3
0
 private SegmentIntArray(byte[] segType, int[] segLen, uint[] data, int len)
 {
     _segType   = segType;
     _segLength = segLen;
     _data      = data;
     _length    = len;
     _bpi       = IntArrayBits.Bits32;
 }
Example #4
0
        /// <summary>
        /// Construct a sparse int array from index, value pairs.
        /// </summary>
        /// <param name="length">The total length of the constructed array.</param>
        /// <param name="bitsPerItem">The number of bits required to store the values.</param>
        /// <param name="nonZeroValues">An ordered enumerable of (index,value) pairs.
        /// Each index should be strictly increasing as the iterable proceeds.</param>
        public DeltaSparseIntArray(int length, IntArrayBits bitsPerItem, IEnumerable <KeyValuePair <int, int> > nonZeroValues)
        {
            using (Timer.Time(TimerEvent.SparseConstruction))
            {
                List <int>  tempValueList = new List <int>();
                List <byte> tempDeltaList = new List <byte>();

                int currentIndex = 0;
                foreach (KeyValuePair <int, int> pair in nonZeroValues)
                {
                    int index = pair.Key;
                    int value = pair.Value;
                    if (index <= currentIndex && (index < 0 || tempValueList.Count > 0))
                    {
                        throw Contracts.Except("index {0} occurred after {1}", index, currentIndex);
                    }
                    while (index - currentIndex > byte.MaxValue)
                    {
                        tempDeltaList.Add(byte.MaxValue);
                        tempValueList.Add(0);
                        currentIndex += byte.MaxValue;
                    }
                    tempDeltaList.Add((byte)(index - currentIndex));
                    tempValueList.Add(value);
                    currentIndex = index;
                }
                // Add the final chunks of 0's if it ended early
                while (length - currentIndex > byte.MaxValue)
                {
                    tempDeltaList.Add(byte.MaxValue);
                    tempValueList.Add(0);
                    currentIndex += byte.MaxValue;
                }
                if (currentIndex >= length && currentIndex > 0)
                {
                    throw Contracts.Except("Index {0} inconsistent with length {1}", currentIndex, length);
                }
                _length = length;

                // It is faster not to use a 4-bit dense array here. The memory difference is minor, since it's just
                //  the sparse values that are saved on.
                // TODO: Implement a special iterator for 4-bit array, and change this code to use the iterator, which
                //          may be faster
                if (bitsPerItem == IntArrayBits.Bits0)
                {
                    throw Contracts.Except("Use dense arrays for 0 bits");
                }
                if (bitsPerItem <= IntArrayBits.Bits8)
                {
                    bitsPerItem = IntArrayBits.Bits8;
                }

                _values = IntArray.New(tempValueList.Count, IntArrayType.Dense, bitsPerItem, tempValueList) as DenseIntArray;
                _deltas = tempDeltaList.ToArray();
            }
        }
Example #5
0
        public DeltaRepeatIntArray(int length, IntArrayBits bitsPerItem, IEnumerable <int> values)
        {
            using (Timer.Time(TimerEvent.SparseConstruction))
            {
                List <int>  tempValueList = new List <int>();
                List <byte> tempDeltaList = new List <byte>();

                _length = 0;

                byte delta   = 0;
                int  lastVal = -1;

                foreach (int val in values)
                {
                    if (val != lastVal || delta == byte.MaxValue)
                    {
                        tempValueList.Add(val);
                        lastVal = val;
                        if (_length != 0)
                        {
                            tempDeltaList.Add(delta);
                        }
                        delta = 0;
                    }
                    ++delta;
                    ++_length;
                }
                if (delta > 0)
                {
                    tempDeltaList.Add(delta);
                }

                if (_length != length)
                {
                    throw Contracts.Except("Length provided to repeat vector is inconsistent with value enumeration");
                }

                // It is faster not to use a 4-bit dense array here. The memory difference is minor, since it's just
                //  the sparse values that are saved on.
                // TODO: Implement a special iterator for 4-bit array, and change this code to use the iterator, which
                //          may be faster
                if (bitsPerItem == IntArrayBits.Bits0)
                {
                    throw Contracts.Except("Use dense arrays for 0 bits");
                }
                if (bitsPerItem <= IntArrayBits.Bits8)
                {
                    bitsPerItem = IntArrayBits.Bits8;
                }

                _values = IntArray.New(tempValueList.Count, IntArrayType.Dense, bitsPerItem, tempValueList) as DenseIntArray;
                _deltas = tempDeltaList.ToArray();
            }
        }
Example #6
0
        public DeltaSparseIntArray(int length, IntArrayBits bitsPerItem)
        {
            if (bitsPerItem == IntArrayBits.Bits0)
            {
                throw Contracts.Except("Use dense arrays for 0 bits");
            }
            if (bitsPerItem <= IntArrayBits.Bits8)
            {
                bitsPerItem = IntArrayBits.Bits8;
            }

            _length = length;
        }
Example #7
0
        public static IntArray New(int length, IntArrayType type, IntArrayBits bitsPerItem, IEnumerable <int> values)
        {
            Contracts.CheckParam(length >= 0, nameof(length));
            Contracts.CheckParam(Enum.IsDefined(typeof(IntArrayType), type) && type != IntArrayType.Current, nameof(type));
            Contracts.CheckParam(Enum.IsDefined(typeof(IntArrayBits), bitsPerItem), nameof(bitsPerItem));
            Contracts.CheckValue(values, nameof(values));

            if (type == IntArrayType.Dense || bitsPerItem == IntArrayBits.Bits0)
            {
                if (bitsPerItem == IntArrayBits.Bits0)
                {
                    Contracts.Assert(values.All(x => x == 0));
                    return(new Dense0BitIntArray(length));
                }
                //else if (bitsPerItem == IntArrayBits.Bits1) return new Dense1BitIntArray(length);
                else if (bitsPerItem <= IntArrayBits.Bits4)
                {
                    return(new Dense4BitIntArray(length, values));
                }
                else if (bitsPerItem <= IntArrayBits.Bits8)
                {
                    return(new Dense8BitIntArray(length, values));
                }
                else if (bitsPerItem <= IntArrayBits.Bits10)
                {
                    return(new Dense10BitIntArray(length, values));
                }
                else if (bitsPerItem <= IntArrayBits.Bits16)
                {
                    return(new Dense16BitIntArray(length, values));
                }
                else
                {
                    return(new Dense32BitIntArray(length, values));
                }
            }
            else if (type == IntArrayType.Sparse)
            {
                return(new DeltaSparseIntArray(length, bitsPerItem, values));
            }
            else if (type == IntArrayType.Repeat)
            {
                return(new DeltaRepeatIntArray(length, bitsPerItem, values));
            }
            else if (type == IntArrayType.Segmented)
            {
                // Segmented should probably not be used in this way.
                return(new SegmentIntArray(length, values));
            }
            return(null);
        }
Example #8
0
        /// <summary>
        /// Creates a new int array given a byte representation
        /// </summary>
        /// <param name="buffer">the byte array representation of the dense array. The buffer can be larger than needed since the caller might be re-using buffers from a pool</param>
        /// <param name="position">the position in the byte array</param>
        /// <returns>the int array object</returns>
        public static IntArray New(byte[] buffer, ref int position)
        {
            IntArrayType type        = (IntArrayType)buffer.ToInt(ref position);
            IntArrayBits bitsPerItem = (IntArrayBits)buffer.ToInt(ref position);

            if (type == IntArrayType.Dense)
            {
                if (bitsPerItem == IntArrayBits.Bits0)
                {
                    return(new Dense0BitIntArray(buffer, ref position));
                }
                else if (bitsPerItem == IntArrayBits.Bits4)
                {
                    return(new Dense4BitIntArray(buffer, ref position));
                }
                else if (bitsPerItem == IntArrayBits.Bits8)
                {
                    return(new Dense8BitIntArray(buffer, ref position));
                }
                else if (bitsPerItem == IntArrayBits.Bits10)
                {
                    return(new Dense10BitIntArray(buffer, ref position));
                }
                else if (bitsPerItem == IntArrayBits.Bits16)
                {
                    return(new Dense16BitIntArray(buffer, ref position));
                }
                else
                {
                    return(new Dense32BitIntArray(buffer, ref position));
                }
            }
            else if (type == IntArrayType.Sparse)
            {
                return(new DeltaSparseIntArray(buffer, ref position));
            }
            else if (type == IntArrayType.Repeat)
            {
                return(new DeltaRepeatIntArray(buffer, ref position));
            }
            else if (type == IntArrayType.Segmented)
            {
                return(new SegmentIntArray(buffer, ref position));
            }
            return(null);
        }
Example #9
0
        private static IntArray ConcatBins(TsvFeature[] parts, uint[] concatValueMap)
        {
            using (Timer.Time(TimerEvent.ConcatBins))
            {
                int length = parts.Sum(x => x.Length);

                IntArrayBits  bitsPerItem = IntArray.NumBitsNeeded(concatValueMap.Length);
                DenseIntArray concatBins  = (DenseIntArray)IntArray.New(length, IntArrayType.Dense, bitsPerItem);

                int pos = 0;

                for (int partIndex = 0; partIndex < parts.Length; ++partIndex)
                {
                    IntArray bins = parts[partIndex].Bins;

                    if (concatValueMap.Length == parts[partIndex].ValueMap.Length)
                    {
                        foreach (int bin in bins)
                        {
                            concatBins[pos++] = bin;
                        }
                    }
                    else
                    {
                        int[] binMap = MakeBinMap(parts[partIndex]._valueMap, concatValueMap);

                        foreach (int bin in bins)
                        {
                            concatBins[pos++] = binMap[bin];
                        }
                    }
                }

                if (bitsPerItem != IntArrayBits.Bits0 && parts.All(x => x.Bins is DeltaSparseIntArray))
                {
                    return(new DeltaSparseIntArray(length, bitsPerItem, concatBins));
                }
                else
                {
                    return(concatBins);
                }
            }
        }
Example #10
0
        public static IntArray New(int length, IntArrayType type, IntArrayBits bitsPerItem)
        {
            Contracts.CheckParam(length >= 0, nameof(length));
            Contracts.CheckParam(type == IntArrayType.Current || type == IntArrayType.Repeat || type == IntArrayType.Segmented, nameof(type));

            if (type == IntArrayType.Dense || bitsPerItem == IntArrayBits.Bits0)
            {
                if (bitsPerItem == IntArrayBits.Bits0)
                {
                    return(new Dense0BitIntArray(length));
                }
                //else if (bitsPerItem <= IntArrayBits.Bits1) return new Dense1BitIntArray(length);
                else if (bitsPerItem <= IntArrayBits.Bits4)
                {
                    return(new Dense4BitIntArray(length));
                }
                else if (bitsPerItem <= IntArrayBits.Bits8)
                {
                    return(new Dense8BitIntArray(length));
                }
                else if (bitsPerItem <= IntArrayBits.Bits10)
                {
                    return(new Dense10BitIntArray(length));
                }
                else if (bitsPerItem <= IntArrayBits.Bits16)
                {
                    return(new Dense16BitIntArray(length));
                }
                else
                {
                    return(new Dense32BitIntArray(length));
                }
            }
            else if (type == IntArrayType.Sparse)
            {
                return(new DeltaSparseIntArray(length, bitsPerItem));
            }
            // REVIEW: ??? What is this?
            return(null);
        }
Example #11
0
        /// <summary>
        /// Concatenates an array of features into one long feature
        /// </summary>
        /// <param name="parts">An array of features</param>
        /// <returns>A concatenated feature</returns>
        public static TsvFeature Concat(TsvFeature[] parts)
        {
            IntArrayBits bitsPerItem = IntArrayBits.Bits0;

            if (parts.Length == 1)
            {
                bitsPerItem = IntArray.NumBitsNeeded(parts[0].ValueMap.Length);
                if (bitsPerItem == parts[0].Bins.BitsPerItem)
                {
                    return(parts[0]);
                }
                IntArray b       = parts[0].Bins;
                IntArray newBins = IntArray.New(b.Length, b.Type, bitsPerItem, b);
                return(new TsvFeature(newBins, parts[0].ValueMap, parts[0]._name));
            }

            uint[] concatValueMap = Algorithms.MergeSortedUniqued(parts.Select(x => x.ValueMap).ToArray());
            bitsPerItem = IntArray.NumBitsNeeded(concatValueMap.Length);
            IntArray concatBins = ConcatBins(parts, concatValueMap);

            return(new TsvFeature(concatBins, concatValueMap, parts[0]._name));
        }
Example #12
0
        private IntArray[] CreateRandomIntArrays(IChannel ch)
        {
            IntArray[] arrays = new IntArray[_count];
            using (var pch = _host.StartProgressChannel("Create IntArrays"))
            {
                int created = 0;
                pch.SetHeader(new ProgressHeader("arrays"), e => e.SetProgress(0, created, arrays.Length));
                IntArrayBits bits = IntArray.NumBitsNeeded(_bins);
                ch.Info("Bits per item is {0}", bits);
                int salt = _host.Rand.Next();
                Func <IChannel, Random, IEnumerable <int> > createIntArray;
                switch (_type)
                {
                case IntArrayType.Dense:
                    createIntArray = CreateDense;
                    break;

                case IntArrayType.Sparse:
                    createIntArray = CreateSparse;
                    if (_param == 1)
                    {
                        createIntArray = CreateDense;
                    }
                    break;

                default:
                    throw _host.ExceptNotImpl("Haven't yet wrote a random generator appropriate for {0}", _type);
                }

                ParallelEnumerable.Range(0, arrays.Length).ForAll(i =>
                {
                    Random r  = new Random(salt + i);
                    arrays[i] = IntArray.New(_len, _type, bits, createIntArray(ch, r));
                    created++;
                });

                return(arrays);
            }
        }
Example #13
0
 public override IntArray Clone(IntArrayBits bitsPerItem, IntArrayType type)
 {
     if (type == IntArrayType.Sparse || type == IntArrayType.Current)
     {
         if (bitsPerItem <= IntArrayBits.Bits8)
         {
             bitsPerItem = IntArrayBits.Bits8;
         }
         DenseIntArray newValues = _values.Clone(bitsPerItem, IntArrayType.Dense) as DenseIntArray;
         return(new DeltaSparseIntArray(newValues, _deltas, _length));
     }
     else
     {
         DenseIntArray dense = IntArray.New(Length, IntArrayType.Dense, BitsPerItem) as DenseIntArray;
         int           index = 0;
         for (int i = 0; i < _values.Length; ++i)
         {
             index       += _deltas[i];
             dense[index] = _values[i];
         }
         return(dense);
     }
 }
Example #14
0
        public SegmentIntArray(int length, IEnumerable <int> values)
        {
            using (Timer.Time(TimerEvent.SparseConstruction))
            {
                uint[] vals = new uint[length];
                uint   pos  = 0;
                uint   max  = 0;
                foreach (int v in values)
                {
                    if (pos >= length)
                    {
                        throw Contracts.Except("Length provided to segment vector is inconsistent with value enumeration");
                    }
                    vals[pos++] = (uint)v;
                    if ((uint)v > max)
                    {
                        max = (uint)v;
                    }
                }
                if (pos != length)
                {
                    throw Contracts.Except("Length provided to segment vector is inconsistent with value enumeration");
                }

                int  maxbits = BitsForValue(max);
                int  transitions;
                long bits;
                SegmentFindOptimalPath(vals, vals.Length, maxbits, out bits, out transitions);
                var b = FromWorkArray(vals, vals.Length, bits, transitions);
                _segType   = b._segType;
                _segLength = b._segLength;
                _data      = b._data;
                _length    = b._length;
                _bpi       = b._bpi;
            }
        }
Example #15
0
 public override IntArray Clone(IntArrayBits bitsPerItem, IntArrayType type)
 {
     return(IntArray.New(_length, type, bitsPerItem, this));
 }
Example #16
0
 public override IntArray Clone(IntArrayBits bitsPerItem, IntArrayType type)
 {
     throw Contracts.ExceptNotImpl();
 }
Example #17
0
        private unsafe void SumupCPlusPlusSparse(SumupInputData input, FeatureHistogram histogram, byte *pValues, byte *pDeltas, int numDeltas, IntArrayBits bitsPerItem)
        {
            fixed(FloatType *pSumTargetsByBin = histogram.SumTargetsByBin)
            fixed(FloatType * pSampleOutputs = input.Outputs)
            fixed(double *pSumWeightsByBin   = histogram.SumWeightsByBin)
            fixed(double *pSampleWeights     = input.Weights)
            fixed(int *pIndices    = input.DocIndices)
            fixed(int *pCountByBin = histogram.CountByBin)
            {
                int rv =
#if USE_SINGLE_PRECISION
                    C_SumupDeltaSparse_float
#else
                    C_SumupDeltaSparse_double
#endif
                        ((int)BitsPerItem, pValues, pDeltas, numDeltas, pIndices, pSampleOutputs, pSampleWeights,
                        pSumTargetsByBin, pSumWeightsByBin, pCountByBin,
                        input.TotalCount, input.SumTargets, input.SumWeights);

                if (rv < 0)
                {
                    throw Contracts.Except("CSumup sumupdeltasparse {0}", rv);
                }
            }
        }
Example #18
0
 /// <summary>
 /// Clones the contents of this IntArray into an new IntArray
 /// </summary>
 /// <param name="bitsPerItem">The number of bits per item in the created IntArray</param>
 /// <param name="type">The type of the new IntArray</param>
 public abstract IntArray Clone(IntArrayBits bitsPerItem, IntArrayType type);
Example #19
0
        /// <summary>
        /// Finds the most space efficient representation of the feature
        /// (with slight slack cut for dense features). The behavior of
        /// this method depends upon the static value <see cref="CompatibilityLevel"/>.
        /// </summary>
        /// <param name="workarray">Should be non-null if you want it to
        /// consider segment arrays.</param>
        /// <returns>Returns a more space efficient version of the array,
        /// or the item itself if that is impossible, somehow.</returns>
        public IntArray Compress(uint[] workarray = null)
        {
            int maxval     = 0;
            int zerocount  = 0;
            int runs       = 0;
            int last       = -1;
            int overflows  = 0;
            int zoverflows = 0;
            int runnow     = 0; // The longest run of having the same value.
            int len        = Length;
            IIntArrayForwardIndexer ind = GetIndexer();

            for (int i = 0; i < len; ++i)
            {
                int val = ind[i];
                if (workarray != null)
                {
                    workarray[i] = (uint)val;
                }
                if (val == 0)
                {
                    zerocount++;
                }
                else if (val > maxval)
                {
                    maxval = val;
                }
                if (last == val)
                {
                    runs++;
                    if (++runnow > byte.MaxValue)
                    {
                        // We have 256 items in a row the same.
                        overflows++;
                        if (val == 0)
                        {
                            zoverflows++;
                        }
                        runnow = 0;
                    }
                }
                last = val;
            }
            // Estimate the costs of the available options.
            IntArrayBits classicBits    = IntArray.NumBitsNeeded(maxval + 1);
            long         denseBits      = (long)classicBits * (long)Length;
            long         sparseBits     = (long)(Math.Max((int)classicBits, 8) + 8) * (long)(Length - zerocount + zoverflows);
            long         rleBits        = (long)(classicBits + 8) * (long)(Length - runs + overflows);
            long         segBits        = long.MaxValue;
            int          segTransitions = 0;

            if (workarray != null)
            {
                int bits = SegmentIntArray.BitsForValue((uint)maxval);
                if (bits <= 21)
                {
                    SegmentIntArray.SegmentFindOptimalPath(workarray, Length,
                                                           bits, out segBits, out segTransitions);
                }
            }
            if ((IntArray.CompatibilityLevel & 0x4) == 0)
            {
                rleBits = long.MaxValue;
            }
            long         bestCost = Math.Min(Math.Min(Math.Min(denseBits, sparseBits), rleBits), segBits);
            IntArrayType bestType = IntArrayType.Dense;

            if (bestCost >= denseBits * 98 / 100)
            {
                // Cut the dense bits a wee bit of slack.
            }
            else if (bestCost == sparseBits)
            {
                bestType = IntArrayType.Sparse;
            }
            else if (bestCost == rleBits)
            {
                bestType = IntArrayType.Repeat;
            }
            else
            {
                bestType = IntArrayType.Segmented;
            }
            if (bestType == Type && classicBits == BitsPerItem)
            {
                return(this);
            }
            IntArray bins = null;

            if (bestType != IntArrayType.Segmented)
            {
                bins = IntArray.New(Length, bestType, classicBits, this);
            }
            else
            {
                bins = SegmentIntArray.FromWorkArray(workarray, Length, segBits, segTransitions);
            }
            return(bins);
        }