コード例 #1
0
ファイル: Feature.cs プロジェクト: zyw400/machinelearning
        protected Feature(byte[] buffer, ref int position)
        {
            Bins = IntArray.New(buffer, ref position);
#if !NO_STORE
            BinsCache = FileObjectStore <IntArrayFormatter> .GetDefaultInstance();
#endif
        }
コード例 #2
0
 public DeltaRepeatIntArray(byte[] buffer, ref int position)
 {
     _length             = buffer.ToInt(ref position);
     _deltasActualLength = position;
     _deltasActualLength = buffer.ToInt(ref _deltasActualLength);
     _deltas             = buffer.ToByteArray(ref position);
     _values             = IntArray.New(buffer, ref position) as DenseIntArray;
 }
コード例 #3
0
        /// <summary>
        /// Construct a sparse int array from index, value pairs.
        /// </summary>
        /// <param name="length">The total length of the constructed array.</param>
        /// <param name="bitsPerItem">The number of bits required to store the values.</param>
        /// <param name="nonZeroValues">An ordered enumerable of (index,value) pairs.
        /// Each index should be strictly increasing as the iterable proceeds.</param>
        public DeltaSparseIntArray(int length, IntArrayBits bitsPerItem, IEnumerable <KeyValuePair <int, int> > nonZeroValues)
        {
            using (Timer.Time(TimerEvent.SparseConstruction))
            {
                List <int>  tempValueList = new List <int>();
                List <byte> tempDeltaList = new List <byte>();

                int currentIndex = 0;
                foreach (KeyValuePair <int, int> pair in nonZeroValues)
                {
                    int index = pair.Key;
                    int value = pair.Value;
                    if (index <= currentIndex && (index < 0 || tempValueList.Count > 0))
                    {
                        throw Contracts.Except("index {0} occurred after {1}", index, currentIndex);
                    }
                    while (index - currentIndex > byte.MaxValue)
                    {
                        tempDeltaList.Add(byte.MaxValue);
                        tempValueList.Add(0);
                        currentIndex += byte.MaxValue;
                    }
                    tempDeltaList.Add((byte)(index - currentIndex));
                    tempValueList.Add(value);
                    currentIndex = index;
                }
                // Add the final chunks of 0's if it ended early
                while (length - currentIndex > byte.MaxValue)
                {
                    tempDeltaList.Add(byte.MaxValue);
                    tempValueList.Add(0);
                    currentIndex += byte.MaxValue;
                }
                if (currentIndex >= length && currentIndex > 0)
                {
                    throw Contracts.Except("Index {0} inconsistent with length {1}", currentIndex, length);
                }
                _length = length;

                // It is faster not to use a 4-bit dense array here. The memory difference is minor, since it's just
                //  the sparse values that are saved on.
                // TODO: Implement a special iterator for 4-bit array, and change this code to use the iterator, which
                //          may be faster
                if (bitsPerItem == IntArrayBits.Bits0)
                {
                    throw Contracts.Except("Use dense arrays for 0 bits");
                }
                if (bitsPerItem <= IntArrayBits.Bits8)
                {
                    bitsPerItem = IntArrayBits.Bits8;
                }

                _values = IntArray.New(tempValueList.Count, IntArrayType.Dense, bitsPerItem, tempValueList) as DenseIntArray;
                _deltas = tempDeltaList.ToArray();
            }
        }
コード例 #4
0
 public DeltaSparseIntArray(byte[] buffer, ref int position)
 {
     _length = buffer.ToInt(ref position);
     // REVIEW: The two lines below is as it actually appeared. I have no earthly idea of what
     // it was trying to accomplish. It appears to function as a no-op, resulting in no valid results
     // inside _deltasActualLength.
     //_deltasActualLength = position;
     //_deltasActualLength = buffer.ToInt(ref _deltasActualLength);
     _deltas = buffer.ToByteArray(ref position);
     _values = IntArray.New(buffer, ref position) as DenseIntArray;
 }
コード例 #5
0
        public DeltaRepeatIntArray(int length, IntArrayBits bitsPerItem, IEnumerable <int> values)
        {
            using (Timer.Time(TimerEvent.SparseConstruction))
            {
                List <int>  tempValueList = new List <int>();
                List <byte> tempDeltaList = new List <byte>();

                _length = 0;

                byte delta   = 0;
                int  lastVal = -1;

                foreach (int val in values)
                {
                    if (val != lastVal || delta == byte.MaxValue)
                    {
                        tempValueList.Add(val);
                        lastVal = val;
                        if (_length != 0)
                        {
                            tempDeltaList.Add(delta);
                        }
                        delta = 0;
                    }
                    ++delta;
                    ++_length;
                }
                if (delta > 0)
                {
                    tempDeltaList.Add(delta);
                }

                if (_length != length)
                {
                    throw Contracts.Except("Length provided to repeat vector is inconsistent with value enumeration");
                }

                // It is faster not to use a 4-bit dense array here. The memory difference is minor, since it's just
                //  the sparse values that are saved on.
                // TODO: Implement a special iterator for 4-bit array, and change this code to use the iterator, which
                //          may be faster
                if (bitsPerItem == IntArrayBits.Bits0)
                {
                    throw Contracts.Except("Use dense arrays for 0 bits");
                }
                if (bitsPerItem <= IntArrayBits.Bits8)
                {
                    bitsPerItem = IntArrayBits.Bits8;
                }

                _values = IntArray.New(tempValueList.Count, IntArrayType.Dense, bitsPerItem, tempValueList) as DenseIntArray;
                _deltas = tempDeltaList.ToArray();
            }
        }
コード例 #6
0
        public override IntArray[] Split(int[][] assignment)
        {
            IntArray[] parts = new IntArray[assignment.Length];
            for (int i = 0; i < assignment.Length; ++i)
            {
                IIntArrayForwardIndexer indexer = GetIndexer();
                parts[i] = IntArray.New(assignment[i].Length, IntArrayType.Sparse, BitsPerItem, assignment[i].Select(x => indexer[x]));
            }

            return(parts);
        }
コード例 #7
0
        public override IntArray[] Split(int[][] assignment)
        {
            int numParts = assignment.Length;

            IntArray[] newArrays = new IntArray[numParts];

            for (int p = 0; p < numParts; ++p)
            {
                newArrays[p] = IntArray.New(assignment[p].Length, IntArrayType.Dense, BitsPerItem, assignment[p].Select(x => this[x]));
            }

            return(newArrays);
        }
コード例 #8
0
        /// <summary>
        /// Attempts to create a feature from a ulong array. The intent
        /// is that this will handle query ID.
        /// </summary>
        public static TsvFeature CreateFeatureFromQueryId(Dataset.DatasetSkeleton skel)
        {
            Dictionary <uint, int> uvalToOrder = new Dictionary <uint, int>();

            foreach (uint uintQid in skel.QueryIds.Select(qid => (uint)qid).Distinct().OrderBy(x => x))
            {
                uvalToOrder[uintQid] = uvalToOrder.Count;
            }
            IntArray bins = IntArray.New(
                skel.NumDocs, IntArrayType.Dense, IntArray.NumBitsNeeded(uvalToOrder.Count),
                skel.QueryIds.SelectMany((qid, i) =>
                                         Enumerable.Repeat(uvalToOrder[(uint)qid], skel.Boundaries[i + 1] - skel.Boundaries[i])));

            uint[] valueMap = uvalToOrder.Keys.OrderBy(x => x).ToArray(uvalToOrder.Count);
            return(new TsvFeature(bins, valueMap, "m:QueryId"));
        }
コード例 #9
0
        // Create feature from labels. This is required because freeform evaluations can use m:Rating
        // as a feature, for which appropriate transformations will be required.
        public static TsvFeature CreateFeatureFromRatings(short[] ratings)
        {
            // This function assumes that labels are only from 0 through 4
            // Label to feature map:
            // 4 -> 9
            // 3 -> 8
            // 2 -> 7
            // 1 -> 6
            // 0 -> 5
            // invalid -> 0
            short    maxLab          = ratings.Length > 0 ? ratings.Max() : (short)0;
            IntArray ratingAsFeature = IntArray.New(
                ratings.Length, IntArrayType.Dense, IntArrayBits.Bits8, ratings.Select(x => (int)x));

            uint[] valueMap = Enumerable.Range(0, ((int)maxLab) + 1).Select(x => (uint)x + 5).ToArray();

            return(new TsvFeature(ratingAsFeature, valueMap, "m:Rating"));
        }
コード例 #10
0
ファイル: Feature.cs プロジェクト: zyw400/machinelearning
        private static IntArray ConcatBins(TsvFeature[] parts, uint[] concatValueMap)
        {
            using (Timer.Time(TimerEvent.ConcatBins))
            {
                int length = parts.Sum(x => x.Length);

                IntArrayBits  bitsPerItem = IntArray.NumBitsNeeded(concatValueMap.Length);
                DenseIntArray concatBins  = (DenseIntArray)IntArray.New(length, IntArrayType.Dense, bitsPerItem);

                int pos = 0;

                for (int partIndex = 0; partIndex < parts.Length; ++partIndex)
                {
                    IntArray bins = parts[partIndex].Bins;

                    if (concatValueMap.Length == parts[partIndex].ValueMap.Length)
                    {
                        foreach (int bin in bins)
                        {
                            concatBins[pos++] = bin;
                        }
                    }
                    else
                    {
                        int[] binMap = MakeBinMap(parts[partIndex]._valueMap, concatValueMap);

                        foreach (int bin in bins)
                        {
                            concatBins[pos++] = binMap[bin];
                        }
                    }
                }

                if (bitsPerItem != IntArrayBits.Bits0 && parts.All(x => x.Bins is DeltaSparseIntArray))
                {
                    return(new DeltaSparseIntArray(length, bitsPerItem, concatBins));
                }
                else
                {
                    return(concatBins);
                }
            }
        }
コード例 #11
0
ファイル: Feature.cs プロジェクト: zyw400/machinelearning
        /// <summary>
        /// Concatenates an array of features into one long feature
        /// </summary>
        /// <param name="parts">An array of features</param>
        /// <returns>A concatenated feature</returns>
        public static TsvFeature Concat(TsvFeature[] parts)
        {
            IntArrayBits bitsPerItem = IntArrayBits.Bits0;

            if (parts.Length == 1)
            {
                bitsPerItem = IntArray.NumBitsNeeded(parts[0].ValueMap.Length);
                if (bitsPerItem == parts[0].Bins.BitsPerItem)
                {
                    return(parts[0]);
                }
                IntArray b       = parts[0].Bins;
                IntArray newBins = IntArray.New(b.Length, b.Type, bitsPerItem, b);
                return(new TsvFeature(newBins, parts[0].ValueMap, parts[0]._name));
            }

            uint[] concatValueMap = Algorithms.MergeSortedUniqued(parts.Select(x => x.ValueMap).ToArray());
            bitsPerItem = IntArray.NumBitsNeeded(concatValueMap.Length);
            IntArray concatBins = ConcatBins(parts, concatValueMap);

            return(new TsvFeature(concatBins, concatValueMap, parts[0]._name));
        }
コード例 #12
0
 public override IntArray Clone(IntArrayBits bitsPerItem, IntArrayType type)
 {
     if (type == IntArrayType.Sparse || type == IntArrayType.Current)
     {
         if (bitsPerItem <= IntArrayBits.Bits8)
         {
             bitsPerItem = IntArrayBits.Bits8;
         }
         DenseIntArray newValues = _values.Clone(bitsPerItem, IntArrayType.Dense) as DenseIntArray;
         return(new DeltaSparseIntArray(newValues, _deltas, _length));
     }
     else
     {
         DenseIntArray dense = IntArray.New(Length, IntArrayType.Dense, BitsPerItem) as DenseIntArray;
         int           index = 0;
         for (int i = 0; i < _values.Length; ++i)
         {
             index       += _deltas[i];
             dense[index] = _values[i];
         }
         return(dense);
     }
 }
コード例 #13
0
 public override IntArray Clone(IntArrayBits bitsPerItem, IntArrayType type)
 {
     return(IntArray.New(_length, type, bitsPerItem, this));
 }
コード例 #14
0
ファイル: IntArray.cs プロジェクト: zyw400/machinelearning
        /// <summary>
        /// Finds the most space efficient representation of the feature
        /// (with slight slack cut for dense features). The behavior of
        /// this method depends upon the static value <see cref="CompatibilityLevel"/>.
        /// </summary>
        /// <param name="workarray">Should be non-null if you want it to
        /// consider segment arrays.</param>
        /// <returns>Returns a more space efficient version of the array,
        /// or the item itself if that is impossible, somehow.</returns>
        public IntArray Compress(uint[] workarray = null)
        {
            int maxval     = 0;
            int zerocount  = 0;
            int runs       = 0;
            int last       = -1;
            int overflows  = 0;
            int zoverflows = 0;
            int runnow     = 0; // The longest run of having the same value.
            int len        = Length;
            IIntArrayForwardIndexer ind = GetIndexer();

            for (int i = 0; i < len; ++i)
            {
                int val = ind[i];
                if (workarray != null)
                {
                    workarray[i] = (uint)val;
                }
                if (val == 0)
                {
                    zerocount++;
                }
                else if (val > maxval)
                {
                    maxval = val;
                }
                if (last == val)
                {
                    runs++;
                    if (++runnow > byte.MaxValue)
                    {
                        // We have 256 items in a row the same.
                        overflows++;
                        if (val == 0)
                        {
                            zoverflows++;
                        }
                        runnow = 0;
                    }
                }
                last = val;
            }
            // Estimate the costs of the available options.
            IntArrayBits classicBits    = IntArray.NumBitsNeeded(maxval + 1);
            long         denseBits      = (long)classicBits * (long)Length;
            long         sparseBits     = (long)(Math.Max((int)classicBits, 8) + 8) * (long)(Length - zerocount + zoverflows);
            long         rleBits        = (long)(classicBits + 8) * (long)(Length - runs + overflows);
            long         segBits        = long.MaxValue;
            int          segTransitions = 0;

            if (workarray != null)
            {
                int bits = SegmentIntArray.BitsForValue((uint)maxval);
                if (bits <= 21)
                {
                    SegmentIntArray.SegmentFindOptimalPath(workarray, Length,
                                                           bits, out segBits, out segTransitions);
                }
            }
            if ((IntArray.CompatibilityLevel & 0x4) == 0)
            {
                rleBits = long.MaxValue;
            }
            long         bestCost = Math.Min(Math.Min(Math.Min(denseBits, sparseBits), rleBits), segBits);
            IntArrayType bestType = IntArrayType.Dense;

            if (bestCost >= denseBits * 98 / 100)
            {
                // Cut the dense bits a wee bit of slack.
            }
            else if (bestCost == sparseBits)
            {
                bestType = IntArrayType.Sparse;
            }
            else if (bestCost == rleBits)
            {
                bestType = IntArrayType.Repeat;
            }
            else
            {
                bestType = IntArrayType.Segmented;
            }
            if (bestType == Type && classicBits == BitsPerItem)
            {
                return(this);
            }
            IntArray bins = null;

            if (bestType != IntArrayType.Segmented)
            {
                bins = IntArray.New(Length, bestType, classicBits, this);
            }
            else
            {
                bins = SegmentIntArray.FromWorkArray(workarray, Length, segBits, segTransitions);
            }
            return(bins);
        }
コード例 #15
0
        /// <summary>
        /// Clone an IntArray containing only the items indexed by <paramref name="itemIndices"/>
        /// </summary>
        /// <param name="itemIndices"> item indices will be contained in the cloned IntArray  </param>
        /// <returns> The cloned IntArray </returns>
        public override IntArray Clone(int[] itemIndices)
        {
            IIntArrayForwardIndexer indexer = GetIndexer();

            return(IntArray.New(itemIndices.Length, IntArrayType.Sparse, BitsPerItem, itemIndices.Select(x => indexer[x])));
        }
コード例 #16
0
        /// <summary>
        /// Deserializes an IntArray object from a byte array
        /// </summary>
        /// <param name="buffer">The byte array object representation</param>
        /// <returns>The IntArray object</returns>
        private IntArray Deserialize(byte[] buffer)
        {
            int position = 0;

            return(IntArray.New(buffer, ref position, true));
        }
コード例 #17
0
 /// <summary>
 /// Clone an IntArray containing only the items indexed by <paramref name="itemIndices"/>
 /// </summary>
 /// <param name="itemIndices"> item indices will be contained in the cloned IntArray  </param>
 /// <returns> The cloned IntArray </returns>
 public override IntArray Clone(int[] itemIndices)
 {
     return(IntArray.New(itemIndices.Length, IntArrayType.Dense, BitsPerItem, itemIndices.Select(x => this[x])));
 }