Esempio n. 1
0
 private void BeginBatch()
 {
     _batch++;
     _numBatchExamples = 0;
     _biasUpdate       = 0;
     VBufferUtils.Resize(ref _weightsUpdate, _weightsUpdate.Length, 0);
 }
Esempio n. 2
0
            private void DropNAs <TDst>(ref VBuffer <TDst> src, ref VBuffer <TDst> dst, InPredicate <TDst> isNA)
            {
                Host.AssertValue(isNA);

                var srcValues = src.GetValues();
                int newCount  = 0;

                for (int i = 0; i < srcValues.Length; i++)
                {
                    if (!isNA(in srcValues[i]))
                    {
                        newCount++;
                    }
                }
                Host.Assert(newCount <= srcValues.Length);

                if (newCount == 0)
                {
                    VBufferUtils.Resize(ref dst, src.Length - srcValues.Length, 0);
                    return;
                }

                if (newCount == srcValues.Length)
                {
                    Utils.Swap(ref src, ref dst);
                    return;
                }

                int iDst = 0;

                if (src.IsDense)
                {
                    var editor = VBufferEditor.Create(ref dst, newCount);
                    for (int i = 0; i < srcValues.Length; i++)
                    {
                        if (!isNA(in srcValues[i]))
                        {
                            editor.Values[iDst] = srcValues[i];
                            iDst++;
                        }
                    }
                    Host.Assert(iDst == newCount);
                    dst = editor.Commit();
                }
                else
                {
                    var newLength = src.Length - srcValues.Length - newCount;
                    var editor    = VBufferEditor.Create(ref dst, newLength, newCount);

                    var srcIndices = src.GetIndices();
                    int offset     = 0;
                    for (int i = 0; i < srcValues.Length; i++)
                    {
                        if (!isNA(in srcValues[i]))
                        {
                            editor.Values[iDst]  = srcValues[i];
                            editor.Indices[iDst] = srcIndices[i] - offset;
                            iDst++;
                        }
Esempio n. 3
0
            private void DropNAsAndDefaults <TDst>(ref VBuffer <TDst> src, ref VBuffer <TDst> dst, InPredicate <TDst> isNA)
            {
                Host.AssertValue(isNA);

                var srcValues = src.GetValues();
                int newCount  = 0;

                for (int i = 0; i < srcValues.Length; i++)
                {
                    if (!isNA(in srcValues[i]))
                    {
                        newCount++;
                    }
                }
                Host.Assert(newCount <= srcValues.Length);

                if (newCount == 0)
                {
                    VBufferUtils.Resize(ref dst, 0);
                    return;
                }

                if (newCount == srcValues.Length)
                {
                    Utils.Swap(ref src, ref dst);
                    if (!dst.IsDense)
                    {
                        Host.Assert(dst.GetValues().Length == newCount);
                        VBufferUtils.Resize(ref dst, newCount);
                    }
                    return;
                }

                int iDst = 0;

                // Densifying sparse vectors since default value equals NA and hence should be dropped.
                var editor = VBufferEditor.Create(ref dst, newCount);

                for (int i = 0; i < srcValues.Length; i++)
                {
                    if (!isNA(in srcValues[i]))
                    {
                        editor.Values[iDst++] = srcValues[i];
                    }
                }
                Host.Assert(iDst == newCount);

                dst = editor.Commit();
            }
        internal static void GetSlotNames(RoleMappedSchema schema, RoleMappedSchema.ColumnRole role, int vectorSize, ref VBuffer <ReadOnlyMemory <char> > slotNames)
        {
            Contracts.CheckValueOrNull(schema);
            Contracts.CheckParam(vectorSize >= 0, nameof(vectorSize));

            IReadOnlyList <Schema.Column> list = schema?.GetColumns(role);

            if (list?.Count != 1 || !schema.Schema[list[0].Index].HasSlotNames(vectorSize))
            {
                VBufferUtils.Resize(ref slotNames, vectorSize, 0);
            }
            else
            {
                schema.Schema[list[0].Index].Metadata.GetValue(Kinds.SlotNames, ref slotNames);
            }
        }
Esempio n. 5
0
        public static void GetSlotNames(RoleMappedSchema schema, RoleMappedSchema.ColumnRole role, int vectorSize, ref VBuffer <ReadOnlyMemory <char> > slotNames)
        {
            Contracts.CheckValueOrNull(schema);
            Contracts.CheckParam(vectorSize >= 0, nameof(vectorSize));

            IReadOnlyList <ColumnInfo> list;

            if ((list = schema?.GetColumns(role)) == null || list.Count != 1 || !schema.Schema.HasSlotNames(list[0].Index, vectorSize))
            {
                VBufferUtils.Resize(ref slotNames, vectorSize, 0);
            }
            else
            {
                schema.Schema.GetMetadata(Kinds.SlotNames, list[0].Index, ref slotNames);
            }
        }
Esempio n. 6
0
        private void CombineCore(ref VBuffer <Single> dst, VBuffer <Single>[] src, Single[] weights = null)
        {
            Host.AssertNonEmpty(src);
            Host.Assert(weights == null || Utils.Size(weights) == Utils.Size(src));

            int count = Utils.Size(src);

            if (count == 0)
            {
                VBufferUtils.Resize(ref dst, 0);
                return;
            }

            int len    = GetClassCount(src);
            var editor = VBufferEditor.Create(ref dst, len);

            if (!editor.CreatedNewValues)
            {
                editor.Values.Clear();
            }

            int voteCount = 0;

            for (int i = 0; i < count; i++)
            {
                int index = VectorUtils.ArgMax(in src[i]);
                if (index >= 0)
                {
                    editor.Values[index]++;
                    voteCount++;
                }
            }

            // Normalize by dividing by the number of votes.
            for (int i = 0; i < len; i++)
            {
                editor.Values[i] /= voteCount;
            }

            // Set the output to values.
            dst = editor.Commit();
        }
Esempio n. 7
0
        private void Eval(object chunkIndexObj)
        {
            int chunkIndex   = (int)chunkIndexObj;
            int chunkSize    = _maxIndex / _threads;
            int bigChunkSize = chunkSize + 1;
            int numBigChunks = _maxIndex % _threads;
            int from;
            int to;

            if (chunkIndex < numBigChunks)
            {
                from = bigChunkSize * chunkIndex;
                to   = from + bigChunkSize;
            }
            else
            {
                from = bigChunkSize * numBigChunks + chunkSize * (chunkIndex - numBigChunks);
                to   = from + chunkSize;
            }

            _tempVals[chunkIndex] = 0;
            VectorUtils.ScaleBy(ref _tempGrads[chunkIndex], 0);

            VBuffer <Float> tempGrad = default(VBuffer <Float>);

            for (int i = from; i < to; ++i)
            {
                VBufferUtils.Resize(ref tempGrad, 0, 0);
                _tempVals[chunkIndex] += _func(i, in _input, ref tempGrad);
                if (_tempGrads[chunkIndex].Length == 0)
                {
                    tempGrad.CopyTo(ref _tempGrads[chunkIndex]);
                }
                else
                {
                    VectorUtils.Add(in tempGrad, ref _tempGrads[chunkIndex]);
                }
            }

            _threadFinished[chunkIndex].Set();
        }
        private static void FillValues(Float input, ref VBuffer <Float> result)
        {
            if (input == 0)
            {
                VBufferUtils.Resize(ref result, 2, 0);
                return;
            }

            var editor = VBufferEditor.Create(ref result, 2, 1);

            if (Float.IsNaN(input))
            {
                editor.Values[0]  = 1;
                editor.Indices[0] = 1;
            }
            else
            {
                editor.Values[0]  = input;
                editor.Indices[0] = 0;
            }

            result = editor.Commit();
        }
 private Delegate MakeGetterVec <T>(int length)
 {
     return((ValueGetter <VBuffer <T> >)((ref VBuffer <T> value) =>
                                         VBufferUtils.Resize(ref value, length, 0)));
 }
Esempio n. 10
0
        /// <summary>
        /// Drops slots from src and populates the dst with the resulting vector. Slots are
        /// dropped based on min and max slots that were passed at the constructor.
        /// </summary>
        public void DropSlots <TDst>(ref VBuffer <TDst> src, ref VBuffer <TDst> dst)
        {
            if (src.Length <= SlotsMin[0])
            {
                // There is nothing to drop, just swap buffers.
                Utils.Swap(ref src, ref dst);
                return;
            }

            int newLength = DstLength == 0 ? ComputeLength(src.Length) : DstLength;

            if (newLength == 0)
            {
                // All slots dropped.
                VBufferUtils.Resize(ref dst, 1, 0);
                return;
            }

            Contracts.Assert(newLength < src.Length);

            // End of the trivial cases
            // At this point, we need to drop some slots and keep some slots.
            VBufferEditor <TDst> editor;
            var srcValues = src.GetValues();

            if (src.IsDense)
            {
                editor = VBufferEditor.Create(ref dst, newLength);

                int iDst = 0;
                int iSrc = 0;
                for (int i = 0; i < SlotsMax.Length && iSrc < src.Length; i++)
                {
                    var lim = Math.Min(SlotsMin[i], src.Length);
                    while (iSrc < lim)
                    {
                        Contracts.Assert(iDst <= iSrc);
                        editor.Values[iDst++] = srcValues[iSrc++];
                    }
                    iSrc = SlotsMax[i] + 1;
                }
                while (iSrc < src.Length)
                {
                    Contracts.Assert(iDst <= iSrc);
                    editor.Values[iDst++] = srcValues[iSrc++];
                }
                Contracts.Assert(iDst == newLength);
                dst = editor.Commit();
                return;
            }

            // Sparse case.
            // Approximate new count is min(#indices, newLength).
            var newCount   = Math.Min(srcValues.Length, newLength);
            var indices    = dst.GetIndices();
            var srcIndices = src.GetIndices();

            Contracts.Assert(newCount <= src.Length);

            editor = VBufferEditor.Create(
                ref dst,
                newLength,
                newCount,
                requireIndicesOnDense: true);

            int iiDst   = 0;
            int iiSrc   = 0;
            int iOffset = 0;
            int iRange  = 0;
            int min     = SlotsMin[iRange];
            // REVIEW: Consider using a BitArray with the slots to keep instead of SlotsMax. It would
            // only make sense when the number of ranges is greater than the number of slots divided by 32.
            int max = SlotsMax[iRange];

            while (iiSrc < srcValues.Length)
            {
                // Copy (with offset) the elements before the current range.
                var index = srcIndices[iiSrc];
                if (index < min)
                {
                    Contracts.Assert(iiDst <= iiSrc);
                    editor.Indices[iiDst]  = index - iOffset;
                    editor.Values[iiDst++] = srcValues[iiSrc++];
                    continue;
                }
                if (index <= max)
                {
                    // Skip elements in the current range.
                    iiSrc++;
                    continue;
                }

                // Find the next range.
                const int threshold1 = 20;
                const int threshold2 = 10;
                while (++iRange < SlotsMax.Length && SlotsMax[iRange] < index)
                {
                    if (SlotsMax.Length - iRange >= threshold1 &&
                        SlotsMax[iRange + threshold2] < index)
                    {
                        iRange = SlotsMax.FindIndexSorted(iRange + threshold2, SlotsMax.Length, index);
                        Contracts.Assert(iRange == SlotsMax.Length ||
                                         iRange > 0 && SlotsMax[iRange - 1] < index && index <= SlotsMax[iRange]);
                        break;
                    }
                }
                if (iRange < SlotsMax.Length)
                {
                    min = SlotsMin[iRange];
                    max = SlotsMax[iRange];
                }
                else
                {
                    min = max = src.Length;
                }
                if (iRange > 0)
                {
                    iOffset = _lengthReduction[iRange - 1];
                }
                Contracts.Assert(index <= max);
            }

            dst = editor.CommitTruncated(iiDst);
        }
Esempio n. 11
0
        /// <summary>
        /// Initialize weights by running SGD up to specified tolerance.
        /// </summary>
        private protected virtual VBuffer <float> InitializeWeightsSgd(IChannel ch, FloatLabelCursor.Factory cursorFactory)
        {
            if (!Quiet)
            {
                ch.Info("Running SGD initialization with tolerance {0}", SgdInitializationTolerance);
            }

            int        numExamples  = 0;
            var        oldWeights   = VBufferUtils.CreateEmpty <float>(BiasCount + WeightCount);
            DTerminate terminateSgd =
                (in VBuffer <float> x) =>
            {
                if (++numExamples % 1000 != 0)
                {
                    return(false);
                }
                VectorUtils.AddMult(in x, -1, ref oldWeights);
                float normDiff = VectorUtils.Norm(oldWeights);
                x.CopyTo(ref oldWeights);
                // #if OLD_TRACING // REVIEW: How should this be ported?
                if (!Quiet)
                {
                    Console.Write(".");
                    if (numExamples % 50000 == 0)
                    {
                        Console.WriteLine("\t{0}\t{1}", numExamples, normDiff);
                    }
                }
                // #endif
                return(normDiff < SgdInitializationTolerance);
            };

            VBuffer <float>  result = default(VBuffer <float>);
            FloatLabelCursor cursor = null;

            try
            {
                float[] scratch = null;

                SgdOptimizer.DStochasticGradient lossSgd =
                    (in VBuffer <float> x, ref VBuffer <float> grad) =>
                {
                    // Zero out the gradient by sparsifying.
                    VBufferUtils.Resize(ref grad, grad.Length, 0);
                    EnsureBiases(ref grad);

                    if (cursor == null || !cursor.MoveNext())
                    {
                        if (cursor != null)
                        {
                            cursor.Dispose();
                        }
                        cursor = cursorFactory.Create();
                        if (!cursor.MoveNext())
                        {
                            return;
                        }
                    }
                    AccumulateOneGradient(in cursor.Features, cursor.Label, cursor.Weight, in x, ref grad, ref scratch);
                };

                VBuffer <float> sgdWeights;
                if (DenseOptimizer)
                {
                    sgdWeights = VBufferUtils.CreateDense <float>(BiasCount + WeightCount);
                }
                else
                {
                    sgdWeights = VBufferUtils.CreateEmpty <float>(BiasCount + WeightCount);
                }
                SgdOptimizer sgdo = new SgdOptimizer(terminateSgd);
                sgdo.Minimize(lossSgd, ref sgdWeights, ref result);
                // #if OLD_TRACING // REVIEW: How should this be ported?
                if (!Quiet)
                {
                    Console.WriteLine();
                }
                // #endif
                ch.Info("SGD initialization done in {0} rounds", numExamples);
            }
            finally
            {
                if (cursor != null)
                {
                    cursor.Dispose();
                }
            }

            return(result);
        }
 // Delegates onto instance methods are more efficient than delegates onto static methods.
 private void VecTrivialGetter <TDst>(ref VBuffer <TDst> value)
 {
     VBufferUtils.Resize(ref value, 1, 0);
 }
Esempio n. 13
0
 protected override void GetMissing(ref VBuffer <TItem> dst)
 {
     VBufferUtils.Resize(ref dst, Type.VectorSize, 0);
 }