private void BeginBatch() { _batch++; _numBatchExamples = 0; _biasUpdate = 0; VBufferUtils.Resize(ref _weightsUpdate, _weightsUpdate.Length, 0); }
private void DropNAs <TDst>(ref VBuffer <TDst> src, ref VBuffer <TDst> dst, InPredicate <TDst> isNA) { Host.AssertValue(isNA); var srcValues = src.GetValues(); int newCount = 0; for (int i = 0; i < srcValues.Length; i++) { if (!isNA(in srcValues[i])) { newCount++; } } Host.Assert(newCount <= srcValues.Length); if (newCount == 0) { VBufferUtils.Resize(ref dst, src.Length - srcValues.Length, 0); return; } if (newCount == srcValues.Length) { Utils.Swap(ref src, ref dst); return; } int iDst = 0; if (src.IsDense) { var editor = VBufferEditor.Create(ref dst, newCount); for (int i = 0; i < srcValues.Length; i++) { if (!isNA(in srcValues[i])) { editor.Values[iDst] = srcValues[i]; iDst++; } } Host.Assert(iDst == newCount); dst = editor.Commit(); } else { var newLength = src.Length - srcValues.Length - newCount; var editor = VBufferEditor.Create(ref dst, newLength, newCount); var srcIndices = src.GetIndices(); int offset = 0; for (int i = 0; i < srcValues.Length; i++) { if (!isNA(in srcValues[i])) { editor.Values[iDst] = srcValues[i]; editor.Indices[iDst] = srcIndices[i] - offset; iDst++; }
private void DropNAsAndDefaults <TDst>(ref VBuffer <TDst> src, ref VBuffer <TDst> dst, InPredicate <TDst> isNA) { Host.AssertValue(isNA); var srcValues = src.GetValues(); int newCount = 0; for (int i = 0; i < srcValues.Length; i++) { if (!isNA(in srcValues[i])) { newCount++; } } Host.Assert(newCount <= srcValues.Length); if (newCount == 0) { VBufferUtils.Resize(ref dst, 0); return; } if (newCount == srcValues.Length) { Utils.Swap(ref src, ref dst); if (!dst.IsDense) { Host.Assert(dst.GetValues().Length == newCount); VBufferUtils.Resize(ref dst, newCount); } return; } int iDst = 0; // Densifying sparse vectors since default value equals NA and hence should be dropped. var editor = VBufferEditor.Create(ref dst, newCount); for (int i = 0; i < srcValues.Length; i++) { if (!isNA(in srcValues[i])) { editor.Values[iDst++] = srcValues[i]; } } Host.Assert(iDst == newCount); dst = editor.Commit(); }
internal static void GetSlotNames(RoleMappedSchema schema, RoleMappedSchema.ColumnRole role, int vectorSize, ref VBuffer <ReadOnlyMemory <char> > slotNames) { Contracts.CheckValueOrNull(schema); Contracts.CheckParam(vectorSize >= 0, nameof(vectorSize)); IReadOnlyList <Schema.Column> list = schema?.GetColumns(role); if (list?.Count != 1 || !schema.Schema[list[0].Index].HasSlotNames(vectorSize)) { VBufferUtils.Resize(ref slotNames, vectorSize, 0); } else { schema.Schema[list[0].Index].Metadata.GetValue(Kinds.SlotNames, ref slotNames); } }
public static void GetSlotNames(RoleMappedSchema schema, RoleMappedSchema.ColumnRole role, int vectorSize, ref VBuffer <ReadOnlyMemory <char> > slotNames) { Contracts.CheckValueOrNull(schema); Contracts.CheckParam(vectorSize >= 0, nameof(vectorSize)); IReadOnlyList <ColumnInfo> list; if ((list = schema?.GetColumns(role)) == null || list.Count != 1 || !schema.Schema.HasSlotNames(list[0].Index, vectorSize)) { VBufferUtils.Resize(ref slotNames, vectorSize, 0); } else { schema.Schema.GetMetadata(Kinds.SlotNames, list[0].Index, ref slotNames); } }
private void CombineCore(ref VBuffer <Single> dst, VBuffer <Single>[] src, Single[] weights = null) { Host.AssertNonEmpty(src); Host.Assert(weights == null || Utils.Size(weights) == Utils.Size(src)); int count = Utils.Size(src); if (count == 0) { VBufferUtils.Resize(ref dst, 0); return; } int len = GetClassCount(src); var editor = VBufferEditor.Create(ref dst, len); if (!editor.CreatedNewValues) { editor.Values.Clear(); } int voteCount = 0; for (int i = 0; i < count; i++) { int index = VectorUtils.ArgMax(in src[i]); if (index >= 0) { editor.Values[index]++; voteCount++; } } // Normalize by dividing by the number of votes. for (int i = 0; i < len; i++) { editor.Values[i] /= voteCount; } // Set the output to values. dst = editor.Commit(); }
private void Eval(object chunkIndexObj) { int chunkIndex = (int)chunkIndexObj; int chunkSize = _maxIndex / _threads; int bigChunkSize = chunkSize + 1; int numBigChunks = _maxIndex % _threads; int from; int to; if (chunkIndex < numBigChunks) { from = bigChunkSize * chunkIndex; to = from + bigChunkSize; } else { from = bigChunkSize * numBigChunks + chunkSize * (chunkIndex - numBigChunks); to = from + chunkSize; } _tempVals[chunkIndex] = 0; VectorUtils.ScaleBy(ref _tempGrads[chunkIndex], 0); VBuffer <Float> tempGrad = default(VBuffer <Float>); for (int i = from; i < to; ++i) { VBufferUtils.Resize(ref tempGrad, 0, 0); _tempVals[chunkIndex] += _func(i, in _input, ref tempGrad); if (_tempGrads[chunkIndex].Length == 0) { tempGrad.CopyTo(ref _tempGrads[chunkIndex]); } else { VectorUtils.Add(in tempGrad, ref _tempGrads[chunkIndex]); } } _threadFinished[chunkIndex].Set(); }
private static void FillValues(Float input, ref VBuffer <Float> result) { if (input == 0) { VBufferUtils.Resize(ref result, 2, 0); return; } var editor = VBufferEditor.Create(ref result, 2, 1); if (Float.IsNaN(input)) { editor.Values[0] = 1; editor.Indices[0] = 1; } else { editor.Values[0] = input; editor.Indices[0] = 0; } result = editor.Commit(); }
private Delegate MakeGetterVec <T>(int length) { return((ValueGetter <VBuffer <T> >)((ref VBuffer <T> value) => VBufferUtils.Resize(ref value, length, 0))); }
/// <summary> /// Drops slots from src and populates the dst with the resulting vector. Slots are /// dropped based on min and max slots that were passed at the constructor. /// </summary> public void DropSlots <TDst>(ref VBuffer <TDst> src, ref VBuffer <TDst> dst) { if (src.Length <= SlotsMin[0]) { // There is nothing to drop, just swap buffers. Utils.Swap(ref src, ref dst); return; } int newLength = DstLength == 0 ? ComputeLength(src.Length) : DstLength; if (newLength == 0) { // All slots dropped. VBufferUtils.Resize(ref dst, 1, 0); return; } Contracts.Assert(newLength < src.Length); // End of the trivial cases // At this point, we need to drop some slots and keep some slots. VBufferEditor <TDst> editor; var srcValues = src.GetValues(); if (src.IsDense) { editor = VBufferEditor.Create(ref dst, newLength); int iDst = 0; int iSrc = 0; for (int i = 0; i < SlotsMax.Length && iSrc < src.Length; i++) { var lim = Math.Min(SlotsMin[i], src.Length); while (iSrc < lim) { Contracts.Assert(iDst <= iSrc); editor.Values[iDst++] = srcValues[iSrc++]; } iSrc = SlotsMax[i] + 1; } while (iSrc < src.Length) { Contracts.Assert(iDst <= iSrc); editor.Values[iDst++] = srcValues[iSrc++]; } Contracts.Assert(iDst == newLength); dst = editor.Commit(); return; } // Sparse case. // Approximate new count is min(#indices, newLength). var newCount = Math.Min(srcValues.Length, newLength); var indices = dst.GetIndices(); var srcIndices = src.GetIndices(); Contracts.Assert(newCount <= src.Length); editor = VBufferEditor.Create( ref dst, newLength, newCount, requireIndicesOnDense: true); int iiDst = 0; int iiSrc = 0; int iOffset = 0; int iRange = 0; int min = SlotsMin[iRange]; // REVIEW: Consider using a BitArray with the slots to keep instead of SlotsMax. It would // only make sense when the number of ranges is greater than the number of slots divided by 32. int max = SlotsMax[iRange]; while (iiSrc < srcValues.Length) { // Copy (with offset) the elements before the current range. var index = srcIndices[iiSrc]; if (index < min) { Contracts.Assert(iiDst <= iiSrc); editor.Indices[iiDst] = index - iOffset; editor.Values[iiDst++] = srcValues[iiSrc++]; continue; } if (index <= max) { // Skip elements in the current range. iiSrc++; continue; } // Find the next range. const int threshold1 = 20; const int threshold2 = 10; while (++iRange < SlotsMax.Length && SlotsMax[iRange] < index) { if (SlotsMax.Length - iRange >= threshold1 && SlotsMax[iRange + threshold2] < index) { iRange = SlotsMax.FindIndexSorted(iRange + threshold2, SlotsMax.Length, index); Contracts.Assert(iRange == SlotsMax.Length || iRange > 0 && SlotsMax[iRange - 1] < index && index <= SlotsMax[iRange]); break; } } if (iRange < SlotsMax.Length) { min = SlotsMin[iRange]; max = SlotsMax[iRange]; } else { min = max = src.Length; } if (iRange > 0) { iOffset = _lengthReduction[iRange - 1]; } Contracts.Assert(index <= max); } dst = editor.CommitTruncated(iiDst); }
/// <summary> /// Initialize weights by running SGD up to specified tolerance. /// </summary> private protected virtual VBuffer <float> InitializeWeightsSgd(IChannel ch, FloatLabelCursor.Factory cursorFactory) { if (!Quiet) { ch.Info("Running SGD initialization with tolerance {0}", SgdInitializationTolerance); } int numExamples = 0; var oldWeights = VBufferUtils.CreateEmpty <float>(BiasCount + WeightCount); DTerminate terminateSgd = (in VBuffer <float> x) => { if (++numExamples % 1000 != 0) { return(false); } VectorUtils.AddMult(in x, -1, ref oldWeights); float normDiff = VectorUtils.Norm(oldWeights); x.CopyTo(ref oldWeights); // #if OLD_TRACING // REVIEW: How should this be ported? if (!Quiet) { Console.Write("."); if (numExamples % 50000 == 0) { Console.WriteLine("\t{0}\t{1}", numExamples, normDiff); } } // #endif return(normDiff < SgdInitializationTolerance); }; VBuffer <float> result = default(VBuffer <float>); FloatLabelCursor cursor = null; try { float[] scratch = null; SgdOptimizer.DStochasticGradient lossSgd = (in VBuffer <float> x, ref VBuffer <float> grad) => { // Zero out the gradient by sparsifying. VBufferUtils.Resize(ref grad, grad.Length, 0); EnsureBiases(ref grad); if (cursor == null || !cursor.MoveNext()) { if (cursor != null) { cursor.Dispose(); } cursor = cursorFactory.Create(); if (!cursor.MoveNext()) { return; } } AccumulateOneGradient(in cursor.Features, cursor.Label, cursor.Weight, in x, ref grad, ref scratch); }; VBuffer <float> sgdWeights; if (DenseOptimizer) { sgdWeights = VBufferUtils.CreateDense <float>(BiasCount + WeightCount); } else { sgdWeights = VBufferUtils.CreateEmpty <float>(BiasCount + WeightCount); } SgdOptimizer sgdo = new SgdOptimizer(terminateSgd); sgdo.Minimize(lossSgd, ref sgdWeights, ref result); // #if OLD_TRACING // REVIEW: How should this be ported? if (!Quiet) { Console.WriteLine(); } // #endif ch.Info("SGD initialization done in {0} rounds", numExamples); } finally { if (cursor != null) { cursor.Dispose(); } } return(result); }
// Delegates onto instance methods are more efficient than delegates onto static methods. private void VecTrivialGetter <TDst>(ref VBuffer <TDst> value) { VBufferUtils.Resize(ref value, 1, 0); }
protected override void GetMissing(ref VBuffer <TItem> dst) { VBufferUtils.Resize(ref dst, Type.VectorSize, 0); }