public void Reset() { if (!doReset_ || Index == 0L || Context.Type != ContextType.Gpu) { return; } Index = 0L; ShuffleIndices(); Context.Copy(IndicesTensor, Indices.AsTensor()); var stream = Context.ToGpuContext().Stream; var srcData = DataTensor == DataTensor1 ? DataTensor1.Buffer.Ptr : DataTensor2.Buffer.Ptr; var dstData = DataTensor == DataTensor1 ? DataTensor2.Buffer.Ptr : DataTensor1.Buffer.Ptr; var srcLabels = LabelsTensor == LabelsTensor1 ? LabelsTensor1.Buffer.Ptr : LabelsTensor2.Buffer.Ptr; var dstLabels = LabelsTensor == LabelsTensor1 ? LabelsTensor2.Buffer.Ptr : LabelsTensor1.Buffer.Ptr; var idx = IndicesTensor.Buffer.Ptr; DeviceFor.For(stream, 0, Rows, i => { var j = idx[i]; var srcDataOffset = srcData + i * cols_; var dstDataOffset = dstData + j * cols_; for (var k = 0; k < cols_; ++k) { dstDataOffset[k] = srcDataOffset[k]; } var srcLabelsOffset = srcLabels + i * outputs_; var dstLabelsOffset = dstLabels + j * outputs_; for (var k = 0; k < outputs_; ++k) { dstLabelsOffset[k] = srcLabelsOffset[k]; } }); DataTensor = DataTensor == DataTensor1 ? DataTensor2 : DataTensor1; LabelsTensor = LabelsTensor == LabelsTensor1 ? LabelsTensor2 : LabelsTensor1; }
public override void Backward(Executor executor) { var p = executor.GetTensor(LogPred); var y = executor.GetTensor(Label); Util.EnsureTrue(p.Shape.Rank == 2); var n = (int)p.Shape[0]; var classes = (int)p.Shape[1]; executor.AssignGradient(Input, Exp(p)); var g = executor.GetGradient(Input); var ctx = executor.Context; if (ctx.Type == ContextType.Gpu) { var stream = ctx.ToGpuContext().Stream; if (typeof(T) == typeof(float)) { var gptr = g.Buffer.Ptr.Reinterpret <float>(); var idxptr = y.Buffer.Ptr; DeviceFor.For(stream, 0, n, i => { var idx = idxptr[i]; gptr[i * classes + idx] -= 1.0f; }); return; } else if (typeof(T) == typeof(double)) { var gptr = g.Buffer.Ptr.Reinterpret <double>(); var idxptr = y.Buffer.Ptr; DeviceFor.For(stream, 0, n, i => { var idx = idxptr[i]; gptr[i * classes + idx] -= 1.0; }); return; } else { throw new NotImplementedException(); } } throw new NotImplementedException(); }
public override void Forward(Executor executor) { var z = executor.GetTensor(Input); var y = executor.GetTensor(Label); Util.EnsureTrue(z.Shape.Rank == 2); Util.EnsureTrue(Dnn.IsAvailable, "TODO: make non-cuDnn implementation."); var n = (int)z.Shape[0]; var classes = (int)z.Shape[1]; using (var xDesc = executor.TensorDescRepo.Acquire()) using (var yDesc = executor.TensorDescRepo.Acquire()) { var dnn = executor.Context.ToGpuContext().Dnn; xDesc.Value.SetND(Dnn.DataTypeOf(typeof(T)), new[] { n, classes, 1, 1 }, new[] { classes, 1, 1, 1 }); yDesc.Value.SetND(Dnn.DataTypeOf(typeof(T)), new[] { n, classes, 1, 1 }, new[] { classes, 1, 1, 1 }); var xPtr = executor.GetTensor(Input).Buffer.Ptr; var yPtr = executor.GetTensor(LogPred, Shape.Create(n, classes)).Buffer.Ptr; var alpha = ScalarOps.Conv <T>(1.0); var beta = ScalarOps.Conv <T>(0.0); const SoftmaxAlgorithm algorithm = SoftmaxAlgorithm.LOG; const SoftmaxMode mode = SoftmaxMode.INSTANCE; dnn.SoftmaxForward(algorithm, mode, alpha, xDesc.Value, xPtr, beta, yDesc.Value, yPtr); } // TODO: make it expression var logPred = executor.GetTensor(LogPred); var temp = executor.GetTensor(Temp, Shape.Create(n)); var ctx = executor.Context; if (ctx.Type == ContextType.Gpu && logPred.Layout.IsInnerChangeMostFullyPacked) { var stream = ctx.ToGpuContext().Stream; var tempPtr = temp.Buffer.Ptr; var logPredPtr = logPred.Buffer.Ptr; var idxPtr = y.Buffer.Ptr; DeviceFor.For(stream, 0, n, i => { var idx = idxPtr[i]; tempPtr[i] = logPredPtr[i * classes + idx]; }); executor.AssignTensor(Loss, -ReduceSum(temp)); return; } throw new NotImplementedException(); }