public override void Forward(Executor executor) { var ctx = executor.Context; var x = executor.GetTensor(Input); var y = executor.GetTensor(Output, x.Shape); if (ctx.Type == ContextType.Gpu && x.Layout.IsInnerChangeMostFullyPacked) { var dnn = ctx.ToGpuContext().Dnn; var n = (int)x.Shape[0]; var classes = (int)x.Shape[1]; using (var xDesc = executor.TensorDescRepo.Acquire()) using (var yDesc = executor.TensorDescRepo.Acquire()) { xDesc.Value.SetND(Dnn.DataTypeOf(typeof(T)), new[] { n, classes, 1, 1 }, new[] { classes, 1, 1, 1 }); yDesc.Value.SetND(Dnn.DataTypeOf(typeof(T)), new[] { n, classes, 1, 1 }, new[] { classes, 1, 1, 1 }); var xPtr = x.Buffer.Ptr; var yPtr = y.Buffer.Ptr; var alpha = ScalarOps.Conv <T>(1.0); var beta = ScalarOps.Conv <T>(0.0); const SoftmaxAlgorithm algorithm = SoftmaxAlgorithm.ACCURATE; const SoftmaxMode mode = SoftmaxMode.INSTANCE; dnn.SoftmaxForward(algorithm, mode, alpha, xDesc.Value, xPtr, beta, yDesc.Value, yPtr); } return; } throw new NotImplementedException(); }
public override void Forward(Executor executor) { var z = executor.GetTensor(Input); var y = executor.GetTensor(Label); Util.EnsureTrue(z.Shape.Rank == 2); Util.EnsureTrue(Dnn.IsAvailable, "TODO: make non-cuDnn implementation."); var n = (int)z.Shape[0]; var classes = (int)z.Shape[1]; using (var xDesc = executor.TensorDescRepo.Acquire()) using (var yDesc = executor.TensorDescRepo.Acquire()) { var dnn = executor.Context.ToGpuContext().Dnn; xDesc.Value.SetND(Dnn.DataTypeOf(typeof(T)), new[] { n, classes, 1, 1 }, new[] { classes, 1, 1, 1 }); yDesc.Value.SetND(Dnn.DataTypeOf(typeof(T)), new[] { n, classes, 1, 1 }, new[] { classes, 1, 1, 1 }); var xPtr = executor.GetTensor(Input).Buffer.Ptr; var yPtr = executor.GetTensor(LogPred, Shape.Create(n, classes)).Buffer.Ptr; var alpha = ScalarOps.Conv <T>(1.0); var beta = ScalarOps.Conv <T>(0.0); const SoftmaxAlgorithm algorithm = SoftmaxAlgorithm.LOG; const SoftmaxMode mode = SoftmaxMode.INSTANCE; dnn.SoftmaxForward(algorithm, mode, alpha, xDesc.Value, xPtr, beta, yDesc.Value, yPtr); } // TODO: make it expression var logPred = executor.GetTensor(LogPred); var temp = executor.GetTensor(Temp, Shape.Create(n)); var ctx = executor.Context; if (ctx.Type == ContextType.Gpu && logPred.Layout.IsInnerChangeMostFullyPacked) { var stream = ctx.ToGpuContext().Stream; var tempPtr = temp.Buffer.Ptr; var logPredPtr = logPred.Buffer.Ptr; var idxPtr = y.Buffer.Ptr; DeviceFor.For(stream, 0, n, i => { var idx = idxPtr[i]; tempPtr[i] = logPredPtr[i * classes + idx]; }); executor.AssignTensor(Loss, -ReduceSum(temp)); return; } throw new NotImplementedException(); }