public override void Forward(Executor executor) { var ctx = executor.Context; var x = executor.GetTensor(Input); var y = executor.GetTensor(Output, x.Shape); if (ctx.Type == ContextType.Gpu && x.Layout.IsInnerChangeMostFullyPacked) { var dnn = ctx.ToGpuContext().Dnn; var n = (int)x.Shape[0]; var classes = (int)x.Shape[1]; using (var xDesc = executor.TensorDescRepo.Acquire()) using (var yDesc = executor.TensorDescRepo.Acquire()) { xDesc.Value.SetND(Dnn.DataTypeOf(typeof(T)), new[] { n, classes, 1, 1 }, new[] { classes, 1, 1, 1 }); yDesc.Value.SetND(Dnn.DataTypeOf(typeof(T)), new[] { n, classes, 1, 1 }, new[] { classes, 1, 1, 1 }); var xPtr = x.Buffer.Ptr; var yPtr = y.Buffer.Ptr; var alpha = ScalarOps.Conv <T>(1.0); var beta = ScalarOps.Conv <T>(0.0); const SoftmaxAlgorithm algorithm = SoftmaxAlgorithm.ACCURATE; const SoftmaxMode mode = SoftmaxMode.INSTANCE; dnn.SoftmaxForward(algorithm, mode, alpha, xDesc.Value, xPtr, beta, yDesc.Value, yPtr); } return; } throw new NotImplementedException(); }
public override void Forward(Executor executor) { var data = executor.GetTensor(Data); var output = executor.GetTensor(Output, Shape.Create(data.Shape[0], Output.Shape[1], Output.Shape[2], Output.Shape[3])); if (executor.Context.Type == ContextType.Gpu) { var dnn = executor.Context.ToGpuContext().Dnn; using (var dataDescRcpt = executor.TensorDescRepo.Acquire()) using (var outputDescRcpt = executor.TensorDescRepo.Acquire()) { var dataDesc = dataDescRcpt.Value; var outputDesc = outputDescRcpt.Value; var dataType = Dnn.DataTypeOf <T>(); dataDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)data.Shape[0], (int)data.Shape[1], (int)data.Shape[2], (int)data.Shape[3]); outputDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)data.Shape[0], (int)Output.Shape[1], (int)Output.Shape[2], (int)Output.Shape[3]); dnn.PoolingForward(Descriptor, ScalarOps.Conv <T>(1.0), dataDesc, data.Buffer.Ptr, ScalarOps.Conv <T>(0.0), outputDesc, output.Buffer.Ptr); return; } } throw new NotImplementedException(); }
public override void Forward(Executor executor) { var z = executor.GetTensor(Input); var y = executor.GetTensor(Label); Util.EnsureTrue(z.Shape.Rank == 2); Util.EnsureTrue(Dnn.IsAvailable, "TODO: make non-cuDnn implementation."); var n = (int)z.Shape[0]; var classes = (int)z.Shape[1]; using (var xDesc = executor.TensorDescRepo.Acquire()) using (var yDesc = executor.TensorDescRepo.Acquire()) { var dnn = executor.Context.ToGpuContext().Dnn; xDesc.Value.SetND(Dnn.DataTypeOf(typeof(T)), new[] { n, classes, 1, 1 }, new[] { classes, 1, 1, 1 }); yDesc.Value.SetND(Dnn.DataTypeOf(typeof(T)), new[] { n, classes, 1, 1 }, new[] { classes, 1, 1, 1 }); var xPtr = executor.GetTensor(Input).Buffer.Ptr; var yPtr = executor.GetTensor(LogPred, Shape.Create(n, classes)).Buffer.Ptr; var alpha = ScalarOps.Conv <T>(1.0); var beta = ScalarOps.Conv <T>(0.0); const SoftmaxAlgorithm algorithm = SoftmaxAlgorithm.LOG; const SoftmaxMode mode = SoftmaxMode.INSTANCE; dnn.SoftmaxForward(algorithm, mode, alpha, xDesc.Value, xPtr, beta, yDesc.Value, yPtr); } // TODO: make it expression var logPred = executor.GetTensor(LogPred); var temp = executor.GetTensor(Temp, Shape.Create(n)); var ctx = executor.Context; if (ctx.Type == ContextType.Gpu && logPred.Layout.IsInnerChangeMostFullyPacked) { var stream = ctx.ToGpuContext().Stream; var tempPtr = temp.Buffer.Ptr; var logPredPtr = logPred.Buffer.Ptr; var idxPtr = y.Buffer.Ptr; DeviceFor.For(stream, 0, n, i => { var idx = idxPtr[i]; tempPtr[i] = logPredPtr[i * classes + idx]; }); executor.AssignTensor(Loss, -ReduceSum(temp)); return; } throw new NotImplementedException(); }
public override void Forward(Executor executor) { var data = executor.GetTensor(Data); var weight = executor.GetTensor(Weight); var bias = executor.GetTensor(Bias); var output = executor.GetTensor(Output, Shape.Create(data.Shape[0], Output.Shape[1], Output.Shape[2], Output.Shape[3])); if (executor.Context.Type == ContextType.Gpu) { var convDesc = ConvolutionDesc; var dnn = executor.Context.ToGpuContext().Dnn; using (var dataDescRcpt = executor.TensorDescRepo.Acquire()) using (var weightDescRcpt = executor.FilterDescRepo.Acquire()) using (var biasDescRcpt = executor.TensorDescRepo.Acquire()) using (var outputDescRcpt = executor.TensorDescRepo.Acquire()) { var dataDesc = dataDescRcpt.Value; var weightDesc = weightDescRcpt.Value; var biasDesc = biasDescRcpt.Value; var outputDesc = outputDescRcpt.Value; var dataType = Dnn.DataTypeOf <T>(); dataDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)data.Shape[0], (int)Data.Shape[1], (int)Data.Shape[2], (int)Data.Shape[3]); weightDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)weight.Shape[0], (int)weight.Shape[1], (int)weight.Shape[2], (int)weight.Shape[3]); biasDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, 1, (int)output.Shape[1], 1, 1); outputDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)output.Shape[0], (int)output.Shape[1], (int)output.Shape[2], (int)output.Shape[3]); ConvolutionFwdAlgo algo; IntPtr workspaceSize; dnn.GetConvolutionForwardAlgorithm(dataDesc, weightDesc, convDesc, outputDesc, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out algo); dnn.GetConvolutionForwardWorkspaceSize(dataDesc, weightDesc, convDesc, outputDesc, algo, out workspaceSize); var workspace = workspaceSize.ToInt64() > 0L ? executor.GetTensor(Workspace1, Shape.Create(workspaceSize.ToInt64())) : null; //Console.WriteLine($"==> {algo} {workspaceSize}"); // step 1, convolute dnn.ConvolutionForward(ScalarOps.Conv <T>(1.0), dataDesc, data.Buffer.Ptr, weightDesc, weight.Buffer.Ptr, convDesc, algo, workspace?.Buffer.Ptr ?? new deviceptr <byte>(), workspaceSize, ScalarOps.Conv <T>(0.0), outputDesc, output.Buffer.Ptr); // step 2, add bias dnn.AddTensor(ScalarOps.Conv <T>(1.0), biasDesc, bias.Buffer.Ptr, ScalarOps.Conv <T>(1.0), outputDesc, output.Buffer.Ptr); return; } } throw new NotImplementedException(); }
public Convolution2D(Variable <T> data, int kernelH, int kernelW, int numFilter) { Util.EnsureTrue(data.Shape.Rank == 4); Util.EnsureTrue(data.Shape[1] > 0); Util.EnsureTrue(data.Shape[2] > 0); Util.EnsureTrue(data.Shape[3] > 0); var numInputFilter = data.Shape[1]; var numOutputFilter = numFilter; var height = data.Shape[2]; var width = data.Shape[3]; // fixed padding and stride now ConvolutionDesc = new ConvolutionDescriptor(); ConvolutionDesc.Set2D(0, 0, 1, 1, 1, 1, ConvolutionMode.CROSS_CORRELATION); using (var dataDesc = new TensorDescriptor()) using (var weightDesc = new FilterDescriptor()) { var dataType = Dnn.DataTypeOf <T>(); var tempN = 100; // for temp mini batch size dataDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, tempN, (int)numInputFilter, (int)height, (int)width); weightDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, numOutputFilter, (int)numInputFilter, kernelH, kernelW); // get output dimension int n, c, h, w; ConvolutionDesc.Get2DForwardOutputDim(dataDesc, weightDesc, out n, out c, out h, out w); //Console.WriteLine($"{c},{h},{w}"); // Create variables var scale = Sqrt(3.0.AsScalar <T>() / ((double)(numInputFilter * kernelH * kernelW)).AsScalar <T>()); Data = data; Weight = Parameter(scale * (2.0.AsScalar <T>() * RandomUniform <T>(Shape.Create(numOutputFilter, numInputFilter, kernelH, kernelW), 0UL, 0UL) - 1.0.AsScalar <T>())); Bias = Parameter(Fill(Shape.Create(c), ScalarOps.Conv <T>(0.1))); Output = Variable <T>(PartialShape.Create(-1, c, h, w)); Workspace1 = AuxVariable <byte>(); Workspace2 = AuxVariable <byte>(); AddInput(Data); AddInput(Weight); AddInput(Bias); AddOutput(Output); AddAuxVar(Workspace1); AddAuxVar(Workspace2); } }
public Pooling2D(Variable <T> data, PoolingMode mode, int kernelH, int kernelW, int strideH, int strideW) { Descriptor = new PoolingDescriptor(); Descriptor.Set2D(mode, NanPropagation.NOT_PROPAGATE_NAN, kernelH, kernelW, 0, 0, strideH, strideW); var dataType = Dnn.DataTypeOf <T>(); var dataDesc = new TensorDescriptor(); dataDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, 10, (int)data.Shape[1], (int)data.Shape[2], (int)data.Shape[3]); int n, c, h, w; Descriptor.Get2dForwardOutputDim(dataDesc, out n, out c, out h, out w); Data = data; Output = Variable <T>(PartialShape.Create(-1, c, h, w)); AddInput(Data); AddOutput(Output); dataDesc.Dispose(); }
public override void Backward(Executor executor) { var data = executor.GetTensor(Data); var weight = executor.GetTensor(Weight); var dOutput = executor.GetGradient(Output); var dWeight = executor.GetGradient(Weight, Shape.Create(Weight.Shape.AsArray)); var dBias = executor.GetGradient(Bias, Shape.Create(Bias.Shape.AsArray)); var dData = executor.GetGradient(Data, Shape.Create(data.Shape.AsArray)); if (executor.Context.Type == ContextType.Gpu) { var convDesc = ConvolutionDesc; var dnn = executor.Context.ToGpuContext().Dnn; using (var dataDescRcpt = executor.TensorDescRepo.Acquire()) using (var weightDescRcpt = executor.FilterDescRepo.Acquire()) using (var dDataDescRcpt = executor.TensorDescRepo.Acquire()) using (var dOutputDescRcpt = executor.TensorDescRepo.Acquire()) using (var dBiasDescRcpt = executor.TensorDescRepo.Acquire()) using (var dWeightDescRcpt = executor.FilterDescRepo.Acquire()) { var dataDesc = dataDescRcpt.Value; var weightDesc = weightDescRcpt.Value; var dDataDesc = dDataDescRcpt.Value; var dOutputDesc = dOutputDescRcpt.Value; var dBiasDesc = dBiasDescRcpt.Value; var dWeightDesc = dWeightDescRcpt.Value; var dataType = Dnn.DataTypeOf <T>(); dataDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)data.Shape[0], (int)Data.Shape[1], (int)Data.Shape[2], (int)Data.Shape[3]); dDataDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)data.Shape[0], (int)Data.Shape[1], (int)Data.Shape[2], (int)Data.Shape[3]); dOutputDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)dOutput.Shape[0], (int)dOutput.Shape[1], (int)dOutput.Shape[2], (int)dOutput.Shape[3]); dBiasDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, 1, (int)dOutput.Shape[1], 1, 1); dWeightDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)weight.Shape[0], (int)weight.Shape[1], (int)weight.Shape[2], (int)weight.Shape[3]); weightDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)weight.Shape[0], (int)weight.Shape[1], (int)weight.Shape[2], (int)weight.Shape[3]); ConvolutionBwdFilterAlgo filterAlgo; IntPtr filterWorkspaceSize; dnn.GetConvolutionBackwardFilterAlgorithm(dataDesc, dOutputDesc, convDesc, dWeightDesc, ConvolutionBwdFilterPreference.PREFER_FASTEST, IntPtr.Zero, out filterAlgo); dnn.GetConvolutionBackwardFilterWorkspaceSize(dataDesc, dOutputDesc, convDesc, dWeightDesc, filterAlgo, out filterWorkspaceSize); var filterWorkspace = filterWorkspaceSize.ToInt64() > 0L ? executor.GetTensor(Workspace1, Shape.Create(filterWorkspaceSize.ToInt64())) : null; //Console.WriteLine($"==> {filterAlgo} {filterWorkspaceSize}"); ConvolutionBwdDataAlgo dataAlgo; IntPtr dataWorkspaceSize; dnn.GetConvolutionBackwardDataAlgorithm(weightDesc, dOutputDesc, convDesc, dDataDesc, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out dataAlgo); dnn.GetConvolutionBackwardDataWorkspaceSize(dWeightDesc, dOutputDesc, convDesc, dDataDesc, dataAlgo, out dataWorkspaceSize); var dataWorkspace = dataWorkspaceSize.ToInt64() > 0L ? executor.GetTensor(Workspace2, Shape.Create(dataWorkspaceSize.ToInt64())) : null; //Console.WriteLine($"==> {dataAlgo} {dataWorkspaceSize}"); // filter dnn.ConvolutionBackwardFilter(ScalarOps.Conv <T>(1.0), dataDesc, data.Buffer.Ptr, dOutputDesc, dOutput.Buffer.Ptr, convDesc, filterAlgo, filterWorkspace?.Buffer.Ptr ?? new deviceptr <byte>(), filterWorkspaceSize, ScalarOps.Conv <T>(0.0), dWeightDesc, dWeight.Buffer.Ptr); // data dnn.ConvolutionBackwardData(ScalarOps.Conv <T>(1.0), weightDesc, weight.Buffer.Ptr, dOutputDesc, dOutput.Buffer.Ptr, convDesc, dataAlgo, dataWorkspace?.Buffer.Ptr ?? new deviceptr <byte>(), dataWorkspaceSize, ScalarOps.Conv <T>(0.0), dDataDesc, dData.Buffer.Ptr); // bias dnn.ConvolutionBackwardBias(ScalarOps.Conv <T>(1.0), dOutputDesc, dOutput.Buffer.Ptr, ScalarOps.Conv <T>(0.0), dBiasDesc, dBias.Buffer.Ptr); return; } } throw new NotImplementedException(); }
public Rnn(RnnType ty, Variable <T> x, int numLayers, int hiddenSize, bool isTraining = true, double dropout = 0.0, ulong dropoutSeed = 1337UL) { Type = ty; IsTraining = isTraining; NumLayers = numLayers; HiddenSize = hiddenSize; Dropout = isTraining ? dropout : 0.0; DropoutSeed = dropoutSeed; // X shape (seqLength, batch, inputSize) X = x; Util.EnsureEqual(3, X.Shape.Rank, "Input layout: (seqLength, batch, inputSize)"); Util.EnsureTrue(X.Shape[0] >= 0, "Input layout: (seqLength, batch, inputSize)"); Util.EnsureTrue(X.Shape[1] >= 0, "Input layout: (seqLength, batch, inputSize)"); Util.EnsureTrue(X.Shape[2] >= 0, "Input layout: (seqLength, batch, inputSize)"); SeqLength = (int)X.Shape[0]; MiniBatch = (int)X.Shape[1]; InputSize = (int)X.Shape[2]; // Y Shape (seqLength, batch, hiddenSize) Y = Variable <T>(PartialShape.Create(SeqLength, MiniBatch, HiddenSize)); // W shape will be determined during initialization W = Parameter <T>(); // state variables var shape = PartialShape.Create(NumLayers, MiniBatch, HiddenSize); var strides = Strides.Create(shape[1] * shape[2], shape[2], 1); // inner change most HX = Variable <T>(shape); CX = Variable <T>(shape); HY = Variable <T>(shape); CY = Variable <T>(shape); StateDesc = new TensorDescriptor(); StateDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); // xDesc is an array, for each step shape = PartialShape.Create(MiniBatch, InputSize, 1); strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var xDesc = new TensorDescriptor(); xDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); XDesc = Enumerable.Repeat(xDesc, SeqLength).ToArray(); // yDesc is an array, for each step shape = PartialShape.Create(MiniBatch, HiddenSize, 1); strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var yDesc = new TensorDescriptor(); yDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); YDesc = Enumerable.Repeat(yDesc, SeqLength).ToArray(); // construct the graph AddInput(X); AddInput(W); AddOutput(Y); AddAuxVar(HX); AddAuxVar(CX); AddAuxVar(HY); AddAuxVar(CY); AddAuxVar(DropoutStates); AddAuxVar(Workspace); AddAuxVar(ReserveSpace); }
public override void Initialize(Executor executor) { var context = executor.Context.ToGpuContext(); var dnn = context.Dnn; // dropout var dropoutDesc = executor.DropoutDescDict[DropoutDesc]; IntPtr dropoutStatesSize; dnn.DropoutGetStatesSize(out dropoutStatesSize); var dropoutStates = executor.GetTensor(DropoutStates, Shape.Create(dropoutStatesSize.ToInt64())); dropoutDesc.Set(dnn, (float)Dropout, dropoutStates.Buffer.Ptr, dropoutStatesSize, DropoutSeed); // rnn descriptor var rnnDesc = executor.RnnDescDict[RnnDesc]; var mode = Type.Mode; rnnDesc.Set(HiddenSize, NumLayers, dropoutDesc, RNNInputMode.LINEAR_INPUT, DirectionMode.UNIDIRECTIONAL, mode, Dnn.DataTypeOf <T>()); // weight var wDesc = executor.FilterDescDict[WDesc]; IntPtr weightsSize; dnn.GetRNNParamsSize(rnnDesc, XDesc[0], out weightsSize, Dnn.DataTypeOf <T>()); Util.EnsureTrue(weightsSize.ToInt64() % Gpu.SizeOf <T>() == 0); var shapeW = Shape.Create(weightsSize.ToInt64() / Alea.Gpu.SizeOf <T>()); wDesc.SetND(Dnn.DataTypeOf <T>(), TensorFormat.CUDNN_TENSOR_NCHW, new [] { (int)shapeW[0], 1, 1 }); // workspace and reserved space IntPtr workSize; dnn.GetRNNWorkspaceSize(rnnDesc, SeqLength, XDesc, out workSize); executor.GetTensor(Workspace, Shape.Create(workSize.ToInt64())); if (IsTraining) { IntPtr reserveSize; dnn.GetRNNTrainingReserveSize(rnnDesc, SeqLength, XDesc, out reserveSize); executor.GetTensor(ReserveSpace, Shape.Create(reserveSize.ToInt64())); } // since we are using cuDNN, we'd better make sure these varaibles are allocated executor.GetTensor(W, shapeW); if (IsTraining) { executor.GetGradient(W, shapeW); } executor.GetTensor(Y, Shape.Create(Y.Shape.AsArray)); executor.GetTensor(HX, Shape.Create(HX.Shape.AsArray)); executor.GetTensor(CX, Shape.Create(CX.Shape.AsArray)); executor.GetTensor(HY, Shape.Create(HY.Shape.AsArray)); executor.GetTensor(CY, Shape.Create(CY.Shape.AsArray)); if (IsTraining) { executor.GetGradient(X, Shape.Create(X.Shape.AsArray)); executor.GetGradient(Y, Shape.Create(Y.Shape.AsArray)); executor.GetGradient(HX, Shape.Create(HX.Shape.AsArray)); executor.GetGradient(CX, Shape.Create(CX.Shape.AsArray)); } // init weights var numLinearLayers = Type.NumLinLayers; using (var filterDesc = new FilterDescriptor()) { var w = executor.GetTensor(W); var filterDimA = new int[3]; for (var layer = 0; layer < NumLayers; ++layer) { for (var linLayerId = 0; linLayerId < numLinearLayers; ++linLayerId) { int nbDims; DataType dataType; TensorFormat format; deviceptr <T> linLayerMat; dnn.GetRNNLinLayerMatrixParams(rnnDesc, layer, XDesc[0], wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerMat); filterDesc.GetND(out dataType, out format, out nbDims, filterDimA); var length = filterDimA.Aggregate(ScalarOps.Mul); var linLayerMatBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerMat); var linLayerMatTensor = new Tensor <T>(linLayerMatBuffer); context.Assign(linLayerMatTensor, RandomNormal <T>(Shape.Create(length)) / (Math.Sqrt(HiddenSize + InputSize).AsScalar <T>())); deviceptr <T> linLayerBias; dnn.GetRNNLinLayerBiasParams(rnnDesc, layer, XDesc[0], wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerBias); filterDesc.GetND(out dataType, out format, out nbDims, filterDimA); length = filterDimA.Aggregate(ScalarOps.Mul); var linLayerBiasBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerBias); var linLayerBiasTensor = new Tensor <T>(linLayerBiasBuffer); Type.InitBias(context, layer, linLayerId, linLayerBiasTensor); } } } base.Initialize(executor); const double value = 0.0; executor.AssignTensor(HX, Fill(Shape.Create(HX.Shape.AsArray), ScalarOps.Conv <T>(value))); executor.AssignTensor(CX, Fill(Shape.Create(CX.Shape.AsArray), ScalarOps.Conv <T>(value))); }
public override void Initialize(Executor executor) { var context = executor.Context.ToGpuContext(); var dnn = context.Dnn; // dropout var dropoutDesc = executor.DropoutDescDict[DropoutDesc]; IntPtr dropoutStatesSize; dnn.DropoutGetStatesSize(out dropoutStatesSize); var dropoutStates = executor.GetTensor(DropoutStates, Shape.Create(dropoutStatesSize.ToInt64())); dropoutDesc.Set(dnn, (float)Dropout, dropoutStates.Buffer.Ptr, dropoutStatesSize, DropoutSeed); // rnn descriptor var rnnDesc = executor.RnnDescDict[RnnDesc]; var mode = RnnType.Mode; rnnDesc.Set(HiddenSize, NumLayers, dropoutDesc, RNNInputMode.LINEAR_INPUT, DirectionMode.UNIDIRECTIONAL, mode, Dnn.DataTypeOf <T>()); // initialize weight, once only, using minibatch size 1 var shape = PartialShape.Create(1, InputSize, 1); // first dimension does not affect the weight shape and size TODO test all, tested only for LSTM var strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var xDesc = new TensorDescriptor(); xDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); var wDesc = executor.FilterDescDict[WDesc]; IntPtr weightsSize; dnn.GetRNNParamsSize(rnnDesc, xDesc, out weightsSize, Dnn.DataTypeOf <T>()); Util.EnsureTrue(weightsSize.ToInt64() % Gpu.SizeOf <T>() == 0); var shapeW = Shape.Create(weightsSize.ToInt64() / Alea.Gpu.SizeOf <T>()); wDesc.SetND(Dnn.DataTypeOf <T>(), TensorFormat.CUDNN_TENSOR_NCHW, new [] { (int)shapeW[0], 1, 1 }); // since we are using cuDNN, we'd better make sure these varaibles are allocated executor.GetTensor(W, shapeW); if (IsTraining) { executor.GetGradient(W, shapeW); } // init weights var numLinearLayers = RnnType.NumLinLayers; using (var filterDesc = new FilterDescriptor()) { var w = executor.GetTensor(W); var filterDimA = new int[3]; for (var layer = 0; layer < NumLayers; ++layer) { for (var linLayerId = 0; linLayerId < numLinearLayers; ++linLayerId) { int nbDims; DataType dataType; TensorFormat format; deviceptr <T> linLayerMat; dnn.GetRNNLinLayerMatrixParams(rnnDesc, layer, xDesc, wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerMat); filterDesc.GetND(out dataType, out format, out nbDims, filterDimA); var length = filterDimA.Aggregate(ScalarOps.Mul); var linLayerMatBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerMat); var linLayerMatTensor = new Tensor <T>(linLayerMatBuffer); context.Assign(linLayerMatTensor, RandomNormal <T>(Shape.Create(length)) / (Math.Sqrt(HiddenSize + InputSize).AsScalar <T>())); deviceptr <T> linLayerBias; dnn.GetRNNLinLayerBiasParams(rnnDesc, layer, xDesc, wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerBias); filterDesc.GetND(out dataType, out format, out nbDims, filterDimA); length = filterDimA.Aggregate(ScalarOps.Mul); var linLayerBiasBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerBias); var linLayerBiasTensor = new Tensor <T>(linLayerBiasBuffer); RnnType.InitBias(context, layer, linLayerId, linLayerBiasTensor); } } } base.Initialize(executor); }
public RnnDescr(Executor executor, RnnDynamic <T> rnn) { var context = executor.Context.ToGpuContext(); var dnn = context.Dnn; Rnn = rnn; var x = executor.GetTensor(Rnn.X); SeqLength = (int)x.Shape[0]; MiniBatch = (int)x.Shape[1]; var shape = Shape.Create(SeqLength, MiniBatch, Rnn.HiddenSize); executor.GetTensor(Rnn.Y, shape); // state variables shape = Shape.Create(Rnn.NumLayers, MiniBatch, Rnn.HiddenSize); var strides = Strides.Create(shape[1] * shape[2], shape[2], 1); // inner change most executor.GetTensor(Rnn.HX, shape); executor.GetTensor(Rnn.CX, shape); executor.GetTensor(Rnn.HY, shape); executor.GetTensor(Rnn.CY, shape); StateDesc = new TensorDescriptor(); StateDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); // xDesc is an array, for each step shape = Shape.Create(MiniBatch, rnn.InputSize, 1); strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var xDesc = new TensorDescriptor(); xDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); XDesc = Enumerable.Repeat(xDesc, SeqLength).ToArray(); // yDesc is an array, for each step shape = Shape.Create(MiniBatch, rnn.HiddenSize, 1); strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var yDesc = new TensorDescriptor(); yDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); YDesc = Enumerable.Repeat(yDesc, SeqLength).ToArray(); // workspace and reserved space var rnnDesc = executor.RnnDescDict[rnn.RnnDesc]; IntPtr workSize; dnn.GetRNNWorkspaceSize(rnnDesc, SeqLength, XDesc, out workSize); executor.GetTensor(Rnn.Workspace, Shape.Create(workSize.ToInt64())); if (Rnn.IsTraining) { IntPtr reserveSize; dnn.GetRNNTrainingReserveSize(rnnDesc, SeqLength, XDesc, out reserveSize); executor.GetTensor(Rnn.ReserveSpace, Shape.Create(reserveSize.ToInt64())); executor.GetGradient(Rnn.X, x.Shape); executor.GetGradient(Rnn.Y, Shape.Create(SeqLength, MiniBatch, Rnn.HiddenSize)); executor.GetGradient(Rnn.HX, Shape.Create(Rnn.NumLayers, MiniBatch, Rnn.HiddenSize)); executor.GetGradient(Rnn.CX, Shape.Create(Rnn.NumLayers, MiniBatch, Rnn.HiddenSize)); } }
public RnnCell(Executor executor, RnnType rnnType, Variable <T> w, int inputSize, int batch, int hiddenSize, int numLayers, bool isTraining, double dropoutProbability, ulong dropoutSeed = 1337UL) { IsTraining = isTraining; BatchSize = batch; InputSize = inputSize; HiddenSize = hiddenSize; NumLayers = numLayers; RnnType = rnnType; W = w; var context = executor.Context.ToGpuContext(); var dnn = context.Dnn; // state variables var shape = Shape.Create(numLayers, batch, hiddenSize); var strides = Strides.Create(shape[1] * shape[2], shape[2], 1); // inner change most StateDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); // xDesc is an array of one element because we do only one step shape = Shape.Create(batch, inputSize, 1); strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var xDesc = new TensorDescriptor(); xDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); XDesc = Enumerable.Repeat(xDesc, 1).ToArray(); // yDesc is an array of one element because we do only one step shape = Shape.Create(batch, hiddenSize, 1); strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var yDesc = new TensorDescriptor(); yDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); YDesc = Enumerable.Repeat(yDesc, 1).ToArray(); IntPtr dropoutStatesSize; dnn.DropoutGetStatesSize(out dropoutStatesSize); DropoutStates = executor.Context.Device.Allocate <byte>(Shape.Create(dropoutStatesSize.ToInt64())); DropoutDesc.Set(dnn, (float)dropoutProbability, DropoutStates.Buffer.Ptr, dropoutStatesSize, dropoutSeed); var mode = rnnType.Mode; RnnDesc.Set(hiddenSize, numLayers, DropoutDesc, RNNInputMode.LINEAR_INPUT, DirectionMode.UNIDIRECTIONAL, mode, Dnn.DataTypeOf <T>()); IntPtr workSize; dnn.GetRNNWorkspaceSize(RnnDesc, 1, XDesc, out workSize); Workspace = executor.Context.Device.Allocate <byte>(Shape.Create(workSize.ToInt64())); if (isTraining) { IntPtr reserveSize; dnn.GetRNNTrainingReserveSize(RnnDesc, 1, XDesc, out reserveSize); ReserveSize = reserveSize.ToInt64(); //ReserveSpace = executor.AttentionState.Device.Allocate<byte>(Shape.Create(reserveSize.ToInt64())); } IntPtr weightsSize; dnn.GetRNNParamsSize(RnnDesc, xDesc, out weightsSize, Dnn.DataTypeOf <T>()); Util.EnsureTrue(weightsSize.ToInt64() % Gpu.SizeOf <T>() == 0); var shapeW = Shape.Create(weightsSize.ToInt64() / Alea.Gpu.SizeOf <T>()); WDesc.SetND(Dnn.DataTypeOf <T>(), TensorFormat.CUDNN_TENSOR_NCHW, new[] { (int)shapeW[0], 1, 1 }); executor.GetTensor(W, shapeW); if (isTraining) { executor.GetGradient(W, shapeW); } }