public IteratedRnnCell(RnnType rnnRnnType, Variable <T> input, int numLayers, int hiddenSize, bool isTraining, double dropoutProbability, ulong dropoutSeed = 1337UL) { RnnType = rnnRnnType; IsTraining = isTraining; NumLayers = numLayers; HiddenSize = hiddenSize; DropoutProbability = isTraining ? dropoutProbability : 0.0; DropoutSeed = dropoutSeed; Util.EnsureEqual(3, input.Shape.Rank, "Input layout: (seqLength, batch, inputSize)"); Util.EnsureTrue(input.Shape[1] >= 0, "Input layout: (seqLength, batch, inputSize)"); Util.EnsureTrue(input.Shape[2] >= 0, "Input layout: (seqLength, batch, inputSize)"); Input = input; BatchSize = (int)input.Shape[1]; InputSize = (int)input.Shape[2]; // output Shape (seqLength, batchSize, hiddenSize) Output = Variable <T>(PartialShape.Create(-1, BatchSize, HiddenSize)); // W shape will be determined during initialization W = Parameter <T>(); // create variables for input hidden and cell state HX = Variable <T>(PartialShape.Create(NumLayers, BatchSize, HiddenSize)); CX = Variable <T>(PartialShape.Create(NumLayers, BatchSize, HiddenSize)); HY = Variable <T>(PartialShape.Create(NumLayers, BatchSize, HiddenSize)); CY = Variable <T>(PartialShape.Create(NumLayers, BatchSize, HiddenSize)); // state variable H and Y = (n - 1, layer, b, d), n is unknown var shape = PartialShape.Create(-1, NumLayers, BatchSize, HiddenSize); H = Library.Variable <T>(shape); C = Library.Variable <T>(shape); ReserveSpace = Library.Variable <byte>(); // construct the graph AddInput(Input); AddInput(W); AddOutput(Output); AddAuxVar(HX); AddAuxVar(CX); AddAuxVar(HY); AddAuxVar(CY); AddAuxVar(H); AddAuxVar(C); AddAuxVar(ReserveSpace); }
public RnnDynamic(RnnType rnnRnnType, Variable <T> x, int numLayers, int hiddenSize, bool isTraining = true, double dropout = 0.0, ulong dropoutSeed = 1337UL) { RnnType = rnnRnnType; IsTraining = isTraining; NumLayers = numLayers; HiddenSize = hiddenSize; Dropout = isTraining ? dropout : 0.0; DropoutSeed = dropoutSeed; // X shape (seqLength, batch, inputSize) X = x; Util.EnsureEqual(3, X.Shape.Rank, "Input layout: (seqLength, batch, inputSize)"); Util.EnsureTrue(X.Shape[2] >= 0, "Input layout: (seqLength, batch, inputSize)"); InputSize = (int)X.Shape[2]; // Y Shape (maxSeqLength, not yet known, hiddenSize) Y = Variable <T>(PartialShape.Create(-1, -1, HiddenSize)); // W shape will be determined during initialization W = Parameter <T>(); // state variables var shape = PartialShape.Create(NumLayers, -1, HiddenSize); HX = Variable <T>(shape); CX = Variable <T>(shape); HY = Variable <T>(shape); CY = Variable <T>(shape); // construct the graph AddInput(X); AddInput(W); AddOutput(Y); AddAuxVar(HX); AddAuxVar(CX); AddAuxVar(HY); AddAuxVar(CY); AddAuxVar(DropoutStates); AddAuxVar(Workspace); AddAuxVar(ReserveSpace); }
public Rnn(RnnType ty, Variable <T> x, int numLayers, int hiddenSize, bool isTraining = true, double dropout = 0.0, ulong dropoutSeed = 1337UL) { Type = ty; IsTraining = isTraining; NumLayers = numLayers; HiddenSize = hiddenSize; Dropout = isTraining ? dropout : 0.0; DropoutSeed = dropoutSeed; // X shape (seqLength, batch, inputSize) X = x; Util.EnsureEqual(3, X.Shape.Rank, "Input layout: (seqLength, batch, inputSize)"); Util.EnsureTrue(X.Shape[0] >= 0, "Input layout: (seqLength, batch, inputSize)"); Util.EnsureTrue(X.Shape[1] >= 0, "Input layout: (seqLength, batch, inputSize)"); Util.EnsureTrue(X.Shape[2] >= 0, "Input layout: (seqLength, batch, inputSize)"); SeqLength = (int)X.Shape[0]; MiniBatch = (int)X.Shape[1]; InputSize = (int)X.Shape[2]; // Y Shape (seqLength, batch, hiddenSize) Y = Variable <T>(PartialShape.Create(SeqLength, MiniBatch, HiddenSize)); // W shape will be determined during initialization W = Parameter <T>(); // state variables var shape = PartialShape.Create(NumLayers, MiniBatch, HiddenSize); var strides = Strides.Create(shape[1] * shape[2], shape[2], 1); // inner change most HX = Variable <T>(shape); CX = Variable <T>(shape); HY = Variable <T>(shape); CY = Variable <T>(shape); StateDesc = new TensorDescriptor(); StateDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); // xDesc is an array, for each step shape = PartialShape.Create(MiniBatch, InputSize, 1); strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var xDesc = new TensorDescriptor(); xDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); XDesc = Enumerable.Repeat(xDesc, SeqLength).ToArray(); // yDesc is an array, for each step shape = PartialShape.Create(MiniBatch, HiddenSize, 1); strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var yDesc = new TensorDescriptor(); yDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); YDesc = Enumerable.Repeat(yDesc, SeqLength).ToArray(); // construct the graph AddInput(X); AddInput(W); AddOutput(Y); AddAuxVar(HX); AddAuxVar(CX); AddAuxVar(HY); AddAuxVar(CY); AddAuxVar(DropoutStates); AddAuxVar(Workspace); AddAuxVar(ReserveSpace); }
public override void Initialize(Executor executor) { var context = executor.Context.ToGpuContext(); var dnn = context.Dnn; // dropout var dropoutDesc = executor.DropoutDescDict[DropoutDesc]; IntPtr dropoutStatesSize; dnn.DropoutGetStatesSize(out dropoutStatesSize); var dropoutStates = executor.GetTensor(DropoutStates, Shape.Create(dropoutStatesSize.ToInt64())); dropoutDesc.Set(dnn, (float)Dropout, dropoutStates.Buffer.Ptr, dropoutStatesSize, DropoutSeed); // rnn descriptor var rnnDesc = executor.RnnDescDict[RnnDesc]; var mode = RnnType.Mode; rnnDesc.Set(HiddenSize, NumLayers, dropoutDesc, RNNInputMode.LINEAR_INPUT, DirectionMode.UNIDIRECTIONAL, mode, Dnn.DataTypeOf <T>()); // initialize weight, once only, using minibatch size 1 var shape = PartialShape.Create(1, InputSize, 1); // first dimension does not affect the weight shape and size TODO test all, tested only for LSTM var strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var xDesc = new TensorDescriptor(); xDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); var wDesc = executor.FilterDescDict[WDesc]; IntPtr weightsSize; dnn.GetRNNParamsSize(rnnDesc, xDesc, out weightsSize, Dnn.DataTypeOf <T>()); Util.EnsureTrue(weightsSize.ToInt64() % Gpu.SizeOf <T>() == 0); var shapeW = Shape.Create(weightsSize.ToInt64() / Alea.Gpu.SizeOf <T>()); wDesc.SetND(Dnn.DataTypeOf <T>(), TensorFormat.CUDNN_TENSOR_NCHW, new [] { (int)shapeW[0], 1, 1 }); // since we are using cuDNN, we'd better make sure these varaibles are allocated executor.GetTensor(W, shapeW); if (IsTraining) { executor.GetGradient(W, shapeW); } // init weights var numLinearLayers = RnnType.NumLinLayers; using (var filterDesc = new FilterDescriptor()) { var w = executor.GetTensor(W); var filterDimA = new int[3]; for (var layer = 0; layer < NumLayers; ++layer) { for (var linLayerId = 0; linLayerId < numLinearLayers; ++linLayerId) { int nbDims; DataType dataType; TensorFormat format; deviceptr <T> linLayerMat; dnn.GetRNNLinLayerMatrixParams(rnnDesc, layer, xDesc, wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerMat); filterDesc.GetND(out dataType, out format, out nbDims, filterDimA); var length = filterDimA.Aggregate(ScalarOps.Mul); var linLayerMatBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerMat); var linLayerMatTensor = new Tensor <T>(linLayerMatBuffer); context.Assign(linLayerMatTensor, RandomNormal <T>(Shape.Create(length)) / (Math.Sqrt(HiddenSize + InputSize).AsScalar <T>())); deviceptr <T> linLayerBias; dnn.GetRNNLinLayerBiasParams(rnnDesc, layer, xDesc, wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerBias); filterDesc.GetND(out dataType, out format, out nbDims, filterDimA); length = filterDimA.Aggregate(ScalarOps.Mul); var linLayerBiasBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerBias); var linLayerBiasTensor = new Tensor <T>(linLayerBiasBuffer); RnnType.InitBias(context, layer, linLayerId, linLayerBiasTensor); } } } base.Initialize(executor); }
public RnnCell(Executor executor, RnnType rnnType, Variable <T> w, int inputSize, int batch, int hiddenSize, int numLayers, bool isTraining, double dropoutProbability, ulong dropoutSeed = 1337UL) { IsTraining = isTraining; BatchSize = batch; InputSize = inputSize; HiddenSize = hiddenSize; NumLayers = numLayers; RnnType = rnnType; W = w; var context = executor.Context.ToGpuContext(); var dnn = context.Dnn; // state variables var shape = Shape.Create(numLayers, batch, hiddenSize); var strides = Strides.Create(shape[1] * shape[2], shape[2], 1); // inner change most StateDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); // xDesc is an array of one element because we do only one step shape = Shape.Create(batch, inputSize, 1); strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var xDesc = new TensorDescriptor(); xDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); XDesc = Enumerable.Repeat(xDesc, 1).ToArray(); // yDesc is an array of one element because we do only one step shape = Shape.Create(batch, hiddenSize, 1); strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var yDesc = new TensorDescriptor(); yDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); YDesc = Enumerable.Repeat(yDesc, 1).ToArray(); IntPtr dropoutStatesSize; dnn.DropoutGetStatesSize(out dropoutStatesSize); DropoutStates = executor.Context.Device.Allocate <byte>(Shape.Create(dropoutStatesSize.ToInt64())); DropoutDesc.Set(dnn, (float)dropoutProbability, DropoutStates.Buffer.Ptr, dropoutStatesSize, dropoutSeed); var mode = rnnType.Mode; RnnDesc.Set(hiddenSize, numLayers, DropoutDesc, RNNInputMode.LINEAR_INPUT, DirectionMode.UNIDIRECTIONAL, mode, Dnn.DataTypeOf <T>()); IntPtr workSize; dnn.GetRNNWorkspaceSize(RnnDesc, 1, XDesc, out workSize); Workspace = executor.Context.Device.Allocate <byte>(Shape.Create(workSize.ToInt64())); if (isTraining) { IntPtr reserveSize; dnn.GetRNNTrainingReserveSize(RnnDesc, 1, XDesc, out reserveSize); ReserveSize = reserveSize.ToInt64(); //ReserveSpace = executor.AttentionState.Device.Allocate<byte>(Shape.Create(reserveSize.ToInt64())); } IntPtr weightsSize; dnn.GetRNNParamsSize(RnnDesc, xDesc, out weightsSize, Dnn.DataTypeOf <T>()); Util.EnsureTrue(weightsSize.ToInt64() % Gpu.SizeOf <T>() == 0); var shapeW = Shape.Create(weightsSize.ToInt64() / Alea.Gpu.SizeOf <T>()); WDesc.SetND(Dnn.DataTypeOf <T>(), TensorFormat.CUDNN_TENSOR_NCHW, new[] { (int)shapeW[0], 1, 1 }); executor.GetTensor(W, shapeW); if (isTraining) { executor.GetGradient(W, shapeW); } }