public void Initialize(Executor executor) { var context = executor.Context.ToGpuContext(); var dnn = context.Dnn; var rnnDesc = RnnDesc; var wDesc = WDesc; // init weights using (var filterDesc = new FilterDescriptor()) { var w = executor.GetTensor(W); var filterDimA = new int[3]; for (var layer = 0; layer < NumLayers; ++layer) { for (var linLayerId = 0; linLayerId < RnnType.NumLinLayers; ++linLayerId) { int nbDims; DataType dataType; TensorFormat format; deviceptr <T> linLayerMat; dnn.GetRNNLinLayerMatrixParams(rnnDesc, layer, XDesc[0], wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerMat); filterDesc.GetND(out dataType, out format, out nbDims, filterDimA); var length = filterDimA.Aggregate(ScalarOps.Mul); var linLayerMatBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerMat); var linLayerMatTensor = new Tensor <T>(linLayerMatBuffer); context.Assign(linLayerMatTensor, AleaTK.Library.RandomNormal <T>(Shape.Create(length)) / (Math.Sqrt(HiddenSize + InputSize).AsScalar <T>())); deviceptr <T> linLayerBias; dnn.GetRNNLinLayerBiasParams(rnnDesc, layer, XDesc[0], wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerBias); filterDesc.GetND(out dataType, out format, out nbDims, filterDimA); length = filterDimA.Aggregate(ScalarOps.Mul); var linLayerBiasBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerBias); var linLayerBiasTensor = new Tensor <T>(linLayerBiasBuffer); RnnType.InitBias(context, layer, linLayerId, linLayerBiasTensor); } } } }
public override void Initialize(Executor executor) { var context = executor.Context.ToGpuContext(); var dnn = context.Dnn; // dropout var dropoutDesc = executor.DropoutDescDict[DropoutDesc]; IntPtr dropoutStatesSize; dnn.DropoutGetStatesSize(out dropoutStatesSize); var dropoutStates = executor.GetTensor(DropoutStates, Shape.Create(dropoutStatesSize.ToInt64())); dropoutDesc.Set(dnn, (float)Dropout, dropoutStates.Buffer.Ptr, dropoutStatesSize, DropoutSeed); // rnn descriptor var rnnDesc = executor.RnnDescDict[RnnDesc]; var mode = Type.Mode; rnnDesc.Set(HiddenSize, NumLayers, dropoutDesc, RNNInputMode.LINEAR_INPUT, DirectionMode.UNIDIRECTIONAL, mode, Dnn.DataTypeOf <T>()); // weight var wDesc = executor.FilterDescDict[WDesc]; IntPtr weightsSize; dnn.GetRNNParamsSize(rnnDesc, XDesc[0], out weightsSize, Dnn.DataTypeOf <T>()); Util.EnsureTrue(weightsSize.ToInt64() % Gpu.SizeOf <T>() == 0); var shapeW = Shape.Create(weightsSize.ToInt64() / Alea.Gpu.SizeOf <T>()); wDesc.SetND(Dnn.DataTypeOf <T>(), TensorFormat.CUDNN_TENSOR_NCHW, new [] { (int)shapeW[0], 1, 1 }); // workspace and reserved space IntPtr workSize; dnn.GetRNNWorkspaceSize(rnnDesc, SeqLength, XDesc, out workSize); executor.GetTensor(Workspace, Shape.Create(workSize.ToInt64())); if (IsTraining) { IntPtr reserveSize; dnn.GetRNNTrainingReserveSize(rnnDesc, SeqLength, XDesc, out reserveSize); executor.GetTensor(ReserveSpace, Shape.Create(reserveSize.ToInt64())); } // since we are using cuDNN, we'd better make sure these varaibles are allocated executor.GetTensor(W, shapeW); if (IsTraining) { executor.GetGradient(W, shapeW); } executor.GetTensor(Y, Shape.Create(Y.Shape.AsArray)); executor.GetTensor(HX, Shape.Create(HX.Shape.AsArray)); executor.GetTensor(CX, Shape.Create(CX.Shape.AsArray)); executor.GetTensor(HY, Shape.Create(HY.Shape.AsArray)); executor.GetTensor(CY, Shape.Create(CY.Shape.AsArray)); if (IsTraining) { executor.GetGradient(X, Shape.Create(X.Shape.AsArray)); executor.GetGradient(Y, Shape.Create(Y.Shape.AsArray)); executor.GetGradient(HX, Shape.Create(HX.Shape.AsArray)); executor.GetGradient(CX, Shape.Create(CX.Shape.AsArray)); } // init weights var numLinearLayers = Type.NumLinLayers; using (var filterDesc = new FilterDescriptor()) { var w = executor.GetTensor(W); var filterDimA = new int[3]; for (var layer = 0; layer < NumLayers; ++layer) { for (var linLayerId = 0; linLayerId < numLinearLayers; ++linLayerId) { int nbDims; DataType dataType; TensorFormat format; deviceptr <T> linLayerMat; dnn.GetRNNLinLayerMatrixParams(rnnDesc, layer, XDesc[0], wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerMat); filterDesc.GetND(out dataType, out format, out nbDims, filterDimA); var length = filterDimA.Aggregate(ScalarOps.Mul); var linLayerMatBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerMat); var linLayerMatTensor = new Tensor <T>(linLayerMatBuffer); context.Assign(linLayerMatTensor, RandomNormal <T>(Shape.Create(length)) / (Math.Sqrt(HiddenSize + InputSize).AsScalar <T>())); deviceptr <T> linLayerBias; dnn.GetRNNLinLayerBiasParams(rnnDesc, layer, XDesc[0], wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerBias); filterDesc.GetND(out dataType, out format, out nbDims, filterDimA); length = filterDimA.Aggregate(ScalarOps.Mul); var linLayerBiasBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerBias); var linLayerBiasTensor = new Tensor <T>(linLayerBiasBuffer); Type.InitBias(context, layer, linLayerId, linLayerBiasTensor); } } } base.Initialize(executor); const double value = 0.0; executor.AssignTensor(HX, Fill(Shape.Create(HX.Shape.AsArray), ScalarOps.Conv <T>(value))); executor.AssignTensor(CX, Fill(Shape.Create(CX.Shape.AsArray), ScalarOps.Conv <T>(value))); }
public override void Initialize(Executor executor) { var context = executor.Context.ToGpuContext(); var dnn = context.Dnn; // dropout var dropoutDesc = executor.DropoutDescDict[DropoutDesc]; IntPtr dropoutStatesSize; dnn.DropoutGetStatesSize(out dropoutStatesSize); var dropoutStates = executor.GetTensor(DropoutStates, Shape.Create(dropoutStatesSize.ToInt64())); dropoutDesc.Set(dnn, (float)Dropout, dropoutStates.Buffer.Ptr, dropoutStatesSize, DropoutSeed); // rnn descriptor var rnnDesc = executor.RnnDescDict[RnnDesc]; var mode = RnnType.Mode; rnnDesc.Set(HiddenSize, NumLayers, dropoutDesc, RNNInputMode.LINEAR_INPUT, DirectionMode.UNIDIRECTIONAL, mode, Dnn.DataTypeOf <T>()); // initialize weight, once only, using minibatch size 1 var shape = PartialShape.Create(1, InputSize, 1); // first dimension does not affect the weight shape and size TODO test all, tested only for LSTM var strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var xDesc = new TensorDescriptor(); xDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); var wDesc = executor.FilterDescDict[WDesc]; IntPtr weightsSize; dnn.GetRNNParamsSize(rnnDesc, xDesc, out weightsSize, Dnn.DataTypeOf <T>()); Util.EnsureTrue(weightsSize.ToInt64() % Gpu.SizeOf <T>() == 0); var shapeW = Shape.Create(weightsSize.ToInt64() / Alea.Gpu.SizeOf <T>()); wDesc.SetND(Dnn.DataTypeOf <T>(), TensorFormat.CUDNN_TENSOR_NCHW, new [] { (int)shapeW[0], 1, 1 }); // since we are using cuDNN, we'd better make sure these varaibles are allocated executor.GetTensor(W, shapeW); if (IsTraining) { executor.GetGradient(W, shapeW); } // init weights var numLinearLayers = RnnType.NumLinLayers; using (var filterDesc = new FilterDescriptor()) { var w = executor.GetTensor(W); var filterDimA = new int[3]; for (var layer = 0; layer < NumLayers; ++layer) { for (var linLayerId = 0; linLayerId < numLinearLayers; ++linLayerId) { int nbDims; DataType dataType; TensorFormat format; deviceptr <T> linLayerMat; dnn.GetRNNLinLayerMatrixParams(rnnDesc, layer, xDesc, wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerMat); filterDesc.GetND(out dataType, out format, out nbDims, filterDimA); var length = filterDimA.Aggregate(ScalarOps.Mul); var linLayerMatBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerMat); var linLayerMatTensor = new Tensor <T>(linLayerMatBuffer); context.Assign(linLayerMatTensor, RandomNormal <T>(Shape.Create(length)) / (Math.Sqrt(HiddenSize + InputSize).AsScalar <T>())); deviceptr <T> linLayerBias; dnn.GetRNNLinLayerBiasParams(rnnDesc, layer, xDesc, wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerBias); filterDesc.GetND(out dataType, out format, out nbDims, filterDimA); length = filterDimA.Aggregate(ScalarOps.Mul); var linLayerBiasBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerBias); var linLayerBiasTensor = new Tensor <T>(linLayerBiasBuffer); RnnType.InitBias(context, layer, linLayerId, linLayerBiasTensor); } } } base.Initialize(executor); }