Пример #1
0
        public void Initialize(Executor executor)
        {
            var context = executor.Context.ToGpuContext();
            var dnn     = context.Dnn;

            var rnnDesc = RnnDesc;
            var wDesc   = WDesc;

            // init weights
            using (var filterDesc = new FilterDescriptor())
            {
                var w          = executor.GetTensor(W);
                var filterDimA = new int[3];

                for (var layer = 0; layer < NumLayers; ++layer)
                {
                    for (var linLayerId = 0; linLayerId < RnnType.NumLinLayers; ++linLayerId)
                    {
                        int          nbDims;
                        DataType     dataType;
                        TensorFormat format;

                        deviceptr <T> linLayerMat;
                        dnn.GetRNNLinLayerMatrixParams(rnnDesc, layer, XDesc[0], wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerMat);

                        filterDesc.GetND(out dataType, out format, out nbDims, filterDimA);
                        var length = filterDimA.Aggregate(ScalarOps.Mul);

                        var linLayerMatBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerMat);
                        var linLayerMatTensor = new Tensor <T>(linLayerMatBuffer);
                        context.Assign(linLayerMatTensor, AleaTK.Library.RandomNormal <T>(Shape.Create(length)) / (Math.Sqrt(HiddenSize + InputSize).AsScalar <T>()));

                        deviceptr <T> linLayerBias;
                        dnn.GetRNNLinLayerBiasParams(rnnDesc, layer, XDesc[0], wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerBias);

                        filterDesc.GetND(out dataType, out format, out nbDims, filterDimA);
                        length = filterDimA.Aggregate(ScalarOps.Mul);

                        var linLayerBiasBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerBias);
                        var linLayerBiasTensor = new Tensor <T>(linLayerBiasBuffer);
                        RnnType.InitBias(context, layer, linLayerId, linLayerBiasTensor);
                    }
                }
            }
        }
Пример #2
0
        public override void Initialize(Executor executor)
        {
            var context = executor.Context.ToGpuContext();
            var dnn     = context.Dnn;

            // dropout
            var    dropoutDesc = executor.DropoutDescDict[DropoutDesc];
            IntPtr dropoutStatesSize;

            dnn.DropoutGetStatesSize(out dropoutStatesSize);
            var dropoutStates = executor.GetTensor(DropoutStates, Shape.Create(dropoutStatesSize.ToInt64()));

            dropoutDesc.Set(dnn, (float)Dropout, dropoutStates.Buffer.Ptr, dropoutStatesSize, DropoutSeed);

            // rnn descriptor
            var rnnDesc = executor.RnnDescDict[RnnDesc];
            var mode    = Type.Mode;

            rnnDesc.Set(HiddenSize, NumLayers, dropoutDesc, RNNInputMode.LINEAR_INPUT, DirectionMode.UNIDIRECTIONAL, mode, Dnn.DataTypeOf <T>());

            // weight
            var    wDesc = executor.FilterDescDict[WDesc];
            IntPtr weightsSize;

            dnn.GetRNNParamsSize(rnnDesc, XDesc[0], out weightsSize, Dnn.DataTypeOf <T>());
            Util.EnsureTrue(weightsSize.ToInt64() % Gpu.SizeOf <T>() == 0);
            var shapeW = Shape.Create(weightsSize.ToInt64() / Alea.Gpu.SizeOf <T>());

            wDesc.SetND(Dnn.DataTypeOf <T>(), TensorFormat.CUDNN_TENSOR_NCHW, new [] { (int)shapeW[0], 1, 1 });

            // workspace and reserved space
            IntPtr workSize;

            dnn.GetRNNWorkspaceSize(rnnDesc, SeqLength, XDesc, out workSize);
            executor.GetTensor(Workspace, Shape.Create(workSize.ToInt64()));

            if (IsTraining)
            {
                IntPtr reserveSize;
                dnn.GetRNNTrainingReserveSize(rnnDesc, SeqLength, XDesc, out reserveSize);
                executor.GetTensor(ReserveSpace, Shape.Create(reserveSize.ToInt64()));
            }

            // since we are using cuDNN, we'd better make sure these varaibles are allocated
            executor.GetTensor(W, shapeW);
            if (IsTraining)
            {
                executor.GetGradient(W, shapeW);
            }

            executor.GetTensor(Y, Shape.Create(Y.Shape.AsArray));
            executor.GetTensor(HX, Shape.Create(HX.Shape.AsArray));
            executor.GetTensor(CX, Shape.Create(CX.Shape.AsArray));
            executor.GetTensor(HY, Shape.Create(HY.Shape.AsArray));
            executor.GetTensor(CY, Shape.Create(CY.Shape.AsArray));

            if (IsTraining)
            {
                executor.GetGradient(X, Shape.Create(X.Shape.AsArray));
                executor.GetGradient(Y, Shape.Create(Y.Shape.AsArray));
                executor.GetGradient(HX, Shape.Create(HX.Shape.AsArray));
                executor.GetGradient(CX, Shape.Create(CX.Shape.AsArray));
            }

            // init weights
            var numLinearLayers = Type.NumLinLayers;

            using (var filterDesc = new FilterDescriptor())
            {
                var w          = executor.GetTensor(W);
                var filterDimA = new int[3];

                for (var layer = 0; layer < NumLayers; ++layer)
                {
                    for (var linLayerId = 0; linLayerId < numLinearLayers; ++linLayerId)
                    {
                        int          nbDims;
                        DataType     dataType;
                        TensorFormat format;

                        deviceptr <T> linLayerMat;
                        dnn.GetRNNLinLayerMatrixParams(rnnDesc, layer, XDesc[0], wDesc, w.Buffer.Ptr, linLayerId,
                                                       filterDesc, out linLayerMat);

                        filterDesc.GetND(out dataType, out format, out nbDims, filterDimA);
                        var length = filterDimA.Aggregate(ScalarOps.Mul);

                        var linLayerMatBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerMat);
                        var linLayerMatTensor = new Tensor <T>(linLayerMatBuffer);
                        context.Assign(linLayerMatTensor, RandomNormal <T>(Shape.Create(length)) / (Math.Sqrt(HiddenSize + InputSize).AsScalar <T>()));

                        deviceptr <T> linLayerBias;
                        dnn.GetRNNLinLayerBiasParams(rnnDesc, layer, XDesc[0], wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerBias);

                        filterDesc.GetND(out dataType, out format, out nbDims, filterDimA);
                        length = filterDimA.Aggregate(ScalarOps.Mul);

                        var linLayerBiasBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerBias);
                        var linLayerBiasTensor = new Tensor <T>(linLayerBiasBuffer);
                        Type.InitBias(context, layer, linLayerId, linLayerBiasTensor);
                    }
                }
            }

            base.Initialize(executor);

            const double value = 0.0;

            executor.AssignTensor(HX, Fill(Shape.Create(HX.Shape.AsArray), ScalarOps.Conv <T>(value)));
            executor.AssignTensor(CX, Fill(Shape.Create(CX.Shape.AsArray), ScalarOps.Conv <T>(value)));
        }
Пример #3
0
        public override void Initialize(Executor executor)
        {
            var context = executor.Context.ToGpuContext();
            var dnn     = context.Dnn;

            // dropout
            var    dropoutDesc = executor.DropoutDescDict[DropoutDesc];
            IntPtr dropoutStatesSize;

            dnn.DropoutGetStatesSize(out dropoutStatesSize);
            var dropoutStates = executor.GetTensor(DropoutStates, Shape.Create(dropoutStatesSize.ToInt64()));

            dropoutDesc.Set(dnn, (float)Dropout, dropoutStates.Buffer.Ptr, dropoutStatesSize, DropoutSeed);

            // rnn descriptor
            var rnnDesc = executor.RnnDescDict[RnnDesc];
            var mode    = RnnType.Mode;

            rnnDesc.Set(HiddenSize, NumLayers, dropoutDesc, RNNInputMode.LINEAR_INPUT, DirectionMode.UNIDIRECTIONAL, mode, Dnn.DataTypeOf <T>());

            // initialize weight, once only, using minibatch size 1
            var shape   = PartialShape.Create(1, InputSize, 1); // first dimension does not affect the weight shape and size TODO test all, tested only for LSTM
            var strides = Strides.Create(shape[1] * shape[2], shape[2], 1);
            var xDesc   = new TensorDescriptor();

            xDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array);
            var    wDesc = executor.FilterDescDict[WDesc];
            IntPtr weightsSize;

            dnn.GetRNNParamsSize(rnnDesc, xDesc, out weightsSize, Dnn.DataTypeOf <T>());
            Util.EnsureTrue(weightsSize.ToInt64() % Gpu.SizeOf <T>() == 0);
            var shapeW = Shape.Create(weightsSize.ToInt64() / Alea.Gpu.SizeOf <T>());

            wDesc.SetND(Dnn.DataTypeOf <T>(), TensorFormat.CUDNN_TENSOR_NCHW, new [] { (int)shapeW[0], 1, 1 });

            // since we are using cuDNN, we'd better make sure these varaibles are allocated
            executor.GetTensor(W, shapeW);
            if (IsTraining)
            {
                executor.GetGradient(W, shapeW);
            }

            // init weights
            var numLinearLayers = RnnType.NumLinLayers;

            using (var filterDesc = new FilterDescriptor())
            {
                var w          = executor.GetTensor(W);
                var filterDimA = new int[3];

                for (var layer = 0; layer < NumLayers; ++layer)
                {
                    for (var linLayerId = 0; linLayerId < numLinearLayers; ++linLayerId)
                    {
                        int          nbDims;
                        DataType     dataType;
                        TensorFormat format;

                        deviceptr <T> linLayerMat;
                        dnn.GetRNNLinLayerMatrixParams(rnnDesc, layer, xDesc, wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerMat);

                        filterDesc.GetND(out dataType, out format, out nbDims, filterDimA);
                        var length = filterDimA.Aggregate(ScalarOps.Mul);

                        var linLayerMatBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerMat);
                        var linLayerMatTensor = new Tensor <T>(linLayerMatBuffer);
                        context.Assign(linLayerMatTensor, RandomNormal <T>(Shape.Create(length)) / (Math.Sqrt(HiddenSize + InputSize).AsScalar <T>()));

                        deviceptr <T> linLayerBias;
                        dnn.GetRNNLinLayerBiasParams(rnnDesc, layer, xDesc, wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerBias);

                        filterDesc.GetND(out dataType, out format, out nbDims, filterDimA);
                        length = filterDimA.Aggregate(ScalarOps.Mul);

                        var linLayerBiasBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerBias);
                        var linLayerBiasTensor = new Tensor <T>(linLayerBiasBuffer);
                        RnnType.InitBias(context, layer, linLayerId, linLayerBiasTensor);
                    }
                }
            }

            base.Initialize(executor);
        }