예제 #1
0
        public override void Forward(Executor executor)
        {
            var ctx = executor.Context;
            var x   = executor.GetTensor(Input);
            var y   = executor.GetTensor(Output, x.Shape);

            if (ctx.Type == ContextType.Gpu && x.Layout.IsInnerChangeMostFullyPacked)
            {
                var dnn     = ctx.ToGpuContext().Dnn;
                var n       = (int)x.Shape[0];
                var classes = (int)x.Shape[1];

                using (var xDesc = executor.TensorDescRepo.Acquire())
                    using (var yDesc = executor.TensorDescRepo.Acquire())
                    {
                        xDesc.Value.SetND(Dnn.DataTypeOf(typeof(T)), new[] { n, classes, 1, 1 }, new[] { classes, 1, 1, 1 });
                        yDesc.Value.SetND(Dnn.DataTypeOf(typeof(T)), new[] { n, classes, 1, 1 }, new[] { classes, 1, 1, 1 });

                        var xPtr  = x.Buffer.Ptr;
                        var yPtr  = y.Buffer.Ptr;
                        var alpha = ScalarOps.Conv <T>(1.0);
                        var beta  = ScalarOps.Conv <T>(0.0);
                        const SoftmaxAlgorithm algorithm = SoftmaxAlgorithm.ACCURATE;
                        const SoftmaxMode      mode      = SoftmaxMode.INSTANCE;

                        dnn.SoftmaxForward(algorithm, mode, alpha, xDesc.Value, xPtr, beta, yDesc.Value, yPtr);
                    }

                return;
            }

            throw new NotImplementedException();
        }
예제 #2
0
        public override void Forward(Executor executor)
        {
            var data   = executor.GetTensor(Data);
            var output = executor.GetTensor(Output, Shape.Create(data.Shape[0], Output.Shape[1], Output.Shape[2], Output.Shape[3]));

            if (executor.Context.Type == ContextType.Gpu)
            {
                var dnn = executor.Context.ToGpuContext().Dnn;

                using (var dataDescRcpt = executor.TensorDescRepo.Acquire())
                    using (var outputDescRcpt = executor.TensorDescRepo.Acquire())
                    {
                        var dataDesc   = dataDescRcpt.Value;
                        var outputDesc = outputDescRcpt.Value;
                        var dataType   = Dnn.DataTypeOf <T>();

                        dataDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)data.Shape[0], (int)data.Shape[1], (int)data.Shape[2], (int)data.Shape[3]);
                        outputDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)data.Shape[0], (int)Output.Shape[1], (int)Output.Shape[2], (int)Output.Shape[3]);

                        dnn.PoolingForward(Descriptor, ScalarOps.Conv <T>(1.0), dataDesc, data.Buffer.Ptr, ScalarOps.Conv <T>(0.0), outputDesc, output.Buffer.Ptr);

                        return;
                    }
            }

            throw new NotImplementedException();
        }
예제 #3
0
        public void Backward(Executor executor)
        {
            var context    = executor.Context.ToGpuContext();
            var dnn        = context.Dnn;
            var rnnDesc    = RnnDesc;
            var filterDesc = WDesc;

            Util.EnsureTrue(IsTraining);

            dnn.RNNBackwardData(
                rnnDesc,
                1,
                YDesc,
                Output.Buffer.Ptr,
                YDesc,
                DOutput.Buffer.Ptr,
                StateDesc,
                DHY.Buffer.Ptr,
                StateDesc,
                DCY.Buffer.Ptr,
                filterDesc,
                executor.GetTensor(W).Buffer.Ptr,
                StateDesc,
                HX.Buffer.Ptr,
                StateDesc,
                CX.Buffer.Ptr,
                XDesc,
                DInput.Buffer.Ptr,
                StateDesc,
                DHX.Buffer.Ptr,
                StateDesc,
                DCX.Buffer.Ptr,
                Workspace.Buffer.Ptr,
                (IntPtr)Workspace.Shape.Length,
                ReserveSpace.Buffer.Ptr,
                (IntPtr)ReserveSpace.Shape.Length);

            if (executor.IncreaseGradientAggregationCounter(W) == 0)
            {
                executor.AssignGradient(W, ScalarOps.Conv <T>(0.0).AsScalar(), replace: true);
            }

            dnn.RNNBackwardWeights(
                rnnDesc,
                1,
                XDesc,
                Input.Buffer.Ptr,
                StateDesc,
                HX.Buffer.Ptr,
                YDesc,
                Output.Buffer.Ptr,
                Workspace.Buffer.Ptr,
                (IntPtr)Workspace.Shape.Length,
                WDesc,
                executor.GetGradient(W).Buffer.Ptr,
                ReserveSpace.Buffer.Ptr,
                (IntPtr)ReserveSpace.Shape.Length);
        }
예제 #4
0
 public override void InitBias <T>(Context ctx, int layerId, int linLayerId, Tensor <T> tensor)
 {
     // cuDNN LSTM layout is: IFAO, bias has 2x4, we ignore the second set of bias
     // so only the first forget bias needs to be set, which is linLayerId = 1
     if (linLayerId == 1)
     {
         ctx.Assign(tensor, ScalarOps.Conv <T>(ForgetBiasInit));
     }
     else
     {
         ctx.Assign(tensor, ScalarOps.Conv <T>(0.0));
     }
 }
예제 #5
0
        public override void Forward(Executor executor)
        {
            var z = executor.GetTensor(Input);
            var y = executor.GetTensor(Label);

            Util.EnsureTrue(z.Shape.Rank == 2);
            Util.EnsureTrue(Dnn.IsAvailable, "TODO: make non-cuDnn implementation.");

            var n       = (int)z.Shape[0];
            var classes = (int)z.Shape[1];

            using (var xDesc = executor.TensorDescRepo.Acquire())
                using (var yDesc = executor.TensorDescRepo.Acquire())
                {
                    var dnn = executor.Context.ToGpuContext().Dnn;
                    xDesc.Value.SetND(Dnn.DataTypeOf(typeof(T)), new[] { n, classes, 1, 1 }, new[] { classes, 1, 1, 1 });
                    yDesc.Value.SetND(Dnn.DataTypeOf(typeof(T)), new[] { n, classes, 1, 1 }, new[] { classes, 1, 1, 1 });

                    var xPtr  = executor.GetTensor(Input).Buffer.Ptr;
                    var yPtr  = executor.GetTensor(LogPred, Shape.Create(n, classes)).Buffer.Ptr;
                    var alpha = ScalarOps.Conv <T>(1.0);
                    var beta  = ScalarOps.Conv <T>(0.0);
                    const SoftmaxAlgorithm algorithm = SoftmaxAlgorithm.LOG;
                    const SoftmaxMode      mode      = SoftmaxMode.INSTANCE;

                    dnn.SoftmaxForward(algorithm, mode, alpha, xDesc.Value, xPtr, beta, yDesc.Value, yPtr);
                }

            // TODO: make it expression
            var logPred = executor.GetTensor(LogPred);
            var temp    = executor.GetTensor(Temp, Shape.Create(n));

            var ctx = executor.Context;

            if (ctx.Type == ContextType.Gpu && logPred.Layout.IsInnerChangeMostFullyPacked)
            {
                var stream     = ctx.ToGpuContext().Stream;
                var tempPtr    = temp.Buffer.Ptr;
                var logPredPtr = logPred.Buffer.Ptr;
                var idxPtr     = y.Buffer.Ptr;
                DeviceFor.For(stream, 0, n, i =>
                {
                    var idx    = idxPtr[i];
                    tempPtr[i] = logPredPtr[i * classes + idx];
                });
                executor.AssignTensor(Loss, -ReduceSum(temp));
                return;
            }

            throw new NotImplementedException();
        }
예제 #6
0
        public override void Forward(Executor executor)
        {
            var data   = executor.GetTensor(Data);
            var weight = executor.GetTensor(Weight);
            var bias   = executor.GetTensor(Bias);
            var output = executor.GetTensor(Output, Shape.Create(data.Shape[0], Output.Shape[1], Output.Shape[2], Output.Shape[3]));

            if (executor.Context.Type == ContextType.Gpu)
            {
                var convDesc = ConvolutionDesc;
                var dnn      = executor.Context.ToGpuContext().Dnn;

                using (var dataDescRcpt = executor.TensorDescRepo.Acquire())
                    using (var weightDescRcpt = executor.FilterDescRepo.Acquire())
                        using (var biasDescRcpt = executor.TensorDescRepo.Acquire())
                            using (var outputDescRcpt = executor.TensorDescRepo.Acquire())
                            {
                                var dataDesc   = dataDescRcpt.Value;
                                var weightDesc = weightDescRcpt.Value;
                                var biasDesc   = biasDescRcpt.Value;
                                var outputDesc = outputDescRcpt.Value;
                                var dataType   = Dnn.DataTypeOf <T>();

                                dataDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)data.Shape[0], (int)Data.Shape[1], (int)Data.Shape[2], (int)Data.Shape[3]);
                                weightDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)weight.Shape[0], (int)weight.Shape[1], (int)weight.Shape[2], (int)weight.Shape[3]);
                                biasDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, 1, (int)output.Shape[1], 1, 1);
                                outputDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)output.Shape[0], (int)output.Shape[1], (int)output.Shape[2], (int)output.Shape[3]);

                                ConvolutionFwdAlgo algo;
                                IntPtr             workspaceSize;
                                dnn.GetConvolutionForwardAlgorithm(dataDesc, weightDesc, convDesc, outputDesc,
                                                                   ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out algo);
                                dnn.GetConvolutionForwardWorkspaceSize(dataDesc, weightDesc, convDesc, outputDesc, algo, out workspaceSize);
                                var workspace = workspaceSize.ToInt64() > 0L
                        ? executor.GetTensor(Workspace1, Shape.Create(workspaceSize.ToInt64()))
                        : null;
                                //Console.WriteLine($"==> {algo} {workspaceSize}");

                                // step 1, convolute
                                dnn.ConvolutionForward(ScalarOps.Conv <T>(1.0), dataDesc, data.Buffer.Ptr, weightDesc, weight.Buffer.Ptr,
                                                       convDesc, algo, workspace?.Buffer.Ptr ?? new deviceptr <byte>(), workspaceSize, ScalarOps.Conv <T>(0.0), outputDesc, output.Buffer.Ptr);

                                // step 2, add bias
                                dnn.AddTensor(ScalarOps.Conv <T>(1.0), biasDesc, bias.Buffer.Ptr, ScalarOps.Conv <T>(1.0), outputDesc, output.Buffer.Ptr);
                                return;
                            }
            }

            throw new NotImplementedException();
        }
예제 #7
0
        public static Tensor <T> GetGradient(Executor executor, Variable <T> var, Shape shape, bool zero = false)
        {
            var ctx  = executor.Context;
            var data = executor.GetData(var);

            Util.EnsureTrue(data.GradientAggregationCounter == 0);
            data.GradientAggregationCounter++;
            var gradient = executor.GetGradient(var, shape);

            if (zero)
            {
                ctx.Assign(gradient, Fill(shape, ScalarOps.Conv <T>(0.0)));
            }
            return(gradient);
        }
예제 #8
0
        public Convolution2D(Variable <T> data, int kernelH, int kernelW, int numFilter)
        {
            Util.EnsureTrue(data.Shape.Rank == 4);
            Util.EnsureTrue(data.Shape[1] > 0);
            Util.EnsureTrue(data.Shape[2] > 0);
            Util.EnsureTrue(data.Shape[3] > 0);

            var numInputFilter  = data.Shape[1];
            var numOutputFilter = numFilter;
            var height          = data.Shape[2];
            var width           = data.Shape[3];

            // fixed padding and stride now
            ConvolutionDesc = new ConvolutionDescriptor();
            ConvolutionDesc.Set2D(0, 0, 1, 1, 1, 1, ConvolutionMode.CROSS_CORRELATION);

            using (var dataDesc = new TensorDescriptor())
                using (var weightDesc = new FilterDescriptor())
                {
                    var dataType = Dnn.DataTypeOf <T>();
                    var tempN    = 100; // for temp mini batch size
                    dataDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, tempN, (int)numInputFilter, (int)height, (int)width);
                    weightDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, numOutputFilter, (int)numInputFilter, kernelH, kernelW);

                    // get output dimension
                    int n, c, h, w;
                    ConvolutionDesc.Get2DForwardOutputDim(dataDesc, weightDesc, out n, out c, out h, out w);

                    //Console.WriteLine($"{c},{h},{w}");

                    // Create variables
                    var scale = Sqrt(3.0.AsScalar <T>() / ((double)(numInputFilter * kernelH * kernelW)).AsScalar <T>());

                    Data       = data;
                    Weight     = Parameter(scale * (2.0.AsScalar <T>() * RandomUniform <T>(Shape.Create(numOutputFilter, numInputFilter, kernelH, kernelW), 0UL, 0UL) - 1.0.AsScalar <T>()));
                    Bias       = Parameter(Fill(Shape.Create(c), ScalarOps.Conv <T>(0.1)));
                    Output     = Variable <T>(PartialShape.Create(-1, c, h, w));
                    Workspace1 = AuxVariable <byte>();
                    Workspace2 = AuxVariable <byte>();

                    AddInput(Data);
                    AddInput(Weight);
                    AddInput(Bias);
                    AddOutput(Output);
                    AddAuxVar(Workspace1);
                    AddAuxVar(Workspace2);
                }
        }
예제 #9
0
        public FullyConnected(Variable <T> data, long numHidden)
        {
            Util.EnsureTrue(data.HasShape);
            Util.EnsureEqual(2, data.Shape.Rank, "Input must be matrix.");
            Util.EnsureTrue(data.Shape[1] > 0L);

            Data = data;

            var numInput = data.Shape[1];
            var scale    = Sqrt(12.0.AsScalar <T>() / ((double)(numInput + numHidden)).AsScalar <T>());

            Weights = Parameter(scale * (RandomUniform <T>(Shape.Create(numInput, numHidden), 0UL, 0UL) - 0.5.AsScalar <T>()));

            Bias   = Parameter(Fill(Shape.Create(numHidden), ScalarOps.Conv <T>(0.0)));
            Output = Variable <T>(PartialShape.Create(data.Shape[0], numHidden));

            AddInput(Data);
            AddInput(Weights);
            AddInput(Bias);
            AddOutput(Output);
        }
예제 #10
0
        public override void Backward(Executor executor)
        {
            var ctx     = executor.Context;
            var indices = executor.GetTensor(Indices);
            var gradout = executor.GetGradient(Output);

            // for performance fix.
            if (ctx.Type == ContextType.Gpu && gradout.Layout.IsInnerChangeMostFullyPacked && indices.Layout.IsInnerChangeMostFullyPacked)
            {
                var embedDim   = EmbedDim;
                var batchSize  = (int)indices.Shape.Length;
                var threadSize = 256;

                // first set all to 0
                executor.AssignGradient(Weights, Fill(executor.GetTensor(Weights).Shape, ScalarOps.Conv <T>(0.0)));
                var dW = executor.GetGradient(Weights);

                // then use a 1 block kernel to update it, cause usually the batch size is not huge, but the embedsize is huge!
                var stream = ctx.ToGpuContext().Stream;
                var iPtr   = indices.Buffer.Ptr;

                // the following kernel is for 1 block, so there is no need for synchornization,
                // there could be further optimized.

                if (typeof(T) == typeof(float))
                {
                    var dOPtr = gradout.Buffer.Ptr.Reinterpret <float>();
                    var dWPtr = dW.Buffer.Ptr.Reinterpret <float>();
                    var lp    = new LaunchParam(1, threadSize);
                    //Console.WriteLine($"{indices.Shape} {gradout.Shape} {dW.Shape}");
                    stream.Launch(() =>
                    {
                        for (var i = 0; i < batchSize; ++i)
                        {
                            var row = iPtr[i];

                            for (var k = threadIdx.x; k < embedDim; k += blockDim.x)
                            {
                                dWPtr[row * embedDim + k] += dOPtr[i * embedDim + k];
                            }
                        }
                    }, lp);

                    return;
                }

                throw new NotImplementedException();
            }
            else
            {
                executor.AssignGradient(Weights, TakeGrad(indices, gradout, EmbedSize));
            }
        }
예제 #11
0
        public override void Backward(Executor executor)
        {
            var data    = executor.GetTensor(Data);
            var weight  = executor.GetTensor(Weight);
            var dOutput = executor.GetGradient(Output);
            var dWeight = executor.GetGradient(Weight, Shape.Create(Weight.Shape.AsArray));
            var dBias   = executor.GetGradient(Bias, Shape.Create(Bias.Shape.AsArray));
            var dData   = executor.GetGradient(Data, Shape.Create(data.Shape.AsArray));

            if (executor.Context.Type == ContextType.Gpu)
            {
                var convDesc = ConvolutionDesc;
                var dnn      = executor.Context.ToGpuContext().Dnn;

                using (var dataDescRcpt = executor.TensorDescRepo.Acquire())
                    using (var weightDescRcpt = executor.FilterDescRepo.Acquire())
                        using (var dDataDescRcpt = executor.TensorDescRepo.Acquire())
                            using (var dOutputDescRcpt = executor.TensorDescRepo.Acquire())
                                using (var dBiasDescRcpt = executor.TensorDescRepo.Acquire())
                                    using (var dWeightDescRcpt = executor.FilterDescRepo.Acquire())
                                    {
                                        var dataDesc    = dataDescRcpt.Value;
                                        var weightDesc  = weightDescRcpt.Value;
                                        var dDataDesc   = dDataDescRcpt.Value;
                                        var dOutputDesc = dOutputDescRcpt.Value;
                                        var dBiasDesc   = dBiasDescRcpt.Value;
                                        var dWeightDesc = dWeightDescRcpt.Value;
                                        var dataType    = Dnn.DataTypeOf <T>();

                                        dataDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)data.Shape[0], (int)Data.Shape[1], (int)Data.Shape[2], (int)Data.Shape[3]);
                                        dDataDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)data.Shape[0], (int)Data.Shape[1], (int)Data.Shape[2], (int)Data.Shape[3]);
                                        dOutputDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)dOutput.Shape[0], (int)dOutput.Shape[1], (int)dOutput.Shape[2], (int)dOutput.Shape[3]);
                                        dBiasDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, 1, (int)dOutput.Shape[1], 1, 1);
                                        dWeightDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)weight.Shape[0], (int)weight.Shape[1], (int)weight.Shape[2], (int)weight.Shape[3]);
                                        weightDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, (int)weight.Shape[0], (int)weight.Shape[1], (int)weight.Shape[2], (int)weight.Shape[3]);

                                        ConvolutionBwdFilterAlgo filterAlgo;
                                        IntPtr filterWorkspaceSize;
                                        dnn.GetConvolutionBackwardFilterAlgorithm(dataDesc, dOutputDesc, convDesc, dWeightDesc,
                                                                                  ConvolutionBwdFilterPreference.PREFER_FASTEST, IntPtr.Zero, out filterAlgo);
                                        dnn.GetConvolutionBackwardFilterWorkspaceSize(dataDesc, dOutputDesc, convDesc, dWeightDesc, filterAlgo, out filterWorkspaceSize);
                                        var filterWorkspace = filterWorkspaceSize.ToInt64() > 0L
                        ? executor.GetTensor(Workspace1, Shape.Create(filterWorkspaceSize.ToInt64()))
                        : null;
                                        //Console.WriteLine($"==> {filterAlgo} {filterWorkspaceSize}");

                                        ConvolutionBwdDataAlgo dataAlgo;
                                        IntPtr dataWorkspaceSize;
                                        dnn.GetConvolutionBackwardDataAlgorithm(weightDesc, dOutputDesc, convDesc, dDataDesc,
                                                                                ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out dataAlgo);
                                        dnn.GetConvolutionBackwardDataWorkspaceSize(dWeightDesc, dOutputDesc, convDesc, dDataDesc, dataAlgo, out dataWorkspaceSize);
                                        var dataWorkspace = dataWorkspaceSize.ToInt64() > 0L
                        ? executor.GetTensor(Workspace2, Shape.Create(dataWorkspaceSize.ToInt64()))
                        : null;
                                        //Console.WriteLine($"==> {dataAlgo} {dataWorkspaceSize}");

                                        // filter
                                        dnn.ConvolutionBackwardFilter(ScalarOps.Conv <T>(1.0), dataDesc, data.Buffer.Ptr, dOutputDesc,
                                                                      dOutput.Buffer.Ptr, convDesc, filterAlgo, filterWorkspace?.Buffer.Ptr ?? new deviceptr <byte>(), filterWorkspaceSize,
                                                                      ScalarOps.Conv <T>(0.0), dWeightDesc, dWeight.Buffer.Ptr);

                                        // data
                                        dnn.ConvolutionBackwardData(ScalarOps.Conv <T>(1.0), weightDesc, weight.Buffer.Ptr, dOutputDesc,
                                                                    dOutput.Buffer.Ptr, convDesc, dataAlgo, dataWorkspace?.Buffer.Ptr ?? new deviceptr <byte>(), dataWorkspaceSize,
                                                                    ScalarOps.Conv <T>(0.0), dDataDesc, dData.Buffer.Ptr);

                                        // bias
                                        dnn.ConvolutionBackwardBias(ScalarOps.Conv <T>(1.0), dOutputDesc, dOutput.Buffer.Ptr, ScalarOps.Conv <T>(0.0), dBiasDesc, dBias.Buffer.Ptr);

                                        return;
                                    }
            }

            throw new NotImplementedException();
        }
예제 #12
0
        public override void Backward(Executor executor)
        {
            Util.EnsureTrue(IsTraining);

            var context = executor.Context.ToGpuContext();
            var dnn     = context.Dnn;

            if (executor.IncreaseGradientAggregationCounter(X) != 0)
            {
                throw new InvalidOperationException();
            }

            if (executor.IncreaseGradientAggregationCounter(HX) != 0)
            {
                throw new InvalidOperationException();
            }

            if (executor.IncreaseGradientAggregationCounter(CX) != 0)
            {
                throw new InvalidOperationException();
            }

            dnn.RNNBackwardData(
                executor.RnnDescDict[RnnDesc],
                SeqLength,
                YDesc,
                executor.GetTensor(Y).Buffer.Ptr,
                YDesc,
                executor.GetGradient(Y).Buffer.Ptr,
                StateDesc,
                new deviceptr <T>(), // executor.GetGradient(HY).Buffer.Ptr,
                StateDesc,
                new deviceptr <T>(), // executor.GetGradient(CY).Buffer.Ptr,
                executor.FilterDescDict[WDesc],
                executor.GetTensor(W).Buffer.Ptr,
                StateDesc,
                executor.GetTensor(HX).Buffer.Ptr,
                StateDesc,
                executor.GetTensor(CX).Buffer.Ptr,
                XDesc,
                executor.GetGradient(X).Buffer.Ptr,
                StateDesc,
                executor.GetGradient(HX).Buffer.Ptr,
                StateDesc,
                executor.GetGradient(CX).Buffer.Ptr,
                executor.GetTensor(Workspace).Buffer.Ptr,
                (IntPtr)executor.GetTensor(Workspace).Shape.Length,
                executor.GetTensor(ReserveSpace).Buffer.Ptr,
                (IntPtr)executor.GetTensor(ReserveSpace).Shape.Length);

            if (executor.IncreaseGradientAggregationCounter(W) == 0)
            {
                executor.AssignGradient(W, ScalarOps.Conv <T>(0.0).AsScalar(), replace: true);
            }

            dnn.RNNBackwardWeights(
                executor.RnnDescDict[RnnDesc],
                SeqLength,
                XDesc,
                executor.GetTensor(X).Buffer.Ptr,
                StateDesc,
                executor.GetTensor(HX).Buffer.Ptr,
                YDesc,
                executor.GetTensor(Y).Buffer.Ptr,
                executor.GetTensor(Workspace).Buffer.Ptr,
                (IntPtr)executor.GetTensor(Workspace).Shape.Length,
                executor.FilterDescDict[WDesc],
                executor.GetGradient(W).Buffer.Ptr,
                executor.GetTensor(ReserveSpace).Buffer.Ptr,
                (IntPtr)executor.GetTensor(ReserveSpace).Shape.Length);
        }
예제 #13
0
        public override void Initialize(Executor executor)
        {
            var context = executor.Context.ToGpuContext();
            var dnn     = context.Dnn;

            // dropout
            var    dropoutDesc = executor.DropoutDescDict[DropoutDesc];
            IntPtr dropoutStatesSize;

            dnn.DropoutGetStatesSize(out dropoutStatesSize);
            var dropoutStates = executor.GetTensor(DropoutStates, Shape.Create(dropoutStatesSize.ToInt64()));

            dropoutDesc.Set(dnn, (float)Dropout, dropoutStates.Buffer.Ptr, dropoutStatesSize, DropoutSeed);

            // rnn descriptor
            var rnnDesc = executor.RnnDescDict[RnnDesc];
            var mode    = Type.Mode;

            rnnDesc.Set(HiddenSize, NumLayers, dropoutDesc, RNNInputMode.LINEAR_INPUT, DirectionMode.UNIDIRECTIONAL, mode, Dnn.DataTypeOf <T>());

            // weight
            var    wDesc = executor.FilterDescDict[WDesc];
            IntPtr weightsSize;

            dnn.GetRNNParamsSize(rnnDesc, XDesc[0], out weightsSize, Dnn.DataTypeOf <T>());
            Util.EnsureTrue(weightsSize.ToInt64() % Gpu.SizeOf <T>() == 0);
            var shapeW = Shape.Create(weightsSize.ToInt64() / Alea.Gpu.SizeOf <T>());

            wDesc.SetND(Dnn.DataTypeOf <T>(), TensorFormat.CUDNN_TENSOR_NCHW, new [] { (int)shapeW[0], 1, 1 });

            // workspace and reserved space
            IntPtr workSize;

            dnn.GetRNNWorkspaceSize(rnnDesc, SeqLength, XDesc, out workSize);
            executor.GetTensor(Workspace, Shape.Create(workSize.ToInt64()));

            if (IsTraining)
            {
                IntPtr reserveSize;
                dnn.GetRNNTrainingReserveSize(rnnDesc, SeqLength, XDesc, out reserveSize);
                executor.GetTensor(ReserveSpace, Shape.Create(reserveSize.ToInt64()));
            }

            // since we are using cuDNN, we'd better make sure these varaibles are allocated
            executor.GetTensor(W, shapeW);
            if (IsTraining)
            {
                executor.GetGradient(W, shapeW);
            }

            executor.GetTensor(Y, Shape.Create(Y.Shape.AsArray));
            executor.GetTensor(HX, Shape.Create(HX.Shape.AsArray));
            executor.GetTensor(CX, Shape.Create(CX.Shape.AsArray));
            executor.GetTensor(HY, Shape.Create(HY.Shape.AsArray));
            executor.GetTensor(CY, Shape.Create(CY.Shape.AsArray));

            if (IsTraining)
            {
                executor.GetGradient(X, Shape.Create(X.Shape.AsArray));
                executor.GetGradient(Y, Shape.Create(Y.Shape.AsArray));
                executor.GetGradient(HX, Shape.Create(HX.Shape.AsArray));
                executor.GetGradient(CX, Shape.Create(CX.Shape.AsArray));
            }

            // init weights
            var numLinearLayers = Type.NumLinLayers;

            using (var filterDesc = new FilterDescriptor())
            {
                var w          = executor.GetTensor(W);
                var filterDimA = new int[3];

                for (var layer = 0; layer < NumLayers; ++layer)
                {
                    for (var linLayerId = 0; linLayerId < numLinearLayers; ++linLayerId)
                    {
                        int          nbDims;
                        DataType     dataType;
                        TensorFormat format;

                        deviceptr <T> linLayerMat;
                        dnn.GetRNNLinLayerMatrixParams(rnnDesc, layer, XDesc[0], wDesc, w.Buffer.Ptr, linLayerId,
                                                       filterDesc, out linLayerMat);

                        filterDesc.GetND(out dataType, out format, out nbDims, filterDimA);
                        var length = filterDimA.Aggregate(ScalarOps.Mul);

                        var linLayerMatBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerMat);
                        var linLayerMatTensor = new Tensor <T>(linLayerMatBuffer);
                        context.Assign(linLayerMatTensor, RandomNormal <T>(Shape.Create(length)) / (Math.Sqrt(HiddenSize + InputSize).AsScalar <T>()));

                        deviceptr <T> linLayerBias;
                        dnn.GetRNNLinLayerBiasParams(rnnDesc, layer, XDesc[0], wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerBias);

                        filterDesc.GetND(out dataType, out format, out nbDims, filterDimA);
                        length = filterDimA.Aggregate(ScalarOps.Mul);

                        var linLayerBiasBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerBias);
                        var linLayerBiasTensor = new Tensor <T>(linLayerBiasBuffer);
                        Type.InitBias(context, layer, linLayerId, linLayerBiasTensor);
                    }
                }
            }

            base.Initialize(executor);

            const double value = 0.0;

            executor.AssignTensor(HX, Fill(Shape.Create(HX.Shape.AsArray), ScalarOps.Conv <T>(value)));
            executor.AssignTensor(CX, Fill(Shape.Create(CX.Shape.AsArray), ScalarOps.Conv <T>(value)));
        }
예제 #14
0
 public void ZeroInitialStates(Executor executor)
 {
     executor.AssignTensor(Rnn.HX, Fill(Shape.Create(Rnn.HX.Shape.AsArray), ScalarOps.Conv <T>(0.0)));
     executor.AssignTensor(Rnn.CX, Fill(Shape.Create(Rnn.CX.Shape.AsArray), ScalarOps.Conv <T>(0.0)));
 }
예제 #15
0
        public void ForwardBasic(Executor executor)
        {
            var ctx       = executor.Context;
            var w         = executor.GetTensor(W);
            var xphpb     = w.Shape[0];
            var x         = executor.GetTensor(X);
            var b         = x.Shape[1];
            var n         = x.Shape[0];
            var d         = HiddenSize;
            var y         = executor.GetTensor(Y, Shape.Create(n, b, d));
            var inputSize = InputSize;
            var one       = 1.0.AsScalar <T>();

            // inital states
            var cx = executor.GetTensor(CX);
            var hx = executor.GetTensor(HX);

            Util.EnsureTrue(cx.Shape.SequenceEqual(Shape.Create(b, d)));
            Util.EnsureTrue(hx.Shape.SequenceEqual(Shape.Create(b, d)));

            // we assign output states to inital states, and later we update it
            var cy = executor.GetTensor(CY, Shape.Create(b, d));
            var hy = executor.GetTensor(HY, Shape.Create(b, d));

            ctx.Assign(cy, cx);
            ctx.Assign(hy, hx);
            var prevc = cy.Reshape(1, b, d);
            var prevh = hy.Reshape(1, b, d);

            var hin   = executor.GetTensor(Hin, Shape.Create(n, b, xphpb));
            var ifoa1 = executor.GetTensor(IFOA1, Shape.Create(n, b, d * 4));
            var ifoa2 = executor.GetTensor(IFOA2, Shape.Create(n, b, d * 4));
            var c     = executor.GetTensor(C, Shape.Create(n, b, d));

            for (var t = 0; t < n; ++t)
            {
                // stack input
                ctx.Assign(hin.Slice(t, -1, 0), Fill(Shape.Create(1, b, 1), ScalarOps.Conv <T>(1.0))); // bias
                ctx.Assign(hin.Slice(t, -1, Range(1, inputSize + 1)), x.Slice(t));
                ctx.Assign(hin.Slice(t, -1, Range(inputSize + 1, -1)), prevh);

                // dot
                ctx.Assign(ifoa1.Slice(t), Dot(hin.Slice(t).Reshape(b, xphpb), w));

                // values for applying element-wise transformation
                // they are of shape (1, b, d)
                var ct = c.Slice(t);
                var ht = y.Slice(t);
                var it = ifoa2.Slice(t, -1, Range(0, d));
                var ft = ifoa2.Slice(t, -1, Range(d, 2 * d));
                var ot = ifoa2.Slice(t, -1, Range(2 * d, 3 * d));
                var at = ifoa2.Slice(t, -1, Range(3 * d, 4 * d));

                // non-linearities
                // first 3 matrices are IFO, we apply sigmoid
                var ifot  = ifoa2.Slice(t, -1, Range(0, 3 * d));
                var _ifot = ifoa1.Slice(t, -1, Range(0, 3 * d));
                ctx.Assign(ifot, one / (one + Exp(-_ifot)));

                // last one is for activation gate, we apply tanh
                var _at = ifoa1.Slice(t, -1, Range(3 * d, 4 * d));
                ctx.Assign(at, Tanh(_at));

                // c_t = i_t * a_t + f_t * c_t-1
                ctx.Assign(ct, it * at + ft * prevc);

                // h_t = o_t * tanh(c_t)
                ctx.Assign(ht, ot * Tanh(ct));

                // update states
                ctx.Assign(prevh, y.Slice(t));
                ctx.Assign(prevc, c.Slice(t));
            }
        }
예제 #16
0
        public override void Initialize(Executor executor)
        {
            base.Initialize(executor);

            // set bias to zero
            var ctx = executor.Context;
            var w   = executor.GetTensor(W);

            // first set 4 bias to 0.0
            ctx.Assign(w.Slice(0), 0.0.AsScalar <T>());

            // set forget bias is needed, layout: IFOA, so forget index is 1
            if (ForgetBiasInit != 0.0)
            {
                ctx.Assign(w.Slice(0, Range(HiddenSize, 2 * HiddenSize)), Fill(Shape.Create(1, HiddenSize), ScalarOps.Conv <T>(ForgetBiasInit)));
            }
        }
예제 #17
0
 public void ZeroTerminalGradient(Executor executor)
 {
     executor.AssignGradient(HY, Fill(Shape.Create(HY.Shape.AsArray), ScalarOps.Conv <T>(0.0)), replace: true);
     executor.AssignGradient(CY, Fill(Shape.Create(CY.Shape.AsArray), ScalarOps.Conv <T>(0.0)), replace: true);
 }