Пример #1
0
        public Attention(Variable <T> encoderHiddenStates, Variable <T> decoderHiddenState, int attentionDim)
        {
            AttentionDim        = attentionDim;
            EncoderHiddenStates = encoderHiddenStates;
            DecoderHiddenState  = decoderHiddenState;

            Util.EnsureEqual(3, EncoderHiddenStates.Shape.Rank, "Input layout: (seqLength, batch, encoderHiddenSize)");
            Util.EnsureTrue(EncoderHiddenStates.Shape[0] >= 0, "Input layout: (seqLength, batch, encoderHiddenSize)");
            Util.EnsureTrue(EncoderHiddenStates.Shape[1] >= 0, "Input layout: (seqLength, batch, encoderHiddenSize)");
            Util.EnsureTrue(EncoderHiddenStates.Shape[2] >= 0, "Input layout: (seqLength, batch, encoderHiddenSize)");
            SeqLength         = (int)EncoderHiddenStates.Shape[0];
            Batch             = (int)EncoderHiddenStates.Shape[1];
            EncoderHiddenSize = (int)EncoderHiddenStates.Shape[2];

            Util.EnsureEqual(2, DecoderHiddenState.Shape.Rank, "Input layout: (batch, decoderHiddenSize)");
            Util.EnsureTrue(DecoderHiddenState.Shape[0] >= 0, "Input layout: (seqLength, batch, encoderHiddenSize)");
            Util.EnsureTrue(DecoderHiddenState.Shape[1] >= 0, "Input layout: (seqLength, batch, encoderHiddenSize)");
            Util.EnsureTrue(DecoderHiddenState.Shape[0] == EncoderHiddenStates.Shape[0]);
            DecoderHiddenSize = (int)DecoderHiddenState.Shape[1];

            var scale = Sqrt(12.0.AsScalar <T>() / ((double)(AttentionDim + EncoderHiddenSize)).AsScalar <T>());

            Wh = Parameter(scale * (RandomUniform <T>(Shape.Create(EncoderHiddenSize, AttentionDim), 0UL, 0UL) - 0.5.AsScalar <T>()));

            scale = Sqrt(12.0.AsScalar <T>() / ((double)(AttentionDim + DecoderHiddenSize)).AsScalar <T>());
            Wd    = Parameter(scale * (RandomUniform <T>(Shape.Create(DecoderHiddenSize, AttentionDim), 0UL, 0UL) - 0.5.AsScalar <T>()));

            scale = Sqrt(12.0.AsScalar <T>() / ((double)(AttentionDim)).AsScalar <T>());
            V     = Parameter(scale * (RandomUniform <T>(Shape.Create(AttentionDim), 0UL, 0UL) - 0.5.AsScalar <T>()));

            Softmax        = Variable <T>();
            AttentionState = Variable <T>(PartialShape.Create(Batch, EncoderHiddenSize));
        }
Пример #2
0
        public static Model ConvolutionalNeuralNetworkModel()
        {
            var images = Variable <float>();
            var labels = Variable <float>();

            ILayer <float> net = new Reshape <float>(images, PartialShape.Create(-1, 1, 28, 28));

            net = new Convolution2D <float>(net.Output, 5, 5, 16);
            net = new ActivationReLU <float>(net.Output);
            net = new Pooling2D <float>(net.Output, PoolingMode.MAX, 2, 2, 2, 2);

            net = new Convolution2D <float>(net.Output, 5, 5, 32);
            net = new ActivationTanh <float>(net.Output);
            net = new Pooling2D <float>(net.Output, PoolingMode.MAX, 2, 2, 2, 2);

            net = new Reshape <float>(net.Output, PartialShape.Create(-1, net.Output.Shape.Skip(1).Aggregate(ScalarOps.Mul)));
            net = new FullyConnected <float>(net.Output, 50);
            net = new ActivationTanh <float>(net.Output);
            net = new FullyConnected <float>(net.Output, 10);

            return(new Model {
                Loss = new SoftmaxCrossEntropy <float>(net.Output, labels),
                Images = images,
                Labels = labels
            });
        }
Пример #3
0
 protected Variable(Type dataType, VariableType type, PartialShape shape)
 {
     DataType     = dataType;
     Type         = type;
     _shape       = shape;
     _initializer = null;
     _owner       = null;
 }
Пример #4
0
 protected Variable(Type dataType, VariableType type, Expr initializer)
 {
     DataType     = dataType;
     Type         = type;
     _shape       = initializer != null ? new PartialShape(initializer.Shape.AsArray) : null;
     _initializer = initializer;
     _owner       = null;
 }
Пример #5
0
 public Reshape(Variable <T> input, PartialShape shape)
 {
     Util.EnsureTrue(input.Type != VariableType.Parameter);
     Shape  = input.HasShape ? PartialShape.Reshape(input.Shape, shape) : shape;
     Input  = input;
     Output = Variable <T>(Shape);
     AddInput(Input);
     AddOutput(Output);
 }
Пример #6
0
        public Embedding(Variable <int> indices, int embedSize, int embedDim, double initScale = 0.5)
        {
            Indices   = indices;
            Weights   = Library.Parameter((initScale * 2.0).AsScalar <T>() * RandomUniform <T>(Shape.Create(embedSize, embedDim)) - initScale.AsScalar <T>());
            Output    = Library.Variable <T>(PartialShape.Create(Indices.Shape.Concat(new long[] { embedDim }).ToArray()));
            EmbedSize = embedSize;
            EmbedDim  = embedDim;

            AddInput(Indices);
            AddInput(Weights);
            AddOutput(Output);
        }
Пример #7
0
        public static void TestAttentionReduce()
        {
            var n = 3;
            var b = 4;
            var d = 5;

            var statesData = new double[n, b, d];

            UniformRandomArray(statesData);
            var softmaxData = new double[n, b];

            UniformRandomArray(softmaxData);

            var softmax = Variable <double>(PartialShape.Create(-1, b));
            var states  = Variable <double>(PartialShape.Create(-1, b, d));
            var reduce  = new AttentionReduce <double>(softmax, states);

            var ctx = Context.GpuContext(0);
            var exe = new Executor(ctx, reduce.Output)
            {
                AssignAllGradient = true
            };

            exe.Initalize();

            var dOutputData = new double[b, d];

            UniformRandomArray(dOutputData);

            exe.AssignTensor(softmax, softmaxData.AsTensor());
            exe.AssignTensor(states, statesData.AsTensor());
            exe.Forward();
            exe.AssignGradient(reduce.Output, dOutputData.AsTensor(), replace: true);
            exe.Backward();

            var dSoftmax = exe.GetGradient(reduce.Softmax);
            var dStates  = exe.GetGradient(reduce.States);

            var bump = 1e-6;

            var dSoftmaxFd = GradientChecker.FiniteDifferenceGradient(exe, softmax, bump: bump);

            AreClose(dSoftmaxFd.ToArray2D(), dSoftmax.ToArray2D(), 1e-7);

            var dStatesFd = GradientChecker.FiniteDifferenceGradient(exe, states, bump: bump);

            AreClose(dStatesFd.ToArray3D(), dStates.ToArray3D(), 1e-7);

            //var dVectorsFdArray = dVectorsFd.Reshape(-1).ToArray();
            //var dVectorsBackpropArray = dStates.Reshape(-1).ToArray();
            //var err = MaxAbsDiff(dVectorsFdArray, dVectorsBackpropArray);
        }
Пример #8
0
        public Lstm(Variable <T> x, int hiddenSize, Variable <T> cx = null, Variable <T> hx = null, double forgetBiasInit = 0.0)
        {
            // X shape (seqLength, batch, inputSize)
            Util.EnsureEqual(3, x.Shape.Rank, "Input layout: (seqLength, batch, inputSize)");
            Util.EnsureTrue(x.Shape[0] > 0, "SeqLength must be determined.");
            Util.EnsureTrue(x.Shape[2] > 0, "InputSize must be determined.");
            X              = x;
            SeqLength      = (int)X.Shape[0];
            InputSize      = (int)X.Shape[2];
            HiddenSize     = hiddenSize;
            ForgetBiasInit = forgetBiasInit;

            // Y Shape (seqLength, batch, hiddenSize)
            Y = Variable <T>(PartialShape.Create(SeqLength, -1, HiddenSize));

            // W (1 + inputSize + hiddenSize, 4 * hiddenSize) : B -> W -> U
            // layout: IFOA
            W = Parameter(RandomNormal <T>(Shape.Create(InputSize + HiddenSize + 1, 4 * HiddenSize)) / Math.Sqrt(InputSize + hiddenSize).AsScalar <T>());

            // input and output states
            CX = cx ?? Variable <T>(PartialShape.Create(-1, HiddenSize));
            HX = hx ?? Variable <T>(PartialShape.Create(-1, HiddenSize));
            CY = Variable <T>(PartialShape.Create(-1, HiddenSize));
            HY = Variable <T>(PartialShape.Create(-1, HiddenSize));

            // build the graph
            AddInput(X);
            AddOutput(Y);
            AddInput(W);
            AddInput(CX);
            AddInput(HX);
            AddOutput(CY);
            AddOutput(HY);

            // Aux variables
            Hin   = AuxVariable <T>();
            Hout  = AuxVariable <T>();
            IFOA1 = AuxVariable <T>();
            IFOA2 = AuxVariable <T>();
            C     = AuxVariable <T>();
            Temp1 = AuxVariable <T>();
            Temp2 = AuxVariable <T>();

            AddAuxVar(Hin);
            AddAuxVar(Hout);
            AddAuxVar(IFOA1);
            AddAuxVar(IFOA2);
            AddAuxVar(C);
            AddAuxVar(Temp1);
            AddAuxVar(Temp2);
        }
Пример #9
0
        public Convolution2D(Variable <T> data, int kernelH, int kernelW, int numFilter)
        {
            Util.EnsureTrue(data.Shape.Rank == 4);
            Util.EnsureTrue(data.Shape[1] > 0);
            Util.EnsureTrue(data.Shape[2] > 0);
            Util.EnsureTrue(data.Shape[3] > 0);

            var numInputFilter  = data.Shape[1];
            var numOutputFilter = numFilter;
            var height          = data.Shape[2];
            var width           = data.Shape[3];

            // fixed padding and stride now
            ConvolutionDesc = new ConvolutionDescriptor();
            ConvolutionDesc.Set2D(0, 0, 1, 1, 1, 1, ConvolutionMode.CROSS_CORRELATION);

            using (var dataDesc = new TensorDescriptor())
                using (var weightDesc = new FilterDescriptor())
                {
                    var dataType = Dnn.DataTypeOf <T>();
                    var tempN    = 100; // for temp mini batch size
                    dataDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, tempN, (int)numInputFilter, (int)height, (int)width);
                    weightDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, numOutputFilter, (int)numInputFilter, kernelH, kernelW);

                    // get output dimension
                    int n, c, h, w;
                    ConvolutionDesc.Get2DForwardOutputDim(dataDesc, weightDesc, out n, out c, out h, out w);

                    //Console.WriteLine($"{c},{h},{w}");

                    // Create variables
                    var scale = Sqrt(3.0.AsScalar <T>() / ((double)(numInputFilter * kernelH * kernelW)).AsScalar <T>());

                    Data       = data;
                    Weight     = Parameter(scale * (2.0.AsScalar <T>() * RandomUniform <T>(Shape.Create(numOutputFilter, numInputFilter, kernelH, kernelW), 0UL, 0UL) - 1.0.AsScalar <T>()));
                    Bias       = Parameter(Fill(Shape.Create(c), ScalarOps.Conv <T>(0.1)));
                    Output     = Variable <T>(PartialShape.Create(-1, c, h, w));
                    Workspace1 = AuxVariable <byte>();
                    Workspace2 = AuxVariable <byte>();

                    AddInput(Data);
                    AddInput(Weight);
                    AddInput(Bias);
                    AddOutput(Output);
                    AddAuxVar(Workspace1);
                    AddAuxVar(Workspace2);
                }
        }
Пример #10
0
        public IteratedRnnCell(RnnType rnnRnnType, Variable <T> input, int numLayers, int hiddenSize, bool isTraining, double dropoutProbability, ulong dropoutSeed = 1337UL)
        {
            RnnType            = rnnRnnType;
            IsTraining         = isTraining;
            NumLayers          = numLayers;
            HiddenSize         = hiddenSize;
            DropoutProbability = isTraining ? dropoutProbability : 0.0;
            DropoutSeed        = dropoutSeed;

            Util.EnsureEqual(3, input.Shape.Rank, "Input layout: (seqLength, batch, inputSize)");
            Util.EnsureTrue(input.Shape[1] >= 0, "Input layout: (seqLength, batch, inputSize)");
            Util.EnsureTrue(input.Shape[2] >= 0, "Input layout: (seqLength, batch, inputSize)");
            Input     = input;
            BatchSize = (int)input.Shape[1];
            InputSize = (int)input.Shape[2];

            // output Shape (seqLength, batchSize, hiddenSize)
            Output = Variable <T>(PartialShape.Create(-1, BatchSize, HiddenSize));

            // W shape will be determined during initialization
            W = Parameter <T>();

            // create variables for input hidden and cell state
            HX = Variable <T>(PartialShape.Create(NumLayers, BatchSize, HiddenSize));
            CX = Variable <T>(PartialShape.Create(NumLayers, BatchSize, HiddenSize));
            HY = Variable <T>(PartialShape.Create(NumLayers, BatchSize, HiddenSize));
            CY = Variable <T>(PartialShape.Create(NumLayers, BatchSize, HiddenSize));

            // state variable H and Y = (n - 1, layer, b, d), n is unknown
            var shape = PartialShape.Create(-1, NumLayers, BatchSize, HiddenSize);

            H = Library.Variable <T>(shape);
            C = Library.Variable <T>(shape);

            ReserveSpace = Library.Variable <byte>();

            // construct the graph
            AddInput(Input);
            AddInput(W);
            AddOutput(Output);
            AddAuxVar(HX);
            AddAuxVar(CX);
            AddAuxVar(HY);
            AddAuxVar(CY);
            AddAuxVar(H);
            AddAuxVar(C);
            AddAuxVar(ReserveSpace);
        }
Пример #11
0
        public Model(Context ctx, int numInputSteps, Config cfg, bool isTraining = true)
        {
            var addDropout = isTraining && cfg.DropoutProbability > 0.0;

            EncoderInputs = Library.Variable <int>(PartialShape.Create(numInputSteps, cfg.BatchSize));
            Embedding     = new Embedding <float>(EncoderInputs, cfg.VocabularySize, cfg.HiddenSize, initScale: cfg.InitScale);

            EmbeddingOutput = addDropout ? new Dropout <float>(Embedding.Output, cfg.DropoutProbability).Output : Embedding.Output;

            var rnnType = new LstmRnnType();

            EncoderRnn       = new Rnn <float>(rnnType, EmbeddingOutput, cfg.NumLayers, cfg.HiddenSize, isTraining: isTraining, dropout: addDropout ? cfg.DropoutProbability : 0.0);
            EncoderRnnOutput = addDropout ? new Dropout <float>(EncoderRnn.Y, cfg.DropoutProbability).Output : EncoderRnn.Y;

            // attention model
        }
Пример #12
0
        public static Model MultiLayerPerceptronModel()
        {
            var            images = Variable <float>(PartialShape.Create(-1, 28 * 28));
            ILayer <float> net    = new FullyConnected <float>(images, 128);

            net = new ActivationReLU <float>(net.Output);
            net = new FullyConnected <float>(net.Output, 64);
            net = new ActivationReLU <float>(net.Output);
            net = new FullyConnected <float>(net.Output, 10);
            var labels = Variable <float>(PartialShape.Create(-1, 10));

            return(new Model {
                Loss = new SoftmaxCrossEntropy <float>(net.Output, labels),
                Images = images,
                Labels = labels
            });
        }
Пример #13
0
        public RnnDynamic(RnnType rnnRnnType, Variable <T> x, int numLayers, int hiddenSize, bool isTraining = true, double dropout = 0.0, ulong dropoutSeed = 1337UL)
        {
            RnnType     = rnnRnnType;
            IsTraining  = isTraining;
            NumLayers   = numLayers;
            HiddenSize  = hiddenSize;
            Dropout     = isTraining ? dropout : 0.0;
            DropoutSeed = dropoutSeed;

            // X shape (seqLength, batch, inputSize)
            X = x;
            Util.EnsureEqual(3, X.Shape.Rank, "Input layout: (seqLength, batch, inputSize)");
            Util.EnsureTrue(X.Shape[2] >= 0, "Input layout: (seqLength, batch, inputSize)");
            InputSize = (int)X.Shape[2];

            // Y Shape (maxSeqLength, not yet known, hiddenSize)
            Y = Variable <T>(PartialShape.Create(-1, -1, HiddenSize));

            // W shape will be determined during initialization
            W = Parameter <T>();

            // state variables
            var shape = PartialShape.Create(NumLayers, -1, HiddenSize);

            HX = Variable <T>(shape);
            CX = Variable <T>(shape);
            HY = Variable <T>(shape);
            CY = Variable <T>(shape);

            // construct the graph
            AddInput(X);
            AddInput(W);
            AddOutput(Y);
            AddAuxVar(HX);
            AddAuxVar(CX);
            AddAuxVar(HY);
            AddAuxVar(CY);
            AddAuxVar(DropoutStates);
            AddAuxVar(Workspace);
            AddAuxVar(ReserveSpace);
        }
Пример #14
0
        public FullyConnected(Variable <T> data, long numHidden)
        {
            Util.EnsureTrue(data.HasShape);
            Util.EnsureEqual(2, data.Shape.Rank, "Input must be matrix.");
            Util.EnsureTrue(data.Shape[1] > 0L);

            Data = data;

            var numInput = data.Shape[1];
            var scale    = Sqrt(12.0.AsScalar <T>() / ((double)(numInput + numHidden)).AsScalar <T>());

            Weights = Parameter(scale * (RandomUniform <T>(Shape.Create(numInput, numHidden), 0UL, 0UL) - 0.5.AsScalar <T>()));

            Bias   = Parameter(Fill(Shape.Create(numHidden), ScalarOps.Conv <T>(0.0)));
            Output = Variable <T>(PartialShape.Create(data.Shape[0], numHidden));

            AddInput(Data);
            AddInput(Weights);
            AddInput(Bias);
            AddOutput(Output);
        }
Пример #15
0
            public AttentionReduce(Variable <T> softmax, Variable <T> states)
            {
                Softmax = softmax;
                States  = states;

                Util.EnsureTrue(softmax.Shape.Rank == 2, "Softmax: (n,b)");
                Util.EnsureTrue(states.Shape.Rank == 3, "States: (n,b,d)");
                Util.EnsureTrue(softmax.Shape[1] > 0, "Softmax: b needed.");
                Util.EnsureTrue(states.Shape[1] > 0, "States: b needed.");
                Util.EnsureTrue(states.Shape[2] > 0, "States: d needed.");
                Util.EnsureTrue(softmax.Shape[1] == states.Shape[1], "b should match.");

                BatchSize  = softmax.Shape[1];
                StatesSize = states.Shape[2];

                Output = Variable <T>(PartialShape.Create(BatchSize, StatesSize));

                AddInput(Softmax);
                AddInput(States);
                AddOutput(Output);
            }
Пример #16
0
        public Pooling2D(Variable <T> data, PoolingMode mode, int kernelH, int kernelW, int strideH, int strideW)
        {
            Descriptor = new PoolingDescriptor();
            Descriptor.Set2D(mode, NanPropagation.NOT_PROPAGATE_NAN, kernelH, kernelW, 0, 0, strideH, strideW);

            var dataType = Dnn.DataTypeOf <T>();
            var dataDesc = new TensorDescriptor();

            dataDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, 10, (int)data.Shape[1], (int)data.Shape[2], (int)data.Shape[3]);

            int n, c, h, w;

            Descriptor.Get2dForwardOutputDim(dataDesc, out n, out c, out h, out w);

            Data   = data;
            Output = Variable <T>(PartialShape.Create(-1, c, h, w));

            AddInput(Data);
            AddOutput(Output);

            dataDesc.Dispose();
        }
Пример #17
0
 public static Variable <T> Variable <T>(PartialShape shape)
 {
     return(new Variable <T>(VariableType.Common, shape));
 }
Пример #18
0
        public override void Initialize(Executor executor)
        {
            var context = executor.Context.ToGpuContext();
            var dnn     = context.Dnn;

            // dropout
            var    dropoutDesc = executor.DropoutDescDict[DropoutDesc];
            IntPtr dropoutStatesSize;

            dnn.DropoutGetStatesSize(out dropoutStatesSize);
            var dropoutStates = executor.GetTensor(DropoutStates, Shape.Create(dropoutStatesSize.ToInt64()));

            dropoutDesc.Set(dnn, (float)Dropout, dropoutStates.Buffer.Ptr, dropoutStatesSize, DropoutSeed);

            // rnn descriptor
            var rnnDesc = executor.RnnDescDict[RnnDesc];
            var mode    = RnnType.Mode;

            rnnDesc.Set(HiddenSize, NumLayers, dropoutDesc, RNNInputMode.LINEAR_INPUT, DirectionMode.UNIDIRECTIONAL, mode, Dnn.DataTypeOf <T>());

            // initialize weight, once only, using minibatch size 1
            var shape   = PartialShape.Create(1, InputSize, 1); // first dimension does not affect the weight shape and size TODO test all, tested only for LSTM
            var strides = Strides.Create(shape[1] * shape[2], shape[2], 1);
            var xDesc   = new TensorDescriptor();

            xDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array);
            var    wDesc = executor.FilterDescDict[WDesc];
            IntPtr weightsSize;

            dnn.GetRNNParamsSize(rnnDesc, xDesc, out weightsSize, Dnn.DataTypeOf <T>());
            Util.EnsureTrue(weightsSize.ToInt64() % Gpu.SizeOf <T>() == 0);
            var shapeW = Shape.Create(weightsSize.ToInt64() / Alea.Gpu.SizeOf <T>());

            wDesc.SetND(Dnn.DataTypeOf <T>(), TensorFormat.CUDNN_TENSOR_NCHW, new [] { (int)shapeW[0], 1, 1 });

            // since we are using cuDNN, we'd better make sure these varaibles are allocated
            executor.GetTensor(W, shapeW);
            if (IsTraining)
            {
                executor.GetGradient(W, shapeW);
            }

            // init weights
            var numLinearLayers = RnnType.NumLinLayers;

            using (var filterDesc = new FilterDescriptor())
            {
                var w          = executor.GetTensor(W);
                var filterDimA = new int[3];

                for (var layer = 0; layer < NumLayers; ++layer)
                {
                    for (var linLayerId = 0; linLayerId < numLinearLayers; ++linLayerId)
                    {
                        int          nbDims;
                        DataType     dataType;
                        TensorFormat format;

                        deviceptr <T> linLayerMat;
                        dnn.GetRNNLinLayerMatrixParams(rnnDesc, layer, xDesc, wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerMat);

                        filterDesc.GetND(out dataType, out format, out nbDims, filterDimA);
                        var length = filterDimA.Aggregate(ScalarOps.Mul);

                        var linLayerMatBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerMat);
                        var linLayerMatTensor = new Tensor <T>(linLayerMatBuffer);
                        context.Assign(linLayerMatTensor, RandomNormal <T>(Shape.Create(length)) / (Math.Sqrt(HiddenSize + InputSize).AsScalar <T>()));

                        deviceptr <T> linLayerBias;
                        dnn.GetRNNLinLayerBiasParams(rnnDesc, layer, xDesc, wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerBias);

                        filterDesc.GetND(out dataType, out format, out nbDims, filterDimA);
                        length = filterDimA.Aggregate(ScalarOps.Mul);

                        var linLayerBiasBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerBias);
                        var linLayerBiasTensor = new Tensor <T>(linLayerBiasBuffer);
                        RnnType.InitBias(context, layer, linLayerId, linLayerBiasTensor);
                    }
                }
            }

            base.Initialize(executor);
        }
Пример #19
0
        public static void TestAttention()
        {
            //var batch = 4;
            //var encoderHiddenSize = 5;
            //var decoderHiddenSize = 4;
            //var attentionDim = 3;
            var batch             = 10;
            var encoderHiddenSize = 20;
            var decoderHiddenSize = 25;
            var attentionDim      = 30;

            // (encoderSeqLength, batch, encoderHiddenSize)
            var encoderHiddenStates = Variable <double>(PartialShape.Create(-1, batch, encoderHiddenSize));
            var decoderHiddenStates = Variable <double>(PartialShape.Create(batch, decoderHiddenSize));
            var attention           = new Attention <double>(encoderHiddenStates, decoderHiddenStates, attentionDim);

            var ctx = Context.GpuContext(0);
            var exe = new Executor(ctx, attention.Output)
            {
                AssignAllGradient = true
            };

            exe.Initalize();

            // encoderSeqLength is flexibly at runtime
            var encoderSeqLength        = 3;
            var dataEncoderHiddenStates = new double[encoderSeqLength, batch, encoderHiddenSize];

            UniformRandomArray(dataEncoderHiddenStates);

            var dataDecoderHiddenStates = new double[batch, decoderHiddenSize];

            UniformRandomArray(dataDecoderHiddenStates);

            exe.AssignTensor(encoderHiddenStates, dataEncoderHiddenStates.AsTensor());
            exe.AssignTensor(decoderHiddenStates, dataDecoderHiddenStates.AsTensor());
            exe.Forward();

            var tensorOutput = exe.GetTensor(attention.Output);
            //Console.WriteLine(tensorOutput.Shape);
            //tensorOutput.Print();

            var dataDOutput = new double[batch, encoderHiddenSize];

            UniformRandomArray(dataDOutput);
            exe.AssignGradient(attention.Output, dataDOutput.AsTensor(), replace: true);
            exe.Backward();

            var tensorDWh = exe.GetGradient(attention.Wh);
            //tensorDWh.Print();

            var tensorDWd = exe.GetGradient(attention.Wd);
            //tensorDWd.Print();

            var tensorDH = exe.GetGradient(attention.EncoderHiddenStates);
            //Console.WriteLine(tensorDH.Shape);
            //tensorDH.Reshape(-1, encoderHiddenSize).Print();

            var tensorDD = exe.GetGradient(attention.DecoderHiddenStates);
            //Console.WriteLine(tensorDD.Shape);
            //tensorDD.Print();

            var bump = 1e-7;

            var tensorDWh_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.Wh, bump: bump);

            //tensorDWh.Print();
            //tensorDWh_fd.Print();
            AreClose(tensorDWh.ToArray2D(), tensorDWh_fd.ToArray2D(), 1e-7);

            var tensorDWd_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.Wd, bump: bump);

            //tensorDWd.Print();
            //tensorDWd_fd.Print();
            AreClose(tensorDWd.ToArray2D(), tensorDWd_fd.ToArray2D(), 1e-7);

            var tensorDH_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.EncoderHiddenStates, bump: bump);

            //tensorDH.Reshape(-1, encoderHiddenSize).Print();
            //tensorDH_fd.Reshape(-1, encoderHiddenSize).Print();
            AreClose(tensorDH.ToArray3D(), tensorDH_fd.ToArray3D(), 1e-7);

            var tensorDD_fd = GradientChecker.FiniteDifferenceGradient(exe, attention.DecoderHiddenStates, bump: bump);

            //tensorDD.Print();
            //tensorDD_fd.Print();
            AreClose(tensorDD.ToArray2D(), tensorDD_fd.ToArray2D(), 1e-7);
        }
Пример #20
0
            public Model(Context ctx, Config cfg, bool isTraining = true, bool usingCuDnn = true)
            {
                Config     = cfg;
                IsTraining = isTraining;
                UsingCuDnn = usingCuDnn;

                Inputs  = Variable <int>(PartialShape.Create(cfg.NumSteps, cfg.BatchSize));
                Targets = Variable <int>(PartialShape.Create(cfg.NumSteps, cfg.BatchSize));

                // embedding
                Embedding = new Embedding <float>(Inputs, cfg.VocabSize, cfg.HiddenSize, initScale: cfg.InitScale);

                // add dropout
                EmbeddedOutput = Embedding.Output;
                if (isTraining && cfg.KeepProb < 1.0)
                {
                    var dropout = new Dropout <float>(EmbeddedOutput, dropoutProb: 1.0 - cfg.KeepProb);
                    EmbeddedOutput = dropout.Output;
                }

                // rnn layer, dropout for intermediate lstm layers and for output
                if (usingCuDnn)
                {
                    RnnAccelerated = new Rnn <float>(new LstmRnnType(forgetBiasInit: 0.0), EmbeddedOutput, cfg.NumLayers, cfg.HiddenSize, isTraining: isTraining, dropout: isTraining && cfg.KeepProb < 1.0 ? 1.0 - Config.KeepProb : 0.0);
                    RnnOutput      = RnnAccelerated.Y;
                    if (isTraining && cfg.KeepProb < 1.0)
                    {
                        var dropout = new Dropout <float>(RnnOutput, dropoutProb: 1.0 - cfg.KeepProb);
                        RnnOutput = dropout.Output;
                    }
                }
                else
                {
                    RnnDirect = new Lstm <float> [cfg.NumLayers];
                    for (var i = 0; i < cfg.NumLayers; ++i)
                    {
                        var lstm = new Lstm <float>(i == 0 ? EmbeddedOutput : RnnOutput, cfg.HiddenSize, forgetBiasInit: 0.0);
                        RnnDirect[i] = lstm;
                        RnnOutput    = lstm.Y;
                        if (isTraining && cfg.KeepProb < 1.0)
                        {
                            var dropout = new Dropout <float>(RnnOutput, dropoutProb: 1.0 - cfg.KeepProb);
                            RnnOutput = dropout.Output;
                        }
                    }
                }

                FC = new FullyConnected <float>(RnnOutput.Reshape(RnnOutput.Shape[0] * RnnOutput.Shape[1], RnnOutput.Shape[2]), cfg.VocabSize);

                Loss = new SoftmaxCrossEntropySparse <float>(FC.Output, Targets.Reshape(Targets.Shape[0] * Targets.Shape[1]));

                Optimizer = new GradientDescentOptimizer(ctx, Loss.Loss, cfg.LearningRate, new GlobalNormGradientClipper(cfg.MaxGradNorm));

                // warmup to force JIT compilation to get timings without JIT overhead
                Optimizer.Initalize();
                ResetStates();
                Optimizer.AssignTensor(Inputs, Fill(Shape.Create(Inputs.Shape.AsArray), 0));
                Optimizer.AssignTensor(Targets, Fill(Shape.Create(Targets.Shape.AsArray), 0));
                Optimizer.Forward();
                if (isTraining)
                {
                    Optimizer.Backward();
                }

                // now reset states
                Optimizer.Initalize();
                ResetStates();
            }
Пример #21
0
 public static Variable <T> Reshape <T>(this Variable <T> input, params long[] shape)
 {
     return(new Reshape <T>(input, PartialShape.Create(shape)).Output);
 }
Пример #22
0
        public static void TestLstmAgainstReferenceResults()
        {
            var mfr = new MatFileReader(@"lstm_small.mat");

            var inputSize  = mfr.GetInt("InputSize");
            var seqLength  = mfr.GetInt("SeqLength");
            var hiddenSize = mfr.GetInt("HiddenSize");
            var batchSize  = mfr.GetInt("BatchSize");

            var x    = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize));
            var lstm = new Lstm <float>(x, hiddenSize);

            var ctx = Context.GpuContext(0);
            var exe = new Executor(ctx, lstm.Y);

            exe.Initalize();

            var h0 = mfr.GetDoubleArray("h0").Select(n => (float)n).ToArray();
            var c0 = mfr.GetDoubleArray("c0").Select(n => (float)n).ToArray();

            exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(batchSize, hiddenSize)));
            exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(batchSize, hiddenSize)));

            var input = mfr.GetDoubleArray("X").Select(n => (float)n).ToArray();

            exe.AssignTensor(x, input.AsTensor(Shape.Create(seqLength, batchSize, inputSize)));

            var w = mfr.GetDoubleArray("W").Select(n => (float)n).ToArray();

            w.AsTensor(Shape.Create(inputSize + hiddenSize + 1, 4 * hiddenSize)).Print();
            exe.AssignTensor(lstm.W, w.AsTensor(Shape.Create(inputSize + hiddenSize + 1, 4 * hiddenSize)));

            exe.Forward();

            var H = mfr.GetDoubleArray("H").Select(n => (float)n).ToArray();

            H.AsTensor(Shape.Create(seqLength * batchSize, hiddenSize)).Print();

            var myH = exe.GetTensor(lstm.Y).ToArray();

            myH.AsTensor(Shape.Create(seqLength * batchSize, hiddenSize)).Print();

            AreClose(H, myH, 1e-6);

            var CN = mfr.GetDoubleArray("cn").Select(n => (float)n).ToArray();

            CN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print();

            var myCN = exe.GetTensor(lstm.CY).ToArray();

            myCN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print();

            AreClose(CN, myCN, 1e-6);

            var HN = mfr.GetDoubleArray("hn").Select(n => (float)n).ToArray();

            HN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print();

            var myHN = exe.GetTensor(lstm.HY).ToArray();

            myHN.AsTensor(Shape.Create(batchSize, hiddenSize)).Print();

            AreClose(HN, myHN, 1e-6);

            var dH = mfr.GetDoubleArray("dH").Select(n => (float)n).ToArray();

            exe.AssignGradient(lstm.Y, dH.AsTensor(Shape.Create(seqLength, batchSize, hiddenSize)), replace: true);

            exe.Backward();

            var dX = mfr.GetDoubleArray("dX").Select(n => (float)n).ToArray();

            dX.AsTensor(Shape.Create(seqLength * batchSize, inputSize)).Print();

            var dXmy = exe.GetGradient(lstm.X).ToArray();

            dXmy.AsTensor(Shape.Create(seqLength * batchSize, inputSize)).Print();
            AreClose(dX, dXmy, 1e-6);

            var dW = mfr.GetDoubleArray("dW").Select(n => (float)n).ToArray();

            dW.AsTensor(Shape.Create(inputSize + hiddenSize + 1, 4 * hiddenSize)).Print();

            var dWmy = exe.GetGradient(lstm.W).ToArray();

            dWmy.AsTensor(Shape.Create(lstm.W.Shape.AsArray)).Print();
            AreClose(dW, dWmy, 1e-6);

            var dc0 = mfr.GetDoubleArray("dc0").Select(n => (float)n).ToArray();

            dc0.AsTensor(Shape.Create(batchSize, hiddenSize)).Print();

            var dc0my = exe.GetGradient(lstm.CX).ToArray();

            dc0my.AsTensor(Shape.Create(batchSize, hiddenSize)).Print();
            AreClose(dc0, dc0my, 1e-6);

            var dh0 = mfr.GetDoubleArray("dh0").Select(n => (float)n).ToArray();

            dh0.AsTensor(Shape.Create(batchSize, hiddenSize)).Print();

            var dh0my = exe.GetGradient(lstm.HX).ToArray();

            dh0my.AsTensor(Shape.Create(batchSize, hiddenSize)).Print();
            AreClose(dh0, dh0my, 1e-6);

            ctx.ToGpuContext().Stream.Synchronize();
        }
Пример #23
0
        public static void TestLstmAgainstCuDnnVersion()
        {
            var ctx        = Context.GpuContext(0);
            var inputSize  = 5;
            var seqLength  = 3;
            var batchSize  = 2;
            var hiddenSize = 4;
            var error      = 1e-5;

            var data = Context.CpuContext.Eval((2.0f.AsScalar() *
                                                RandomUniform <float>(Shape.Create(seqLength, batchSize, inputSize)) -
                                                1.0f.AsScalar())).ToArray3D();
            //data.AsTensor(Shape.Create(seqLength*batchSize, inputSize)).Print();

            var h0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D();
            var c0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D();
            var dy = Context.CpuContext.Eval((2.0f.AsScalar() *
                                              RandomUniform <float>(Shape.Create(seqLength, batchSize, hiddenSize)) -
                                              1.0f.AsScalar())).ToArray3D();
            //dy.AsTensor(Shape.Create(seqLength * batchSize, hiddenSize)).Print();

            var wi = 0.5f;
            var wf = 0.4f;
            var wo = 0.3f;
            var wa = 0.2f;
            var ui = 0.5f;
            var uf = 0.4f;
            var uo = 0.3f;
            var ua = 0.1f;
            var bi = 0.5f;
            var bf = 0.4f;
            var bo = 0.3f;
            var ba = 0.2f;

            float[,,] y1, y2, dx1, dx2;
            float[,] cy1, cy2, hy1, hy2;
            float[,] dcx1, dcx2, dhx1, dhx2;
            float[,] dw1, dw2;

            {
                // calc with cuDNN
                var x    = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize));
                var lstm = new Rnn <float>(new LstmRnnType(), x, 1, hiddenSize, dropout: 0.0);
                var exe  = new Executor(ctx, lstm.Y);
                exe.Initalize();

                // set input
                exe.AssignTensor(lstm.X, data.AsTensor());

                // set states
                exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(1, batchSize, hiddenSize)));
                exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(1, batchSize, hiddenSize)));

                // set weigths
                // cuDNN matrices order: IFAO
                var w      = exe.GetTensor(lstm.W).Reshape(inputSize * 4 + hiddenSize * 4 + 2 * 4, hiddenSize);
                var offset = 0;
                // Wi
                ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wi));
                offset += inputSize;
                // Wf
                ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wf));
                offset += inputSize;
                // Wa
                ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wa));
                offset += inputSize;
                // Wo
                ctx.Assign(w.Slice(Range(offset, offset + inputSize)), Fill(Shape.Create(inputSize, hiddenSize), wo));
                offset += inputSize;
                // Ui
                ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ui));
                offset += hiddenSize;
                // Uf
                ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uf));
                offset += hiddenSize;
                // Ua
                ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ua));
                offset += hiddenSize;
                // Uo
                ctx.Assign(w.Slice(Range(offset, offset + hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uo));
                offset += hiddenSize;
                // Bi
                ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), bi));
                offset++;
                // Bf
                ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), bf));
                offset++;
                // Ba
                ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), ba));
                offset++;
                // Bo
                ctx.Assign(w.Slice(offset), Fill(Shape.Create(1, hiddenSize), bo));

                exe.Forward();

                y1  = exe.GetTensor(lstm.Y).ToArray3D();
                cy1 = exe.GetTensor(lstm.CY).Reshape(batchSize, hiddenSize).ToArray2D();
                hy1 = exe.GetTensor(lstm.HY).Reshape(batchSize, hiddenSize).ToArray2D();

                exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true);

                exe.Backward();

                dx1  = exe.GetGradient(lstm.X).ToArray3D();
                dcx1 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D();
                dhx1 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D();

                // we make dw follow the shape as (1 + inputSize + hiddenSize, 4*hiddenSize), need to transpose because cuDNN uses Fortran storge order
                var dwCUDNN = exe.GetGradient(lstm.W).ToArray().AsTensor();
                dw1 = new float[1 + inputSize + hiddenSize, 4 * hiddenSize];
                var dw1Tensor = Reference <float>(dw1);
                var cpu       = Context.CpuContext;
                offset = 0;

                // cuDNN order: IFAO, need to transpose because cuDNN uses Fortran storge order

                // Wi
                cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(0, hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T);
                offset += inputSize * hiddenSize;
                // Wf
                cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(hiddenSize, 2 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T);
                offset += inputSize * hiddenSize;
                // Wa
                cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(3 * hiddenSize, 4 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T);
                offset += inputSize * hiddenSize;
                // Wo
                cpu.Assign(dw1Tensor.Slice(Range(1, inputSize + 1), Range(2 * hiddenSize, 3 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + inputSize * hiddenSize)).Reshape(hiddenSize, inputSize).T);
                offset += inputSize * hiddenSize;
                // Ui
                cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(0, hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T);
                offset += hiddenSize * hiddenSize;
                // Uf
                cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(hiddenSize, 2 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T);
                offset += hiddenSize * hiddenSize;
                // Ua
                cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(3 * hiddenSize, 4 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T);
                offset += hiddenSize * hiddenSize;
                // Uo
                cpu.Assign(dw1Tensor.Slice(Range(inputSize + 1, -1), Range(2 * hiddenSize, 3 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize * hiddenSize)).Reshape(hiddenSize, hiddenSize).T);
                offset += hiddenSize * hiddenSize;
                // Bi
                cpu.Assign(dw1Tensor.Slice(0, Range(0, hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T);
                offset += hiddenSize;
                // Bf
                cpu.Assign(dw1Tensor.Slice(0, Range(hiddenSize, 2 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T);
                offset += hiddenSize;
                // Ba
                cpu.Assign(dw1Tensor.Slice(0, Range(3 * hiddenSize, 4 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T);
                offset += hiddenSize;
                // Bo
                cpu.Assign(dw1Tensor.Slice(0, Range(2 * hiddenSize, 3 * hiddenSize)), dwCUDNN.Slice(Range(offset, offset + hiddenSize)).Reshape(hiddenSize, 1).T);
            }

            {
                // calc with direct LSTM implementation
                var x    = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize));
                var lstm = new Lstm <float>(x, hiddenSize, forgetBiasInit: 0.0);
                var exe  = new Executor(ctx, lstm.Y);
                exe.Initalize();

                // set input
                exe.AssignTensor(lstm.X, data.AsTensor());

                // set states
                exe.AssignTensor(lstm.CX, c0.AsTensor());
                exe.AssignTensor(lstm.HX, h0.AsTensor());

                // set weights
                var w = exe.GetTensor(lstm.W);
                // Wi
                ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(0, hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wi));
                // Wf
                ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(hiddenSize, 2 * hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wf));
                // Wo
                ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(2 * hiddenSize, 3 * hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wo));
                // Wa
                ctx.Assign(w.Slice(Range(1, inputSize + 1), Range(3 * hiddenSize, 4 * hiddenSize)), Fill(Shape.Create(inputSize, hiddenSize), wa));
                // Ui
                ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(0, hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ui));
                // Uf
                ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(hiddenSize, 2 * hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uf));
                // Uo
                ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(2 * hiddenSize, 3 * hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), uo));
                // Ua
                ctx.Assign(w.Slice(Range(inputSize + 1, -1), Range(3 * hiddenSize, 4 * hiddenSize)), Fill(Shape.Create(hiddenSize, hiddenSize), ua));
                // Bi
                ctx.Assign(w.Slice(0, Range(0, hiddenSize)), Fill(Shape.Create(1, hiddenSize), bi));
                // Bf
                ctx.Assign(w.Slice(0, Range(hiddenSize, 2 * hiddenSize)), Fill(Shape.Create(1, hiddenSize), bf));
                // Bo
                ctx.Assign(w.Slice(0, Range(2 * hiddenSize, 3 * hiddenSize)), Fill(Shape.Create(1, hiddenSize), bo));
                // Ba
                ctx.Assign(w.Slice(0, Range(3 * hiddenSize, 4 * hiddenSize)), Fill(Shape.Create(1, hiddenSize), ba));

                exe.Forward();

                y2  = exe.GetTensor(lstm.Y).ToArray3D();
                cy2 = exe.GetTensor(lstm.CY).ToArray2D();
                hy2 = exe.GetTensor(lstm.HY).ToArray2D();

                exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true);

                exe.Backward();

                dx2  = exe.GetGradient(lstm.X).ToArray3D();
                dcx2 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D();
                dhx2 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D();
                dw2  = exe.GetGradient(lstm.W).ToArray2D();
            }

            AreClose(y1, y2, error);
            AreClose(cy1, cy2, error);
            AreClose(hy1, hy2, error);
            AreClose(dx1, dx2, error);
            AreClose(dcx1, dcx2, error);
            AreClose(dhx1, dhx2, error);
            AreClose(dw1, dw2, error);
        }
Пример #24
0
        public static void RnnAgainstRnnDynamic()
        {
            var ctx        = Context.GpuContext(0);
            var inputSize  = 5;
            var seqLength  = 3;
            var batchSize  = 2;
            var hiddenSize = 4;
            var error      = 1e-5;

            var data = Context.CpuContext.Eval(RandomUniform <float>(-1, 1, Shape.Create(seqLength, batchSize, inputSize))).ToArray3D();

            data.AsTensor(Shape.Create(seqLength * batchSize, inputSize)).Print();

            var h0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D();
            var c0 = Context.CpuContext.Eval(RandomNormal <float>(Shape.Create(batchSize, hiddenSize))).ToArray2D();
            var dy = Context.CpuContext.Eval(RandomUniform <float>(-1, 1, Shape.Create(seqLength, batchSize, hiddenSize))).ToArray3D();

            float[,,] y1, y2, dx1, dx2;
            float[,] cy1, cy2, hy1, hy2;
            float[,] dcx1, dcx2, dhx1, dhx2;
            float[] dw1, dw2;

            {
                var x    = Variable <float>(PartialShape.Create(seqLength, batchSize, inputSize));
                var lstm = new Rnn <float>(new LstmRnnType(), x, 1, hiddenSize, dropout: 0.0);
                var exe  = new Executor(ctx, lstm.Y);
                exe.Initalize();

                // set input
                exe.AssignTensor(lstm.X, data.AsTensor());

                // set states
                exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(1, batchSize, hiddenSize)));
                exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(1, batchSize, hiddenSize)));

                // set weigths, cuDNN matrices order: IFAO
                var w = exe.GetTensor(lstm.W).Reshape(inputSize * 4 + hiddenSize * 4 + 2 * 4, hiddenSize);
                SetWeights(ctx, w, inputSize, hiddenSize);

                exe.Forward();

                y1  = exe.GetTensor(lstm.Y).ToArray3D();
                cy1 = exe.GetTensor(lstm.CY).Reshape(batchSize, hiddenSize).ToArray2D();
                hy1 = exe.GetTensor(lstm.HY).Reshape(batchSize, hiddenSize).ToArray2D();

                exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true);

                exe.Backward();

                dx1  = exe.GetGradient(lstm.X).ToArray3D();
                dcx1 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D();
                dhx1 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D();
                dw1  = exe.GetGradient(lstm.W).ToArray(); // cuDNN weight is 1D linear blob
            }

            {
                var x    = Variable <float>(PartialShape.Create(-1, -1, inputSize));
                var lstm = new RnnDynamic <float>(new LstmRnnType(), x, 1, hiddenSize, dropout: 0.0);
                var exe  = new Executor(ctx, lstm.Y);
                exe.Initalize();

                // set input
                exe.AssignTensor(lstm.X, data.AsTensor());

                // set states
                exe.AssignTensor(lstm.CX, c0.AsTensor(Shape.Create(1, batchSize, hiddenSize)));
                exe.AssignTensor(lstm.HX, h0.AsTensor(Shape.Create(1, batchSize, hiddenSize)));

                // set weigths, cuDNN matrices order: IFAO
                var w = exe.GetTensor(lstm.W).Reshape(inputSize * 4 + hiddenSize * 4 + 2 * 4, hiddenSize);
                SetWeights(ctx, w, inputSize, hiddenSize);

                exe.Forward();

                y2  = exe.GetTensor(lstm.Y).ToArray3D();
                cy2 = exe.GetTensor(lstm.CY).Reshape(batchSize, hiddenSize).ToArray2D();
                hy2 = exe.GetTensor(lstm.HY).Reshape(batchSize, hiddenSize).ToArray2D();

                exe.AssignGradient(lstm.Y, dy.AsTensor(), replace: true);

                exe.Backward();

                dx2  = exe.GetGradient(lstm.X).ToArray3D();
                dcx2 = exe.GetGradient(lstm.CX).Reshape(batchSize, hiddenSize).ToArray2D();
                dhx2 = exe.GetGradient(lstm.HX).Reshape(batchSize, hiddenSize).ToArray2D();
                dw2  = exe.GetGradient(lstm.W).ToArray();
            }

            AreClose(y1, y2, error);
            AreClose(cy1, cy2, error);
            AreClose(hy1, hy2, error);
            AreClose(dx1, dx2, error);
            AreClose(dcx1, dcx2, error);
            AreClose(dhx1, dhx2, error);
            AreClose(dw1, dw2, error);
        }
Пример #25
0
 public SequenceDecoderWithAttention(int encoderOutputSize)
 {
     // Y Shape (maxSeqLength, not yet known, hiddenSize)
     EncoderOutput = Variable <T>(PartialShape.Create(-1, -1, encoderOutputSize));
 }
Пример #26
0
        public Rnn(RnnType ty, Variable <T> x, int numLayers, int hiddenSize, bool isTraining = true, double dropout = 0.0, ulong dropoutSeed = 1337UL)
        {
            Type        = ty;
            IsTraining  = isTraining;
            NumLayers   = numLayers;
            HiddenSize  = hiddenSize;
            Dropout     = isTraining ? dropout : 0.0;
            DropoutSeed = dropoutSeed;

            // X shape (seqLength, batch, inputSize)
            X = x;
            Util.EnsureEqual(3, X.Shape.Rank, "Input layout: (seqLength, batch, inputSize)");
            Util.EnsureTrue(X.Shape[0] >= 0, "Input layout: (seqLength, batch, inputSize)");
            Util.EnsureTrue(X.Shape[1] >= 0, "Input layout: (seqLength, batch, inputSize)");
            Util.EnsureTrue(X.Shape[2] >= 0, "Input layout: (seqLength, batch, inputSize)");
            SeqLength = (int)X.Shape[0];
            MiniBatch = (int)X.Shape[1];
            InputSize = (int)X.Shape[2];

            // Y Shape (seqLength, batch, hiddenSize)
            Y = Variable <T>(PartialShape.Create(SeqLength, MiniBatch, HiddenSize));

            // W shape will be determined during initialization
            W = Parameter <T>();

            // state variables
            var shape   = PartialShape.Create(NumLayers, MiniBatch, HiddenSize);
            var strides = Strides.Create(shape[1] * shape[2], shape[2], 1); // inner change most

            HX        = Variable <T>(shape);
            CX        = Variable <T>(shape);
            HY        = Variable <T>(shape);
            CY        = Variable <T>(shape);
            StateDesc = new TensorDescriptor();
            StateDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array);

            // xDesc is an array, for each step
            shape   = PartialShape.Create(MiniBatch, InputSize, 1);
            strides = Strides.Create(shape[1] * shape[2], shape[2], 1);
            var xDesc = new TensorDescriptor();

            xDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array);
            XDesc = Enumerable.Repeat(xDesc, SeqLength).ToArray();

            // yDesc is an array, for each step
            shape   = PartialShape.Create(MiniBatch, HiddenSize, 1);
            strides = Strides.Create(shape[1] * shape[2], shape[2], 1);
            var yDesc = new TensorDescriptor();

            yDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array);
            YDesc = Enumerable.Repeat(yDesc, SeqLength).ToArray();

            // construct the graph
            AddInput(X);
            AddInput(W);
            AddOutput(Y);
            AddAuxVar(HX);
            AddAuxVar(CX);
            AddAuxVar(HY);
            AddAuxVar(CY);
            AddAuxVar(DropoutStates);
            AddAuxVar(Workspace);
            AddAuxVar(ReserveSpace);
        }