Exemplo n.º 1
0
        public void BackwardOptimized(Executor executor)
        {
            var ctx = executor.Context;
            var one = 1.0.AsScalar <T>();

            var dy        = executor.GetGradient(Y); // input
            var w         = executor.GetTensor(W);
            var c         = executor.GetTensor(C);
            var hin       = executor.GetTensor(Hin);
            var ifoa2     = executor.GetTensor(IFOA2);
            var n         = (int)dy.Shape[0];
            var b         = (int)dy.Shape[1];
            var d         = (int)dy.Shape[2];
            var xphpb     = (int)w.Shape[0];
            var inputSize = InputSize;

            var cx = executor.GetTensor(CX);
            var hx = executor.GetTensor(HX);

            Util.EnsureTrue(cx.Shape.SequenceEqual(Shape.Create(b, d)));
            Util.EnsureTrue(hx.Shape.SequenceEqual(Shape.Create(b, d)));

            var dc     = GetGradient(executor, C);
            var dx     = executor.GetGradient(X, Shape.Create(n, b, inputSize));
            var dw     = GetGradient(executor, W);
            var difoa1 = GetGradient(executor, IFOA1, ifoa2.Shape);
            var dhin   = GetGradient(executor, Hin);
            var dhout  = GetGradient(executor, Hout, dy.Shape);
            var dhx    = GetGradient(executor, HX);
            var dcx    = GetGradient(executor, CX);

            Util.EnsureTrue(ctx.Type == ContextType.Gpu && typeof(T) == typeof(float), "Currently only support gpu and single precision.");

            if (ctx.Type == ContextType.Gpu && typeof(T) == typeof(float))
            {
                var stream    = ctx.ToGpuContext().Stream;
                var cxPtr     = cx.Buffer.Ptr.Reinterpret <float>();
                var dcxPtr    = dcx.Buffer.Ptr.Reinterpret <float>();
                var dhxPtr    = dhx.Buffer.Ptr.Reinterpret <float>();
                var cPtr      = c.Buffer.Ptr.Reinterpret <float>();
                var dcPtr     = dc.Buffer.Ptr.Reinterpret <float>();
                var dhPtr     = dhout.Buffer.Ptr.Reinterpret <float>();
                var ifoaPtr   = ifoa2.Buffer.Ptr.Reinterpret <float>();
                var _difoaPtr = difoa1.Buffer.Ptr.Reinterpret <float>();
                var dxPtr     = dx.Buffer.Ptr.Reinterpret <float>();
                var dhinPtr   = dhin.Buffer.Ptr.Reinterpret <float>();
                var dyPtr     = dy.Buffer.Ptr.Reinterpret <float>();
                var dwPtr     = dw.Buffer.Ptr.Reinterpret <float>();

                // use one kernel to initalize the data
                stream.For(0, Math.Max(n * b * d, xphpb * d), _i =>
                {
                    var i = (int)_i;

                    if (i < n * b * d)
                    {
                        // TODO: dcn and dhn
                        dhPtr[i] = dyPtr[i];
                        dcPtr[i] = 0.0f;
                    }

                    if (i < xphpb * d)
                    {
                        dwPtr[i] = 0.0f;
                    }

                    if (i < b * d)
                    {
                        dhxPtr[i] = 0.0f;
                        dcxPtr[i] = 0.0f;
                    }
                });

                for (var t = n - 1; t >= 0; --t)
                {
                    // c: n, b, d
                    // h: n, b, d
                    // ifoa: n, b, 4*d
                    stream.For(0, b * d, i =>
                    {
                        var bi = (int)i / d;
                        var di = (int)i % d;

                        var offset1 = t * b * d + bi * d;         // for (n, b, d)
                        var offset2 = t * b * 4 * d + bi * 4 * d; // for (n, b, 4*d)
                        var offsetI = offset2;
                        var offsetF = offset2 + d;
                        var offsetO = offset2 + 2 * d;
                        var offsetA = offset2 + 3 * d;

                        var ct = cPtr[offset1 + di];
                        var it = ifoaPtr[offsetI + di];
                        var ft = ifoaPtr[offsetF + di];
                        var ot = ifoaPtr[offsetO + di];
                        var at = ifoaPtr[offsetA + di];

                        var dct = dcPtr[offset1 + di];
                        var dht = dhPtr[offset1 + di];

                        var tanhCt = DeviceFunction.Tanh(ct);

                        // do_t = dh_t * tanh(c_t)
                        var dot = dht * tanhCt;

                        // dc_t += dh_t * o_t * (1 - tanh**2(c_t))
                        dct += dht * ot * (1.0f - tanhCt * tanhCt);

                        // df_t = dc_t * c_t-1
                        // dc_t-1 = dc_t * f_t
                        float dft;
                        if (t > 0)
                        {
                            var ctPrev = cPtr[offset1 - b * d + di];
                            dft        = dct * ctPrev;
                            dcPtr[offset1 - b * d + di] += ft * dct;
                        }
                        else
                        {
                            var ctPrev          = cxPtr[bi * d + di];
                            dft                 = dct * ctPrev;
                            dcxPtr[bi * d + di] = ft * dct;
                        }
                        // di_t = dc_t * a_t
                        var dit = dct * at;
                        // da_t = dc_t * i_t
                        var dat = dct * it;

                        // backprop activation functions
                        // d^a_t = (1 - a_t * a_t) * da_t (for gradient of tanh)
                        var _dat = (1.0f - at * at) * dat;
                        // others are dv = v*(1-v)*dv (for gradient of sigmoid)
                        var _dit = it * (1.0f - it) * dit;
                        var _dft = ft * (1.0f - ft) * dft;
                        var _dot = ot * (1.0f - ot) * dot;

                        _difoaPtr[offsetI + di] = _dit;
                        _difoaPtr[offsetF + di] = _dft;
                        _difoaPtr[offsetO + di] = _dot;
                        _difoaPtr[offsetA + di] = _dat;
                        dcPtr[offset1 + di]     = dct;
                    });

                    // backprop matrix multiply
                    var _difoat = difoa1.Slice(t).Reshape(b, 4 * d);
                    var tmp1    = executor.GetTensor(Temp1, Shape.Create(b, xphpb));
                    ctx.Assign(tmp1, hin.Slice(t).Reshape(b, xphpb));
                    var tmp2 = executor.GetTensor(Temp2, dw.Shape);
                    ctx.Assign(tmp2, Dot(tmp1.T, _difoat));

                    ctx.Assign(dw, dw + tmp2);
                    ctx.Assign(dhin.Slice(t), Dot(_difoat, w.T));

                    // backprop the identity transforms into hin
                    stream.For(0, b * inputSize, i =>
                    {
                        var bi = (int)i / inputSize;
                        var ii = (int)i % inputSize;

                        // write dx of input size
                        // hin: n, b, 1+input+hidden
                        var value = dhinPtr[t * b * xphpb + bi * xphpb + ii + 1];
                        // x: n, b, inputSize
                        dxPtr[t * b * inputSize + bi * inputSize + ii] = value;
                    });

                    // update dh
                    stream.For(0, b * d, i =>
                    {
                        var bi = (int)i / d;
                        var di = (int)i % d;

                        var t0 = t - 1;
                        // dh : n, b, d
                        // dhx : b, d
                        if (t > 0)
                        {
                            dhPtr[t0 * b * d + bi * d + di] += dhinPtr[t * b * xphpb + bi * xphpb + 1 + inputSize + di];
                        }
                        else
                        {
                            dhxPtr[bi * d + di] += dhinPtr[t * b * xphpb + bi * xphpb + 1 + inputSize + di];
                        }
                    });
                }
            }
        }
Exemplo n.º 2
0
        public void ForwardOptimized(Executor executor)
        {
            var ctx       = executor.Context;
            var w         = executor.GetTensor(W);
            var xphpb     = (int)w.Shape[0];
            var x         = executor.GetTensor(X);
            var b         = (int)x.Shape[1];
            var n         = (int)x.Shape[0];
            var d         = HiddenSize;
            var y         = executor.GetTensor(Y, Shape.Create(n, b, d));
            var inputSize = InputSize;
            var one       = 1.0.AsScalar <T>();

            // inital states
            var cx = executor.GetTensor(CX);
            var hx = executor.GetTensor(HX);

            Util.EnsureTrue(cx.Shape.SequenceEqual(Shape.Create(b, d)));
            Util.EnsureTrue(hx.Shape.SequenceEqual(Shape.Create(b, d)));

            // we assign output states to inital states, and later we update it
            var cy = executor.GetTensor(CY, Shape.Create(b, d));
            var hy = executor.GetTensor(HY, Shape.Create(b, d));

            ctx.Assign(cy, cx);
            ctx.Assign(hy, hx);
            var prevc = cy.Reshape(1, b, d);
            var prevh = hy.Reshape(1, b, d);

            var hin   = executor.GetTensor(Hin, Shape.Create(n, b, xphpb));
            var ifoa1 = executor.GetTensor(IFOA1, Shape.Create(n, b, d * 4));
            var ifoa2 = executor.GetTensor(IFOA2, Shape.Create(n, b, d * 4));
            var c     = executor.GetTensor(C, Shape.Create(n, b, d));

            Util.EnsureTrue(ctx.Type == ContextType.Gpu && typeof(T) == typeof(float), "Currently only support gpu and single precision.");

            if (ctx.Type == ContextType.Gpu && typeof(T) == typeof(float))
            {
                var stream   = ctx.ToGpuContext().Stream;
                var hinPtr   = hin.Buffer.Ptr.Reinterpret <float>();
                var xPtr     = x.Buffer.Ptr.Reinterpret <float>();
                var prevhPtr = prevh.Buffer.Ptr.Reinterpret <float>();
                var prevcPtr = prevc.Buffer.Ptr.Reinterpret <float>();
                var _ifoaPtr = ifoa1.Buffer.Ptr.Reinterpret <float>();
                var ifoaPtr  = ifoa2.Buffer.Ptr.Reinterpret <float>();
                var cPtr     = c.Buffer.Ptr.Reinterpret <float>();
                var hPtr     = y.Buffer.Ptr.Reinterpret <float>();

                for (var t = 0; t < n; ++t)
                {
                    // stack input
                    stream.For(0, b * xphpb, i =>
                    {
                        var bi = (int)i / xphpb;
                        var _i = (int)i % xphpb;

                        if (_i >= 1 + inputSize) // for hidden
                        {
                            var di = _i - 1 - inputSize;
                            hinPtr[t * b * xphpb + bi * xphpb + _i] = prevhPtr[bi * d + di];
                        }
                        else if (_i >= 1)
                        {
                            var ii = _i - 1;
                            hinPtr[t * b * xphpb + bi * xphpb + _i] = xPtr[t * b * inputSize + bi * inputSize + ii];
                        }
                        else
                        {
                            hinPtr[t * b * xphpb + bi * xphpb + _i] = 1.0f; // bias
                        }
                    });

                    // dot
                    ctx.Assign(ifoa1.Slice(t), Dot(hin.Slice(t).Reshape(b, xphpb), w));

                    // element-wise op
                    stream.For(0, b * d, i =>
                    {
                        var bi = (int)i / d;
                        var di = (int)i % d;

                        var offset1 = t * b * d + bi * d;         // for (n, b, d)
                        var offset2 = t * b * 4 * d + bi * 4 * d; // for (n, b, 4*d)
                        var offsetI = offset2;
                        var offsetF = offset2 + d;
                        var offsetO = offset2 + 2 * d;
                        var offsetA = offset2 + 3 * d;

                        var prevct = prevcPtr[bi * d + di];
                        var _it    = _ifoaPtr[offsetI + di];
                        var _ft    = _ifoaPtr[offsetF + di];
                        var _ot    = _ifoaPtr[offsetO + di];
                        var _at    = _ifoaPtr[offsetA + di];

                        // non-linearities
                        // a are tanh, others are sigmoid
                        var it = 1.0f / (1.0f + DeviceFunction.Exp(-_it));
                        var ft = 1.0f / (1.0f + DeviceFunction.Exp(-_ft));
                        var ot = 1.0f / (1.0f + DeviceFunction.Exp(-_ot));
                        var at = DeviceFunction.Tanh(_at);

                        // c_t = i_t * a_t + f_t * c_t-1
                        var ct = it * at + ft * prevct;

                        // h_t = o_t * tanh(c_t)
                        var ht = ot * DeviceFunction.Tanh(ct);

                        ifoaPtr[offsetI + di] = it;
                        ifoaPtr[offsetF + di] = ft;
                        ifoaPtr[offsetO + di] = ot;
                        ifoaPtr[offsetA + di] = at;
                        cPtr[offset1 + di]    = ct;
                        hPtr[offset1 + di]    = ht;
                        prevhPtr[bi * d + di] = ht;
                        prevcPtr[bi * d + di] = ct;
                    });
                }
            }
        }