示例#1
0
        public void TestLoop1()
        {
            // Computing tanh(x(t).dot(W) + b) elementwise
            //http://deeplearning.net/software/theano/tutorial/loop.html

            // defining the tensor variables
            var X     = T.Matrix <float>("x");
            var W     = T.Matrix <float>("W");
            var b_sym = T.Vector <float>("b_sym");

            var results             = T.Scan(v => T.Tanh(T.Dot(v, W) + b_sym), sequence: X);
            var compute_elementwise = T.Function(inputs: new[] { X, W, b_sym }, output: results);

            // test values
            var x = NN.Eye <float>(2);
            var w = NN.Ones <float>(2, 2);
            var b = NN.Ones <float>(2);

            b.Item[1] = 2;

            var result   = compute_elementwise(new[] { x, w, b });
            var expected = NN.Tanh(x.Dot(w) + b);

            AssertArray.AreAlmostEqual(expected[0], result[0]);
        }
示例#2
0
        public void SumProductWithSharedHasCorrectGrad()
        {
            // sequence of input
            var xs = T.Matrix <float>("xs");
            // accumulator
            var z = T.Vector <float>("z");
            var b = T.Vector <float>("b");

            // sum xs in the accumulator
            Func <Tensor <float>, Tensor <float>, IList <Tensor <float> > > rec = (x, a) =>
                                                                                  new List <Tensor <float> >()
            {
                x + a, x *a + b
            };
            var loop = T.Scan(rec, xs, new[] { z, null });

            // get the last value
            var prod = loop[1][-1];
            var cost = T.Sum(prod);
            //var dz = T.Grad(cost, z);
            var db = T.Grad(cost, b);

            var reshape = db as Reshaping <float>;
            var sum     = reshape.x as Sum <float>;

            Assert.AreEqual(0, sum.Axis);

            var dfor     = sum.x as Tensor <float> .For;
            var backLoop = dfor.Loop;

            Assert.AreEqual(3, backLoop.Sequences.Count);
            Assert.AreEqual(4, backLoop.Fors.Count);
            Assert.AreEqual(2, dfor.Index);

            // TODO: check why a recursive was expected
            //var db_ = dfor.RecursiveVariable;
            //Assert.AreEqual("db_", db_.Name);

            var variables = backLoop.Variables.Cast <Tensor <float> >().ToList();
            var x_        = variables[0];

            Assert.AreEqual("x_", x_.Name);
            var a_ = variables[1];

            Assert.AreEqual("a_", a_.Name);
            var d_f1_ = variables[2];

            Assert.AreEqual("delta_f1_", d_f1_.Name);
            var da_ = variables[4];

            Assert.AreEqual("da_", da_.Name);
            var dx = (Tensor <float>)backLoop.Fors[0].Expression;
            var da = (Tensor <float>)backLoop.Fors[1].Expression;

            Assert.IsTrue((d_f1_ * a_ + da_).StructuralEquality(dx));
            Assert.IsTrue((d_f1_ * x_ + da_).StructuralEquality(da));
            Assert.IsTrue((d_f1_).StructuralEquality(dfor.Expression));
        }
示例#3
0
        public void TestRange()
        {
            var i = T.Scalar <int>("i");
            var x = T.Range(i);

            var f = T.Function(i, x);

            AssertArray.AreEqual(NN.Range(10), f(10));
        }
示例#4
0
        public void TestAdd2()
        {
            var x = T.Scalar <int>("x");
            var e = x + x;

            var f = T.Function(x, e);

            Assert.AreEqual(8, f(4));
        }
示例#5
0
        public void MeanAcceptIntTensor()
        {
            var i = T.Scalar <int>("i");
            var x = T.Range(i);
            var y = T.Mean(x);

            var f = T.Function(i, y);

            AssertArray.AreAlmostEqual(f(10), 4.5f);
        }
示例#6
0
        public void TestTwoVarExpr()
        {
            var x = T.Scalar <float>("x");
            var y = T.Scalar <float>("y");
            var e = 2 * x + 3 * y;

            var f = T.Function(input: (x, y), output: e);

            Assert.AreEqual(22, f(5, 4));
        }
示例#7
0
        public void TestAdd()
        {
            var x = T.Scalar <int>("x");
            var y = T.Scalar <int>("y");
            var e = x + y;

            var f = T.Function(input: (x, y), output: e);

            Assert.AreEqual(8, f(5, 3));
        }
示例#8
0
        public void SumProductHasCorrectGrad()
        {
            // sequence of input
            var xs = T.Matrix <float>("xs");
            // accumulator
            var z = T.Vector <float>("z");

            // sum xs in the accumulator
            Func <Tensor <float>, Tensor <float>, IList <Tensor <float> > > rec = (x, a) =>
                                                                                  new List <Tensor <float> >()
            {
                x + a, x *a
            };
            var loop = T.Scan(rec, xs, new[] { z, null });

            // get the last value
            var prod = loop[1][-1];
            var cost = T.Sum(prod);
            var dz   = T.Grad(cost, z);

            var slicing = dz as Slicing <float>;

            Assert.AreEqual(1, slicing.Slices.Count);
            Assert.IsTrue(slicing.Slices[0].IsSingleton);
            Assert.AreEqual(-1, ((Scalar <int> .Const)slicing.Slices[0].Start).Value);

            var dfor     = slicing.x as Tensor <float> .For;
            var backLoop = dfor.Loop;

            Assert.AreEqual(3, backLoop.Sequences.Count);
            Assert.AreEqual(3, backLoop.Fors.Count);
            Assert.AreEqual(1, dfor.Index);

            var variables = backLoop.Variables.Cast <Tensor <float> >().ToList();
            var x_        = variables[0];

            Assert.AreEqual("x_", x_.Name);
            var a_ = variables[1];

            Assert.AreEqual("a_", a_.Name);
            var d_f1_ = variables[2];

            Assert.AreEqual("delta_f1_", d_f1_.Name);
            var da_ = variables[4];

            Assert.AreEqual("da_", da_.Name);
            var dx = (Tensor <float>)backLoop.Fors[0].Expression;
            var da = (Tensor <float>)backLoop.Fors[1].Expression;

            Assert.IsTrue((d_f1_ * a_ + da_).StructuralEquality(dx));
            Assert.IsTrue((d_f1_ * x_ + da_).StructuralEquality(da));
        }
示例#9
0
        public void SumHasCorrectGrad()
        {
            // sequence of input
            var xs = T.Matrix <float>("xs");
            // accumulator
            var z = T.Vector <float>("z");

            // sum xs in the accumulator
            var partialSums = T.Scan((x, a) => x + a, xs, z);
            // get the last value
            var sum  = partialSums[-1];
            var cost = T.Sum(sum * sum);
            var dz   = T.Grad(cost, z);

            var slicing = dz as Slicing <float>;

            Assert.AreEqual(1, slicing.Slices.Count);
            Assert.IsTrue(slicing.Slices[0].IsSingleton);
            Assert.AreEqual(-1, ((Scalar <int> .Const)slicing.Slices[0].Start).Value);

            var dfor     = slicing.x as Tensor <float> .For;
            var backLoop = dfor.Loop;

            Assert.AreEqual(3, backLoop.Sequences.Count);
            Assert.AreEqual(3, backLoop.Fors.Count);
            Assert.AreEqual(1, dfor.Index);

            var variables = backLoop.Variables.Cast <Tensor <float> >().ToList();
            var x_        = variables[0];

            Assert.AreEqual("x_", x_.Name);
            var a_ = variables[1];

            Assert.AreEqual("a_", a_.Name);
            var delta_a_ = variables[2];

            Assert.AreEqual("delta_a_", delta_a_.Name);
            var dx_ = variables[3];

            Assert.AreEqual("dx_", dx_.Name);
            var da_ = variables[4];

            Assert.AreEqual("da_", da_.Name);
            var dx = (Tensor <float>)backLoop.Fors[0].Expression;
            var da = (Tensor <float>)backLoop.Fors[1].Expression;

            Assert.IsTrue((delta_a_ + da_).StructuralEquality(dx));
            Assert.IsTrue((delta_a_ + da_).StructuralEquality(da));
        }
示例#10
0
        public void TestForwardCrf()
        {
            var rng = NN.Random.Seed(20130601);
            var o   = T.Matrix <float>("o");
            var c   = T.Tensor3 <float>("c");
            var f   = T.Function(input: (o, c), output: Crf.Forward(o, c));
            var g   = T.Function(input: (o, c), output: Crf.Forward(o, c, viterbi: true));

            for (int i = 0; i < 20; i++)
            {
                var num_labels    = rng.Next(2, 10);
                var num_timesteps = rng.Next(2, 10);
                var obs           = NN.Random.Uniform(-1, 1, num_timesteps, num_labels);
                var chain         = NN.Random.Uniform(-1, 1, num_labels, num_labels, num_labels);
            }
        }
示例#11
0
        public void SumProductWithSharedCanTrain()
        {
            var n = 2;
            // sequence of input
            var xs = T.Matrix <float>("xs");
            // accumulator
            var z = T.Vector <float>("z");
            var b = T.Shared(NN.Ones(n), "b");

            // sum xs in the accumulator
            Func <Tensor <float>, Tensor <float>, IList <Tensor <float> > > rec = (x, a) =>
                                                                                  new List <Tensor <float> >()
            {
                x + a, x *a + b
            };
            var loop = T.Scan(rec, xs, new[] { z, null });

            // get the last value
            var prod = loop[1][-1];

            // compute the cost and the gradient for the shared b.
            var cost = T.Sum(prod);
            var db   = T.Grad(cost, b);

            var costFunction = T.Function(input: (xs, z), output: cost);
            var xs_          = NN.Array(new float[, ] {
                { 1, -1 },
                { 0, -2 }
            });

            var z_ = NN.Zeros(n);

            var cost_xs_z = costFunction(xs_, z_);

            Assert.AreEqual(4, cost_xs_z);

            var updates = new OrderedDictionary {
                { b, b - 0.05f * db }
            };
            var train      = T.Function(input: (xs, z), output: cost, updates: updates);
            var cost_xs_z2 = train(xs_, z_);

            AssertArray.AreAlmostEqual(NN.Array(new[] { 0.95f, 0.95f }), b.Value);
        }
示例#12
0
        public void TestRecursive()
        {
            // http://deeplearning.net/software/theano/tutorial/loop.html
            // define tensor variables
            var X       = T.Vector <float>("X");
            var W       = T.Matrix <float>("W");
            var b_sym   = T.Matrix <float>("b_sym");
            var U       = T.Matrix <float>("U");
            var Y       = T.Matrix <float>("Y");
            var V       = T.Matrix <float>("V");
            var P       = T.Matrix <float>("P");
            var results = T.Scan((yy, pp, xx_tm1) => T.Tanh(T.Dot(xx_tm1, W) + T.Dot(yy, U) + T.Dot(pp, V)),
                                 sequences: new[] { Y, P[XSlicer.Step(-1)] },
                                 outputsInfo: X);
            var compute_seq = T.Function(inputs: new[] { X, W, Y, U, P, V }, output: results);
            // test values
            var x = NN.Zeros <float>(2);

            x.Item[1] = 1;
            var w = NN.Ones <float>(2, 2);
            var y = NN.Ones <float>(5, 2);

            y.Item[0] = -3;
            var u = NN.Ones <float>(2, 2);
            var p = NN.Ones <float>(5, 2);

            p.Item[0] = 3;
            var v      = NN.Ones <float>(2, 2);
            var result = compute_seq(new[] { x, w, y, u, p, v }); // Array<float>[5] => theano returns Array<float>[5][1]
            // comparison with numpy
            var x_res = NN.Zeros <float>(5, 2);

            x_res[0] = NN.Tanh(x.Dot(w) + y[0].Dot(u) + p[4].Dot(v));
            for (int i = 1; i < 5; i++)
            {
                x_res[i] = NN.Tanh(x_res[i - 1].Dot(w) + y[i].Dot(u) + p[4 - i].Dot(v));
            }

            AssertArray.AreAlmostEqual(x_res, result);
        }
示例#13
0
        public void RnnXorHasCorrectGradient()
        {
            NN.Random.Seed(12345);
            int nh = 10; // hidden layer

            var Wbit   = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, nh, 1).As <float>(), "Wbit");
            var Wstate = T.Shared(NN.Eye <float>(nh), "Wstate");
            var Wout   = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, 1, nh).As <float>(), "Wout");
            var b      = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, nh, 1).As <float>(), "b");

            var state0 = T.Shared(NN.Zeros <float>(nh, 1), "state0");

            var bits     = T.Tensor3 <float>("bits");          // n x 1
            var expected = T.Matrix <float>("expected");       // 1 x 1

            Func <Tensor <float>, Tensor <float>, Tensor <float> > recurrence = (bit, oldState) =>
            {
                return(T.Tanh(T.Dot(Wbit, bit) + T.Dot(Wstate, oldState) + b));
            };

            var states = T.Scan(fn: recurrence, sequence: bits, outputsInfo: state0);

            var output    = T.Tanh(T.Dot(Wout, states[(Slice)(-1)]));
            var error     = 0.5f * T.Norm2(output - expected);
            var classify  = T.Function(bits, output);
            var gradients = T.Grad(error);

            var gradWstate = gradients[Wstate];

            Assert.IsNotNull(gradWstate);
            var gradWstateIsReshape = gradWstate as Reshaping <float>;

            Assert.IsNotNull(gradWstateIsReshape);
            var gradWstateIsSum = gradWstateIsReshape.x as Sum <float>;

            Assert.IsNotNull(gradWstateIsSum);
            var dfor     = gradWstateIsSum.x as Tensor <float> .For;
            var backLoop = dfor.Loop;

            Assert.AreEqual(3, backLoop.Sequences.Count); // bit, states, delta
            Assert.AreEqual(6, backLoop.Fors.Count);      // dbit, dstate, dWstate, db, dWbit, dstate_p1
            Assert.AreEqual(3, dfor.Index);

            // TODO: check why a recursive was expected
            //var dWstate_ = dfor.RecursiveVariable;
            //Assert.AreEqual("dWstate_", dWstate_.Name);

            var variables = backLoop.Variables.Cast <Tensor <float> >().ToList();
            var bit_      = variables[0];

            Assert.AreEqual("bit_", bit_.Name);
            var oldState_ = variables[1];

            Assert.AreEqual("oldState_", oldState_.Name);
            var delta_oldState_ = variables[2];

            Assert.AreEqual("delta_oldState_", delta_oldState_.Name);
            var dbit_ = variables[3];

            Assert.AreEqual("dbit_", dbit_.Name);
            var doldState_ = variables[4];

            Assert.AreEqual("doldState_", doldState_.Name);
            var oldState_tp1_ = variables[5];

            Assert.AreEqual("oldState_tp1", oldState_tp1_.Name);

            var d = T.Sum((delta_oldState_ + doldState_) * (1f - T.Square(oldState_tp1_)), axis: 1, keepDims: true);

            var doldState = (Tensor <float>)backLoop.Fors[1].Expression;

            (T.Dot(Wstate, d, transposeX: true)).AssertEqual(doldState);

            var dWstate    = (Tensor <float>)backLoop.Fors[3].Expression;
            var dWstateExp = T.Dot(d, oldState_, transposeY: true);

            dWstateExp.AssertEqual(dWstate);

            var dbit = (Tensor <float>)backLoop.Fors[0].Expression;

            (T.Dot(Wbit, d, transposeX: true)).StructuralEquality(dbit);

            var oldState_tp1 = (Tensor <float>)backLoop.Fors[5].Expression;

            oldState_tp1.AssertEqual(oldState_);
        }
示例#14
0
        /// <summary></summary>
        /// <param name="inputDim">dimension of the input vectors</param>
        /// <param name="hiddenDim">dimension of the hidden layer</param>
        /// <param name="outputDim">dimension of the output vector</param>
        /// <param name="scale">scaling factor to initialize weights</param>
        public GRU(int inputDim, int hiddenDim, int outputDim, float scale = 0.2f)
        {
            // initial hidden state
            h0 = T.Shared(NN.Zeros <float>(hiddenDim), "h0");

            // reset gate layers
            Wr = T.Shared(NN.Random.Uniform(-scale, scale, inputDim, hiddenDim), "Wr");
            Ur = T.Shared(NN.Eye <float>(hiddenDim), "Ur");
            br = T.Shared(NN.Zeros <float>(/*1,*/ hiddenDim), "br");

            // update gate layers
            Wz = T.Shared(NN.Random.Uniform(-scale, scale, inputDim, hiddenDim), "Wz");
            Uz = T.Shared(NN.Eye <float>(hiddenDim), "Uz");
            bz = T.Shared(NN.Zeros <float>(/*1,*/ hiddenDim), "bz");

            // layers
            W = T.Shared(NN.Random.Uniform(-scale, scale, inputDim, hiddenDim), "W");
            U = T.Shared(NN.Eye <float>(hiddenDim), "U");
            b = T.Shared(NN.Zeros <float>(/*1,*/ hiddenDim), "b");

            // prediction layer
            S  = T.Shared(NN.Random.Uniform(-scale, scale, hiddenDim, outputDim), "S");
            Sb = T.Shared(NN.Zeros <float>(/*1,*/ outputDim), "Sb");

            // bundle
            this.@params = new[] { h0, Wr, Ur, br, Wz, Uz, bz, W, U, b, S, Sb };

            // Adagrad shared variables
            this.grads = new Dictionary <string, Tensor <float> .Shared>();
            foreach (var param in @params)
            {
                var name = param.Name + "Grad";
                this.grads[name] = T.Shared(NN.Zeros <float>(param.Value.Shape), name);
            }

            this.hists = new Dictionary <string, Tensor <float> .Shared>();
            foreach (var param in @params)
            {
                var name = param.Name + "Hist";
                this.hists[name] = T.Shared(NN.Zeros <float>(param.Value.Shape), name);
            }

            // Adadelta shared variables
            var hists2 = new Dictionary <string, Tensor <float> .Shared>();

            foreach (var param in @params)
            {
                var name = param.Name + "Hist2";
                hists2[name] = T.Shared(NN.Zeros <float>(param.Value.Shape), name);
            }

            var x        = T.Matrix <float>("x"); // [sentence, inputDim]
            var expected = T.Vector <float>("expected");

            Func <Tensor <float>, Tensor <float>, Tensor <float>[]> recurrence = (x_t, h_tm1) =>
            {
                // reset gate
                var r_t = T.Sigmoid(T.Dot(x_t, Wr) + T.Dot(h_tm1, Ur) + br);
                // update gate
                var z_t = T.Sigmoid(T.Dot(x_t, Wz) + T.Dot(h_tm1, Uz) + bz);
                // proposed hidden state
                var _h_t = T.Tanh(T.Dot(x_t, W) + T.Dot(r_t * h_tm1, U) + b);
                // actual hidden state
                var h_t = z_t * h_tm1 + (1 - z_t) * _h_t;
                // return all the intermediate variables because they may be reused by T.Grad to optimize gradient computation
                return(new[] { h_t, r_t, z_t, _h_t });
            };

            var h = T.Scan(recurrence, x, new[] { h0, null, null, null })[0][-1];

            // cost and gradients
            var output    = T.Dot(h, S) + Sb;
            var error     = 0.5f * T.Norm2(output - expected);
            var gradients = T.Grad(error);

            var updatesTrain = new OrderedDictionary();

            foreach (var param in @params)
            {
                var grad = gradients[param];
                //var grad = T.Clip(update.Item2, -10, 10);

                // Adagrad
                //const float eps = 1e-5f;
                var g = grads[param.Name + "Grad"];
                updatesTrain[g] = g + grad;
                //updates[param] = param - lr * grad / T.Sqrt(hist + eps);

                // Adadelta
                //const float rho = 0.95f;
                //const float eps = 1e-5f;
                //var hist = hists[param.Name + "Hist"];
                //var hist2 = hists2[param.Name + "Hist2"];
                //var newHist = rho * hist + (1 - rho) * (grad * grad);
                //updates[hist] = newHist;
                //var newGrad = grad * T.Sqrt((hist2 + eps) / (newHist + eps));
                //updates[param] = param - newGrad;
                //updates[hist2] = rho * hist2 + (1 - rho) * (newGrad * newGrad);

                // Regular
                //updates[param] = param - lr * grad;
            }

            var         batchSize = T.Scalar <float>("batchSize");
            var         lr        = T.Scalar <float>("lr");
            const float eps       = 1e-5f;

            var updates = new OrderedDictionary();

            foreach (var param in this.@params)
            {
                var grad     = this.grads[param.Name + "Grad"];
                var meanGrad = grad / batchSize;

                var hist = this.hists[param.Name + "Hist"];
                updates[hist]  = hist + meanGrad * meanGrad;
                updates[param] = param - lr * meanGrad / T.Sqrt(hist + eps);
                updates[grad]  = T.ZerosLike(grad);
            }

            // theano functions
            this.Classify = T.Function(input: x, output: output);

            this.Train = T.Function(input: (x, expected),
                                    output: error,
                                    updates: updatesTrain);

            this.Update = T.Function(input: (lr, batchSize), updates: updates);
        }
示例#15
0
 public static Tensor <Type> operator +(Scalar <Type> x, Tensor <Type> y) => Op.Apply(y, _y => x + _y);