コード例 #1
0
ファイル: Tensor.cs プロジェクト: stuarthillary/TheaNet
        public static Tensor <Type> operator -(Tensor <Type> x, Tensor <Type> y)
        {
            return(Cache.GetValue($"Sub|{x.Id}|{y.Id}", () =>
            {
                if (x == y)
                {
                    return Op.ZerosLike(x);
                }

                if (x is Elementwise unaryx && unaryx.Abstraction is Scalars.Neg <Type> )
                {
                    return -(unaryx.Inputs[0] + y);        // (-x) - y = -(x + y)
                }
                if (y is Elementwise unaryy && unaryy.Abstraction is Scalars.Neg <Type> )
                {
                    return x + unaryy.Inputs[0];          // x - (-y) = x + y
                }
                return Op.Apply(x, y, (_x, _y) => _x - _y);

                //return Option.TakeFirst(
                //    () => x.Match((Neg<Type> neg) => -(neg.x + y)),
                //    () => y.Match((Neg<Type> neg) => x + neg.x),
                //    () => new BinaryElementwise(x, y, (_x, _y) => new Operators.Scalars.Sub<Type>(_x, _y))
                //);
            }));
コード例 #2
0
 // TODO: fix derivative
 public static Scalar <float> Max(Tensor <float> x) => new Aggregate <float>("Max", x, dx: (_x, _f) => Op.ZerosLike(x));
コード例 #3
0
        /// <summary></summary>
        /// <param name="inputDim">dimension of the input vectors</param>
        /// <param name="hiddenDim">dimension of the hidden layer</param>
        /// <param name="outputDim">dimension of the output vector</param>
        /// <param name="scale">scaling factor to initialize weights</param>
        public GRU(int inputDim, int hiddenDim, int outputDim, float scale = 0.2f)
        {
            // initial hidden state
            h0 = T.Shared(NN.Zeros <float>(hiddenDim), "h0");

            // reset gate layers
            Wr = T.Shared(NN.Random.Uniform(-scale, scale, inputDim, hiddenDim), "Wr");
            Ur = T.Shared(NN.Eye <float>(hiddenDim), "Ur");
            br = T.Shared(NN.Zeros <float>(/*1,*/ hiddenDim), "br");

            // update gate layers
            Wz = T.Shared(NN.Random.Uniform(-scale, scale, inputDim, hiddenDim), "Wz");
            Uz = T.Shared(NN.Eye <float>(hiddenDim), "Uz");
            bz = T.Shared(NN.Zeros <float>(/*1,*/ hiddenDim), "bz");

            // layers
            W = T.Shared(NN.Random.Uniform(-scale, scale, inputDim, hiddenDim), "W");
            U = T.Shared(NN.Eye <float>(hiddenDim), "U");
            b = T.Shared(NN.Zeros <float>(/*1,*/ hiddenDim), "b");

            // prediction layer
            S  = T.Shared(NN.Random.Uniform(-scale, scale, hiddenDim, outputDim), "S");
            Sb = T.Shared(NN.Zeros <float>(/*1,*/ outputDim), "Sb");

            // bundle
            this.@params = new[] { h0, Wr, Ur, br, Wz, Uz, bz, W, U, b, S, Sb };

            // Adagrad shared variables
            this.grads = new Dictionary <string, Tensor <float> .Shared>();
            foreach (var param in @params)
            {
                var name = param.Name + "Grad";
                this.grads[name] = T.Shared(NN.Zeros <float>(param.Value.Shape), name);
            }

            this.hists = new Dictionary <string, Tensor <float> .Shared>();
            foreach (var param in @params)
            {
                var name = param.Name + "Hist";
                this.hists[name] = T.Shared(NN.Zeros <float>(param.Value.Shape), name);
            }

            // Adadelta shared variables
            var hists2 = new Dictionary <string, Tensor <float> .Shared>();

            foreach (var param in @params)
            {
                var name = param.Name + "Hist2";
                hists2[name] = T.Shared(NN.Zeros <float>(param.Value.Shape), name);
            }

            var x        = T.Matrix <float>("x"); // [sentence, inputDim]
            var expected = T.Vector <float>("expected");

            Func <Tensor <float>, Tensor <float>, Tensor <float>[]> recurrence = (x_t, h_tm1) =>
            {
                // reset gate
                var r_t = T.Sigmoid(T.Dot(x_t, Wr) + T.Dot(h_tm1, Ur) + br);
                // update gate
                var z_t = T.Sigmoid(T.Dot(x_t, Wz) + T.Dot(h_tm1, Uz) + bz);
                // proposed hidden state
                var _h_t = T.Tanh(T.Dot(x_t, W) + T.Dot(r_t * h_tm1, U) + b);
                // actual hidden state
                var h_t = z_t * h_tm1 + (1 - z_t) * _h_t;
                // return all the intermediate variables because they may be reused by T.Grad to optimize gradient computation
                return(new[] { h_t, r_t, z_t, _h_t });
            };

            var h = T.Scan(recurrence, x, new[] { h0, null, null, null })[0][-1];

            // cost and gradients
            var output    = T.Dot(h, S) + Sb;
            var error     = 0.5f * T.Norm2(output - expected);
            var gradients = T.Grad(error);

            var updatesTrain = new OrderedDictionary();

            foreach (var param in @params)
            {
                var grad = gradients[param];
                //var grad = T.Clip(update.Item2, -10, 10);

                // Adagrad
                //const float eps = 1e-5f;
                var g = grads[param.Name + "Grad"];
                updatesTrain[g] = g + grad;
                //updates[param] = param - lr * grad / T.Sqrt(hist + eps);

                // Adadelta
                //const float rho = 0.95f;
                //const float eps = 1e-5f;
                //var hist = hists[param.Name + "Hist"];
                //var hist2 = hists2[param.Name + "Hist2"];
                //var newHist = rho * hist + (1 - rho) * (grad * grad);
                //updates[hist] = newHist;
                //var newGrad = grad * T.Sqrt((hist2 + eps) / (newHist + eps));
                //updates[param] = param - newGrad;
                //updates[hist2] = rho * hist2 + (1 - rho) * (newGrad * newGrad);

                // Regular
                //updates[param] = param - lr * grad;
            }

            var         batchSize = T.Scalar <float>("batchSize");
            var         lr        = T.Scalar <float>("lr");
            const float eps       = 1e-5f;

            var updates = new OrderedDictionary();

            foreach (var param in this.@params)
            {
                var grad     = this.grads[param.Name + "Grad"];
                var meanGrad = grad / batchSize;

                var hist = this.hists[param.Name + "Hist"];
                updates[hist]  = hist + meanGrad * meanGrad;
                updates[param] = param - lr * meanGrad / T.Sqrt(hist + eps);
                updates[grad]  = T.ZerosLike(grad);
            }

            // theano functions
            this.Classify = T.Function(input: x, output: output);

            this.Train = T.Function(input: (x, expected),
                                    output: error,
                                    updates: updatesTrain);

            this.Update = T.Function(input: (lr, batchSize), updates: updates);
        }
コード例 #4
0
ファイル: Tensor.For.cs プロジェクト: stuarthillary/TheaNet
            public override void Backward(Tensor <Type> deltas, Backpropagation bp)
            {
                deltas.AssertOfShape(Shape);

                var deltaFromRecursive = OutputInfo != null;

                // var in the forward -> for in the backward
                var forsDic = new Dictionary <ISymbol, IFor>();   // ITensorSymbol

                var backLoop = new Loop("d" + Loop.Name);

                backLoop.Length = Loop.Length;
                var substitution = new Patch(preserveShape: true);

                // add the sequences used by the forward
                int fwdSeqCount = Loop.Sequences.Count;

                for (int i = 0; i < fwdSeqCount; i++)
                {
                    var seq      = Loop.Sequences[i];
                    var variable = Loop.Variable(seq);
                    var alias    = Loop.Sequences[i].Match(
                        (Tensor <float> s) =>
                        backLoop.AddSeq(s[Step_m1], variable.Name + "_", Loop.SequenceAxes[i]),
                        (Tensor <int> s) =>
                        backLoop.AddSeq(s[Step_m1], variable.Name + "_", Loop.SequenceAxes[i]),
                        (Func <ITensor>)null
                        );
                    substitution.Add_(variable, alias);
                }

                // add the sequences computed by the forward
                foreach (var @for in Loop.Fors)
                {
                    if (@for.IsRecursive)
                    {
                        var variable = @for.RecursiveVariable;
                        var alias    = @for.Match(
                            (Tensor <float> .For f) =>
                            backLoop.AddSeq(new Insert <float>(f, 0, f.OutputInfo, 0)[From_m2_Step_m1], variable.Name + "_", axis: 0),
                            (Tensor <int> .For f) =>
                            backLoop.AddSeq(new Insert <int>(f, 0, f.OutputInfo, 0)[From_m2_Step_m1], variable.Name + "_", axis: 0),
                            (Func <ITensor>)null
                            );
                        substitution.Add_(variable, alias);
                    }
                    else
                    {
                        var alias = @for.Match(
                            (Tensor <float> .For f) =>
                            backLoop.AddSeq(f[Step_m1], @for.Name + "_"),
                            (Tensor <int> .For f) =>
                            backLoop.AddSeq(f[Step_m1], @for.Name + "_"),
                            (Func <ITensor>)null
                            );
                        substitution.Add_(@for.Expression, alias);
                    }
                }

                // add the retropropagated delta
                var deltaOut = backLoop.AddSeq(deltas[Step_m1], $"delta_{RecursiveVariable?.ToString() ?? "f" + Index}_", axis: 0);

                // d_ avoid duplicated variables with the same name.
                var d_ = new Dictionary <IVar, IVar>();

                // add deltas of sequences (inputs of and computed by the forward), initialized to zero
                var recVariables = Loop.RecursiveFors.Select(f => Loop.Variable(f));

                foreach (var varFwd in Loop.Variables)
                {
                    var zeros = varFwd.Match(
                        (Tensor <float> .Var x) => Op.ZerosLike(x),
                        (Tensor <int> .Var x) => Op.ZerosLike(x),
                        (Func <ITensor>)null
                        );
                    var @for = backLoop.AddRecursive_(zeros, zeros, $"d{varFwd.Name}_");
                    @for.Comment = $"dL/d{varFwd}";

                    d_[varFwd]      = @for.RecursiveVariable;
                    forsDic[varFwd] = @for;
                }

                // `others` collect gradients pushed to expressions of the loop that aren't sequences or variables.
                var others = new Dictionary <IExpr, IFor>();

                AddDeltaFromBackpropagate(backLoop, others, forsDic, Backpropagation.Backward(Expression, deltaFromRecursive ? deltaOut + (Var)d_[RecursiveVariable] : deltaOut));

                foreach (var @for in Loop.RecursiveFors)
                {
                    var variable = @for.RecursiveVariable;

                    if (!deltaFromRecursive || @for != this)
                    {
                        var gradExpr = @for.Match(
                            (Tensor <float> .For f) => Backpropagation.Backward(f.Expression, (Tensor <float>)d_[f.RecursiveVariable]),
                            (Tensor <int> .For f) => Backpropagation.Backward(f.Expression, (Tensor <int>)d_[f.RecursiveVariable]),
                            null
                            );

                        AddDeltaFromBackpropagate(backLoop, others, forsDic, gradExpr);
                    }
                    // else: we already added the delta prior to the loop

                    // reuse results computed during the forward inside the backward
                    var alias_tp1 = backLoop.AddRecursive_(variable, @for[-1], variable.Name + "_tp1").RecursiveVariable;
                    substitution.Add_(@for.Expression, alias_tp1);
                }

                // Substitute variable in fors
                foreach (var @for in backLoop.Fors)
                {
                    var comment = @for.Expression.Comment;
                    @for.Expression         = (ITensor)@for.Expression.Patch(substitution);
                    @for.Expression.Comment = comment;
                }

                // deltas of sequences
                for (int i = 0; i < Loop.Sequences.Count; ++i)
                {
                    if (Loop.Sequences[i] is Tensor <float> )
                    {
                        bp.PushGradientTo((Tensor <float>)Loop.Sequences[i], ((Tensor <float>)backLoop.Fors[i])[Step_m1]);
                    }
                    else
                    {
                        throw new NotImplementedException();
                    }
                }

                // deltas of seed
                foreach (var @for in Loop.RecursiveFors)
                {
                    if (@for is Tensor <float> )
                    {
                        bp.PushGradientTo((Tensor <float>)@for.OutputInfo, ((Tensor <float>)forsDic[@for.RecursiveVariable])[-1]);
                    }
                    else
                    {
                        throw new NotImplementedException();
                    }
                }

                // other deltas
                foreach (var W_dW in others)
                {
                    var W = W_dW.Key; var dW = W_dW.Value;
                    if (W is Tensor <float> )
                    {
                        bp.PushGradientTo((Tensor <float>)W, Op.Sum((Tensor <float>)dW, axis: 0));
                    }
                    else
                    {
                        throw new NotImplementedException();
                    }
                }
            }