/// <summary> /// Given (symbolic) log-domain potentials, construct the graph for forward inference in a chain CRF. /// </summary> /// <param name="obs_potentials">(n_steps, n_classes) Axes correspond to time and the value of the discrete label variable /// This is the energy assigned to a configuration (so higher energy = lower probability).</param> /// <param name="chain_potentials">(n_classes, n_classes, n_classes) Axes correspond to left label state, right label state, and the global label. /// Corresponds to the energy of a given pair of labels adjacent to one another (higher energy = lower probability).</param> /// <param name="viterbi">Perform MAP inference with the Viterbi algorithm rather than marginalizing the step-specific /// label variables, Instead, use the single most likely configuration.</param> /// <returns>(1-dimensional) The energy assigned for a given global label. /// This can be turned into a log probability by subtracting logsumexp(energy).</returns> public static Tensor <float> Forward(Tensor <float> obs_potentials, Tensor <float> chain_potentials, bool viterbi = false) { Func <Tensor <float>, Tensor <float>, Tensor <float> > inner_function = (obs, prior_result /*, chain_potentials*/) => { prior_result = prior_result.DimShuffle(0, 'x', 1); obs = obs.DimShuffle('x', 0, 'x'); if (viterbi) { return(T.Max((-prior_result - obs - chain_potentials), axis: 0)); } else { return(LogSumExp(-prior_result - obs - chain_potentials, axis: 0)); } }; Debug.Assert(obs_potentials.NDim == 2); Debug.Assert(chain_potentials.NDim == 3); var initial = (obs_potentials[0].DimShuffle(0, 'x') * T.OnesLike(chain_potentials[0])); var scanned = T.Scan( fn: inner_function, outputsInfo: initial, sequences: new[] { obs_potentials[XSlicer.From(1)] } //non_sequences: chain_potentials ); if (viterbi) { return(-(T.Max(scanned[-1], axis: 0))); } else { return(-LogSumExp(scanned[-1], axis: 0)); } }
/// <summary> /// Checks the gradient of an expression without inputs. /// </summary> public static void PassesGradientCheck(Scalar <float> expr, Scalar <float> W, float epsilon = 0.001f, float relativeErr = 1e-3f, float absErr = 1e-4f, int repeat = 6) { var checkGrad = T.RandomGradientCheck(EmptyArray <IVar> .Value, expr, W); var fault = 0; var errors = ""; for (int _ = 0; _ < repeat; ++_) { var eps = (_ % 2 == 0) ? epsilon : -epsilon; var checkRes = checkGrad(eps); var finite = checkRes.Item1; var backpropagated = checkRes.Item2; if (!AssertArray.CheckAreAlmostEqual(finite, backpropagated, relativeErr, absErr)) { var abs = Math.Abs(finite - backpropagated); var relative = 2 * abs / (Math.Abs(finite) + Math.Abs(backpropagated)); errors += $"For epsilon {eps} expected: {finite}, actual {backpropagated}, diff {abs}, relative {relative}.\n"; ++fault; } if (_ % 2 == 1) { epsilon *= 10; } } if (fault > 0) { throw new Exception($"The computed gradient of {W.ToString()} doesn't match finite difference (failed {fault} times over {repeat}).\n{errors}"); } }
public void TestExp() { var x = T.Scalar <float>("x"); var e = T.Exp(x / 5); var de = T.Grad(e, x); var f = T.Function(x, e); Assert.AreEqual((float)Math.Exp(4f / 5f), f(4)); Assert.AreEqual((float)Math.Exp(5f / 5f), f(5)); var df = T.Function(x, de); Assert.AreEqual((float)Math.Exp(4f / 5f) / 5f, df(4)); Assert.AreEqual((float)Math.Exp(5f / 5f) / 5f, df(5)); var fdf = T.Function(x, new[] { e, de }); var res = fdf(4); Assert.AreEqual(f(4), res[0]); Assert.AreEqual(df(4), res[1]); res = fdf(5); Assert.AreEqual(f(5), res[0]); Assert.AreEqual(df(5), res[1]); }
/// <summary> /// Compute log(sum(exp(x), axis=axis) in a numerically stable fashion. /// </summary> /// <param name="x">A Theano tensor (any dimension will do).</param> /// <param name="axis">int or symbolic integer scalar, or None. Axis over which to perform the summation. `None`, the /// default, performs over all axes.</param> /// <returns>The result of the log(sum(exp(...))) operation.</returns> public static Tensor <float> LogSumExp(Tensor <float> x, int axis) { var xmax = T.Max(x, axis: axis, keepDims: true); var xmax_ = T.Max(x, axis: axis); return(xmax_ + T.Log(T.Sum(T.Exp(x - xmax), axis: axis))); }
public void TestFloat() { var state = T.Shared(0f, "state"); var inc = T.Scalar <float>("inc"); var updates = new OrderedDictionary { { state, state + inc } }; var accumulator = T.Function(inc, state, updates); Assert.AreEqual(0, state.Value); Assert.AreEqual(0, accumulator(1)); Assert.AreEqual(1, state.Value); Assert.AreEqual(1, accumulator(300)); Assert.AreEqual(301, state.Value); state.Value = -1; Assert.AreEqual(-1, accumulator(3)); Assert.AreEqual(2, state.Value); var updates2 = new OrderedDictionary { { state, state - inc } }; var decrementor = T.Function(inc, state, updates2); Assert.AreEqual(2, decrementor(2)); Assert.AreEqual(0, state.Value); }
/// <summary> /// Checks the gradient of an expression with one input. /// If a shape of the input is unknown, it will be replaced by 10. /// </summary> public static void PassesGradientCheck <X>(Tensor <X> .Var input, Scalar <float> expr, Tensor <float> W, float epsilon = 0.001f, float relativeErr = 1e-3f, float absErr = 1e-4f, int repeat = 50, Func <Array <X> > init = null) { var xShape = input.Shape.Select(s => (s as Scalar <int> .Const)?.Value ?? 10).ToArray(); var checkGrad = T.RandomGradientCheck(new[] { input }, expr, W); if (init == null) { init = () => NN.Random.Uniform(-1f, 1f, xShape).As <X>(); } var fault = 0; var last = ""; for (int _ = 0; _ < repeat; ++_) { var x = init(); var checkRes = checkGrad(x, epsilon); var finite = checkRes.Item1; var backpropagated = checkRes.Item2; if (!AssertArray.CheckAreAlmostEqual(finite, backpropagated, relativeErr, absErr)) { var abs = Math.Abs(finite - backpropagated); var relative = 2 * abs / (Math.Abs(finite) + Math.Abs(backpropagated)); last += $"Expected: {finite}, actual {backpropagated}, diff {abs}, relative {relative}.\n"; ++fault; } } if (fault > 0) { throw new Exception($"The computed gradient of {W.Name} doesn't match finite difference (failed {fault} times over {repeat}).\n{last}"); } }
public void ScanPassesGradientCheckOnSeq2Seq() { int embeddingSize = 10, vocabSize = 100; var L = T.Shared(NN.Random.Uniform(-0.01f, 0.01f, vocabSize, embeddingSize), "L"); var W = T.Shared(NN.Random.Uniform(-0.01f, 0.01f, embeddingSize, embeddingSize), "W"); var ids = T.Vector <int>(-1, "ids"); var xs = L[ids]; var scan = T.Scan((x, acc) => T.Tanh(T.Dot(acc + x, W)), sequence: xs, outputsInfo: T.Zeros <float>(embeddingSize)); var norm2 = T.Norm2(scan); var grad = T.Grad(norm2); var updates = new OrderedDictionary { [W] = W - 0.001f * grad[W], [L] = L - 0.001f * grad[L] }; var f = T.Function(input: ids, output: norm2, updates: updates); Func <Array <int> > init = () => NN.Random.Uniform(0, vocabSize - 1, 10).As <int>(); f(init()); AssertTensor.PassesGradientCheck(ids, norm2, W, init: init); AssertTensor.PassesGradientCheck(ids, norm2, L, init: init); }
public void TestScanOnTanhSumDot() { var W = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, 4, 5).As <float>(), "W"); Func <Tensor <float>, Tensor <float>, Tensor <float> > recurrence = (x, acc) => T.Tanh(acc + T.Dot(W, x)); var X = T.Matrix <float>(-1, 5, "X"); var acc0 = T.Shared(NN.Zeros <float>(4), "acc0"); var result = T.Scan(fn: recurrence, sequences: new[] { X }, outputsInfo: acc0); var norm2 = T.Norm2(result[-1]); var f = T.Function(X, norm2); var grad = T.Grad(norm2, W); var df = T.Function(input: X, output: (norm2, grad)); df(NN.Array(new[, ] { { 0f, 0f, 0f, 0f, 0f } })); AssertTensor.PassesGradientCheck(X, norm2, acc0); AssertTensor.PassesGradientCheck(X, norm2, W); }
public void Convolve2DPassesGradientCheck() { //int[] poolingShape = new int[] { 1, 1 }; int[] kernelShape = new int[] { 7, 7 }; int[] inputShape = new int[] { 100, 100 }; var iS = NN.Array(inputShape).As <float>(); var kS = NN.Array(kernelShape).As <float>(); // layers var W = T.Shared(NN.Random.Uniform(-0.01f, 0.01f, kernelShape).As <float>(), "W"); //var flatShape = ((inputShape[0] + kernelShape[0] - 1) / poolingShape[0] ) * ((inputShape[1] + kernelShape[1] - 1) / poolingShape[1] ); var flatShape = ((inputShape[0] + kernelShape[0] - 1)) * ((inputShape[1] + kernelShape[1] - 1)); var scaling = (((iS[0] + kS[0] - 1f)) + ((iS[1] + kS[1] - 1f))); var S = T.Shared(NN.Random.Uniform(-10f, 10f, 2, flatShape).As <float>() / scaling, "S"); var Sb = T.Shared(NN.Zeros <float>(2, 1), "Sb"); var x = T.Matrix <float>(inputShape[0], inputShape[1], "x"); // [inputLength] var h = T.Sigmoid(T.Convolve2d(x, W, mode: ConvMode.Full)); //h = T.MaxPooling2d(h, poolingShape[0], poolingShape[1], true); h = h.Reshape(flatShape, 1); var debug = (T.Dot(S, h) + Sb).Reshape(2); var pred = T.Softmax(debug); var nll = -T.Mean(T.Log(pred)[1]); AssertTensor.PassesGradientCheck(x, nll, W, relativeErr: 1e-3f, absErr: 1e-3f); }
public void TestOnehotDotM() { var M = T.Matrix <float>("M"); var X = T.Matrix <float>("X"); var a = T.Vector <float>("a"); var oneHot = T.OneHot(X.Shape, 1, a); var B = T.Dot(oneHot, M); var M_ = NN.Array(new float[, ] { { 0, 3, 7 }, { 5, 2, 0 } }); var X_ = NN.Zeros(4, 2); var a_ = NN.Array <float>(1, -1); var B_ = Op.Function(input: (M, X, a), output: B); var B_pred = B_(M_, X_, a_); var Y_ = X_.Copy(); Y_[1] = a_; var B_exp = Y_.Dot(M_); AssertArray.AreEqual(B_exp, B_pred); }
/// <summary> /// /// </summary> /// <param name="nh">dimension of the hidden layer</param> /// <param name="nc">number of classes</param> /// <param name="ne">number of word embeddings in the vocabulary</param> /// <param name="de">dimension of the word embeddings</param> /// <param name="cs">word window context size</param> public Elman(int nh, int nc, int ne, int de, int cs) { // parameters of the model this.emb = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, ne + 1, de), "emb"); // add one for PADDING at the end this.Wx = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, de * cs, nh), "Wx"); this.Wh = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, nh, nh), "Wh"); this.W = T.Shared(0.2f * NN.Random.Uniform(-1.0f, 1.0f, nh, nc), "W"); this.bh = T.Shared(NN.Zeros <float>(nh), "bh"); this.b = T.Shared(NN.Zeros <float>(nc), "b"); this.h0 = T.Shared(NN.Zeros <float>(nh), "h0"); // bundle this.@params = new[] { this.emb, this.Wx, this.Wh, this.W, this.bh, this.b, this.h0 }; this.names = new[] { "embeddings", "Wx", "Wh", "W", "bh", "b", "h0" }; var idxs = T.Matrix <int>("idxs"); // as many columns as context window size/lines as words in the sentence var x = this.emb[idxs].Reshape(idxs.Shape[0], de * cs); // joc: idxs.shape = [sentence, cs], emb.shape = [ne, de], emb[idx].shape = [sentence, cs, de], reshape = [sentence, de * cs] var y = T.Scalar <int>("y"); // label Func <Tensor <float>, Tensor <float>, Tensor <float>[]> recurrence = (x_t, h_tm1) => { var h_t = T.Sigmoid(T.Dot(x_t, this.Wx) + T.Dot(h_tm1, this.Wh) + this.bh); var s_t = T.Softmax(T.Dot(h_t, this.W) + this.b); return(new[] { h_t, s_t }); }; var result = T.Scan(fn: recurrence, sequences: x, outputsInfo: new[] { this.h0, null } /*, n_steps: x.Shape[0]*/); var h = result[0]; var s = result[1]; var p_y_given_x_lastword = s[-1, /*0,*/ XSlicer._]; // 0 because of Theano's Softmax ? var p_y_given_x_sentence = s[XSlicer._, /*0,*/ XSlicer._]; var y_pred = T.Argmax(p_y_given_x_sentence, axis: 1); // cost and gradients and learning rate var lr = T.Scalar <float>("lr"); Loss = -T.Mean(T.Log(p_y_given_x_lastword)[y]); var gradients = T.Grad(Loss); var updates = new OrderedDictionary(); foreach (var W in @params) { updates[W] = W - lr * gradients[W]; } // theano functions this.classify = T.Function(input: idxs, output: y_pred); this.train = T.Function(input: (idxs, y, lr), output: Loss, updates: updates); this.normalize = T.Function(updates: new OrderedDictionary { { emb, emb / T.Sqrt(T.Sum(T.Pow(emb, 2), axis: 1)).DimShuffle(0, 'x') } }); }
public void CustomOpSupportsStatic() { var x = T.Scalar <float>("x"); Scalar <float> y = CustomOp.Create("myCustomCosinus", Cos, x); var f = T.Function(x, y); AssertAreCoherents(Cos, f); }
public void CustomOpSupportsLambda() { var x = T.Scalar <float>("x"); Scalar <float> y = CustomOp.Create("myCustomSinus", a => (float)Math.Sin(a), x); var f = T.Function(x, y); AssertAreCoherents(a => (float)Math.Sin(a), f); }
public static void Test2() { var a = T.Matrix <float>("a"); // declare variable var @out = a + T.Pow(a, 10); // build symbolic expression var f = T.Function(a, @out); // compile function Console.WriteLine(f(NN.Array <float>(0, 1, 2))); // prints `array([0, 2, 1026])` }
public void ItemPassesGradientCheck() { var y = T.Vector <float>(10, "y"); var b = T.Shared(NN.Random.Uniform(-1f, 1f, 10).As <float>(), "b"); AssertTensor.PassesGradientCheck(y, (y + b).Item[5], b); AssertTensor.PassesGradientCheck(y, (y + b).Item[-3], b); }
/// <summary> /// /// </summary> /// <param name="nh">dimension of the hidden layer</param> /// <param name="nc">number of classes</param> /// <param name="de">dimension of the word embeddings</param> /// <param name="cs">word window context size</param> public Elman3(int nh, int nc, int de, int cs) { // parameters of the model var scale = 0.2f; this.Wx = T.Shared(scale * NN.Random.Uniform(-1.0f, 1.0f, de * cs, nh), "Wx"); //this.Wh = T.Shared(scale * NN.Random.Uniform(-1.0f, 1.0f, nh, nh), "Wh"); this.Wh = T.Shared(NN.Eye <float>(nh), "Wh"); this.W = T.Shared(scale * NN.Random.Uniform(-1.0f, 1.0f, nh, nc), "W"); this.bh = T.Shared(NN.Zeros <float>(nh), "bh"); this.b = T.Shared(NN.Zeros <float>(nc), "b"); this.h0 = T.Shared(NN.Zeros <float>(nh), "h0"); // bundle this.@params = new[] { this.Wx, this.Wh, this.W, this.bh, this.b, this.h0 }; var x = T.Matrix <float>("x"); // [sentence, de * cs] var y = T.Scalar <int>("y"); // label Func <Tensor <float>, Tensor <float>, Tensor <float>[]> recurrence = (x_t, h_tm1) => { var h_t = T.Sigmoid(T.Dot(x_t, this.Wx) + T.Dot(h_tm1, this.Wh) + this.bh); var s_t = T.Softmax(T.Dot(h_t, this.W) + this.b); return(new[] { h_t, s_t }); }; var result = T.Scan( fn: recurrence, sequences: x, outputsInfo: new[] { this.h0, null } /*, n_steps: x.Shape[0]*/); var h = result[0]; var s = result[1]; var p_y_given_x_lastword = s[-1, /*0,*/ XSlicer._]; // 0 because of Theano's Softmax ? var p_y_given_x_sentence = s[XSlicer._, /*0,*/ XSlicer._]; var y_pred = T.Argmax(p_y_given_x_sentence, axis: 1); // cost and gradients and learning rate var lr = T.Scalar <float>("lr"); nll = -T.Mean(T.Log(p_y_given_x_lastword)[y]); var gradients = T.Grad(nll); var updates = new OrderedDictionary(); foreach (var W in @params) { updates[W] = W - lr * gradients[W]; } // theano functions this.classify = T.Function(input: x, output: y_pred); this.train = T.Function(input: (x, y, lr), output: nll, updates: updates); }
public void SlicingCompiles() { var x = T.Matrix <float>(10, 5, "x"); var loss = T.Norm2(x[From(2)]); var f = T.Function(input: x, output: loss); f(NN.Range <float>(50).Reshape(10, 5)); }
public void FailMissingVariable() { var x = T.Matrix <float>("x"); var y = T.Matrix <float>("y"); var z = x + y; var f = T.Function(x, z); // "y" is missing f(NN.Array(1f, 2f, 3f)); // should throw exception }
public void TanhPerceptronPassesGradientCheck() { var x = T.Vector <float>("x"); int n = 20, m = 5; var W = T.Shared(NN.Random.Uniform(-1f, 1f, m, n).As <float>(), "W"); var W_op = T.Shared(NN.Random.Uniform(-1f, 1f, m, n).As <float>(), "W_op"); var loss = T.Norm2(T.Tanh(T.Dot(W, x)) - T.Dot(W_op, x)); AssertTensor.PassesGradientCheck(x, loss, W); }
/// <summary> /// Return the mean of the negative log-likelihood of the prediction of this model under a given target distribution. /// </summary> /// <param name="y">corresponds to a vector that gives for each example the correct label</param> /// <returns></returns> public Scalar <float> NegativeLogLikelihood(Tensor <int> y) { // y.shape[0] is (symbolically) the number of rows in y, i.e., number of examples (call it n) in the minibatch // T.arange(y.shape[0]) is a symbolic vector which will contain [0,1,2,... n-1] // T.log(self.p_y_given_x) is a matrix of Log-Probabilities (call it LP) with one row per example and one column per class // LP[T.arange(y.shape[0]),y] is a vector v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ..., LP[n-1,y[n-1]]] // and T.mean(LP[T.arange(y.shape[0]),y]) is the mean (across minibatch examples) of the elements in v, // i.e., the mean log-likelihood across the minibatch. return(-T.Mean(T.Log(this.p_y_given_x)[T.Range(y.Shape[0]), y])); }
public Scalar <float> Errors(Tensor <int> y) { // check if y has same dimension of y_pred if (y.NDim != this.y_pred.NDim) { throw new RankException("y should have the same shape as self.y_pred"); } // the T.neq operator returns a vector of 0s and 1s, where 1 represents a mistake in prediction return(T.Mean(T.Neq(this.y_pred, y))); }
public void DimShufflePassesGradientCheck() { var X = T.Matrix <float>(5, 3, "X"); var b = T.Shared(NN.Random.Uniform(-1f, 1f, 3), "b"); var b2 = b.DimShuffle('x', 0); var Xb = X * b2; var loss = T.Norm2(Xb); AssertTensor.PassesGradientCheck(X, loss, b); }
public void FailTwoVarExprNotShared() { var x = T.Scalar <float>("x"); var y = T.Scalar <float>("y"); var e = T.Tanh(x / 5) * T.Exp(0.5f * y); // When not all arguments of a function are precised, // the 'Function' throws an exception var g = T.Function(x, e); var d = g(3f); }
public void MinPassesGradientCheck() { var x = T.Shared(0f, "x"); var min = T.Min(x, 0f); x.Value = 1; AssertTensor.PassesGradientCheck(min, x); x.Value = -1; AssertTensor.PassesGradientCheck(min, x); }
// usage: var train = T.Function(...vars..., output: loss, updates: UpdateRules.Sgd(loss, eta, @params)); public static OrderedDictionary Sgd(Scalar <float> loss, Scalar <float> lr, params Tensor <float> .Shared[] @params) { var dloss = T.Grad(loss); var result = new OrderedDictionary(); foreach (var param in @params) { result[param] = param - lr * dloss[param]; } return(result); }
public void FailGivenCast() { var x = T.Scalar <int>("x"); var y = T.Shared(3, "y"); var output = x + y; var f = T.Function(input: x, output: output, givens: new OrderedDictionary { { y, 4.5f } }); AssertArray.AreEqual(f(2), 6); }
public void ConcatPassesGradientCheck() { var x = T.Shared(NN.Random.Uniform(-1f, 1f, 4, 10), "x"); var y = T.Shared(NN.Random.Uniform(-1f, 1f, 6, 10), "y"); var z = T.Concat(0, x, y); var loss = T.Norm2(z[Range(2, 8)]); AssertTensor.PassesGradientCheck(loss, x); AssertTensor.PassesGradientCheck(loss, y); }
public void MaxPassesGradientCheck() { var x = T.Shared(0f, "x"); var max = T.Max(x, 0f); x.Value = 1; AssertTensor.PassesGradientCheck(max, x); x.Value = -1; AssertTensor.PassesGradientCheck(max, x); }
public void TensorDot3Dx1DPassesGradientCheck() { var x = T.Vector <float>("x"); int n = 6, m = 4, l = 2; var W = T.Shared(NN.Random.Uniform(-1f, 1f, l, m, n).As <float>(), "W"); var W_op = T.Shared(NN.Random.Uniform(-1f, 1f, l, m, n).As <float>(), "W_op"); var loss = T.Norm2(T.Dot(W, x) - T.Dot(W_op, x)); AssertTensor.PassesGradientCheck(x, loss, W, relativeErr: 1e-3f, absErr: 1e-4f); }
public void TensorDot3Dx2DAsEinsteinPassesGradientCheck() { int n = 6, m = 4, l = 2; var x = T.Matrix <float>("x"); var W = T.Shared(NN.Random.Uniform(-1f, 1f, l, m, n).As <float>(), "W"); var W_op = T.Shared(NN.Random.Uniform(-1f, 1f, l, m, n).As <float>(), "W_op"); var loss = T.Norm2(T.EinsteinSum(W, x, "lmn,nx->lmx") - T.Dot(W_op, x)); AssertTensor.PassesGradientCheck(x, loss, W); }