public override void backward() { // pooling layers have no parameters, so simply compute // gradient wrt data here var V = this.in_act; V.dw = Convnet_util.zeros(V.w.Length); // zero out gradient wrt data var A = this.out_act; // computed in forward pass var n = 0; for (var d = 0; d < this.out_depth; d++) { var x = -this.pad; var y = -this.pad; for (var ax = 0; ax < this.out_sx; x += this.stride, ax++) { y = -this.pad; for (var ay = 0; ay < this.out_sy; y += this.stride, ay++) { var chain_grad = this.out_act.get_grad(ax, ay, d); V.add_grad(this.switchx[n], this.switchy[n], d, chain_grad); n++; } } } }
public override double backward(int y) { // compute and accumulate gradient wrt weights and bias of this layer var x = this.in_act; x.dw = Convnet_util.zeros(x.w.Length); // zero out the gradient of input Vol // we're using structured loss here, which means that the score // of the ground truth should be higher than the score of any other // class, by a margin var yscore = x.w[y]; // score of ground truth var margin = 1.0; var loss = 0.0; for (var i = 0; i < this.out_depth; i++) { if (y == i) { continue; } var ydiff = -yscore + x.w[i] + margin; if (ydiff > 0) { // violating dimension, apply loss x.dw[i] += 1; x.dw[y] -= 1; loss += ydiff; } } return(loss); }
public SoftmaxLayer(Layer_def opt) { // computed this.num_inputs = opt.in_sx * opt.in_sy * opt.in_depth; out_depth = this.num_inputs; this.out_sx = 1; this.out_sy = 1; this.layer_type = "softmax"; es = Convnet_util.zeros(out_depth); }
public override void backward() { var V = this.in_act; // we need to set dw of this var V2 = this.out_act; var N = V.w.Length; V.dw = Convnet_util.zeros(N); // zero out gradient wrt data for (var i = 0; i < N; i++) { var v2wi = V2.w[i]; V.dw[i] = (1.0 - v2wi * v2wi) * V2.dw[i]; } in_act = V;//check this }
public override void backward() { var V = this.in_act; // we need to set dw of this var chain_grad = this.out_act; var N = V.w.Length; V.dw = Convnet_util.zeros(N); // zero out gradient wrt data for (var i = 0; i < N; i++) { if (!(this.dropped[i])) { V.dw[i] = chain_grad.dw[i]; // copy over the gradient } } in_act = V;//check this }
public override void backward() { var V = this.in_act; V.dw = Convnet_util.zeros(V.w.Length); // zero out gradient wrt bottom data, we're about to fill it var V_sx = V.sx | 0; var V_sy = V.sy | 0; var xy_stride = this.stride | 0; for (var d = 0; d < this.out_depth; d++) { var f = this.filters[d]; var x = -this.pad | 0; var y = -this.pad | 0; for (var ay = 0; ay < this.out_sy; y += xy_stride, ay++) // xy_stride { x = -this.pad | 0; for (var ax = 0; ax < this.out_sx; x += xy_stride, ax++) // xy_stride // convolve centered at this particular location { var chain_grad = this.out_act.get_grad(ax, ay, d); // gradient from above, from chain rule for (var fy = 0; fy < f.sy; fy++) { var oy = y + fy; // coordinates in the original input array coordinates for (var fx = 0; fx < f.sx; fx++) { var ox = x + fx; if (oy >= 0 && oy < V_sy && ox >= 0 && ox < V_sx) { for (var fd = 0; fd < f.depth; fd++) { // avoid function call overhead (x2) for efficiency, compromise modularity :( var ix1 = ((V_sx * oy) + ox) * V.depth + fd; var ix2 = ((f.sx * fy) + fx) * f.depth + fd; f.dw[ix2] += V.w[ix1] * chain_grad; V.dw[ix1] += f.w[ix2] * chain_grad; } } } } this.biases.dw[d] += chain_grad; } } } }
public override void backward() { var V = this.in_act; V.dw = Convnet_util.zeros(V.w.Length); // zero out the gradient in input Vol // compute gradient wrt weights and data for (var i = 0; i < this.out_depth; i++) { var tfi = this.filters[i]; var chain_grad = this.out_act.dw[i]; for (var d = 0; d < this.num_inputs; d++) { V.dw[d] += tfi.w[d] * chain_grad; // grad wrt input data tfi.dw[d] += V.w[d] * chain_grad; // grad wrt params } this.biases.dw[i] += chain_grad; } }
public override void backward() { var V = this.in_act; // we need to set dw of this var V2 = this.out_act; var N = V.w.Length; V.dw = Convnet_util.zeros(N); // zero out gradient wrt data for (var i = 0; i < N; i++) { if (V2.w[i] <= 0) { V.dw[i] = 0; // threshold } else { V.dw[i] = V2.dw[i]; } } in_act = V;//check this }
public override Vol forward(Vol V, bool is_training) { this.in_act = V; var A = new Vol(1, 1, this.out_depth, 0.0); // compute max activation var _as = V.w; var amax = V.w[0]; for (var i = 1; i < this.out_depth; i++) { if (_as[i] > amax) { amax = _as[i]; } } // compute exponentials (carefully to not blow up) var es = Convnet_util.zeros(this.out_depth); var esum = 0.0; for (var i = 0; i < this.out_depth; i++) { var e = Math.Exp(_as[i] - amax); esum += e; es[i] = e; } // normalize and output to sum to one for (var i = 0; i < this.out_depth; i++) { es[i] /= esum; A.w[i] = es[i]; } this.es = es; // save these for backprop this.out_act = A; return(this.out_act); }
public override double backward(int y) { // compute and accumulate gradient wrt weights and bias of this layer var x = this.in_act; x.dw = Convnet_util.zeros(x.w.Length); // zero out the gradient of input Vol for (var i = 0; i < this.out_depth; i++) { var indicator = i == y ? 1.0 : 0.0; var mul = -(indicator - this.es[i]); x.dw[i] = mul; } if (es.Length == 0) { return(0); } double _es = es[y]; // loss is the class negative log likelihood return(-Math.Log(_es)); }
// y is a list here of size num_inputs // or it can be a number if only one value is regressed // or it can be a struct {dim: i, val: x} where we only want to // regress on dimension i and asking it to have value x public override double backward(int y) { // compute and accumulate gradient wrt weights and bias of this layer var x = this.in_act; x.dw = Convnet_util.zeros(x.w.Length); // zero out the gradient of input Vol var loss = 0.0; /* * * if (y instanceof Array || y instanceof Float64Array) { * for (var i = 0; i < this.out_depth; i++) * { * var dy = x.w[i] - y[i]; * x.dw[i] = dy; * loss += 0.5 * dy * dy; * } * } * else if (typeof y == "number") * { * // lets hope that only one number is being regressed * var dy = x.w[0] - y; * x.dw[0] = dy; * loss += 0.5 * dy * dy; * } * else * { * // assume it is a struct with entries .dim and .val * // and we pass gradient only along dimension dim to be equal to val * var i = y.dim; * var yi = y.val; * var dy = x.w[i] - yi; * x.dw[i] = dy; * loss += 0.5 * dy * dy; * } */ return(loss); }
public override void backward() { var V = this.in_act; // we need to set dw of this var V2 = this.out_act; var N = this.out_depth; V.dw = Convnet_util.zeros(V.w.Length); // zero out gradient wrt data // pass the gradient through the appropriate switch if (this.out_sx == 1 && this.out_sy == 1) { for (var i = 0; i < N; i++) { var chain_grad = V2.dw[i]; V.dw[this.switches[i]] = chain_grad; } } else { // bleh okay, lets do this the hard way var n = 0; // counter for switches for (var x = 0; x < V2.sx; x++) { for (var y = 0; y < V2.sy; y++) { for (var i = 0; i < N; i++) { var chain_grad = V2.get_grad(x, y, i); V.set_grad(x, y, this.switches[n], chain_grad); n++; } } } } in_act = V;//check this }