public static Vol img_to_vol(Bitmap img, bool convert_grayscale) { ImageConverter converter = new ImageConverter(); var p = (byte[])converter.ConvertTo(img, typeof(byte[])); var W = img.Width; var H = img.Height; Stack <double> pv = new Stack <double>(); for (var i = 0; i < p.Length; i++) { pv.Push(p[i] / 255.0 - 0.5); // normalize image pixels to [-0.5, 0.5] } var x = new Vol(W, H, 4, 0.0); //input volume (image) x.w = pv.ToArray(); if (convert_grayscale) { // flatten into depth=1 array var x1 = new Vol(W, H, 1, 0.0); for (var i = 0; i < W; i++) { for (var j = 0; j < H; j++) { x1.set(i, j, 0, x.get(i, j, 0)); } } x = x1; } return(x); }
public override Vol forward(Vol V, bool is_training) { this.in_act = V; var V2 = V.clone(); var N = V.w.Length; if (is_training) { // do dropout for (var i = 0; i < N; i++) { if (rnd.NextDouble() < this.drop_prob) { V2.w[i] = 0; this.dropped[i] = true; } // drop! else { this.dropped[i] = false; } } } else { // scale the activations during prediction for (var i = 0; i < N; i++) { V2.w[i] *= this.drop_prob; } } this.out_act = V2; return(this.out_act); // dummy identity function for now }
public void addFrom(Vol V) { for (var k = 0; k < this.w.Length; k++) { this.w[k] += V.w[k]; } }
public void addFromScaled(Vol V, double a) { for (var k = 0; k < this.w.Length; k++) { this.w[k] += a * V.w[k]; } }
public override Vol forward(Vol V, bool is_training) { this.in_act = V; var N = this.out_depth; var V2 = new Vol(this.out_sx, this.out_sy, this.out_depth, 0.0); // optimization branch. If we're operating on 1D arrays we dont have // to worry about keeping track of x,y,d coordinates inside // input volumes. In convnets we do :( if (this.out_sx == 1 && this.out_sy == 1) { for (var i = 0; i < N; i++) { var ix = i * this.group_size; // base index offset var a = V.w[ix]; var ai = 0; for (var j = 1; j < this.group_size; j++) { var a2 = V.w[ix + j]; if (a2 > a) { a = a2; ai = j; } } V2.w[i] = a; this.switches[i] = ix + ai; } } else { var n = 0; // counter for switches for (var x = 0; x < V.sx; x++) { for (var y = 0; y < V.sy; y++) { for (var i = 0; i < N; i++) { var ix = i * this.group_size; var a = V.get(x, y, ix); var ai = 0; for (var j = 1; j < this.group_size; j++) { var a2 = V.get(x, y, ix + j); if (a2 > a) { a = a2; ai = j; } } V2.set(x, y, i, a); this.switches[n] = ix + ai; n++; } } } } this.out_act = V2; return(this.out_act); }
public double getCostLoss(Vol V, int y) { this.forward(V, false); var N = this.layers.Count; var loss = this.layers[N - 1].backward(y); return(loss); }
// forward prop the network. // The trainer class passes is_training = true, but when this function is // called from outside (not from the trainer), it defaults to prediction mode public Vol forward(Vol V, bool is_training) { var act = this.layers[0].forward(V, is_training); for (var i = 1; i < this.layers.Count; i++) { act = this.layers[i].forward(act, is_training); } return(act); }
public Vol clone() { var V = new Vol(this.sx, this.sy, this.depth, 0.0); var n = this.w.Length; for (var i = 0; i < n; i++) { V.w[i] = this.w[i]; } return(V); }
static void Main(string[] args) { List <String> layer_defs = new List <String>(); /* * layer_defs.Add("{ type: 'input', out_sx: 24, out_sy: 24, out_depth: 1}"); * layer_defs.Add("{ type: 'conv', sx: 5, filters: 8, stride: 1, pad: 2, activation: 'relu'}"); * layer_defs.Add("{ type: 'pool', sx: 2, stride: 2}"); * layer_defs.Add("{ type: 'conv', sx: 5, filters: 16, stride: 1, pad: 2, activation: 'relu'}"); * layer_defs.Add("{ type: 'pool', sx: 3, stride: 3}"); * layer_defs.Add("{ type: 'softmax', num_classes: 10}"); */ layer_defs.Add("{type:'input', out_sx:1, out_sy:1, out_depth:2}"); layer_defs.Add("{type:'fc', num_neurons:2, activation: 'sigmoid',bias_pref:1}"); layer_defs.Add("{type:'softmax', num_classes:2}"); Net net = new Net(); net.makeLayers(layer_defs); String json_trainer = "{learning_rate:0.1, momentum:0.9, batch_size:10, l2_decay:0.1,method:'adadelta'}"; Trainer trainer = new Trainer(net, json_trainer); var x = new Vol(1, 1, 2); double[][] data = new double[][] { new double[] { 0, 0 }, new double[] { 0, 1 }, new double[] { 1, 0 }, new double[] { 1, 1 } }; int[] label = new int[] { 0, 1, 1, 0 }; for (int i = 0; i < 4000; i++) { int j = i % 4; x.w = data[j]; String tr = trainer.train(x, label[j]); //Console.WriteLine(tr); Console.WriteLine(net.layers[3].out_act.dw[0].ToString()); } while (true) { ; } }
public override Vol forward(Vol V, bool is_training) { this.in_act = V; var V2 = V.cloneAndZero(); var N = V.w.Length; for (var i = 0; i < N; i++) { V2.w[i] = Math.Tanh(V.w[i]); } this.out_act = V2; return(this.out_act); }
public override Vol forward(Vol V, bool is_training) { this.in_act = V; var A = new Vol(this.out_sx, this.out_sy, this.out_depth, 0.0); var n = 0; // a counter for switches for (var d = 0; d < this.out_depth; d++) { var x = -this.pad; var y = -this.pad; for (var ax = 0; ax < this.out_sx; x += this.stride, ax++) { y = -this.pad; for (var ay = 0; ay < this.out_sy; y += this.stride, ay++) { // convolve centered at this particular location double a = -99999; // hopefully small enough ;\ int winx = -1, winy = -1; for (var fx = 0; fx < this.sx; fx++) { for (var fy = 0; fy < this.sy; fy++) { var oy = y + fy; var ox = x + fx; if (oy >= 0 && oy < V.sy && ox >= 0 && ox < V.sx) { var v = V.get(ox, oy, d); // perform max pooling and store pointers to where // the max came from. This will speed up backprop // and can help make nice visualizations in future if (v > a) { a = v; winx = ox; winy = oy; } } } } this.switchx[n] = winx; this.switchy[n] = winy; n++; A.set(ax, ay, d, a); } } } this.out_act = A; return(this.out_act); }
public override Vol forward(Vol V, bool is_training) { this.in_act = V; var V2 = V.cloneAndZero(); var N = V.w.Length; var V2w = V2.w; var Vw = V.w; for (var i = 0; i < N; i++) { V2w[i] = 1.0 / (1.0 + Math.Exp(-Vw[i])); } this.out_act = V2; return(this.out_act); }
public override Vol forward(Vol V, bool is_training) { // optimized code by @mdda that achieves 2x speedup over previous version this.in_act = V; var A = new Vol(this.out_sx | 0, this.out_sy | 0, this.out_depth | 0, 0.0); var V_sx = V.sx | 0; var V_sy = V.sy | 0; var xy_stride = this.stride | 0; for (var d = 0; d < this.out_depth; d++) { var f = this.filters[d]; var x = -this.pad | 0; var y = -this.pad | 0; for (var ay = 0; ay < this.out_sy; y += xy_stride, ay++) // xy_stride { x = -this.pad | 0; for (var ax = 0; ax < this.out_sx; x += xy_stride, ax++) // xy_stride // convolve centered at this particular location { var a = 0.0; for (var fy = 0; fy < f.sy; fy++) { var oy = y + fy; // coordinates in the original input array coordinates for (var fx = 0; fx < f.sx; fx++) { var ox = x + fx; if (oy >= 0 && oy < V_sy && ox >= 0 && ox < V_sx) { for (var fd = 0; fd < f.depth; fd++) { // avoid function call overhead (x2) for efficiency, compromise modularity :( a += f.w[((f.sx * fy) + fx) * f.depth + fd] * V.w[((V_sx * oy) + ox) * V.depth + fd]; } } } } a += this.biases.w[d]; A.set(ax, ay, d, a); } } } this.out_act = A; return(this.out_act); }
public override Vol forward(Vol V, bool is_training) { this.in_act = V; var V2 = V.clone(); var N = V.w.Length; var V2w = V2.w; for (var i = 0; i < N; i++) { if (V2w[i] < 0) { V2w[i] = 0; // threshold at 0 } } this.out_act = V2; return(this.out_act); }
public override Vol forward(Vol V, bool is_training) { this.in_act = V; var A = new Vol(1, 1, this.out_depth, 0.0); var Vw = V.w; for (var i = 0; i < this.out_depth; i++) { var a = 0.0; var wi = this.filters[i].w; for (var d = 0; d < this.num_inputs; d++) { a += Vw[d] * wi[d]; // for efficiency use Vols directly for now } a += this.biases.w[i]; A.w[i] = a; } this.out_act = A; return(this.out_act); }
public override Vol forward(Vol V, bool is_training) { this.in_act = V; var A = new Vol(1, 1, this.out_depth, 0.0); // compute max activation var _as = V.w; var amax = V.w[0]; for (var i = 1; i < this.out_depth; i++) { if (_as[i] > amax) { amax = _as[i]; } } // compute exponentials (carefully to not blow up) var es = Convnet_util.zeros(this.out_depth); var esum = 0.0; for (var i = 0; i < this.out_depth; i++) { var e = Math.Exp(_as[i] - amax); esum += e; es[i] = e; } // normalize and output to sum to one for (var i = 0; i < this.out_depth; i++) { es[i] /= esum; A.w[i] = es[i]; } this.es = es; // save these for backprop this.out_act = A; return(this.out_act); }
public override Vol forward(Vol V, bool is_training) { this.in_act = V; var A = V.cloneAndZero(); this.S_cache_ = V.cloneAndZero(); var n2 = Math.Floor((double)this.n / 2); for (var x = 0; x < V.sx; x++) { for (var y = 0; y < V.sy; y++) { for (var i = 0; i < V.depth; i++) { var ai = V.get(x, y, i); // normalize in a window of size n var den = 0.0; for (var j = Math.Max(0, i - n2); j <= Math.Min(i + n2, V.depth - 1); j++) { var aa = V.get(x, y, (int)j); den += aa * aa; } den *= this.alpha / this.n; den += this.k; this.S_cache_.set(x, y, i, den); // will be useful for backprop den = Math.Pow(den, this.beta); A.set(x, y, i, ai / den); } } } this.out_act = A; return(this.out_act); // dummy identity function for now }
public String train(Vol x, int y) { var start = DateTime.Now; this.net.forward(x, true); // also set the flag that lets the net know we're just training var end = DateTime.Now; var fwd_time = end - start; start = DateTime.Now; var cost_loss = this.net.backward(y); var l2_decay_loss = 0.0; var l1_decay_loss = 0.0; end = DateTime.Now; var bwd_time = end - start; // if (this.regression && y.constructor != Array) // Console.WriteLine("Warning: a regression net requires an array as training output vector."); this.k++; if (this.k % this.batch_size == 0) { var pglist = net.getParamsAndGrads(); // initialize lists for accumulators. Will only be done once on first iteration if (this.gsum.Count == 0 && (this.method != "sgd" || this.momentum > 0.0)) { // only vanilla sgd doesnt need either lists // momentum needs gsum // adagrad needs gsum // adam and adadelta needs gsum and xsum for (var i = 0; i < pglist.Count; i++) { this.gsum.Add(new _array(pglist[i].param.Length)); if (this.method == "adam" || this.method == "adadelta") { this.xsum.Add(new _array(pglist[i].param.Length)); } else { this.xsum.Add(new _array(1)); // conserve memory } } } // perform an update for all sets of weights for (var i = 0; i < pglist.Count; i++) { var pg = pglist[i]; // param, gradient, other options in future (custom learning rate etc) var p = pg.param; var g = pg.grads; // learning rate for some parameters. var l2_decay_mul = pg.l2_decay_mul != 0 ? pg.l2_decay_mul : 1.0; var l1_decay_mul = pg.l1_decay_mul != 0 ? pg.l1_decay_mul : 1.0; var l2_decay = this.l2_decay * l2_decay_mul; var l1_decay = this.l1_decay * l1_decay_mul; var plen = p.Length; for (var j = 0; j < plen; j++) { l2_decay_loss += l2_decay * p[j] * p[j] / 2; // accumulate weight decay loss l1_decay_loss += l1_decay * Math.Abs(p[j]); var l1grad = l1_decay * (p[j] > 0 ? 1 : -1); var l2grad = l2_decay * (p[j]); var gij = (l2grad + l1grad + g[j]) / this.batch_size; // raw batch gradient var gsumi = this.gsum[i].data; var xsumi = this.xsum[i].data; if (this.method == "adam") { // adam update gsumi[j] = gsumi[j] * this.beta1 + (1 - this.beta1) * gij; // update biased first moment estimate xsumi[j] = xsumi[j] * this.beta2 + (1 - this.beta2) * gij * gij; // update biased second moment estimate var biasCorr1 = gsumi[j] * (1 - Math.Pow(this.beta1, this.k)); // correct bias first moment estimate var biasCorr2 = xsumi[j] * (1 - Math.Pow(this.beta2, this.k)); // correct bias second moment estimate var dx = -this.learning_rate * biasCorr1 / (Math.Sqrt(biasCorr2) + this.eps); p[j] += dx; } else if (this.method == "adagrad") { // adagrad update gsumi[j] = gsumi[j] + gij * gij; var dx = -this.learning_rate / Math.Sqrt(gsumi[j] + this.eps) * gij; p[j] += dx; } else if (this.method == "windowgrad") { // this is adagrad but with a moving window weighted average // so the gradient is not accumulated over the entire history of the run. // it's also referred to as Idea #1 in Zeiler paper on Adadelta. Seems reasonable to me! gsumi[j] = this.ro * gsumi[j] + (1 - this.ro) * gij * gij; var dx = -this.learning_rate / Math.Sqrt(gsumi[j] + this.eps) * gij; // eps added for better conditioning p[j] += dx; } else if (this.method == "adadelta") { gsumi[j] = this.ro * gsumi[j] + (1 - this.ro) * gij * gij; var dx = -Math.Sqrt((xsumi[j] + this.eps) / (gsumi[j] + this.eps)) * gij; xsumi[j] = this.ro * xsumi[j] + (1 - this.ro) * dx * dx; // yes, xsum lags behind gsum by 1. p[j] += dx; } else if (this.method == "nesterov") { var dx = gsumi[j]; gsumi[j] = gsumi[j] * this.momentum + this.learning_rate * gij; dx = this.momentum * dx - (1.0 + this.momentum) * gsumi[j]; p[j] += dx; } else { // assume SGD if (this.momentum > 0.0) { // momentum update var dx = this.momentum * gsumi[j] - this.learning_rate * gij; // step gsumi[j] = dx; // back this up for next iteration of momentum p[j] += dx; // apply corrected gradient } else { // vanilla sgd p[j] += -this.learning_rate * gij; } } g[j] = 0.0; // zero out gradient so that we can begin accumulating anew } } } // appending softmax_loss for backwards compatibility, but from now on we will always use cost_loss // in future, TODO: have to completely redo the way loss is done around the network as currently // loss is a bit of a hack. Ideally, user should specify arbitrary number of loss functions on any layer // and it should all be computed correctly and automatically. String json = "{fwd_time:" + fwd_time + ",bwd_time: " + bwd_time + ",l2_decay_loss: " + l2_decay_loss + ",l1_decay_loss: " + l1_decay_loss + ",cost_loss: " + cost_loss + ",softmax_loss: " + cost_loss + ",loss: " + cost_loss + l1_decay_loss + l2_decay_loss + "}"; return(json); }
public override Vol forward(Vol V, bool is_training) { this.in_act = V; this.out_act = V; return(V); // identity function }
public override Vol forward(Vol V, bool is_training) { this.in_act = V; this.out_act = V; // nothing to do, output raw scores return(V); }
public abstract Vol forward(Vol V, bool is_training);
public override Vol forward(Vol V, bool is_training) { this.in_act = V; this.out_act = V; return(this.out_act); // simply identity function for now }