public Matrix Sigmoid(Matrix m) { // sigmoid nonlinearity var output = new Matrix(m.Rows, m.Columns); var n = m.W.Length; for (var i = 0; i < n; i++) { output.W[i] = Tembo.Sig(m.W[i]); } if (NeedsBackPropagation) { var bvc = new Action(() => { for (var i = 0; i < n; i++) { // grad for z = tanh(x) is (1 - z^2) var mwi = output.W[i]; m.DW[i] += mwi * (1.0 - mwi) * output.DW[i]; } }); BackProp.Add(bvc); } return(output); }
public Matrix Dot(Matrix m1, Matrix m2) { Tembo.Assert(m1.W.Length == m2.W.Length); var output = new Matrix(1, 1); var dot = 0.0; for (var i = 0; i < m1.W.Length; i++) { dot += m1.W[i] * m2.W[i]; } output.W[0] = dot; if (NeedsBackPropagation) { var bvc = new Action(() => { for (var i = 0; i < m1.W.Length; i++) { m1.DW[i] += m2.W[i] * output.DW[0]; m2.DW[i] += m1.W[i] * output.DW[0]; } }); BackProp.Add(bvc); } return(output); }
public static List <Model> InitLSTM(int inputSize, int[] hiddenSizes, int outputSize) { var models = new List <Model>(); for (var d = 0; d < hiddenSizes.Length; d++) { // loop over depths var prev_size = d == 0 ? inputSize : hiddenSizes[d - 1]; var hiddenSize = hiddenSizes[d]; // gates parameters var key = "Wix" + d; var model = new Model(key, Tembo.RandomMatrix(hiddenSize, prev_size, 0, 0.08)); models.Add(model); key = "Wih" + d; var model2 = new Model(key, Tembo.RandomMatrix(hiddenSize, prev_size, 0, 0.08)); models.Add(model2); key = "bi" + d; var model3 = new Model(key, new Matrix(hiddenSize, 1)); models.Add(model3); key = "Wfx" + d; var model4 = new Model(key, Tembo.RandomMatrix(hiddenSize, prev_size, 0, 0.08)); models.Add(model4); key = "Wfh" + d; var model5 = new Model(key, Tembo.RandomMatrix(hiddenSize, prev_size, 0, 0.08)); models.Add(model5); key = "bf" + d; var model6 = new Model(key, new Matrix(hiddenSize, 1)); models.Add(model6); key = "Wox" + d; var model7 = new Model(key, Tembo.RandomMatrix(hiddenSize, prev_size, 0, 0.08)); models.Add(model7); key = "Woh" + d; var model8 = new Model(key, Tembo.RandomMatrix(hiddenSize, prev_size, 0, 0.08)); models.Add(model8); key = "bo" + d; var model9 = new Model(key, new Matrix(hiddenSize, 1)); models.Add(model9); key = "Wcx" + d; var model10 = new Model(key, Tembo.RandomMatrix(hiddenSize, prev_size, 0, 0.08)); models.Add(model10); key = "Wch" + d; var model11 = new Model(key, Tembo.RandomMatrix(hiddenSize, prev_size, 0, 0.08)); models.Add(model11); key = "bc" + d; var model12 = new Model(key, new Matrix(hiddenSize, 1)); models.Add(model12); } // decoder params var model13 = new Model("Whd", Tembo.RandomMatrix(outputSize, hiddenSizes.LastOrDefault(), 0, 0.08)); models.Add(model13); var model14 = new Model("bd", new Matrix(outputSize, 1)); models.Add(model14); return(models); }
public dynamic Step(List <Model> model, int stepSize, double regc, double clipval) { dynamic solverStats = new ExpandoObject(); var num_clipped = 0; var num_tot = 0; foreach (var k in model) { if (k.Value != null) { var m = k.Value; if (!Tembo.Contains(StepCache, k)) { StepCache.Add(new Model(k.Key, new Matrix(k.Value.Rows, k.Value.Columns))); } var s = StepCache.FirstOrDefault(d => d.Key == k.Key); for (var i = 0; i < m.W.Length; i++) { // rmsprop adaptive learning rate var mdwi = m.DW[i]; s.Value.W[i] = s.Value.W[i] * DecayRate + (1.0 - DecayRate) * mdwi * mdwi; // gradient clip if (mdwi > clipval) { mdwi = clipval; num_clipped++; } if (mdwi < -clipval) { mdwi = -clipval; num_clipped++; } num_tot++; // update (and regularize) m.W[i] += -stepSize * mdwi / Math.Sqrt(s.Value.W[i] + SmoothEps) - regc * m.W[i]; m.DW[i] = 0; // reset gradients for next iteration } } } solverStats.ratio_clipped = num_clipped * 1.0 / num_tot; return(solverStats); }
public static int SampleI(double[] w) { // sample argmax from w, assuming w are // probabilities that sum to one var r = Tembo.RandomNumber(0, 1); var x = 0.0; var i = 0; while (true) { x += w[i]; if (x > r) { return(i); } i++; } //wtf is this return doing here? idk! perhaps it was javascript thing but i'm leaving it for history return(w.Length - 1); // pretty sure we should never get here? }
public Matrix Multiply(Matrix m1, Matrix m2) { Tembo.Assert(m1.Columns == m2.Rows, "matrix multiplier dimensions misaligned"); var n = m1.Rows; var d = m2.Columns; var output = new Matrix(n, d); for (var i = 0; i < m1.Rows; i++) { // loop over rows of m1 for (var j = 0; j < m2.Columns; j++) { // loop over cols of m2 var dot = 0.0; for (var k = 0; k < m1.Columns; k++) { // dot product loop dot += m1.W[m1.Columns * i + k] * m2.W[m2.Columns * k + j]; } output.W[d * i + j] = dot; } } if (NeedsBackPropagation) { var bvc = new Action(() => { for (var i = 0; i < m1.Rows; i++) { // loop over rows of m1 for (var j = 0; j < m2.Columns; j++) { // loop over cols of m2 var dot = 0.0; for (var k = 0; k < m1.Columns; k++) { // dot product loop dot += m1.W[m1.Columns * i + k] * m2.W[m2.Columns * k + j]; } output.W[d * i + j] = dot; } } }); BackProp.Add(bvc); } return(output); }
public Matrix RowPluck(Matrix m, int ix) { Tembo.Assert(ix >= 0 && ix < m.Rows); var d = m.Columns; var output = new Matrix(d, 1); for (var i = 0; i < d; i++) { output.W[i] = m.W[d * ix + i]; } // copy over the data if (NeedsBackPropagation) { var bvc = new Action(() => { for (var i = 0; i < d; i++) { m.DW[d * ix + i] += output.DW[i]; } }); BackProp.Add(bvc); } return(output); }
public Matrix Add(Matrix m1, Matrix m2) { Tembo.Assert(m1.W.Length == m2.W.Length, "matrix addition dimensions misaligned"); var output = new Matrix(m1.Rows, m1.Columns); for (var i = 0; i < m1.W.Length; i++) { output.W[i] = m1.W[i] + m2.W[i]; } if (NeedsBackPropagation) { var bvc = new Action(() => { for (var i = 0; i < m1.W.Length; i++) { output.W[i] = m1.W[i] + m2.W[i]; } }); BackProp.Add(bvc); } return(output); }
public static int SampleWeighted(double[] p) { var r = Tembo.Random(); var c = 0.0; for (var i = 0; i < p.Length; i++) { c += p[i]; if (c >= r) { return(i); } //when update mode is sarsa wtf is reached //so just retun the largest if (c <= r && i >= p.Length - 1) { return(i); } } Tembo.Assert(false, "'wtf'"); return(0); }
public Matrix Eltmul(Matrix m1, Matrix m2) { Tembo.Assert(m1.W.Length == m2.W.Length); var output = new Matrix(m1.Rows, m1.Columns); for (var i = 0; i < m1.W.Length; i++) { output.W[i] = m1.W[i] * m2.W[i]; } if (NeedsBackPropagation) { var bvc = new Action(() => { for (var i = 0; i < m1.W.Length; i++) { m1.DW[i] += m2.DW[i] * output.DW[i]; m2.DW[i] += m1.W[i] * output.DW[i]; } }); BackProp.Add(bvc); } return(output); }