public void TrainWithTeach(Matrix x, int y, double norm = 1e-3, double moment = 0.0, double lambda = 0.0, Optimizer optimizer = Optimizer.SGD) { Tensor4 X = new Tensor4(x.width, x.height, 1, 1); X.elements = x.elements; Tensor4 Y = new Tensor4(output.width, output.height, output.deep, 1); if (y != -1) { Y[0, 0, 0, y] = 1.0; } BackwardBase(X, Y, norm, moment, lambda, optimizer); }
public void TrainWithTeach(Vector x, int y, double val, double norm = 1e-3, double moment = 0.0, double lambda = 0.0, Optimizer optimizer = Optimizer.SGD) { Tensor4 X = new Tensor4(x.Length, 1, 1, 1); X.elements = x.elements; Tensor4 Y = new Tensor4(output.width, output.height, output.deep, 1); if (y != -1) { Y[0, 0, 0, y] = 1.0; } for (int i = 0; i < layers.Count; i++) { if (layers[i] is Hand) { layers[i].output.elements[layers[i].output.elements.Length - 1] = val; break; } } BackwardBase(X, Y, norm, moment, lambda, optimizer); }
public void TrainWithTeach(Tensor4 x, Tensor4 y, double norm = 1e-3, double moment = 0.0, double lambda = 0.0, Optimizer optimizer = Optimizer.SGD) { BackwardBase(x, y, norm, moment, lambda, optimizer); }
public void TrainWithTeach(Tensor4 x, int[] y, double norm = 1e-3, double moment = 0.0, double lambda = 0.0, Optimizer optimizer = Optimizer.SGD) { Tensor4 Y = new Tensor4(output.width, output.height, output.deep, output.bs); if (output.width != 1 && output.height * output.deep == 1) { for (int i = 0; i < y.Length; i++) { if (y[i] != -1) { Y[i, 0, 0, y[i]] = 1.0; } } } else if (output.deep != 1 && output.height * output.width == 1) { for (int i = 0; i < y.Length; i++) { if (y[i] != -1) { Y[i, y[i], 0, 0] = 1.0; } else { throw new Exception(); } } } BackwardBase(x, Y, norm, moment, lambda, optimizer); }
void BackwardBase(Tensor4 x, Tensor4 y, double norm, double moment, double lambda, Optimizer optimizer) { Tensor4[] grads, weights; Vector E; Matrix A; Vector b; switch (optimizer) { #region SGD case Optimizer.SGD: CalcDelts(x, y); CalcGrads(lambda); Train(norm, moment); break; #endregion #region ConjGrads case Optimizer.ConjGrads: CalcDelts(x, y); grads = CalcGrads(lambda); weights = GetWeights(); if (lastGrads == null) { lastGrads = new Tensor4[layers.Count]; lastB = new Tensor4[layers.Count]; for (int i = 0; i < lastGrads.Length; i++) { if (grads[i] != null) { lastGrads[i] = new Tensor4(grads[i].width, grads[i].height, grads[i].deep, grads[i].bs); lastB[i] = new Tensor4(grads[i].width, grads[i].height, grads[i].deep, grads[i].bs); } } } for (int i = 0; i < layers.Count; i++) { if (grads[i] != null) { double lG_l2 = lastGrads[i].EuclidNorm(); double w = (lG_l2 != 0.0) ? grads[i].EuclidNorm() / lG_l2 : 0.0; lastB[i] = grads[i] + w * lastB[i]; weights[i] -= norm * lastB[i]; } } lastGrads = grads; SetWeights(weights); break; #endregion #region Adam case Optimizer.Adam: CalcDelts(x, y); grads = CalcGrads(lambda); weights = GetWeights(); if (m == null || v == null) { m = new Tensor4[grads.Length]; v = new Tensor4[grads.Length]; for (int i = 0; i < grads.Length; i++) { if (grads[i] != null) { m[i] = new Tensor4(grads[i].width, grads[i].height, grads[i].deep, grads[i].bs); v[i] = new Tensor4(grads[i].width, grads[i].height, grads[i].deep, grads[i].bs); } } } Parallel.For(0, grads.Length, i => { //for (int i = 1; i < grads.Length; i++) if (grads[i] != null) { m[i] = b1 * m[i] + (1.0 - b1) * grads[i]; v[i] = b2 * v[i] + (1.0 - b2) * (grads[i] * grads[i]); var M = m[i] / (1.0 - b1); var V = v[i] / (1.0 - b2); weights[i] -= norm * M / (V.ElementsPow(0.5) + e); } }); SetWeights(weights); break; #endregion #region Marat case Optimizer.Marat: CalcDelts(x, y); grads = CalcGrads(lambda); weights = GetWeights(); int d = x.bs; E = CalcErrRootMSEBase2(x, y); //* for (int i = 0; i < weights.Length; i++) { if (grads[i] != null) { if (d < weights[i].dhw) { throw new Exception("Недостаточно обучающей выборки."); } A = new Matrix(weights[i].dhw, weights[i].dhw); b = new Vector(weights[i].dhw); var w = layers[i].WeightsNeurons(); for (int n = 0; n < w.Length; n++) { } } } //* //*/ /* * k = 0; * for (int i = 0; i < weights.Length; i++) * { * if (grads[i] != null) * { * for (int j = 0; j < weights[i].dhw; j++) * { * weights[i].elements[j] = a[k++]; * } * } * } * //*/ SetWeights(weights); break; #endregion #region Taylor case Optimizer.Taylor: CalcDelts(x, y); grads = CalcGrads(lambda); weights = GetWeights(); E = CalcErrRootMSEBase2(x, y); if (E.Length != 1) { throw new Exception("Ты пидор"); } for (int i = 1; i < grads.Length; i++) { //for (int i = 1; i < grads.Length; i++) if (grads[i] != null) { for (int j = 0; j < grads[i].elements.Length; j++) { var grad = E[0] / grads[i].elements[j]; if (!Double.IsNaN(grad) && !Double.IsInfinity(grad)) { weights[i] -= norm * grad; } else { } } } } ; SetWeights(weights); //Train(norm, moment); break; #endregion #region NonIteratorLinear case Optimizer.NonIteratorLinear: int wN = layers[layers.Count - 1].weights.height; var input = x; //[k, 0, 0, j] for (int i = 0; i < layers[layers.Count - 1].weights.width; i++) { A = new Matrix(wN, wN); b = new Vector(wN); for (int j = 0; j < b.Length; j++) { for (int k = 0; k < input.bs; k++) { b[j] += y[k, 0, 0, i] * input[k, 0, 0, j]; } } for (int q = 0; q < wN; q++) { for (int n = 0; n < wN; n++) { for (int k = 0; k < input.bs; k++) { A[q, n] += input[k, 0, 0, n] * input[k, 0, 0, q]; } } } //A.Transpose(); var result = matlib.SolvingSystems.SolvingSLAY(A, b); for (int j = 0; j < wN; j++) { layers[layers.Count - 1].weights[0, 0, j, i] = result[j]; } } break; #endregion } //if(lambda != 0.0) //Regularization(lambda); }