/// <summary> /// Train a neural network /// </summary> /// <param name="X">The feature set Matrix.</param> /// <param name="y">The result set Matrix.</param> /// <param name="input_layer_size">The size of the input layer</param> /// <param name="hidden_layer_size">The size of the hidden layer</param> /// <param name="labels">A list of classification labels.</param> /// <param name="lambda">The regularization parameter which helps reduce overfitting. /// <param name="maxIterations">The maximum number of iterations to run the minimization function.</param> /// <returns>The trained weights for between the input and hidden layers.</returns> public static Matrix[] Train(Matrix X, Matrix y, int input_layer_size, int hidden_layer_size, double[] labels, double lambda, int maxIterations = 50) { int num_labels = labels.Length; Matrix initial_Theta1 = RandInitializeWeights(input_layer_size, hidden_layer_size); Matrix initial_Theta2 = RandInitializeWeights(hidden_layer_size, num_labels); Matrix initial_nn_params = Matrix.Join(initial_Theta1.Unrolled, initial_Theta2.Unrolled, MatrixDimensions.Rows); MinimizeOptions options = new MinimizeOptions(); options.InputLayerSize = input_layer_size; options.HiddenLayerSize = hidden_layer_size; options.Labels = labels; options.RegularizationParameter = lambda; options.MaxIterations = maxIterations; int i = 0; Matrix new_theta = LogisticRegression.Minimize(NNCostFunction, X, y, initial_nn_params, options, out i); Matrix[] thetas = new Matrix[2]; thetas[0] = Matrix.Reshape(new_theta, 0, hidden_layer_size, input_layer_size + 1); thetas[1] = Matrix.Reshape(new_theta, (hidden_layer_size * (input_layer_size + 1)), num_labels, hidden_layer_size + 1); return(thetas); }
/// <summary> /// Create multiple logistic regression classifiers, one for each class we're /// trying to categorize. /// </summary> /// <param name="X">The features Matrix (n x m).</param> /// <param name="y">The results Matrix (n x 1).</param> /// <param name="labels">The labels to classify against.</param> /// <param name="lambda">The regularization parameter.</param> /// <returns>A Matrix where each row is a learned set of parameters for that /// particular class.</returns> public static Matrix OneVsAll(Matrix X, Matrix y, double[] labels, double lambda, int maxIterations = 50) { int m = X.Rows; int n = X.Columns; X = Matrix.Join(Matrix.Ones(m, 1), X, MatrixDimensions.Columns); int numberOfLabels = labels.Length; Matrix all_theta = new Matrix(numberOfLabels, n + 1); MinimizeOptions options = new MinimizeOptions(); options.RegularizationParameter = lambda; options.MaxIterations = maxIterations; for (int c = 0; c < numberOfLabels; c++) { Matrix initial_theta = new Matrix(n + 1, 1); int i = 0; Matrix new_theta = Minimize(CostFunction, X, y == labels[c], initial_theta, options, out i); all_theta.SetRow(c, new_theta.Transpose); } return(all_theta); }
static void NeuralNetworkDemo() { WriteH1("Neural Network Regression"); #region PredictNN WriteH2("Predict Neural Network"); Matrix Theta1 = new Matrix(new double[, ] { { 0.00000, 0.90930, -0.75680 }, { 0.47943, 0.59847, -0.97753 }, { 0.84147, 0.14112, -0.95892 }, { 0.99749, -0.35078, -0.70554 } }); Matrix Theta2 = new Matrix(new double[, ] { { 0.00000, 0.93204, 0.67546, -0.44252, -0.99616 }, { 0.29552, 0.99749, 0.42738, -0.68777, -0.92581 }, { 0.56464, 0.97385, 0.14112, -0.87158, -0.77276 }, { 0.78333, 0.86321, -0.15775, -0.97753, -0.55069 } }); Matrix X = new Matrix(new double[, ] { { 0.84147, 0.41212 }, { 0.90930, -0.54402 }, { 0.14112, -0.99999 }, { -0.75680, -0.53657 }, { -0.95892, 0.42017 }, { -0.27942, 0.99061 }, { 0.65699, 0.65029 }, { 0.98936, -0.28790 } }); Matrix prediction = NeuralNetwork.Predict(Theta1, Theta2, X); Console.WriteLine("Target: 3.00 ; 0.00 ; 0.00 ; 3.00 ; 3.00 ; 3.00 ; 3.00 ; 1.00 ;\nActual: {0}", prediction.ToString().Replace("\n", "; ")); #endregion #region Sigmoid Gradient Matrix a = new Matrix(new double[, ] { { -1, -2, -3 } }); X = Matrix.Join(a, Matrix.Magic(3), MatrixDimensions.Rows); Matrix sg = NeuralNetwork.SigmoidGradient(X); WriteH2("Sigmoid Gradient"); Console.WriteLine(sg); #endregion #region Neural Network Cost Function Matrix nn = new Matrix(new double[, ] { { 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8 } }); int il = 2; int hl = 2; double[] labels = new double[] { 1, 2, 3, 4 }; X = new Matrix(new double[, ] { { 0.5403, -0.41615 }, { -0.98999, -0.65364 }, { 0.28366, 0.96017 } }); Matrix y = new Matrix(new double[, ] { { 4 }, { 2 }, { 3 } }); MinimizeOptions options = new MinimizeOptions(); options.InputLayerSize = il; options.HiddenLayerSize = hl; options.Labels = labels; options.RegularizationParameter = 4; Tuple <double, Matrix> result = NeuralNetwork.NNCostFunction(X, y, nn, options); WriteH2("Neural Network Cost Function"); Console.WriteLine($"J: {result.Item1} (Expected Result: 19.474)"); Console.WriteLine(result.Item2); #endregion }
static void LogisticRegressionDemo() { WriteH1("Logistic Regression"); #region Sigmoid Function WriteH2("Sigmoid Function"); Matrix m1 = new Matrix(new double[, ] { { 1200000 } }); Matrix sigmoid1 = LogisticRegression.Sigmoid(m1); Console.Write("Target: 1.0 Actual: {0}", sigmoid1); m1[0, 0] = -25000; sigmoid1 = LogisticRegression.Sigmoid(m1); Console.Write("Target: 0.0 Actual: {0}", sigmoid1); m1[0, 0] = 0; sigmoid1 = LogisticRegression.Sigmoid(m1); Console.Write("Target: 0.5 Actual: {0}", sigmoid1); m1 = new Matrix(new double[, ] { { 4, 5, 6 } }); sigmoid1 = LogisticRegression.Sigmoid(m1); Console.Write("Target: 0.98 0.99 0.997 Actual: {0}", sigmoid1); #endregion #region Predict WriteH2("Prediction"); m1 = new Matrix(new double[, ] { { 1, 1 }, { 1, 2.5 }, { 1, 3 }, { 1, 4 } }); Matrix theta = new Matrix(new double[, ] { { -3.5 }, { 1.3 } }); Matrix prediction = LogisticRegression.Predict(m1, theta); Console.WriteLine("Target: 0.0 ; 0.0 ; 1.0 ; 1.0 ; Actual: {0}", prediction.ToString().Replace("\n", "; ")); m1 = Matrix.Magic(3); theta = new Matrix(new double[, ] { { 4 }, { 3 }, { -8 } }); prediction = LogisticRegression.Predict(m1, theta); Console.WriteLine("Target: 0.0 ; 0.0 ; 1.0 ; Actual: {0}", prediction.ToString().Replace("\n", "; ")); #endregion #region Cost Function WriteH2("Cost Function"); Matrix X = Matrix.AddIdentityColumn(Matrix.Magic(3)); Matrix y = new Matrix(new double[, ] { { 1 }, { 0 }, { 1 } }); theta = new Matrix(new double[, ] { { -2 }, { -1 }, { 1 }, { 2 } }); Tuple <double, Matrix> cost = LogisticRegression.CostFunction(X, y, theta); Console.WriteLine("Target: 4.6832 ; Actual: {0}", cost.Item1); #endregion #region Regularized Cost Function WriteH2("Regularized Cost Function"); MinimizeOptions options = new MinimizeOptions(); options.RegularizationParameter = 3; cost = LogisticRegression.CostFunction(X, y, theta, options); Console.WriteLine("Target: 7.6832 ; Actual: {0}", cost.Item1); X = new Matrix(new double[, ] { { 1.0, 0.1, 0.6, 1.1 }, { 1.0, 0.2, 0.7, 1.2 }, { 1.0, 0.3, 0.8, 1.3 }, { 1.0, 0.4, 0.9, 1.4 }, { 1.0, 0.5, 1.0, 1.5 } }); y = new Matrix(new double[, ] { { 1.0 }, { 0.0 }, { 1.0 }, { 0.0 }, { 1.0 } }); theta = new Matrix(new double[, ] { { -2 }, { -1 }, { 1 }, { 2 } }); cost = LogisticRegression.CostFunction(X, y, theta, options); Console.WriteLine("Target: 2.5348 ; Actual: {0}", cost.Item1); #endregion #region OneVsAll WriteH2("One vs All"); X = new Matrix(new double[, ] { { 8.0, 1.0, 6.0 }, { 3.0, 5.0, 7.0 }, { 4.0, 9.0, 2.0 }, { 0.84147, 0.90930, 0.14112 }, { 0.54030, -0.41615, -0.98999 } }); y = new Matrix(new double[, ] { { 1.0 }, { 2.0 }, { 2.0 }, { 1.0 }, { 3.0 } }); //Matrix testTheta = new Matrix(4, 1); //Matrix X0 = Matrix.Join(Matrix.Ones(5, 1), X, MatrixDimensions.Columns); //cost = LogisticRegression.CostFunction(X0, y==1, testTheta, 0.1); //Console.WriteLine(cost.Item1); //Console.WriteLine(cost.Item2); double[] labels = new double[] { 1.0, 2.0, 3.0 }; Matrix all_theta = LogisticRegression.OneVsAll(X, y, labels, 0.1); Console.WriteLine(all_theta); #endregion #region PredictOneVsAll WriteH2("Predict One vs All"); X = new Matrix(new double[, ] { { 1.0, 7.0 }, { 4.0, 5.0 }, { 7.0, 8.0 }, { 1.0, 4.0 } }); all_theta = new Matrix(new double[, ] { { 1.0, -6.0, 3.0 }, { -2.0, 4.0, -3.0 } }); prediction = LogisticRegression.PredictOneVsAll(all_theta, X); Console.WriteLine("Target: 0; 1; 1; 0; Actual: {0}", prediction.ToString().Replace("\n", "; ")); #endregion }
/// <summary> /// Calculate the regularized cost function for logistic regression. /// </summary> /// <param name="X">The original features Matrix.</param> /// <param name="y">The results Matrix.</param> /// <param name="theta">The theta values to apply to each feature.</param> /// <param name="lambda">The regularization parameter which helps reduce overfitting. /// Note that using values that are too high will lead to underfitting.</param> /// <returns>The cost of using the given value of theta, and the gradient of /// the cost (useful for iterative minimization functions)</returns> public static Tuple <double, Matrix> CostFunction(Matrix X, Matrix y, Matrix theta, MinimizeOptions options) { double lambda = options.RegularizationParameter; double m = (double)X.Rows; Matrix t = new Matrix(theta); Matrix h = Sigmoid(X * t); // Hypothesis Matrix ev = h - y; // Error Vector double part1 = (-y.Transpose * Matrix.ElementLog(h)).SumAllElements; double part2 = ((1 - y).Transpose * Matrix.ElementLog(1 - h)).SumAllElements; double J = (1.0 / m) * (part1 - part2); t[0, 0] = 0; double theta_sq = (t.Transpose * t).SumAllElements; J += ((lambda / (2.0 * m)) * theta_sq); Matrix grad = ((1 / m) * (X.Transpose * (h - y))) + ((lambda / m) * t); return(Tuple.Create(J, grad)); }
/// <summary> /// Minimize the cost function for an initial set of values. /// </summary> /// <param name="f">A function that calculates the cost, and a vector /// of partial derivatives.</param> /// <param name="Features">The features being assessed for each label.</param> /// <param name="y">A truth table containing '1' if it matches the label /// being investigated, or '0' if it's not a match.</param> /// <param name="theta">Initial values for theta.</param> /// <param name="lambda">The regularization parameter.</param> /// <param name="maxIterations">The maximum number of iterations to perform /// before stopping.</param> /// <param name="i">The final number of iterations used to find a result.</param> /// <returns>The solution for theta for a given set of labels.</returns> public static Matrix Minimize(MinimizeFunction f, Matrix Features, Matrix y, Matrix theta, MinimizeOptions options, out int i) { int maxIterations = options.MaxIterations; double lambda = options.RegularizationParameter; int length = maxIterations > 0 ? maxIterations : 100; // Most of the below is adapted from fmincg.m by Carl Edward Rasmussen. // Original Copyright notice: // ------------------------------------------------------------------------- // Copyright(C) 2001 and 2002 by Carl Edward Rasmussen. Date 2002 - 02 - 13 // // (C)Copyright 1999, 2000 & 2001, Carl Edward Rasmussen // // Permission is granted for anyone to copy, use, or modify these // programs and accompanying documents for purposes of research or // education, provided this copyright notice is retained, and note is // made of any changes that have been made. // // These programs and documents are distributed without any warranty, // express or implied.As the programs were written for research // purposes only, they have not been tested to the degree that would be // advisable in any important application.All use of these programs is // entirely at the user's own risk. // ------------------------------------------------------------------------- // NOTE: Original code was written in Octave, while here it's obviously been // re-written in C#. There are likely a few differences and errors with this // implementation, which will hopefully be ironed out in time. These are // entirely my own fault, and not the original author's. double RHO = 0.01; // a bunch of constants for line searches double SIG = 0.5; // RHO and SIG are the constants in the Wolfe - Powell conditions double INT = 0.1; // don't reevaluate within 0.1 of the limit of the current bracket double EXT = 3.0; // extrapolate maximum 3 times the current bracket double MAX = 20; // max 20 function evaluations per line search double RATIO = 100; // maximum allowed slope ratio double red = 1; i = 0; // zero the run length counter bool ls_failed = false; // no previous line search has failed // fX = []; //[f1 df1] = eval(argstr); // get function value and gradient Tuple <double, Matrix> cost1 = f(Features, y, theta, options); double f1 = cost1.Item1; Matrix df1 = cost1.Item2; i = i + (length < 0 ? 1 : 0); // count epochs?! Matrix s = -df1; // search direction is steepest double d1 = (-Matrix.MultiplyTransposeBy(s))[0, 0]; // this is the slope double z1 = red / (1.0 - d1); // initial step is red/(|s|+1) double z2, A, B; while (i < Math.Abs(length)) // while not finished { i = i + (length > 0 ? 1 : 0); // count iterations?! Matrix theta0 = new Matrix(theta); double f0 = f1; Matrix df0 = new Matrix(df1); // make a copy of current values theta = theta + z1 * s; // begin line search Tuple <double, Matrix> cost2 = f(Features, y, theta, options); double f2 = cost2.Item1; Matrix df2 = new Matrix(cost2.Item2); //[f2 df2] = eval(argstr); i = i + (length < 0 ? 1 : 0); // count epochs?! double d2 = MultiplyFirstRowTransposeBy(df2, s); double f3 = f1; double d3 = d1; double z3 = -z1; // initialize point 3 equal to point 1 double M = 0; M = (length > 0) ? MAX : Math.Min(MAX, -length - i); bool success = false; double limit = -1; // initialize quantities while (true) { while (((f2 > f1 + z1 * RHO * d1) || (d2 > -SIG * d1)) && (M > 0)) { limit = z1; // tighten the bracket z2 = 0; if (f2 > f1) { z2 = z3 - (0.5 * d3 * z3 * z3) / (d3 * z3 + f2 - f3); // quadratic fit } else { A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3); // cubic fit B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2); z2 = (Math.Sqrt(B * B - A * d2 * z3 * z3) - B) / A; // numerical error possible -ok! } if ((double.IsNaN(z2)) || (double.IsInfinity(z2))) { z2 = z3 / 2; // if we had a numerical problem then bisect } z2 = Math.Max(Math.Min(z2, INT * z3), (1 - INT) * z3); // don't accept too close to limits z1 = z1 + z2; // update the step theta = theta + z2 * s; // [f2 df2] = eval(argstr); cost2 = f(Features, y, theta, options); f2 = cost2.Item1; df2 = new Matrix(cost2.Item2); M = M - 1; i = i + (length < 0 ? 1 : 0); // count epochs?! //d2 = (df2.Transpose * s)[0, 0]; d2 = MultiplyFirstRowTransposeBy(df2, s); // (Matrix.MultiplyTransposeBy(df2, s))[0, 0]; z3 = z3 - z2; // z3 is now relative to the location of z2 } if ((f2 > (f1 + z1 * RHO * d1)) || (d2 > (-SIG * d1))) { break; // this is a failure } else if (d2 > (SIG * d1)) { success = true; break; // success } else if (M == 0) { break; // failure } A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3); // make cubic extrapolation B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2); z2 = -d2 * z3 * z3 / (B + Math.Sqrt(B * B - A * d2 * z3 * z3)); // num.error possible - ok! //if ~isreal(z2) || isnan(z2) || isinf(z2) || z2 < 0 // num prob or wrong sign? if (double.IsNaN(z2) || double.IsInfinity(z2) || z2 < 0) // num prob or wrong sign? { if (limit < -0.5) // if we have no upper limit { z2 = z1 * (EXT - 1); // the extrapolate the maximum amount } else { z2 = (limit - z1) / 2; // otherwise bisect } } else if ((limit > -0.5) && (z2 + z1 > limit)) // extraplation beyond max? { z2 = (limit - z1) / 2; // bisect } else if ((limit < -0.5) && (z2 + z1 > z1 * EXT)) // extrapolation beyond limit { z2 = z1 * (EXT - 1.0); // set to extrapolation limit } else if (z2 < -z3 * INT) { z2 = -z3 * INT; } else if ((limit > -0.5) && (z2 < (limit - z1) * (1.0 - INT))) // too close to limit? { z2 = (limit - z1) * (1.0 - INT); } f3 = f2; d3 = d2; z3 = -z2; // set point 3 equal to point 2 z1 = z1 + z2; theta = theta + z2 * s; // update current estimates //[f2 df2] = eval(argstr); cost2 = f(Features, y, theta, options); f2 = cost2.Item1; df2 = new Matrix(cost2.Item2); M = M - 1; i = i + (length < 0 ? 1 : 0); // count epochs?! d2 = MultiplyFirstRowTransposeBy(df2, s); } // end of line search if (success) // if line search succeeded { f1 = f2; //fX = [fX' f1]'; //fprintf('%s %4i | Cost: %4.6e\r', S, i, f1); s = (df2.Transpose * df2 - df1.Transpose * df2)[0, 0] / (df1.Transpose * df1)[0, 0] * s - df2; // Polack-Ribiere direction Matrix tmp = new Matrix(df1); df1 = df2; df2 = tmp; // swap derivatives //d2 = (df1.Transpose * s)[0, 0]; d2 = (Matrix.MultiplyTransposeBy(df1, s))[0, 0]; if (d2 > 0) // new slope must be negative { s = -df1; // otherwise use steepest direction //d2 = (-s.Transpose * s)[0, 0]; d2 = MultiplyFirstRowTransposeBy(s, true); // d2 = (-Matrix.MultiplyTransposeBy(s))[0, 0]; } double TEST = d2 - double.MinValue; z1 = z1 * Math.Min(RATIO, d1 / (d2 - double.Epsilon)); // slope ratio but max RATIO d1 = d2; ls_failed = false; // this line search did not fail } else { theta = new Matrix(theta0); f1 = f0; df1 = df0; // restore point from before failed line search if (ls_failed || i > Math.Abs(length)) // line search failed twice in a row { break; // or we ran out of time, so we give up } Matrix tmp = new Matrix(df1); df1 = df2; df2 = tmp; // swap derivatives s = -df1; // try steepest d1 = MultiplyFirstRowTransposeBy(s, true); z1 = 1 / (1 - d1); ls_failed = true; //this line search failed } } return(theta); }
/// <summary> /// The Neural Network cost function for a two layer classification Neural Network. /// </summary> /// <param name="nn_parameters">The unrolled parameter vector that contains all the weights.</param> /// <param name="input_layer_size">The number of nodes in the input layer.</param> /// <param name="hidden_layer_size">The number of nodes in the hidden layer.</param> /// <param name="labels">A list of classification labels.</param> /// <param name="X">The feature set Matrix.</param> /// <param name="y">The result set Matrix.</param> /// <param name="lambda">The regularization parameter which helps reduce overfitting. /// Note that using values that are too high will lead to underfitting.</param> /// <returns>The cost of using the given value of theta, and the gradient of /// the cost (useful for iterative minimization functions)</returns> public static Tuple <double, Matrix> NNCostFunction(Matrix X, Matrix y, Matrix nn_parameters, MinimizeOptions options) { double lambda = options.RegularizationParameter; int input_layer_size = options.InputLayerSize; int hidden_layer_size = options.HiddenLayerSize; double[] labels = options.Labels; double costFunction = 0; int num_labels = labels.Length; List <Matrix> output_gradient = new List <Matrix>(); Matrix Theta1 = Matrix.Reshape(nn_parameters, 0, hidden_layer_size, input_layer_size + 1); Matrix Theta2 = Matrix.Reshape(nn_parameters, (hidden_layer_size * (input_layer_size + 1)), num_labels, hidden_layer_size + 1); // y_matrix has the following attributes: // Rows: same as the number of rows in Y -- one for each example result. // Columns: one for each label. // Values: Each row consists of zeros, except for one, which matches the // value of y in that row to the index of the label. For example, if there // are three labels (3, 6, 8), and y contains 2 rows (8, 3), then y_matrix // would be: // 0 0 1 // 1 0 0 Matrix y_matrix = AssignLabels(y, labels); // Add ones to the X Matrix Matrix a1 = Matrix.AddIdentityColumn(X); Matrix z2 = a1 * Theta1.Transpose; Matrix a2 = LogisticRegression.Sigmoid(z2); a2 = Matrix.AddIdentityColumn(a2); Matrix z3 = a2 * Theta2.Transpose; Matrix a3 = LogisticRegression.Sigmoid(z3); Matrix log1 = Matrix.ElementLog(a3); Matrix log2 = Matrix.ElementLog(1 - a3); Matrix part1 = Matrix.ElementMultiply(-y_matrix, log1); Matrix part2 = Matrix.ElementMultiply((1 - y_matrix), log2); Matrix t0 = Theta1.RemoveColumn(0); Matrix t1 = Theta2.RemoveColumn(0); // Calculate regularization component double multiplier = lambda / (2 * X.Rows); double reg1 = Matrix.ElementPower(t0, 2).SumAllElements; double reg2 = Matrix.ElementPower(t1, 2).SumAllElements; double r = multiplier * (reg1 + reg2); // Calculate cost costFunction = (1.0 / X.Rows) * (part1 - part2).SumAllElements + r; // Back Propogation Matrix d3 = a3 - y_matrix; Matrix d2 = Matrix.ElementMultiply( (t1.Transpose * d3.Transpose).Transpose, SigmoidGradient(z2) ); Matrix Delta1 = d2.Transpose * a1; Matrix Delta2 = d3.Transpose * a2; Theta1 = Matrix.Join(new Matrix(t0.Rows, 1), t0, MatrixDimensions.Columns); Theta2 = Matrix.Join(new Matrix(t1.Rows, 1), t1, MatrixDimensions.Columns); double scale_value = lambda / X.Rows; Matrix Theta1_scaled = Theta1 * scale_value; Matrix Theta2_scaled = Theta2 * scale_value; Matrix Theta1_grad = ((Delta1 / X.Rows) + Theta1_scaled).Unrolled; Matrix Theta2_grad = ((Delta2 / X.Rows) + Theta2_scaled).Unrolled; return(new Tuple <double, Matrix>(costFunction, Matrix.Join(Theta1_grad, Theta2_grad, MatrixDimensions.Rows))); }