Exemple #1
0
        /// <summary>
        /// Predict the index of each classifer that applies to each row of X using trained
        /// weights of a neural network.
        /// </summary>
        /// <param name="theta_1">The first set of trained weights between the input layer
        /// and the hidden layer.</param>
        /// <param name="theta_2">The second set of trained weights between the hidden layer
        /// and the output layer.</param>
        /// <param name="X">A Matrix of example rows.</param>
        /// <returns>A Matrix (column vector, m * 1) containing the zero-based indices of
        /// the most probable classification prediction for each input row in X.</returns>
        /// <remarks>This version only has one hidden layer, which will suffice for many
        /// problems. However, it should probably be updated to allow for an arbitrary
        /// number of layers.</remarks>
        public static Matrix Predict(Matrix theta_1, Matrix theta_2, Matrix X)
        {
            Matrix A1 = Matrix.AddIdentityColumn(X);

            // Calculate the second (hidden) layer.
            Matrix Z2 = Matrix.MultiplyByTranspose(theta_1, A1);
            Matrix A2 = LogisticRegression.Sigmoid(Z2);

            A2 = Matrix.AddIdentityColumn(A2.Transpose);

            // Calculate 3rd layer (output)
            Matrix Z3 = Matrix.MultiplyByTranspose(theta_2, A2);
            Matrix A3 = LogisticRegression.Sigmoid(Z3);

            return(Matrix.MaxIndex(A3).Transpose);
        }
Exemple #2
0
        static void TitanicDemo()
        {
            WriteH1("Testing");

            #region Dummy data importer
            //WriteH2("Dummy Data");
            //DataImporterDummy di = new DataImporterDummy();
            //DataFrame df = new DataFrame(di);
            //df.Load(String.Empty, true, true);

            //Console.WriteLine(df.TotalColumns);
            //foreach(string h in df.Headers)
            //{
            //    Console.WriteLine(h);
            //}
            #endregion

            #region CSV data importer
            WriteH2("CSV Data (Titanic)");
            DataImporterCSV di_csv = new DataImporterCSV();
            DataExporterCSV de_csv = new DataExporterCSV();

            DataFrame df_train = new DataFrame(di_csv, de_csv);
            DataFrame df_test  = new DataFrame(di_csv, de_csv);

            df_train.Load(@"c:\temp\titanic.csv", true, "Survived");
            df_test.Load(@"c:\temp\titanic_test.csv", true);

            Console.WriteLine($"Total Columns (training data): {df_train.TotalColumns}");
            Console.WriteLine($"Total Columns (testing data):  {df_test.TotalColumns}");

            // Change the type of some of the training columns
            df_train.SetColumnType("pclass", DataFrameColumnType.Factors);
            df_train.SetColumnType("sex", DataFrameColumnType.Factors);
            df_train.SetColumnType("age", DataFrameColumnType.Bins);
            //df_train["age"].SetBins(new double[] { 0.0, 18.0, 100.0 });
            df_train["age"].SetBins(new double[] { 0.0, 15.0, 25.0, 30.0, 40.0, 50.0, 55.0, 65.0, 75.0, 100.0 });
            df_train["age"].EmptyValue = 30.27; // Average value of known ages
            df_train.SetColumnType("fare", DataFrameColumnType.Double);
            df_train.SetColumnType("sibsp", DataFrameColumnType.Double);
            df_train.SetColumnType("parch", DataFrameColumnType.Double);

            df_train.CreateDataColumn("CabinLetter", GetCabinLetter);
            df_test.CreateDataColumn("CabinLetter", GetCabinLetter);    // For now, we have to explicitly set both training set
                                                                        // and test set separately.
            df_train.SetColumnType("CabinLetter", DataFrameColumnType.Factors);

            df_train.SetColumnType("survived", DataFrameColumnType.Double);

            // Try and match the types in the testing set
            df_test.MatchColumns(df_train);

            Console.WriteLine($"df_train hasResults? {df_train.HasResults}. df_test hasResults? {df_test.HasResults}");

            // Start calculations
            Matrix Xtrain = df_train.ExportFeatures();
            Matrix ytrain = df_train.ExportResults();

            Matrix Xtest = df_test.ExportFeatures();

            // Try Logistic Regression
            double[] labels        = new double[] { 0.0, 1.0 };
            Matrix   lr_theta      = LogisticRegression.OneVsAll(Xtrain, ytrain, labels, 0.1, 1000);
            Matrix   lr_prediction = LogisticRegression.PredictOneVsAll(lr_theta, Xtest);

            int      input_layer_size  = Xtrain.Columns;
            int      output_layer_size = labels.Length;
            int      hidden_layer_size = (input_layer_size + output_layer_size) / 2;
            Matrix[] nn_theta          = NeuralNetwork.Train(Xtrain, ytrain, input_layer_size, hidden_layer_size, labels, 0.1, 1000);
            Matrix   nn_prediction     = NeuralNetwork.Predict(nn_theta[0], nn_theta[1], Xtest);

            // Exporting
            DataFrame df_lr_export = df_test;
            DataFrame df_nn_export = df_test;

            DataFrameColumn col_lr_results = new DataFrameColumn(df_lr_export, lr_prediction, 0);
            DataFrameColumn col_nn_results = new DataFrameColumn(df_nn_export, nn_prediction, 0);

            col_lr_results.Header = col_nn_results.Header = "Survived";

            df_lr_export.Save(@"c:\temp\lr_results.csv");
            df_nn_export.Save(@"c:\temp\nn_results.csv");
            #endregion
        }
Exemple #3
0
        static void LogisticRegressionDemo()
        {
            WriteH1("Logistic Regression");

            #region Sigmoid Function
            WriteH2("Sigmoid Function");

            Matrix m1 = new Matrix(new double[, ] {
                { 1200000 }
            });
            Matrix sigmoid1 = LogisticRegression.Sigmoid(m1);
            Console.Write("Target: 1.0   Actual: {0}", sigmoid1);

            m1[0, 0] = -25000;
            sigmoid1 = LogisticRegression.Sigmoid(m1);
            Console.Write("Target: 0.0   Actual: {0}", sigmoid1);

            m1[0, 0] = 0;
            sigmoid1 = LogisticRegression.Sigmoid(m1);
            Console.Write("Target: 0.5   Actual: {0}", sigmoid1);

            m1 = new Matrix(new double[, ] {
                { 4, 5, 6 }
            });
            sigmoid1 = LogisticRegression.Sigmoid(m1);
            Console.Write("Target: 0.98 0.99 0.997   Actual: {0}", sigmoid1);
            #endregion

            #region Predict
            WriteH2("Prediction");

            m1 = new Matrix(new double[, ] {
                { 1, 1 }, { 1, 2.5 }, { 1, 3 }, { 1, 4 }
            });
            Matrix theta = new Matrix(new double[, ] {
                { -3.5 }, { 1.3 }
            });
            Matrix prediction = LogisticRegression.Predict(m1, theta);
            Console.WriteLine("Target: 0.0 ; 0.0 ; 1.0 ; 1.0 ;  Actual: {0}", prediction.ToString().Replace("\n", "; "));

            m1    = Matrix.Magic(3);
            theta = new Matrix(new double[, ] {
                { 4 }, { 3 }, { -8 }
            });
            prediction = LogisticRegression.Predict(m1, theta);
            Console.WriteLine("Target: 0.0 ; 0.0 ; 1.0 ;        Actual: {0}", prediction.ToString().Replace("\n", "; "));
            #endregion

            #region Cost Function
            WriteH2("Cost Function");
            Matrix X = Matrix.AddIdentityColumn(Matrix.Magic(3));
            Matrix y = new Matrix(new double[, ] {
                { 1 }, { 0 }, { 1 }
            });
            theta = new Matrix(new double[, ] {
                { -2 }, { -1 }, { 1 }, { 2 }
            });
            Tuple <double, Matrix> cost = LogisticRegression.CostFunction(X, y, theta);

            Console.WriteLine("Target: 4.6832 ;  Actual: {0}", cost.Item1);

            #endregion

            #region Regularized Cost Function
            WriteH2("Regularized Cost Function");
            MinimizeOptions options = new MinimizeOptions();
            options.RegularizationParameter = 3;
            cost = LogisticRegression.CostFunction(X, y, theta, options);
            Console.WriteLine("Target: 7.6832 ;  Actual: {0}", cost.Item1);

            X = new Matrix(new double[, ] {
                { 1.0, 0.1, 0.6, 1.1 },
                { 1.0, 0.2, 0.7, 1.2 },
                { 1.0, 0.3, 0.8, 1.3 },
                { 1.0, 0.4, 0.9, 1.4 },
                { 1.0, 0.5, 1.0, 1.5 }
            });
            y = new Matrix(new double[, ] {
                { 1.0 },
                { 0.0 },
                { 1.0 },
                { 0.0 },
                { 1.0 }
            });
            theta = new Matrix(new double[, ] {
                { -2 }, { -1 }, { 1 }, { 2 }
            });
            cost = LogisticRegression.CostFunction(X, y, theta, options);
            Console.WriteLine("Target: 2.5348 ;  Actual: {0}", cost.Item1);

            #endregion

            #region OneVsAll
            WriteH2("One vs All");
            X = new Matrix(new double[, ] {
                { 8.0, 1.0, 6.0 },
                { 3.0, 5.0, 7.0 },
                { 4.0, 9.0, 2.0 },
                { 0.84147, 0.90930, 0.14112 },
                { 0.54030, -0.41615, -0.98999 }
            });
            y = new Matrix(new double[, ] {
                { 1.0 },
                { 2.0 },
                { 2.0 },
                { 1.0 },
                { 3.0 }
            });
            //Matrix testTheta = new Matrix(4, 1);
            //Matrix X0 = Matrix.Join(Matrix.Ones(5, 1), X, MatrixDimensions.Columns);
            //cost = LogisticRegression.CostFunction(X0, y==1, testTheta, 0.1);
            //Console.WriteLine(cost.Item1);
            //Console.WriteLine(cost.Item2);

            double[] labels    = new double[] { 1.0, 2.0, 3.0 };
            Matrix   all_theta = LogisticRegression.OneVsAll(X, y, labels, 0.1);

            Console.WriteLine(all_theta);
            #endregion

            #region PredictOneVsAll
            WriteH2("Predict One vs All");
            X = new Matrix(new double[, ] {
                { 1.0, 7.0 },
                { 4.0, 5.0 },
                { 7.0, 8.0 },
                { 1.0, 4.0 }
            });
            all_theta = new Matrix(new double[, ] {
                { 1.0, -6.0, 3.0 },
                { -2.0, 4.0, -3.0 }
            });
            prediction = LogisticRegression.PredictOneVsAll(all_theta, X);
            Console.WriteLine("Target: 0; 1; 1; 0;    Actual: {0}", prediction.ToString().Replace("\n", "; "));
            #endregion
        }
Exemple #4
0
        /// <summary>
        /// The Neural Network cost function for a two layer classification Neural Network.
        /// </summary>
        /// <param name="nn_parameters">The unrolled parameter vector that contains all the weights.</param>
        /// <param name="input_layer_size">The number of nodes in the input layer.</param>
        /// <param name="hidden_layer_size">The number of nodes in the hidden layer.</param>
        /// <param name="labels">A list of classification labels.</param>
        /// <param name="X">The feature set Matrix.</param>
        /// <param name="y">The result set Matrix.</param>
        /// <param name="lambda">The regularization parameter which helps reduce overfitting.
        /// Note that using values that are too high will lead to underfitting.</param>
        /// <returns>The cost of using the given value of theta, and the gradient of
        /// the cost (useful for iterative minimization functions)</returns>
        public static Tuple <double, Matrix> NNCostFunction(Matrix X, Matrix y, Matrix nn_parameters, MinimizeOptions options)
        {
            double lambda            = options.RegularizationParameter;
            int    input_layer_size  = options.InputLayerSize;
            int    hidden_layer_size = options.HiddenLayerSize;

            double[] labels = options.Labels;

            double        costFunction    = 0;
            int           num_labels      = labels.Length;
            List <Matrix> output_gradient = new List <Matrix>();

            Matrix Theta1 = Matrix.Reshape(nn_parameters, 0, hidden_layer_size, input_layer_size + 1);
            Matrix Theta2 = Matrix.Reshape(nn_parameters, (hidden_layer_size * (input_layer_size + 1)), num_labels, hidden_layer_size + 1);

            // y_matrix has the following attributes:
            // Rows: same as the number of rows in Y -- one for each example result.
            // Columns: one for each label.
            // Values: Each row consists of zeros, except for one, which matches the
            // value of y in that row to the index of the label. For example, if there
            // are three labels (3, 6, 8), and y contains 2 rows (8, 3), then y_matrix
            // would be:
            // 0 0 1
            // 1 0 0
            Matrix y_matrix = AssignLabels(y, labels);

            // Add ones to the X Matrix
            Matrix a1 = Matrix.AddIdentityColumn(X);

            Matrix z2 = a1 * Theta1.Transpose;
            Matrix a2 = LogisticRegression.Sigmoid(z2);

            a2 = Matrix.AddIdentityColumn(a2);

            Matrix z3 = a2 * Theta2.Transpose;
            Matrix a3 = LogisticRegression.Sigmoid(z3);

            Matrix log1 = Matrix.ElementLog(a3);
            Matrix log2 = Matrix.ElementLog(1 - a3);

            Matrix part1 = Matrix.ElementMultiply(-y_matrix, log1);
            Matrix part2 = Matrix.ElementMultiply((1 - y_matrix), log2);

            Matrix t0 = Theta1.RemoveColumn(0);
            Matrix t1 = Theta2.RemoveColumn(0);

            // Calculate regularization component
            double multiplier = lambda / (2 * X.Rows);
            double reg1       = Matrix.ElementPower(t0, 2).SumAllElements;
            double reg2       = Matrix.ElementPower(t1, 2).SumAllElements;
            double r          = multiplier * (reg1 + reg2);

            // Calculate cost
            costFunction = (1.0 / X.Rows) * (part1 - part2).SumAllElements + r;


            // Back Propogation
            Matrix d3 = a3 - y_matrix;
            Matrix d2 = Matrix.ElementMultiply(
                (t1.Transpose * d3.Transpose).Transpose,
                SigmoidGradient(z2)
                );

            Matrix Delta1 = d2.Transpose * a1;
            Matrix Delta2 = d3.Transpose * a2;

            Theta1 = Matrix.Join(new Matrix(t0.Rows, 1), t0, MatrixDimensions.Columns);
            Theta2 = Matrix.Join(new Matrix(t1.Rows, 1), t1, MatrixDimensions.Columns);

            double scale_value   = lambda / X.Rows;
            Matrix Theta1_scaled = Theta1 * scale_value;
            Matrix Theta2_scaled = Theta2 * scale_value;

            Matrix Theta1_grad = ((Delta1 / X.Rows) + Theta1_scaled).Unrolled;
            Matrix Theta2_grad = ((Delta2 / X.Rows) + Theta2_scaled).Unrolled;

            return(new Tuple <double, Matrix>(costFunction, Matrix.Join(Theta1_grad, Theta2_grad, MatrixDimensions.Rows)));
        }