Ejemplo n.º 1
0
        static void TitanicDemo()
        {
            WriteH1("Testing");

            #region Dummy data importer
            //WriteH2("Dummy Data");
            //DataImporterDummy di = new DataImporterDummy();
            //DataFrame df = new DataFrame(di);
            //df.Load(String.Empty, true, true);

            //Console.WriteLine(df.TotalColumns);
            //foreach(string h in df.Headers)
            //{
            //    Console.WriteLine(h);
            //}
            #endregion

            #region CSV data importer
            WriteH2("CSV Data (Titanic)");
            DataImporterCSV di_csv = new DataImporterCSV();
            DataExporterCSV de_csv = new DataExporterCSV();

            DataFrame df_train = new DataFrame(di_csv, de_csv);
            DataFrame df_test  = new DataFrame(di_csv, de_csv);

            df_train.Load(@"c:\temp\titanic.csv", true, "Survived");
            df_test.Load(@"c:\temp\titanic_test.csv", true);

            Console.WriteLine($"Total Columns (training data): {df_train.TotalColumns}");
            Console.WriteLine($"Total Columns (testing data):  {df_test.TotalColumns}");

            // Change the type of some of the training columns
            df_train.SetColumnType("pclass", DataFrameColumnType.Factors);
            df_train.SetColumnType("sex", DataFrameColumnType.Factors);
            df_train.SetColumnType("age", DataFrameColumnType.Bins);
            //df_train["age"].SetBins(new double[] { 0.0, 18.0, 100.0 });
            df_train["age"].SetBins(new double[] { 0.0, 15.0, 25.0, 30.0, 40.0, 50.0, 55.0, 65.0, 75.0, 100.0 });
            df_train["age"].EmptyValue = 30.27; // Average value of known ages
            df_train.SetColumnType("fare", DataFrameColumnType.Double);
            df_train.SetColumnType("sibsp", DataFrameColumnType.Double);
            df_train.SetColumnType("parch", DataFrameColumnType.Double);

            df_train.CreateDataColumn("CabinLetter", GetCabinLetter);
            df_test.CreateDataColumn("CabinLetter", GetCabinLetter);    // For now, we have to explicitly set both training set
                                                                        // and test set separately.
            df_train.SetColumnType("CabinLetter", DataFrameColumnType.Factors);

            df_train.SetColumnType("survived", DataFrameColumnType.Double);

            // Try and match the types in the testing set
            df_test.MatchColumns(df_train);

            Console.WriteLine($"df_train hasResults? {df_train.HasResults}. df_test hasResults? {df_test.HasResults}");

            // Start calculations
            Matrix Xtrain = df_train.ExportFeatures();
            Matrix ytrain = df_train.ExportResults();

            Matrix Xtest = df_test.ExportFeatures();

            // Try Logistic Regression
            double[] labels        = new double[] { 0.0, 1.0 };
            Matrix   lr_theta      = LogisticRegression.OneVsAll(Xtrain, ytrain, labels, 0.1, 1000);
            Matrix   lr_prediction = LogisticRegression.PredictOneVsAll(lr_theta, Xtest);

            int      input_layer_size  = Xtrain.Columns;
            int      output_layer_size = labels.Length;
            int      hidden_layer_size = (input_layer_size + output_layer_size) / 2;
            Matrix[] nn_theta          = NeuralNetwork.Train(Xtrain, ytrain, input_layer_size, hidden_layer_size, labels, 0.1, 1000);
            Matrix   nn_prediction     = NeuralNetwork.Predict(nn_theta[0], nn_theta[1], Xtest);

            // Exporting
            DataFrame df_lr_export = df_test;
            DataFrame df_nn_export = df_test;

            DataFrameColumn col_lr_results = new DataFrameColumn(df_lr_export, lr_prediction, 0);
            DataFrameColumn col_nn_results = new DataFrameColumn(df_nn_export, nn_prediction, 0);

            col_lr_results.Header = col_nn_results.Header = "Survived";

            df_lr_export.Save(@"c:\temp\lr_results.csv");
            df_nn_export.Save(@"c:\temp\nn_results.csv");
            #endregion
        }
Ejemplo n.º 2
0
        static void LogisticRegressionDemo()
        {
            WriteH1("Logistic Regression");

            #region Sigmoid Function
            WriteH2("Sigmoid Function");

            Matrix m1 = new Matrix(new double[, ] {
                { 1200000 }
            });
            Matrix sigmoid1 = LogisticRegression.Sigmoid(m1);
            Console.Write("Target: 1.0   Actual: {0}", sigmoid1);

            m1[0, 0] = -25000;
            sigmoid1 = LogisticRegression.Sigmoid(m1);
            Console.Write("Target: 0.0   Actual: {0}", sigmoid1);

            m1[0, 0] = 0;
            sigmoid1 = LogisticRegression.Sigmoid(m1);
            Console.Write("Target: 0.5   Actual: {0}", sigmoid1);

            m1 = new Matrix(new double[, ] {
                { 4, 5, 6 }
            });
            sigmoid1 = LogisticRegression.Sigmoid(m1);
            Console.Write("Target: 0.98 0.99 0.997   Actual: {0}", sigmoid1);
            #endregion

            #region Predict
            WriteH2("Prediction");

            m1 = new Matrix(new double[, ] {
                { 1, 1 }, { 1, 2.5 }, { 1, 3 }, { 1, 4 }
            });
            Matrix theta = new Matrix(new double[, ] {
                { -3.5 }, { 1.3 }
            });
            Matrix prediction = LogisticRegression.Predict(m1, theta);
            Console.WriteLine("Target: 0.0 ; 0.0 ; 1.0 ; 1.0 ;  Actual: {0}", prediction.ToString().Replace("\n", "; "));

            m1    = Matrix.Magic(3);
            theta = new Matrix(new double[, ] {
                { 4 }, { 3 }, { -8 }
            });
            prediction = LogisticRegression.Predict(m1, theta);
            Console.WriteLine("Target: 0.0 ; 0.0 ; 1.0 ;        Actual: {0}", prediction.ToString().Replace("\n", "; "));
            #endregion

            #region Cost Function
            WriteH2("Cost Function");
            Matrix X = Matrix.AddIdentityColumn(Matrix.Magic(3));
            Matrix y = new Matrix(new double[, ] {
                { 1 }, { 0 }, { 1 }
            });
            theta = new Matrix(new double[, ] {
                { -2 }, { -1 }, { 1 }, { 2 }
            });
            Tuple <double, Matrix> cost = LogisticRegression.CostFunction(X, y, theta);

            Console.WriteLine("Target: 4.6832 ;  Actual: {0}", cost.Item1);

            #endregion

            #region Regularized Cost Function
            WriteH2("Regularized Cost Function");
            MinimizeOptions options = new MinimizeOptions();
            options.RegularizationParameter = 3;
            cost = LogisticRegression.CostFunction(X, y, theta, options);
            Console.WriteLine("Target: 7.6832 ;  Actual: {0}", cost.Item1);

            X = new Matrix(new double[, ] {
                { 1.0, 0.1, 0.6, 1.1 },
                { 1.0, 0.2, 0.7, 1.2 },
                { 1.0, 0.3, 0.8, 1.3 },
                { 1.0, 0.4, 0.9, 1.4 },
                { 1.0, 0.5, 1.0, 1.5 }
            });
            y = new Matrix(new double[, ] {
                { 1.0 },
                { 0.0 },
                { 1.0 },
                { 0.0 },
                { 1.0 }
            });
            theta = new Matrix(new double[, ] {
                { -2 }, { -1 }, { 1 }, { 2 }
            });
            cost = LogisticRegression.CostFunction(X, y, theta, options);
            Console.WriteLine("Target: 2.5348 ;  Actual: {0}", cost.Item1);

            #endregion

            #region OneVsAll
            WriteH2("One vs All");
            X = new Matrix(new double[, ] {
                { 8.0, 1.0, 6.0 },
                { 3.0, 5.0, 7.0 },
                { 4.0, 9.0, 2.0 },
                { 0.84147, 0.90930, 0.14112 },
                { 0.54030, -0.41615, -0.98999 }
            });
            y = new Matrix(new double[, ] {
                { 1.0 },
                { 2.0 },
                { 2.0 },
                { 1.0 },
                { 3.0 }
            });
            //Matrix testTheta = new Matrix(4, 1);
            //Matrix X0 = Matrix.Join(Matrix.Ones(5, 1), X, MatrixDimensions.Columns);
            //cost = LogisticRegression.CostFunction(X0, y==1, testTheta, 0.1);
            //Console.WriteLine(cost.Item1);
            //Console.WriteLine(cost.Item2);

            double[] labels    = new double[] { 1.0, 2.0, 3.0 };
            Matrix   all_theta = LogisticRegression.OneVsAll(X, y, labels, 0.1);

            Console.WriteLine(all_theta);
            #endregion

            #region PredictOneVsAll
            WriteH2("Predict One vs All");
            X = new Matrix(new double[, ] {
                { 1.0, 7.0 },
                { 4.0, 5.0 },
                { 7.0, 8.0 },
                { 1.0, 4.0 }
            });
            all_theta = new Matrix(new double[, ] {
                { 1.0, -6.0, 3.0 },
                { -2.0, 4.0, -3.0 }
            });
            prediction = LogisticRegression.PredictOneVsAll(all_theta, X);
            Console.WriteLine("Target: 0; 1; 1; 0;    Actual: {0}", prediction.ToString().Replace("\n", "; "));
            #endregion
        }