static void TitanicDemo() { WriteH1("Testing"); #region Dummy data importer //WriteH2("Dummy Data"); //DataImporterDummy di = new DataImporterDummy(); //DataFrame df = new DataFrame(di); //df.Load(String.Empty, true, true); //Console.WriteLine(df.TotalColumns); //foreach(string h in df.Headers) //{ // Console.WriteLine(h); //} #endregion #region CSV data importer WriteH2("CSV Data (Titanic)"); DataImporterCSV di_csv = new DataImporterCSV(); DataExporterCSV de_csv = new DataExporterCSV(); DataFrame df_train = new DataFrame(di_csv, de_csv); DataFrame df_test = new DataFrame(di_csv, de_csv); df_train.Load(@"c:\temp\titanic.csv", true, "Survived"); df_test.Load(@"c:\temp\titanic_test.csv", true); Console.WriteLine($"Total Columns (training data): {df_train.TotalColumns}"); Console.WriteLine($"Total Columns (testing data): {df_test.TotalColumns}"); // Change the type of some of the training columns df_train.SetColumnType("pclass", DataFrameColumnType.Factors); df_train.SetColumnType("sex", DataFrameColumnType.Factors); df_train.SetColumnType("age", DataFrameColumnType.Bins); //df_train["age"].SetBins(new double[] { 0.0, 18.0, 100.0 }); df_train["age"].SetBins(new double[] { 0.0, 15.0, 25.0, 30.0, 40.0, 50.0, 55.0, 65.0, 75.0, 100.0 }); df_train["age"].EmptyValue = 30.27; // Average value of known ages df_train.SetColumnType("fare", DataFrameColumnType.Double); df_train.SetColumnType("sibsp", DataFrameColumnType.Double); df_train.SetColumnType("parch", DataFrameColumnType.Double); df_train.CreateDataColumn("CabinLetter", GetCabinLetter); df_test.CreateDataColumn("CabinLetter", GetCabinLetter); // For now, we have to explicitly set both training set // and test set separately. df_train.SetColumnType("CabinLetter", DataFrameColumnType.Factors); df_train.SetColumnType("survived", DataFrameColumnType.Double); // Try and match the types in the testing set df_test.MatchColumns(df_train); Console.WriteLine($"df_train hasResults? {df_train.HasResults}. df_test hasResults? {df_test.HasResults}"); // Start calculations Matrix Xtrain = df_train.ExportFeatures(); Matrix ytrain = df_train.ExportResults(); Matrix Xtest = df_test.ExportFeatures(); // Try Logistic Regression double[] labels = new double[] { 0.0, 1.0 }; Matrix lr_theta = LogisticRegression.OneVsAll(Xtrain, ytrain, labels, 0.1, 1000); Matrix lr_prediction = LogisticRegression.PredictOneVsAll(lr_theta, Xtest); int input_layer_size = Xtrain.Columns; int output_layer_size = labels.Length; int hidden_layer_size = (input_layer_size + output_layer_size) / 2; Matrix[] nn_theta = NeuralNetwork.Train(Xtrain, ytrain, input_layer_size, hidden_layer_size, labels, 0.1, 1000); Matrix nn_prediction = NeuralNetwork.Predict(nn_theta[0], nn_theta[1], Xtest); // Exporting DataFrame df_lr_export = df_test; DataFrame df_nn_export = df_test; DataFrameColumn col_lr_results = new DataFrameColumn(df_lr_export, lr_prediction, 0); DataFrameColumn col_nn_results = new DataFrameColumn(df_nn_export, nn_prediction, 0); col_lr_results.Header = col_nn_results.Header = "Survived"; df_lr_export.Save(@"c:\temp\lr_results.csv"); df_nn_export.Save(@"c:\temp\nn_results.csv"); #endregion }
static void LogisticRegressionDemo() { WriteH1("Logistic Regression"); #region Sigmoid Function WriteH2("Sigmoid Function"); Matrix m1 = new Matrix(new double[, ] { { 1200000 } }); Matrix sigmoid1 = LogisticRegression.Sigmoid(m1); Console.Write("Target: 1.0 Actual: {0}", sigmoid1); m1[0, 0] = -25000; sigmoid1 = LogisticRegression.Sigmoid(m1); Console.Write("Target: 0.0 Actual: {0}", sigmoid1); m1[0, 0] = 0; sigmoid1 = LogisticRegression.Sigmoid(m1); Console.Write("Target: 0.5 Actual: {0}", sigmoid1); m1 = new Matrix(new double[, ] { { 4, 5, 6 } }); sigmoid1 = LogisticRegression.Sigmoid(m1); Console.Write("Target: 0.98 0.99 0.997 Actual: {0}", sigmoid1); #endregion #region Predict WriteH2("Prediction"); m1 = new Matrix(new double[, ] { { 1, 1 }, { 1, 2.5 }, { 1, 3 }, { 1, 4 } }); Matrix theta = new Matrix(new double[, ] { { -3.5 }, { 1.3 } }); Matrix prediction = LogisticRegression.Predict(m1, theta); Console.WriteLine("Target: 0.0 ; 0.0 ; 1.0 ; 1.0 ; Actual: {0}", prediction.ToString().Replace("\n", "; ")); m1 = Matrix.Magic(3); theta = new Matrix(new double[, ] { { 4 }, { 3 }, { -8 } }); prediction = LogisticRegression.Predict(m1, theta); Console.WriteLine("Target: 0.0 ; 0.0 ; 1.0 ; Actual: {0}", prediction.ToString().Replace("\n", "; ")); #endregion #region Cost Function WriteH2("Cost Function"); Matrix X = Matrix.AddIdentityColumn(Matrix.Magic(3)); Matrix y = new Matrix(new double[, ] { { 1 }, { 0 }, { 1 } }); theta = new Matrix(new double[, ] { { -2 }, { -1 }, { 1 }, { 2 } }); Tuple <double, Matrix> cost = LogisticRegression.CostFunction(X, y, theta); Console.WriteLine("Target: 4.6832 ; Actual: {0}", cost.Item1); #endregion #region Regularized Cost Function WriteH2("Regularized Cost Function"); MinimizeOptions options = new MinimizeOptions(); options.RegularizationParameter = 3; cost = LogisticRegression.CostFunction(X, y, theta, options); Console.WriteLine("Target: 7.6832 ; Actual: {0}", cost.Item1); X = new Matrix(new double[, ] { { 1.0, 0.1, 0.6, 1.1 }, { 1.0, 0.2, 0.7, 1.2 }, { 1.0, 0.3, 0.8, 1.3 }, { 1.0, 0.4, 0.9, 1.4 }, { 1.0, 0.5, 1.0, 1.5 } }); y = new Matrix(new double[, ] { { 1.0 }, { 0.0 }, { 1.0 }, { 0.0 }, { 1.0 } }); theta = new Matrix(new double[, ] { { -2 }, { -1 }, { 1 }, { 2 } }); cost = LogisticRegression.CostFunction(X, y, theta, options); Console.WriteLine("Target: 2.5348 ; Actual: {0}", cost.Item1); #endregion #region OneVsAll WriteH2("One vs All"); X = new Matrix(new double[, ] { { 8.0, 1.0, 6.0 }, { 3.0, 5.0, 7.0 }, { 4.0, 9.0, 2.0 }, { 0.84147, 0.90930, 0.14112 }, { 0.54030, -0.41615, -0.98999 } }); y = new Matrix(new double[, ] { { 1.0 }, { 2.0 }, { 2.0 }, { 1.0 }, { 3.0 } }); //Matrix testTheta = new Matrix(4, 1); //Matrix X0 = Matrix.Join(Matrix.Ones(5, 1), X, MatrixDimensions.Columns); //cost = LogisticRegression.CostFunction(X0, y==1, testTheta, 0.1); //Console.WriteLine(cost.Item1); //Console.WriteLine(cost.Item2); double[] labels = new double[] { 1.0, 2.0, 3.0 }; Matrix all_theta = LogisticRegression.OneVsAll(X, y, labels, 0.1); Console.WriteLine(all_theta); #endregion #region PredictOneVsAll WriteH2("Predict One vs All"); X = new Matrix(new double[, ] { { 1.0, 7.0 }, { 4.0, 5.0 }, { 7.0, 8.0 }, { 1.0, 4.0 } }); all_theta = new Matrix(new double[, ] { { 1.0, -6.0, 3.0 }, { -2.0, 4.0, -3.0 } }); prediction = LogisticRegression.PredictOneVsAll(all_theta, X); Console.WriteLine("Target: 0; 1; 1; 0; Actual: {0}", prediction.ToString().Replace("\n", "; ")); #endregion }