/// <summary> /// Given a DataFrame, df, ensure any column with the same name has matching /// attributes (i.e. ColumnType) /// </summary> /// <param name="df">The DataFrame whose columns dictate the attributes of /// this DataFrame.</param> public void MatchColumns(DataFrame df, bool caseSensitive = false) { if (Columns == null || Columns.Count == 0) { return; } foreach (DataFrameColumn column in Columns) { DataFrameColumn other = df.FindColumn(column.Header, caseSensitive); if (other != null) { column.ColumnType = other.ColumnType; column.CopyFactors(other); column.CopyBins(other); } } }
static void TitanicDemo() { WriteH1("Testing"); #region Dummy data importer //WriteH2("Dummy Data"); //DataImporterDummy di = new DataImporterDummy(); //DataFrame df = new DataFrame(di); //df.Load(String.Empty, true, true); //Console.WriteLine(df.TotalColumns); //foreach(string h in df.Headers) //{ // Console.WriteLine(h); //} #endregion #region CSV data importer WriteH2("CSV Data (Titanic)"); DataImporterCSV di_csv = new DataImporterCSV(); DataExporterCSV de_csv = new DataExporterCSV(); DataFrame df_train = new DataFrame(di_csv, de_csv); DataFrame df_test = new DataFrame(di_csv, de_csv); df_train.Load(@"c:\temp\titanic.csv", true, "Survived"); df_test.Load(@"c:\temp\titanic_test.csv", true); Console.WriteLine($"Total Columns (training data): {df_train.TotalColumns}"); Console.WriteLine($"Total Columns (testing data): {df_test.TotalColumns}"); // Change the type of some of the training columns df_train.SetColumnType("pclass", DataFrameColumnType.Factors); df_train.SetColumnType("sex", DataFrameColumnType.Factors); df_train.SetColumnType("age", DataFrameColumnType.Bins); //df_train["age"].SetBins(new double[] { 0.0, 18.0, 100.0 }); df_train["age"].SetBins(new double[] { 0.0, 15.0, 25.0, 30.0, 40.0, 50.0, 55.0, 65.0, 75.0, 100.0 }); df_train["age"].EmptyValue = 30.27; // Average value of known ages df_train.SetColumnType("fare", DataFrameColumnType.Double); df_train.SetColumnType("sibsp", DataFrameColumnType.Double); df_train.SetColumnType("parch", DataFrameColumnType.Double); df_train.CreateDataColumn("CabinLetter", GetCabinLetter); df_test.CreateDataColumn("CabinLetter", GetCabinLetter); // For now, we have to explicitly set both training set // and test set separately. df_train.SetColumnType("CabinLetter", DataFrameColumnType.Factors); df_train.SetColumnType("survived", DataFrameColumnType.Double); // Try and match the types in the testing set df_test.MatchColumns(df_train); Console.WriteLine($"df_train hasResults? {df_train.HasResults}. df_test hasResults? {df_test.HasResults}"); // Start calculations Matrix Xtrain = df_train.ExportFeatures(); Matrix ytrain = df_train.ExportResults(); Matrix Xtest = df_test.ExportFeatures(); // Try Logistic Regression double[] labels = new double[] { 0.0, 1.0 }; Matrix lr_theta = LogisticRegression.OneVsAll(Xtrain, ytrain, labels, 0.1, 1000); Matrix lr_prediction = LogisticRegression.PredictOneVsAll(lr_theta, Xtest); int input_layer_size = Xtrain.Columns; int output_layer_size = labels.Length; int hidden_layer_size = (input_layer_size + output_layer_size) / 2; Matrix[] nn_theta = NeuralNetwork.Train(Xtrain, ytrain, input_layer_size, hidden_layer_size, labels, 0.1, 1000); Matrix nn_prediction = NeuralNetwork.Predict(nn_theta[0], nn_theta[1], Xtest); // Exporting DataFrame df_lr_export = df_test; DataFrame df_nn_export = df_test; DataFrameColumn col_lr_results = new DataFrameColumn(df_lr_export, lr_prediction, 0); DataFrameColumn col_nn_results = new DataFrameColumn(df_nn_export, nn_prediction, 0); col_lr_results.Header = col_nn_results.Header = "Survived"; df_lr_export.Save(@"c:\temp\lr_results.csv"); df_nn_export.Save(@"c:\temp\nn_results.csv"); #endregion }