Beispiel #1
0
        /// <summary>
        /// Given a DataFrame, df, ensure any column with the same name has matching
        /// attributes (i.e. ColumnType)
        /// </summary>
        /// <param name="df">The DataFrame whose columns dictate the attributes of
        /// this DataFrame.</param>
        public void MatchColumns(DataFrame df, bool caseSensitive = false)
        {
            if (Columns == null || Columns.Count == 0)
            {
                return;
            }

            foreach (DataFrameColumn column in Columns)
            {
                DataFrameColumn other = df.FindColumn(column.Header, caseSensitive);
                if (other != null)
                {
                    column.ColumnType = other.ColumnType;
                    column.CopyFactors(other);
                    column.CopyBins(other);
                }
            }
        }
Beispiel #2
0
        static void TitanicDemo()
        {
            WriteH1("Testing");

            #region Dummy data importer
            //WriteH2("Dummy Data");
            //DataImporterDummy di = new DataImporterDummy();
            //DataFrame df = new DataFrame(di);
            //df.Load(String.Empty, true, true);

            //Console.WriteLine(df.TotalColumns);
            //foreach(string h in df.Headers)
            //{
            //    Console.WriteLine(h);
            //}
            #endregion

            #region CSV data importer
            WriteH2("CSV Data (Titanic)");
            DataImporterCSV di_csv = new DataImporterCSV();
            DataExporterCSV de_csv = new DataExporterCSV();

            DataFrame df_train = new DataFrame(di_csv, de_csv);
            DataFrame df_test  = new DataFrame(di_csv, de_csv);

            df_train.Load(@"c:\temp\titanic.csv", true, "Survived");
            df_test.Load(@"c:\temp\titanic_test.csv", true);

            Console.WriteLine($"Total Columns (training data): {df_train.TotalColumns}");
            Console.WriteLine($"Total Columns (testing data):  {df_test.TotalColumns}");

            // Change the type of some of the training columns
            df_train.SetColumnType("pclass", DataFrameColumnType.Factors);
            df_train.SetColumnType("sex", DataFrameColumnType.Factors);
            df_train.SetColumnType("age", DataFrameColumnType.Bins);
            //df_train["age"].SetBins(new double[] { 0.0, 18.0, 100.0 });
            df_train["age"].SetBins(new double[] { 0.0, 15.0, 25.0, 30.0, 40.0, 50.0, 55.0, 65.0, 75.0, 100.0 });
            df_train["age"].EmptyValue = 30.27; // Average value of known ages
            df_train.SetColumnType("fare", DataFrameColumnType.Double);
            df_train.SetColumnType("sibsp", DataFrameColumnType.Double);
            df_train.SetColumnType("parch", DataFrameColumnType.Double);

            df_train.CreateDataColumn("CabinLetter", GetCabinLetter);
            df_test.CreateDataColumn("CabinLetter", GetCabinLetter);    // For now, we have to explicitly set both training set
                                                                        // and test set separately.
            df_train.SetColumnType("CabinLetter", DataFrameColumnType.Factors);

            df_train.SetColumnType("survived", DataFrameColumnType.Double);

            // Try and match the types in the testing set
            df_test.MatchColumns(df_train);

            Console.WriteLine($"df_train hasResults? {df_train.HasResults}. df_test hasResults? {df_test.HasResults}");

            // Start calculations
            Matrix Xtrain = df_train.ExportFeatures();
            Matrix ytrain = df_train.ExportResults();

            Matrix Xtest = df_test.ExportFeatures();

            // Try Logistic Regression
            double[] labels        = new double[] { 0.0, 1.0 };
            Matrix   lr_theta      = LogisticRegression.OneVsAll(Xtrain, ytrain, labels, 0.1, 1000);
            Matrix   lr_prediction = LogisticRegression.PredictOneVsAll(lr_theta, Xtest);

            int      input_layer_size  = Xtrain.Columns;
            int      output_layer_size = labels.Length;
            int      hidden_layer_size = (input_layer_size + output_layer_size) / 2;
            Matrix[] nn_theta          = NeuralNetwork.Train(Xtrain, ytrain, input_layer_size, hidden_layer_size, labels, 0.1, 1000);
            Matrix   nn_prediction     = NeuralNetwork.Predict(nn_theta[0], nn_theta[1], Xtest);

            // Exporting
            DataFrame df_lr_export = df_test;
            DataFrame df_nn_export = df_test;

            DataFrameColumn col_lr_results = new DataFrameColumn(df_lr_export, lr_prediction, 0);
            DataFrameColumn col_nn_results = new DataFrameColumn(df_nn_export, nn_prediction, 0);

            col_lr_results.Header = col_nn_results.Header = "Survived";

            df_lr_export.Save(@"c:\temp\lr_results.csv");
            df_nn_export.Save(@"c:\temp\nn_results.csv");
            #endregion
        }