static void ExampleML4() { string file_path_ser = @"D:\Dropbox\TestData\ML4\brains_dataset.ser"; IDataSetHybrid data_set_hybrid_unlabeled = null; if (File.Exists(file_path_ser)) { try { data_set_hybrid_unlabeled = DataSetHybrid.Read(new BinaryReader(File.Open(file_path_ser, FileMode.Open))); } catch (Exception) { data_set_hybrid_unlabeled = null; } } if (data_set_hybrid_unlabeled == null) { IDataSet <int> data_set0 = ReadExcellNominal(@"D:\Dropbox\TestData\ML4\brains_ml.xlsx"); data_set_hybrid_unlabeled = CleanUpML4(data_set0); ToolsIOSerialization.SerializeToFile(file_path_ser, data_set_hybrid_unlabeled); } IDataSet <int> data_set_nominal_unlabeled = data_set_hybrid_unlabeled.ConvertToNominal(5); IDataSet <int, int> data_set_nominal_labeled = data_set_nominal_unlabeled.PromoteFeatureToLabel(0); Tuple <IDataSet <int, int>, IDataSet <int, int> > split = data_set_nominal_labeled.Split(); IDataSet <int, int> training_set = split.Item1; IDataSet <int, int> test_set = split.Item2; ProccessDataSetNominalJoinTable(training_set, test_set); }
public static IDataSetHybrid CleanUpML4(IDataSet <int> data_set0) { // Manipulate it Console.WriteLine("For feature index: " + 0 + " Adding value type as missing: " + "9"); Console.WriteLine("Removing all instances missing: " + data_set0.DataContext.GetFeatureName(0)); IDataSet <int> data_set1 = data_set0.RemoveInstancesMissing(0); Console.WriteLine("Removing all instances missing: " + data_set0.DataContext.GetFeatureName(1)); IDataSet <int> data_set2 = data_set1.RemoveInstancesMissing(1); int[] remove_feature_indexes = new int[] { 1, 2, 3, 5, 7, 8, 9, 11, 12 }; foreach (int remove_feature_index in remove_feature_indexes) { Console.WriteLine("Removing: " + data_set2.DataContext.GetFeatureName(remove_feature_index)); } IDataSet <int> data_set3 = data_set2.RemoveFeatures(remove_feature_indexes); int[] interval_feature_indexes = new int[] { 1, 2, 3 }; foreach (int interval_feature_index in interval_feature_indexes) { Console.WriteLine("Promoting to hybrid: " + data_set0.DataContext.GetFeatureName(interval_feature_index)); } IDataSetHybrid data_set4 = data_set3.PromoteFeatureLevelToInterval(interval_feature_indexes); return(data_set4); }