static List <Record> DiscretizeDataset(List <Record> dataset, ReferenceTable table) { int[] row = { 0 }; Discretize(table.HasUnknowns(0), rec => double.Parse(rec[row[0]]), (rec, val) => rec[row[0]] = val, table.GetRanges(row[0]), table.GetValues(row[0]), dataset); row[0]++; Discretize(table.HasUnknowns(0), rec => double.Parse(rec[row[0]]), (rec, val) => rec[row[0]] = val, table.GetRanges(row[0]), table.GetValues(row[0]), dataset); row[0]++; Discretize(table.HasUnknowns(0), rec => double.Parse(rec[row[0]]), (rec, val) => rec[row[0]] = val, table.GetRanges(row[0]), table.GetValues(row[0]), dataset); row[0]++; Discretize(table.HasUnknowns(0), rec => double.Parse(rec[row[0]]), (rec, val) => rec[row[0]] = val, table.GetRanges(row[0]), table.GetValues(row[0]), dataset); row[0]++; Discretize(table.HasUnknowns(0), rec => double.Parse(rec[row[0]]), (rec, val) => rec[row[0]] = val, table.GetRanges(row[0]), table.GetValues(row[0]), dataset); row[0]++; Discretize(table.HasUnknowns(0), rec => double.Parse(rec[row[0]]), (rec, val) => rec[row[0]] = val, table.GetRanges(row[0]), table.GetValues(row[0]), dataset); row[0]++; Discretize(table.HasUnknowns(0), rec => double.Parse(rec[row[0]]), (rec, val) => rec[row[0]] = val, table.GetRanges(row[0]), table.GetValues(row[0]), dataset); row[0]++; Discretize(table.HasUnknowns(0), rec => double.Parse(rec[row[0]]), (rec, val) => rec[row[0]] = val, table.GetRanges(row[0]), table.GetValues(row[0]), dataset); row[0]++; return(dataset); }
static void RunDiabetesTest(string trainingFile, string testFile) { Console.WriteLine($"[{GetTimeStamp()}] Diabetes test:"); ReferenceTable table = new ReferenceTable(); Console.WriteLine($"[{GetTimeStamp()}] Loading training file: {trainingFile}."); List <Record> trainingRecords = DiscretizeDataset(ParseRecords(File.ReadAllLines(trainingFile)), table); Console.WriteLine($"[{GetTimeStamp()}] Loading test file: {trainingFile}."); List <Record> testRecords = DiscretizeDataset(ParseRecords(File.ReadAllLines(testFile)), table); var attributes = table.GetIndex().Select(idx => new DiscreteAttribute(idx, table.GetName(idx), table.GetValues(idx))).ToList(); //DecisionTree.AssignProbabilitiesByClass(attributes, trainingRecords, false); attributes.ForEach(attribute => DecisionTree.AssignProbabilities(attribute, trainingRecords)); attributes.ForEach(attribute => DecisionTree.AssignProbabilities(attribute, testRecords.Union(trainingRecords).ToList())); Console.WriteLine($"[{GetTimeStamp()}] Building Ensemble of ID3 decision trees..."); Random sampler = new Random(); var ensemble = new Ensemble(); for (int i = 0; i <= 1000; i++) { DecisionTree tree = new DecisionTree(attributes, SampledData(trainingRecords, sampler)).Build(); ensemble.AddVoter(tree.Test); if (i != 0 && i != 20 && i != 100 && i != 500 && i != 1000 && i != 2000) { continue; } ConfusionMatrix trainingMatrix = RunPredictions(trainingRecords, rec => rec.IsPositive, ensemble.Test); ConfusionMatrix testMatrix = RunPredictions(testRecords, rec => rec.IsPositive, ensemble.Test); Console.WriteLine("----------------------------------------------------------------"); Console.WriteLine($"[{GetTimeStamp()}][Ensemble: {i}] Printing sanity results: "); PrintResults(trainingMatrix); Console.WriteLine($"[{GetTimeStamp()}][Ensemble: {i}] Printing prediction results: "); PrintResults(testMatrix); } }