ID3Learner LoadDecisionTree(List <Record> trainingSet, ReferenceTable table, int depth) { int[][] inputs; int[] outputs; var codebook = BuildCodebook(trainingSet, table, out inputs, out outputs); var attributes = new DecisionVariable[table.Columns.Length - 1]; for (int i = 0; i < attributes.Length; i++) { attributes[i] = new DecisionVariable(table.Columns[i], table.GetValues(i).Length); } int classCount = 2; DecisionTree tree = new DecisionTree(attributes, classCount); ID3LearningEx id3Learning = new ID3LearningEx(tree) { MaxHeight = depth }; id3Learning.Run(inputs, outputs); return(new ID3Learner(this, tree, codebook, table.Columns.Last())); }
public List <Record> DiscretizeDataset(List <Record> dataset, ReferenceTable table) { int[] row = { 0 }; Discretize(table.HasUnknowns(row[0]), rec => double.Parse(rec[row[0]]), (rec, val) => rec[row[0]] = val, table.GetRanges(row[0]), table.GetValues(row[0]), dataset); row[0]++; Discretize(table.HasUnknowns(row[0]), rec => double.Parse(rec[row[0]]), (rec, val) => rec[row[0]] = val, table.GetRanges(row[0]), table.GetValues(row[0]), dataset); row[0]++; Discretize(table.HasUnknowns(row[0]), rec => double.Parse(rec[row[0]]), (rec, val) => rec[row[0]] = val, table.GetRanges(row[0]), table.GetValues(row[0]), dataset); row[0]++; Discretize(table.HasUnknowns(row[0]), rec => double.Parse(rec[row[0]]), (rec, val) => rec[row[0]] = val, table.GetRanges(row[0]), table.GetValues(row[0]), dataset); row[0]++; Discretize(table.HasUnknowns(row[0]), rec => double.Parse(rec[row[0]]), (rec, val) => rec[row[0]] = val, table.GetRanges(row[0]), table.GetValues(row[0]), dataset); row[0]++; Discretize(table.HasUnknowns(row[0]), rec => double.Parse(rec[row[0]]), (rec, val) => rec[row[0]] = val, table.GetRanges(row[0]), table.GetValues(row[0]), dataset); row[0]++; Discretize(table.HasUnknowns(row[0]), rec => double.Parse(rec[row[0]]), (rec, val) => rec[row[0]] = val, table.GetRanges(row[0]), table.GetValues(row[0]), dataset); row[0]++; Discretize(table.HasUnknowns(row[0]), rec => double.Parse(rec[row[0]]), (rec, val) => rec[row[0]] = val, table.GetRanges(row[0]), table.GetValues(row[0]), dataset); row[0]++; return(dataset); }
Codification BuildCodebook(List <Record> trainingSet, ReferenceTable table, out int[][] inputs, out int[] outputs) { DataTable data = new DataTable("Diabetes dataset"); data.Columns.AddRange(Array.ConvertAll(table.Columns, x => new DataColumn(x))); trainingSet.ForEach(each => data.Rows.Add(each.Values)); Codification codebook = new Codification(data); DataTable symbols = codebook.Apply(data); inputs = symbols.ToArray <int>(ExcludeLast(table.Columns)); outputs = symbols.ToArray <int>(table.Columns.Last()); return(codebook); }
ReferenceTable BuildDataSets(out List <Record> trainingSet, out List <Record> testSet) { const string training = @"..\..\Resources\3.1\diabetes_train.txt"; const string test = @"..\..\Resources\3.1\diabetes_test.txt"; ReferenceTable table = new ReferenceTable(); var parser = new RecordParser(); trainingSet = parser.ParseRecords(File.ReadAllLines(training)); testSet = parser.ParseRecords(File.ReadAllLines(test)); parser.DiscretizeDataset(trainingSet, table); parser.DiscretizeDataset(testSet, table); return(table); }
void RunSingleTest(List <Record> trainingSet, ReferenceTable table, List <Record> testSet, int ensembleCount, int depth) { Console.WriteLine($"\n[{GetTimeStamp()}] Runing test with Ensemble = {ensembleCount} and MaxDepth = {depth}\n"); Ensemble ensemble = new Ensemble(); Random picker = new Random(); for (int i = 0; i < ensembleCount; i++) { ID3Learner learner = LoadDecisionTree(SampledData(trainingSet, picker), table, depth); ensemble.AddVoter(learner.Predict); } ConfusionMatrix testResults = RunTest(ensemble.Test, testSet); PrintResults(testResults); }