static void Main() { var records = LoadData(); int crossValidationSetsCount = 277 / 5; int totalCount = records.Count(); int testingSetCount = totalCount / crossValidationSetsCount; double totalAccuracy = 0; for (int i = 0; i < crossValidationSetsCount; i++) { var testSet = records.Skip(i * testingSetCount).Take(testingSetCount); var trainingSet = records.Take(i * testingSetCount).Concat(records.Skip((i + 1) * testingSetCount).Take((crossValidationSetsCount - i) * testingSetCount)); TreeNode root = CreateNode(trainingSet, Attributes, GetdominatingClass(trainingSet));//the dominatingOutcome should not be used in the first children double succesfullPredictions = 0; foreach (var testRecord in testSet) { OutcomeClass predictedOutcome = PredictOutcome(root, testRecord); if (predictedOutcome == testRecord.OutcomeClass) { succesfullPredictions++; } } Console.WriteLine($"For test set {i} success is {succesfullPredictions / testingSetCount * 100}%"); totalAccuracy += succesfullPredictions / testingSetCount; } Console.WriteLine($"The total accuracy is {totalAccuracy / crossValidationSetsCount * 100}%"); }
private static TreeNode CreateNode(IEnumerable <Record> records, IEnumerable <string> attributes, OutcomeClass dominatingParentOutcome) { var treeNode = new TreeNode(); treeNode.DominatingParrentOutcome = dominatingParentOutcome; double minEntropy = 1000; string chosenAttribute = ""; if (records.Count() == 0)//there are no examples in the subset, which happens when no example in the parent set was found to match a specific value of the selected attribute.Then a leaf node is created and labelled with the most common class of the examples in the parent node's set. { treeNode.IsLeaf = true; treeNode.Outcome = dominatingParentOutcome; return(treeNode); } if (attributes.Count() == 0)//there are no more attributes to be selected, but the examples still do not belong to the same class. In this case, the node is made a leaf node and labelled with the most common class of the examples in the subset. { var dominatingOutcome = GetdominatingClass(records); treeNode.IsLeaf = true; treeNode.Outcome = dominatingOutcome; return(treeNode); } foreach (var attribute in attributes) { double entropy = getEntropy(records, attribute); if (entropy < minEntropy) { minEntropy = entropy; chosenAttribute = attribute; } } treeNode.NextAttributeName = chosenAttribute; var property = typeof(Record).GetProperties().First(prop => prop.Name == chosenAttribute); var grouped = records.GroupBy( record => property.GetValue(record) ); foreach (var group in grouped) { var childNode = new TreeNode(); if (getEntropy(group, chosenAttribute) == 0) { childNode.IsLeaf = true; childNode.Outcome = group.First().OutcomeClass; childNode.AttributeValue = group.Key as string; } else { childNode = CreateNode(group, attributes.Where(attr => attr != chosenAttribute), GetdominatingClass(records)); childNode.AttributeValue = group.Key as string; //property.GetValue(group.First()) as string; } treeNode.Children.Add(childNode); } return(treeNode); }