public Id3Classifier(List <int[]> instances, int classIndex, double confidence) { Instances = instances; Confidence = confidence; Tree = Id3Node.BuildTree(instances, classIndex, confidence); }
public Id3Classifier(List <int[]> instances, int classIndex, double confidence, int maxDepth) { Confidence = confidence; Tree = Id3Node.BuildTree(instances, classIndex, confidence, maxDepth); }
static void Main(string[] args) { // Training ArffHeader header = null; List <object[]> instances = new List <object[]>(); using (ArffReader arffReader = new ArffReader(_arffFile)) { header = arffReader.ReadHeader(); object[] instance; while ((instance = arffReader.ReadInstance()) != null) { instances.Add(instance); } } List <int[]> trainingData = new List <int[]>(instances.Select(objectArray => objectArray.Select(o => o == null ? -1 : (int)o).ToArray())); // Test instances = new List <object[]>(); using (ArffReader arffReader = new ArffReader(_testArffFile)) { header = arffReader.ReadHeader(); object[] instance; while ((instance = arffReader.ReadInstance()) != null) { instances.Add(instance); } } List <int[]> testData = new List <int[]>(instances.Select(objectArray => objectArray.Select(o => o == null ? -1 : (int)o).ToArray())); double[] confidences = new double[] { 0.0, 0.1, 0.2, 0.4, 0.6, 0.8, 0.9, 0.95, 0.99, 0.9999 }; PrintAsCsv(header, trainingData, @"c:\users\andresz\desktop\data.csv"); Parallel.ForEach(confidences, confidence => { Id3Node tree = Id3Node.BuildTree(trainingData, trainingData[0].Length - 1, confidence); Console.WriteLine($"Confidence {confidence}: Num of nodes {GetCount(tree)}"); // Test accuracy on training Console.WriteLine($"Confidence {confidence}: Accuracy on train = { trainingData.Where(instance => GetClass(instance, tree) == instance[trainingData[0].Length - 1]).Count() / (double)trainingData.Count}"); // Test accuracy on test Console.WriteLine($"Confidence {confidence}: Accuracy on test = { testData.Where(instance => GetClass(instance, tree) == instance[testData[0].Length - 1]).Count() / (double)testData.Count}"); StringBuilder sb = new StringBuilder(); StringBuilder sbMaxPositive = new StringBuilder(); StringBuilder sbMaxNegative = new StringBuilder(); int maxPositive = int.MinValue; int maxNegative = int.MinValue; // Only print small trees. if (confidence > 0.5) { PrintTreeAsRules(sb, ref sbMaxPositive, ref sbMaxNegative, ref maxPositive, ref maxNegative, tree, header); sb.AppendLine("The most max positive rule is:"); sb.AppendLine(sbMaxPositive.ToString()); sb.AppendLine(); sb.AppendLine("The most max negative rule is:"); sb.AppendLine(sbMaxNegative.ToString()); Directory.CreateDirectory(_outputFolder); File.WriteAllText(Path.Combine(_outputFolder, $"Tree{confidence}.txt"), sb.ToString()); } }); }