예제 #1
0
        public Id3Classifier(List <int[]> instances, int classIndex, double confidence)
        {
            Instances  = instances;
            Confidence = confidence;

            Tree = Id3Node.BuildTree(instances, classIndex, confidence);
        }
예제 #2
0
        public Id3Classifier(List <int[]> instances, int classIndex, double confidence, int maxDepth)
        {
            Confidence = confidence;

            Tree = Id3Node.BuildTree(instances, classIndex, confidence, maxDepth);
        }
예제 #3
0
        static void Main(string[] args)
        {
            // Training
            ArffHeader      header    = null;
            List <object[]> instances = new List <object[]>();

            using (ArffReader arffReader = new ArffReader(_arffFile))
            {
                header = arffReader.ReadHeader();
                object[] instance;
                while ((instance = arffReader.ReadInstance()) != null)
                {
                    instances.Add(instance);
                }
            }

            List <int[]> trainingData = new List <int[]>(instances.Select(objectArray => objectArray.Select(o => o == null ? -1 : (int)o).ToArray()));

            // Test
            instances = new List <object[]>();
            using (ArffReader arffReader = new ArffReader(_testArffFile))
            {
                header = arffReader.ReadHeader();
                object[] instance;
                while ((instance = arffReader.ReadInstance()) != null)
                {
                    instances.Add(instance);
                }
            }

            List <int[]> testData = new List <int[]>(instances.Select(objectArray => objectArray.Select(o => o == null ? -1 : (int)o).ToArray()));

            double[] confidences = new double[]
            {
                0.0,
                0.1,
                0.2,
                0.4,
                0.6,
                0.8,
                0.9,
                0.95,
                0.99,
                0.9999
            };

            PrintAsCsv(header, trainingData, @"c:\users\andresz\desktop\data.csv");

            Parallel.ForEach(confidences, confidence =>
            {
                Id3Node tree = Id3Node.BuildTree(trainingData, trainingData[0].Length - 1, confidence);

                Console.WriteLine($"Confidence {confidence}: Num of nodes {GetCount(tree)}");
                // Test accuracy on training
                Console.WriteLine($"Confidence {confidence}: Accuracy on train = { trainingData.Where(instance => GetClass(instance, tree) == instance[trainingData[0].Length - 1]).Count() / (double)trainingData.Count}");

                // Test accuracy on test
                Console.WriteLine($"Confidence {confidence}: Accuracy on test = { testData.Where(instance => GetClass(instance, tree) == instance[testData[0].Length - 1]).Count() / (double)testData.Count}");

                StringBuilder sb            = new StringBuilder();
                StringBuilder sbMaxPositive = new StringBuilder();
                StringBuilder sbMaxNegative = new StringBuilder();
                int maxPositive             = int.MinValue;
                int maxNegative             = int.MinValue;
                // Only print small trees.
                if (confidence > 0.5)
                {
                    PrintTreeAsRules(sb, ref sbMaxPositive, ref sbMaxNegative, ref maxPositive, ref maxNegative, tree, header);
                    sb.AppendLine("The most max positive rule is:");
                    sb.AppendLine(sbMaxPositive.ToString());
                    sb.AppendLine();
                    sb.AppendLine("The most max negative rule is:");
                    sb.AppendLine(sbMaxNegative.ToString());
                    Directory.CreateDirectory(_outputFolder);
                    File.WriteAllText(Path.Combine(_outputFolder, $"Tree{confidence}.txt"), sb.ToString());
                }
            });
        }