コード例 #1
0
        static void Main(string[] args)
        {
            if (args.Length < 2) // NOTE: Check argument length
            {
                PrintHelp();
                return;
            }
            Parser       parser           = new Parser();
            DecisionTree dt               = new DecisionTree();
            string       trainingFileName = args[0];
            string       testingFileName  = args[1];
            ID3Data      id3Data          = parser.ParseID3InformationFile(trainingFileName);
            Node         learnedTree      = dt.ID3(id3Data.TestData, id3Data.GetKeyAttributes(), id3Data);

            // Part One
            Console.WriteLine("Learned Tree:\n-----------");
            Console.WriteLine(learnedTree.ToString());
            // Part Two
            ID3Data   testingData        = parser.ParseID3InformationFile(testingFileName);
            ArrayList treeClassification = learnedTree.GetTreeClassifications(testingData);

            Console.WriteLine("Classifications:\n-------------");
            Console.WriteLine(testingData.CompareClassifications(treeClassification));
            Console.WriteLine("\n\nPress any key to exit...");
            Console.ReadKey();
        }
コード例 #2
0
        /// <summary>
        /// Translate the file data to ID3 data.
        /// </summary>
        /// <param name="filename">Filename to read from</param>
        /// <returns>ID3Data object</returns>
        public ID3Data ParseID3InformationFile(string filename)
        {
            /**
             * !File Format!
             * !Do not read lines starting with '//'!
             * \SOF
             * class labels (could be any number; assume 2 for testing purposes)
             * # of features (nF) (how many to read in)
             * feature1 (list starts)
             * ... -> nF (list ends)
             * # of examles (nE) (how many to read in)
             * example1 (list starts)
             * ... -> nE (list ends)
             * \EOF
             */
            string    rawFileContents   = GetFileAsString(filename);
            ArrayList cleanFileContents = RemoveCommentsAndEmptyLines(rawFileContents);
            int       i       = 0;
            ID3Data   id3Data = new ID3Data();

            id3Data.Categories = new ArrayList()
            {
                cleanFileContents[i++],
                cleanFileContents[i++] // NOTE: This gets line 0 and 1 (the class labels)
            };
            int attributeCount = Int32.Parse(cleanFileContents[i++].ToString());

            for (int f = 0; f < attributeCount; f++, i++) // NOTE: This line is the number of features
            {
                string[] line = cleanFileContents[i].ToString().Split(' ');
                id3Data.Attributes.Add(line[0], new ArrayList()
                {
                    line[1],
                    line[2]
                });
            }
            int dataItemCount = Int32.Parse(cleanFileContents[i++].ToString());

            for (int e = 0; e < dataItemCount; e++, i++)
            {
                string[] line = System.Text.RegularExpressions.Regex.Split(cleanFileContents[i].ToString(), @"\s+");
                id3Data.TestData.Add(
                    new Data(
                        line[0],
                        line[1],
                        new ArrayList()
                {
                    line[2],
                    line[3],
                    line[4],
                    line[5]
                }
                        )
                    );
            }
            return(id3Data);
        }
コード例 #3
0
        /// <summary>
        /// Gets all the tree classification for the given test data object
        /// </summary>
        /// <param name="testingData">Data object to test with</param>
        /// <returns>ArrayList of Tuples[string, string] (name, classification)</returns>
        public ArrayList GetTreeClassifications(ID3Data testingData)
        {
            ArrayList output = new ArrayList();

            foreach (Data example in testingData.TestData)
            {
                string exmapleClassification = Traverse(example);
                output.Add(Tuple.Create <string, string>(example.Name, exmapleClassification));
            }
            return(output);
        }
コード例 #4
0
        /// <summary>
        /// Entropy
        /// </summary>
        /// <param name="examples"></param>
        /// <param name="targetAttribute"></param>
        /// <param name="data"></param>
        /// <returns></returns>
        double Entropy(ArrayList examples, string targetAttribute, ID3Data data)
        {
            double result = 0;
            Dictionary <string, int> dictionary = SummarizeExamplesAttribute(examples, targetAttribute, data);

            foreach (KeyValuePair <string, int> kvp in dictionary)
            {
                double proportion = (float)dictionary[kvp.Key] / (float)examples.Count;
                result -= proportion * Math.Log(proportion, 2);
            }
            return(result);
        }
コード例 #5
0
        /// <summary>
        /// Gets the best attribute for the examples.
        /// </summary>
        /// <param name="examples">Examples list (as data objects)</param>
        /// <param name="attributes">Attributes in the current example list</param>
        /// <param name="data">Data object</param>
        /// <returns>Best attribute for the current examples</returns>
        string GetBestAttribute(ArrayList examples, ArrayList attributes, ID3Data data)
        {
            string output = "";
            double best   = double.MinValue;

            foreach (string value in attributes)
            {
                double temp = InformationGain(examples, value, Entropy(examples, value, data), data);
                if (temp > best) // REVIEW: Depends on how you do this
                {
                    output = value;
                    best   = temp;
                }
            }
            return(output);
        }
コード例 #6
0
        /// <summary>
        /// ID3 Algorithm
        /// *Notes inline comments*
        /// </summary>
        /// <param name="examples">Example set to be used</param>
        /// <param name="attributes">All attributes in the training data</param>
        /// <param name="data">ID3 Data object (from the parser)</param>
        /// <returns>Roote node of the tree</returns>
        public Node ID3(ArrayList examples, ArrayList attributes, ID3Data data)
        {
            /**
             *  A <- best attribute
             *  Assign A as decision attribute for node
             *  foreach value of A
             *      create a descendent of node
             *  sort training examples to leaves
             *  if examples perfectly classified STOP
             *  else iterate over leaves
             */
            //  if all example same category (pure)
            //      return leaf with that category
            //  if attributes.empty
            //      return a leaf with most common category in examples
            if (attributes.Count == 0)
            {
                // string mostCommon = GetMostCommonValue(examples);
                string mostCommon = GetMostCommonCategory(examples);
                return(new Node(Label: mostCommon, Decision: null));
            }
            string bestAttribute = GetBestAttribute(examples, attributes, data);   // Gets the best attribute for the current examples
            Node   tree          = new Node(Label: bestAttribute, Decision: null); // This nodes decision category

            foreach (string value in data.Attributes[bestAttribute])
            {
                ArrayList subset = SubSet(examples, value);                                          // Generates a subset of examples
                Dictionary <string, int> dictionary = SummarizeExamplesValue(examples, value, data); // Checks if the examples are empty
                foreach (KeyValuePair <string, int> kvp in dictionary)
                {
                    if (kvp.Value == examples.Count)
                    {
                        return(new Node(Label: kvp.Key, Decision: null));
                    }
                }
                ArrayList newAttributes = attributes;
                newAttributes.Remove(bestAttribute);             // Removes current best attribute from list
                Node subtree = ID3(subset, newAttributes, data); // Gets the subtree from a new tree being created by the algorithm
                subtree.Decision = value;
                tree.AddBranch(subtree);                         // Adds a child to the tree (or a branch)
            }
            return(tree);
        }
コード例 #7
0
        /// <summary>
        /// Summarizes how many of each attribute are in the current examples.
        /// </summary>
        /// <param name="examples">Examples to check</param>
        /// <param name="targetAttribute">Target attribute to iterate over it's values</param>
        /// <param name="data">Data object</param>
        /// <returns>Dictionary of summarized examples</returns>
        Dictionary <string, int> SummarizeExamplesAttribute(ArrayList examples, string targetAttribute, ID3Data data)
        {
            Dictionary <string, int> dictionary = new Dictionary <string, int>();

            foreach (string value in data.Attributes[targetAttribute])
            {
                foreach (Data example in examples)
                {
                    if (example.Attributes.Contains(value))
                    {
                        if (dictionary.ContainsKey(value))
                        {
                            dictionary[value] += 1;
                        }
                        else
                        {
                            dictionary.Add(value, 1);
                        }
                    }
                }
            }
            return(dictionary);
        }
コード例 #8
0
        /// <summary>
        /// Information gain
        /// </summary>
        /// <param name="examples"></param>
        /// <param name="attribute"></param>
        /// <param name="entropyOfSet"></param>
        /// <param name="data"></param>
        /// <returns></returns>
        double InformationGain(ArrayList examples, string attribute, double entropyOfSet, ID3Data data)
        {
            double gain = entropyOfSet;                          // The current gain

            foreach (string value in data.Attributes[attribute]) // For each value the attribute can be
            {
                ArrayList subset = SubSet(examples, value);
                gain -= (float)subset.Count / (float)examples.Count * (float)Entropy(subset, attribute, data);
            }
            return(gain);
        }