/**
         *  Calculate the average entropy of the child nodes that would result from splitting
         *  this node on the given attribute.
         */
        public double ExpectedEntropyFromSplit(string splitCondition)
        {
            if (!COMP8901_Asg05._attributes.Contains(splitCondition))
            {
                throw new System.ArgumentException(
                          System.String.Format("{0} is not a valid attribute.", splitCondition));
            }

            /* Split this node's individual's on the given trait. */
            SysGeneric.List <Individual> positives = new SysGeneric.List <Individual>();
            SysGeneric.List <Individual> negatives = new SysGeneric.List <Individual>();
            string positiveSplitValue = COMP8901_Asg05._attributeValues[splitCondition][0];

            foreach (Individual eachIndividual in _individuals)
            {
                if (eachIndividual._attributes[splitCondition].Equals(positiveSplitValue))
                {
                    positives.Add(eachIndividual);
                }
                else
                {
                    negatives.Add(eachIndividual);
                }
            }

            /* Calculate the weighted entropy for the result sets. */
            double entropy = CalculateEntropy(positives) * positives.Count / _individuals.Count;

            entropy += CalculateEntropy(negatives) * negatives.Count / _individuals.Count;
            return(entropy);
        }
        /**
         *  Returns the entropy of the given list of individuals.
         */
        private static double CalculateEntropy(SysGeneric.List <Individual> collection)
        {
            /* If the collection has less than 2 members or is null, entropy is zero. */
            if (collection.Count < 2 ||
                collection == null)
            {
                return(0);
            }

            /* Determine how many individuals of each classification are present in the
             *  collection. */
            int nPositive = 0;
            int nNegative = 0;

            foreach (Individual eachIndividual in collection)
            {
                if (eachIndividual._classification.Equals(COMP8901_Asg05._classifications[0]))
                {
                    nPositive++;
                }
                else
                {
                    nNegative++;
                }
            }

            return(Entropy(nPositive, nNegative));
        }
 /*--------------------------------------------------------------------------------
 *   Class Methods
 *  --------------------------------------------------------------------------------*/
 /**
  *  Initializes class fields and properties.
  */
 private static void Init()
 {
     _classifications = new SysGeneric.List <string>();
     _attributes      = new SysGeneric.List <string>();
     _attributeValues = new SysGeneric.Dictionary <string, SysGeneric.List <string> >();
     _testData        = new SysGeneric.List <Individual>();
     _errorList       = new SysGeneric.List <Individual>();
 }
 /*------------------------------------------------------------------------------------
 *   Constructors & Destructors
 *  ------------------------------------------------------------------------------------*/
 /**
  *  Default DecisionTreeNode constructor.
  */
 public DecisionTreeNode(SysGeneric.List <Individual> newIndividuals = null)
 {
     _individuals            = (newIndividuals == null) ? new SysGeneric.List <Individual>() : newIndividuals;
     _tree                   = null;
     _parent                 = null;
     _children               = new SysGeneric.List <DecisionTreeNode>();
     _pastSplitConditions    = new SysGeneric.Dictionary <string, string>();
     _childrenSplitCondition = "";
 }
        /**
         *  Builds an ID3-optimized decision tree using the given collection of
         *  individuals as the root node.
         */
        private static void BuildDecisionTree(SysGeneric.List <Individual> root)
        {
            SysConsole.Write(System.String.Format("{0}\n\tBuilding Decision Tree\n{0}\n", FileReader.HORIZONTAL_RULE));

            /* Initialize the learned tree with the given node. */
            DecisionTreeNode rootNode = new DecisionTreeNode(root);

            _learnedTree = new DecisionTree(rootNode);

            SysConsole.Write(System.String.Format("The most common classification is {0}.\n\n", _commonClassification));
            SysConsole.Write(System.String.Format("The entropy of the root node is {0}.\n\n", _learnedTree._root._entropy));

            /* Generate the optimal decision tree from the training data. */
            _learnedTree.GenerateOptimalTree();

            SysConsole.Write(System.String.Format("{0}\n\tLearned Decision Tree\n{0}\n", FileReader.HORIZONTAL_RULE));
            _learnedTree.Print();
            SysConsole.Write("\n");
        }
        /**
         *  Returns the classification ratio for a given list of Individuals.
         *
         *  The classification ratio indicates the fraction of individuals in the given
         *  list who fall into the first classification defined in the classification
         *  source.
         *
         *  If the given list is empty or null, this method returns a negative number.
         */
        public static double CalculateClassificationRatio(SysGeneric.List <Individual> collection)
        {
            /* If the collection has no members or is null, classification ratio is negative. */
            if (collection.Count < 1 ||
                collection == null)
            {
                return(-1);
            }

            double nPositive = 0;

            foreach (Individual eachIndividual in collection)
            {
                if (eachIndividual._classification.Equals(COMP8901_Asg05._classifications[0]))
                {
                    nPositive++;
                }
            }

            return(nPositive / collection.Count);
        }
        /**
         *  Returns the optimal next attribute to split this node on.
         */
        public string DetermineBestSplitCondition()
        {
            /* Only consider attributes that have not already been split on. */
            SysGeneric.List <string> attributesToTest = new SysGeneric.List <string>();

            foreach (string eachAttribute in COMP8901_Asg05._attributes)
            {
                if (!_pastSplitConditions.ContainsKey(eachAttribute))
                {
                    attributesToTest.Add(eachAttribute);
                }
            }

            /* Calculate the information gain of splitting on each attribute
             *  and keep the best one. */
            string bestAttribute        = null;
            double bestAttributeUtility = -1;

            foreach (string eachAttribute in attributesToTest)
            {
                double eachUtility = ExpectedUtilityFromSplit(eachAttribute);

                if (eachUtility > bestAttributeUtility)
                {
                    bestAttribute        = eachAttribute;
                    bestAttributeUtility = eachUtility;
                }
            }

            if (bestAttribute != null)
            {
                return(bestAttribute);
            }
            else
            {
                throw new System.Exception("Could not determine a best split attribute.");
            }
        }
Esempio n. 8
0
    /*------------------------------------------------------------------------------------
    *           Constructors & Destructors
    *   ------------------------------------------------------------------------------------*/
    public Board()
    {
        /* Initialize the board to all spaces empty. */
        _board = new SysGeneric.List <SysGeneric.List <char> >();
        for (int columnNum = 0; columnNum < BOARD_WIDTH; columnNum++)
        {
            SysGeneric.List <char> column = new SysGeneric.List <char>();

            for (int cellNum = 0; cellNum < BOARD_HEIGHT; cellNum++)
            {
                column.Add(EMPTY);
            }

            _board.Add(column);
        }

        ///* Initialize the row separator string. */
        //ROW_SEPARATOR = new System.String(ROW_SEPARATOR_CHAR, BOARD_WIDTH * 2 + 1);

        _moveCount      = 0;
        _moves          = "";
        _piecesInColumn = new SysGeneric.List <int>(BOARD_WIDTH);
    }
        /*--------------------------------------------------------------------------------
        *   Instance Properties
        *  --------------------------------------------------------------------------------*/


        /*--------------------------------------------------------------------------------
        *   Constructors & Destructors
        *  --------------------------------------------------------------------------------*/


        /*--------------------------------------------------------------------------------
        *   Class Methods
        *  --------------------------------------------------------------------------------*/
        public static SysGeneric.List <Individual> ReadDataFile(string filePath)
        {
            string fileContents = System.IO.File.ReadAllText(filePath);

            /* Remove carriage returns and replace all whitespace sequences with single spaces. */
            fileContents = SysRegex.Replace(fileContents, "\r", "");
            fileContents = SysRegex.Replace(fileContents, @"[\t\f\v ]+", " ");

            /* Split into lines. */
            string[] lines = fileContents.Split(LINE_BREAK);

            /* Get rid of blank and comment lines. */
            SysGeneric.List <string> dataLines = new SysGeneric.List <string>();
            foreach (string eachLine in lines)
            {
                if (eachLine.Length < 1)
                {
                    continue;
                }

                if (eachLine[0].Equals('/') &&
                    eachLine[1].Equals('/'))
                {
                    continue;
                }

                /* If the line is neither blank nor commented, then it is data. */
                dataLines.Add(eachLine);
            }

            /* Extract data from the data lines. */
            bool areClassificationsRead = false;
            bool areAttributesRead      = false;
            bool isDataRead             = false;
            int  attributeCount         = 0;
            int  dataCount = 0;

            SysGeneric.List <Individual> individuals = new SysGeneric.List <Individual>();

            SysConsole.Write(System.String.Format("{0}\n\tReading Classifications\n{0}\n", HORIZONTAL_RULE));
            foreach (string eachLine in dataLines)
            {
                /* Read the classifications. */
                if (!areClassificationsRead)
                {
                    /* Are we done reading the classifications? */
                    if (int.TryParse(eachLine, out attributeCount))
                    {
                        areClassificationsRead = true;
                        SysConsole.Write(System.String.Format("{0} classifications read.\n\n", COMP8901_Asg05._classifications.Count));
                        SysConsole.Write(System.String.Format("{0}\n\tReading Attributes\n{0}\n", HORIZONTAL_RULE));
                        SysConsole.Write(System.String.Format("Attribute count: {0}\n", attributeCount));
                        continue;
                    }

                    /* If we have not recorded this classificiation yet, do so. */
                    if (!COMP8901_Asg05._classifications.Contains(eachLine))
                    {
                        COMP8901_Asg05._classifications.Add(eachLine);
                        SysConsole.Write(eachLine + "\n");
                    }
                    continue;
                }

                /* Read the attributes. */
                if (!areAttributesRead)
                {
                    /* Are we done reading the attributes? */
                    if (int.TryParse(eachLine, out dataCount))
                    {
                        areAttributesRead = true;
                        SysConsole.Write(System.String.Format("{0} attributes read.\n\n", COMP8901_Asg05._attributes.Count));
                        SysConsole.Write(System.String.Format("{0}\n\tReading Individuals\n{0}\n", HORIZONTAL_RULE));
                        SysConsole.Write(System.String.Format("Individual count: {0}\n\n", dataCount));
                        continue;
                    }

                    /* Parse each attribute. */
                    string[] attributeParts = eachLine.Split(ATTRIBUTE_SEPARATOR);

                    /* If we have not recorded this attribute yet, do so. */
                    if (!COMP8901_Asg05._attributes.Contains(attributeParts[0]))
                    {
                        SysGeneric.List <string> attributeValues = new SysGeneric.List <string>();

                        for (int i = 1; i < attributeParts.Length; i++)
                        {
                            attributeValues.Add(attributeParts[i]);
                        }

                        /* Record each attribute. */
                        COMP8901_Asg05._attributes.Add(attributeParts[0]);
                        COMP8901_Asg05._attributeValues[attributeParts[0]] = attributeValues;
                        SysConsole.Write(eachLine + "\n");
                    }
                    continue;
                }

                /* Read the data. */
                if (!isDataRead &&
                    dataCount > 0)
                {
                    /* Parse each individual. */
                    string[] dataParts = eachLine.Split(DATA_SEPARATOR);

                    Individual eachIndividual = new Individual(dataParts[0], dataParts[1]);
                    SysGeneric.Dictionary <string, string> eachIndividualAttributes = new SysGeneric.Dictionary <string, string>();

                    /* Get the attribute values for this individual. */
                    int index = 2;
                    foreach (string eachAttribute in COMP8901_Asg05._attributes)
                    {
                        eachIndividualAttributes.Add(eachAttribute, dataParts[index]);
                        index++;
                    }

                    /* Add the individual to the return list. */
                    eachIndividual._attributes = eachIndividualAttributes;
                    individuals.Add(eachIndividual);
                    //SysConsole.Write(eachIndividual + "\n");
                    dataCount--;
                    continue;
                }

                /* If we have read all of the lines of data we wanted, stop reading the file. */
                break;
            }

            SysConsole.Write("\nFinished reading data from file!\n\n");
            return(individuals);
        }