/** * Calculate the average entropy of the child nodes that would result from splitting * this node on the given attribute. */ public double ExpectedEntropyFromSplit(string splitCondition) { if (!COMP8901_Asg05._attributes.Contains(splitCondition)) { throw new System.ArgumentException( System.String.Format("{0} is not a valid attribute.", splitCondition)); } /* Split this node's individual's on the given trait. */ SysGeneric.List <Individual> positives = new SysGeneric.List <Individual>(); SysGeneric.List <Individual> negatives = new SysGeneric.List <Individual>(); string positiveSplitValue = COMP8901_Asg05._attributeValues[splitCondition][0]; foreach (Individual eachIndividual in _individuals) { if (eachIndividual._attributes[splitCondition].Equals(positiveSplitValue)) { positives.Add(eachIndividual); } else { negatives.Add(eachIndividual); } } /* Calculate the weighted entropy for the result sets. */ double entropy = CalculateEntropy(positives) * positives.Count / _individuals.Count; entropy += CalculateEntropy(negatives) * negatives.Count / _individuals.Count; return(entropy); }
/** * Returns the entropy of the given list of individuals. */ private static double CalculateEntropy(SysGeneric.List <Individual> collection) { /* If the collection has less than 2 members or is null, entropy is zero. */ if (collection.Count < 2 || collection == null) { return(0); } /* Determine how many individuals of each classification are present in the * collection. */ int nPositive = 0; int nNegative = 0; foreach (Individual eachIndividual in collection) { if (eachIndividual._classification.Equals(COMP8901_Asg05._classifications[0])) { nPositive++; } else { nNegative++; } } return(Entropy(nPositive, nNegative)); }
/*-------------------------------------------------------------------------------- * Class Methods * --------------------------------------------------------------------------------*/ /** * Initializes class fields and properties. */ private static void Init() { _classifications = new SysGeneric.List <string>(); _attributes = new SysGeneric.List <string>(); _attributeValues = new SysGeneric.Dictionary <string, SysGeneric.List <string> >(); _testData = new SysGeneric.List <Individual>(); _errorList = new SysGeneric.List <Individual>(); }
/*------------------------------------------------------------------------------------ * Constructors & Destructors * ------------------------------------------------------------------------------------*/ /** * Default DecisionTreeNode constructor. */ public DecisionTreeNode(SysGeneric.List <Individual> newIndividuals = null) { _individuals = (newIndividuals == null) ? new SysGeneric.List <Individual>() : newIndividuals; _tree = null; _parent = null; _children = new SysGeneric.List <DecisionTreeNode>(); _pastSplitConditions = new SysGeneric.Dictionary <string, string>(); _childrenSplitCondition = ""; }
/** * Builds an ID3-optimized decision tree using the given collection of * individuals as the root node. */ private static void BuildDecisionTree(SysGeneric.List <Individual> root) { SysConsole.Write(System.String.Format("{0}\n\tBuilding Decision Tree\n{0}\n", FileReader.HORIZONTAL_RULE)); /* Initialize the learned tree with the given node. */ DecisionTreeNode rootNode = new DecisionTreeNode(root); _learnedTree = new DecisionTree(rootNode); SysConsole.Write(System.String.Format("The most common classification is {0}.\n\n", _commonClassification)); SysConsole.Write(System.String.Format("The entropy of the root node is {0}.\n\n", _learnedTree._root._entropy)); /* Generate the optimal decision tree from the training data. */ _learnedTree.GenerateOptimalTree(); SysConsole.Write(System.String.Format("{0}\n\tLearned Decision Tree\n{0}\n", FileReader.HORIZONTAL_RULE)); _learnedTree.Print(); SysConsole.Write("\n"); }
/** * Returns the classification ratio for a given list of Individuals. * * The classification ratio indicates the fraction of individuals in the given * list who fall into the first classification defined in the classification * source. * * If the given list is empty or null, this method returns a negative number. */ public static double CalculateClassificationRatio(SysGeneric.List <Individual> collection) { /* If the collection has no members or is null, classification ratio is negative. */ if (collection.Count < 1 || collection == null) { return(-1); } double nPositive = 0; foreach (Individual eachIndividual in collection) { if (eachIndividual._classification.Equals(COMP8901_Asg05._classifications[0])) { nPositive++; } } return(nPositive / collection.Count); }
/** * Returns the optimal next attribute to split this node on. */ public string DetermineBestSplitCondition() { /* Only consider attributes that have not already been split on. */ SysGeneric.List <string> attributesToTest = new SysGeneric.List <string>(); foreach (string eachAttribute in COMP8901_Asg05._attributes) { if (!_pastSplitConditions.ContainsKey(eachAttribute)) { attributesToTest.Add(eachAttribute); } } /* Calculate the information gain of splitting on each attribute * and keep the best one. */ string bestAttribute = null; double bestAttributeUtility = -1; foreach (string eachAttribute in attributesToTest) { double eachUtility = ExpectedUtilityFromSplit(eachAttribute); if (eachUtility > bestAttributeUtility) { bestAttribute = eachAttribute; bestAttributeUtility = eachUtility; } } if (bestAttribute != null) { return(bestAttribute); } else { throw new System.Exception("Could not determine a best split attribute."); } }
/*------------------------------------------------------------------------------------ * Constructors & Destructors * ------------------------------------------------------------------------------------*/ public Board() { /* Initialize the board to all spaces empty. */ _board = new SysGeneric.List <SysGeneric.List <char> >(); for (int columnNum = 0; columnNum < BOARD_WIDTH; columnNum++) { SysGeneric.List <char> column = new SysGeneric.List <char>(); for (int cellNum = 0; cellNum < BOARD_HEIGHT; cellNum++) { column.Add(EMPTY); } _board.Add(column); } ///* Initialize the row separator string. */ //ROW_SEPARATOR = new System.String(ROW_SEPARATOR_CHAR, BOARD_WIDTH * 2 + 1); _moveCount = 0; _moves = ""; _piecesInColumn = new SysGeneric.List <int>(BOARD_WIDTH); }
/*-------------------------------------------------------------------------------- * Instance Properties * --------------------------------------------------------------------------------*/ /*-------------------------------------------------------------------------------- * Constructors & Destructors * --------------------------------------------------------------------------------*/ /*-------------------------------------------------------------------------------- * Class Methods * --------------------------------------------------------------------------------*/ public static SysGeneric.List <Individual> ReadDataFile(string filePath) { string fileContents = System.IO.File.ReadAllText(filePath); /* Remove carriage returns and replace all whitespace sequences with single spaces. */ fileContents = SysRegex.Replace(fileContents, "\r", ""); fileContents = SysRegex.Replace(fileContents, @"[\t\f\v ]+", " "); /* Split into lines. */ string[] lines = fileContents.Split(LINE_BREAK); /* Get rid of blank and comment lines. */ SysGeneric.List <string> dataLines = new SysGeneric.List <string>(); foreach (string eachLine in lines) { if (eachLine.Length < 1) { continue; } if (eachLine[0].Equals('/') && eachLine[1].Equals('/')) { continue; } /* If the line is neither blank nor commented, then it is data. */ dataLines.Add(eachLine); } /* Extract data from the data lines. */ bool areClassificationsRead = false; bool areAttributesRead = false; bool isDataRead = false; int attributeCount = 0; int dataCount = 0; SysGeneric.List <Individual> individuals = new SysGeneric.List <Individual>(); SysConsole.Write(System.String.Format("{0}\n\tReading Classifications\n{0}\n", HORIZONTAL_RULE)); foreach (string eachLine in dataLines) { /* Read the classifications. */ if (!areClassificationsRead) { /* Are we done reading the classifications? */ if (int.TryParse(eachLine, out attributeCount)) { areClassificationsRead = true; SysConsole.Write(System.String.Format("{0} classifications read.\n\n", COMP8901_Asg05._classifications.Count)); SysConsole.Write(System.String.Format("{0}\n\tReading Attributes\n{0}\n", HORIZONTAL_RULE)); SysConsole.Write(System.String.Format("Attribute count: {0}\n", attributeCount)); continue; } /* If we have not recorded this classificiation yet, do so. */ if (!COMP8901_Asg05._classifications.Contains(eachLine)) { COMP8901_Asg05._classifications.Add(eachLine); SysConsole.Write(eachLine + "\n"); } continue; } /* Read the attributes. */ if (!areAttributesRead) { /* Are we done reading the attributes? */ if (int.TryParse(eachLine, out dataCount)) { areAttributesRead = true; SysConsole.Write(System.String.Format("{0} attributes read.\n\n", COMP8901_Asg05._attributes.Count)); SysConsole.Write(System.String.Format("{0}\n\tReading Individuals\n{0}\n", HORIZONTAL_RULE)); SysConsole.Write(System.String.Format("Individual count: {0}\n\n", dataCount)); continue; } /* Parse each attribute. */ string[] attributeParts = eachLine.Split(ATTRIBUTE_SEPARATOR); /* If we have not recorded this attribute yet, do so. */ if (!COMP8901_Asg05._attributes.Contains(attributeParts[0])) { SysGeneric.List <string> attributeValues = new SysGeneric.List <string>(); for (int i = 1; i < attributeParts.Length; i++) { attributeValues.Add(attributeParts[i]); } /* Record each attribute. */ COMP8901_Asg05._attributes.Add(attributeParts[0]); COMP8901_Asg05._attributeValues[attributeParts[0]] = attributeValues; SysConsole.Write(eachLine + "\n"); } continue; } /* Read the data. */ if (!isDataRead && dataCount > 0) { /* Parse each individual. */ string[] dataParts = eachLine.Split(DATA_SEPARATOR); Individual eachIndividual = new Individual(dataParts[0], dataParts[1]); SysGeneric.Dictionary <string, string> eachIndividualAttributes = new SysGeneric.Dictionary <string, string>(); /* Get the attribute values for this individual. */ int index = 2; foreach (string eachAttribute in COMP8901_Asg05._attributes) { eachIndividualAttributes.Add(eachAttribute, dataParts[index]); index++; } /* Add the individual to the return list. */ eachIndividual._attributes = eachIndividualAttributes; individuals.Add(eachIndividual); //SysConsole.Write(eachIndividual + "\n"); dataCount--; continue; } /* If we have read all of the lines of data we wanted, stop reading the file. */ break; } SysConsole.Write("\nFinished reading data from file!\n\n"); return(individuals); }