/** * Calculate the average entropy of the child nodes that would result from splitting * this node on the given attribute. */ public double ExpectedEntropyFromSplit(string splitCondition) { if (!COMP8901_Asg05._attributes.Contains(splitCondition)) { throw new System.ArgumentException( System.String.Format("{0} is not a valid attribute.", splitCondition)); } /* Split this node's individual's on the given trait. */ SysGeneric.List <Individual> positives = new SysGeneric.List <Individual>(); SysGeneric.List <Individual> negatives = new SysGeneric.List <Individual>(); string positiveSplitValue = COMP8901_Asg05._attributeValues[splitCondition][0]; foreach (Individual eachIndividual in _individuals) { if (eachIndividual._attributes[splitCondition].Equals(positiveSplitValue)) { positives.Add(eachIndividual); } else { negatives.Add(eachIndividual); } } /* Calculate the weighted entropy for the result sets. */ double entropy = CalculateEntropy(positives) * positives.Count / _individuals.Count; entropy += CalculateEntropy(negatives) * negatives.Count / _individuals.Count; return(entropy); }
/** * Returns the optimal next attribute to split this node on. */ public string DetermineBestSplitCondition() { /* Only consider attributes that have not already been split on. */ SysGeneric.List <string> attributesToTest = new SysGeneric.List <string>(); foreach (string eachAttribute in COMP8901_Asg05._attributes) { if (!_pastSplitConditions.ContainsKey(eachAttribute)) { attributesToTest.Add(eachAttribute); } } /* Calculate the information gain of splitting on each attribute * and keep the best one. */ string bestAttribute = null; double bestAttributeUtility = -1; foreach (string eachAttribute in attributesToTest) { double eachUtility = ExpectedUtilityFromSplit(eachAttribute); if (eachUtility > bestAttributeUtility) { bestAttribute = eachAttribute; bestAttributeUtility = eachUtility; } } if (bestAttribute != null) { return(bestAttribute); } else { throw new System.Exception("Could not determine a best split attribute."); } }
/*------------------------------------------------------------------------------------ * Constructors & Destructors * ------------------------------------------------------------------------------------*/ public Board() { /* Initialize the board to all spaces empty. */ _board = new SysGeneric.List <SysGeneric.List <char> >(); for (int columnNum = 0; columnNum < BOARD_WIDTH; columnNum++) { SysGeneric.List <char> column = new SysGeneric.List <char>(); for (int cellNum = 0; cellNum < BOARD_HEIGHT; cellNum++) { column.Add(EMPTY); } _board.Add(column); } ///* Initialize the row separator string. */ //ROW_SEPARATOR = new System.String(ROW_SEPARATOR_CHAR, BOARD_WIDTH * 2 + 1); _moveCount = 0; _moves = ""; _piecesInColumn = new SysGeneric.List <int>(BOARD_WIDTH); }
/*-------------------------------------------------------------------------------- * Instance Properties * --------------------------------------------------------------------------------*/ /*-------------------------------------------------------------------------------- * Constructors & Destructors * --------------------------------------------------------------------------------*/ /*-------------------------------------------------------------------------------- * Class Methods * --------------------------------------------------------------------------------*/ public static SysGeneric.List <Individual> ReadDataFile(string filePath) { string fileContents = System.IO.File.ReadAllText(filePath); /* Remove carriage returns and replace all whitespace sequences with single spaces. */ fileContents = SysRegex.Replace(fileContents, "\r", ""); fileContents = SysRegex.Replace(fileContents, @"[\t\f\v ]+", " "); /* Split into lines. */ string[] lines = fileContents.Split(LINE_BREAK); /* Get rid of blank and comment lines. */ SysGeneric.List <string> dataLines = new SysGeneric.List <string>(); foreach (string eachLine in lines) { if (eachLine.Length < 1) { continue; } if (eachLine[0].Equals('/') && eachLine[1].Equals('/')) { continue; } /* If the line is neither blank nor commented, then it is data. */ dataLines.Add(eachLine); } /* Extract data from the data lines. */ bool areClassificationsRead = false; bool areAttributesRead = false; bool isDataRead = false; int attributeCount = 0; int dataCount = 0; SysGeneric.List <Individual> individuals = new SysGeneric.List <Individual>(); SysConsole.Write(System.String.Format("{0}\n\tReading Classifications\n{0}\n", HORIZONTAL_RULE)); foreach (string eachLine in dataLines) { /* Read the classifications. */ if (!areClassificationsRead) { /* Are we done reading the classifications? */ if (int.TryParse(eachLine, out attributeCount)) { areClassificationsRead = true; SysConsole.Write(System.String.Format("{0} classifications read.\n\n", COMP8901_Asg05._classifications.Count)); SysConsole.Write(System.String.Format("{0}\n\tReading Attributes\n{0}\n", HORIZONTAL_RULE)); SysConsole.Write(System.String.Format("Attribute count: {0}\n", attributeCount)); continue; } /* If we have not recorded this classificiation yet, do so. */ if (!COMP8901_Asg05._classifications.Contains(eachLine)) { COMP8901_Asg05._classifications.Add(eachLine); SysConsole.Write(eachLine + "\n"); } continue; } /* Read the attributes. */ if (!areAttributesRead) { /* Are we done reading the attributes? */ if (int.TryParse(eachLine, out dataCount)) { areAttributesRead = true; SysConsole.Write(System.String.Format("{0} attributes read.\n\n", COMP8901_Asg05._attributes.Count)); SysConsole.Write(System.String.Format("{0}\n\tReading Individuals\n{0}\n", HORIZONTAL_RULE)); SysConsole.Write(System.String.Format("Individual count: {0}\n\n", dataCount)); continue; } /* Parse each attribute. */ string[] attributeParts = eachLine.Split(ATTRIBUTE_SEPARATOR); /* If we have not recorded this attribute yet, do so. */ if (!COMP8901_Asg05._attributes.Contains(attributeParts[0])) { SysGeneric.List <string> attributeValues = new SysGeneric.List <string>(); for (int i = 1; i < attributeParts.Length; i++) { attributeValues.Add(attributeParts[i]); } /* Record each attribute. */ COMP8901_Asg05._attributes.Add(attributeParts[0]); COMP8901_Asg05._attributeValues[attributeParts[0]] = attributeValues; SysConsole.Write(eachLine + "\n"); } continue; } /* Read the data. */ if (!isDataRead && dataCount > 0) { /* Parse each individual. */ string[] dataParts = eachLine.Split(DATA_SEPARATOR); Individual eachIndividual = new Individual(dataParts[0], dataParts[1]); SysGeneric.Dictionary <string, string> eachIndividualAttributes = new SysGeneric.Dictionary <string, string>(); /* Get the attribute values for this individual. */ int index = 2; foreach (string eachAttribute in COMP8901_Asg05._attributes) { eachIndividualAttributes.Add(eachAttribute, dataParts[index]); index++; } /* Add the individual to the return list. */ eachIndividual._attributes = eachIndividualAttributes; individuals.Add(eachIndividual); //SysConsole.Write(eachIndividual + "\n"); dataCount--; continue; } /* If we have read all of the lines of data we wanted, stop reading the file. */ break; } SysConsole.Write("\nFinished reading data from file!\n\n"); return(individuals); }