public DTestResults testTree(DataSet test) { int count = 0; for(uint x=0; x < test.numRows(); x++) { count += (testRow(test.getRow(x))) ? 1 : 0; } DTestResults retval = new DTestResults(); retval.correct = (uint) count; retval.total = test.numRows(); return retval; }
static int Main(string[] args) { if(args.Length != 2) { usage(); return 2; } //Console.WriteLine(new DataSet("sample1.txt")); try { classify = new DataSet(args[0]); test = new DataSet(args[1]); } catch(System.IO.IOException e) { Console.WriteLine(e.Message); } //make tree DTree tree = new DTree(classify); tree.printTree(); DTestResults results = tree.testTree(test); Console.WriteLine("Correct/Total: " + results.correct + " / " + results.total); Console.WriteLine("Error Rate: " + ((double)results.correct/(double)results.total)); //Console.WriteLine(classify.gain(3)); //Console.WriteLine(classify.entropyAttr(3,"Weak")); //Console.WriteLine(classify.entropy); //Console.WriteLine(classify.partitionData(0,"Rain")); //Console.WriteLine("Num Attribs: " + classify.numAttributes()); /* bool[] marked = new bool[classify.numAttributes()]; for(int x=0; x < marked.Length; x++) { marked[x] = false; } for(int x=0; x < marked.Length; x++) { uint a = classify.bestGain(marked); marked[a] = true; Console.WriteLine(a); } Console.WriteLine(classify.getMostOccuringClassification()); //Console.WriteLine("A" + classify.gain(2)); */ return 0; }
public DataSet partitionData(uint aidx, string v) { DataSet retval = new DataSet(attribs.getLength()); for(uint x=0; x < size; x++) { if(rows[x].hasAttribute(aidx,v)) { retval.addRow(rows[x].originalString()); } } return retval; }
public DNode(DataSet classify, ref bool[] marked, string path) { //Console.WriteLine(markedString(marked)); classification = null; //only gets set on leaf node attr_assigned = false; //unassigned this.path = path; //if the tree is perfectly classified (entropy == 0) // or if we're out of attributes, return the most occuring one if(classify.entropy == 0 || allMarked(ref marked)) { classification = classify.getMostOccuringClassification();} else { attribute = classify.bestGain(marked); attr_assigned = true; children = new DNode[classify.getAttributes(attribute).Length]; //Console.WriteLine("Attrib: " + attribute + " Children: " + classify.getAttributes(attribute).Length); for(uint x=0; x < children.Length; x++) { marked[attribute] = true; string tmp = classify.getAttributes(attribute)[x]; children[x] = new DNode(classify.partitionData(attribute,tmp),ref marked,tmp); } } }
public DTree(DataSet classify) { bool[] marked = new bool[classify.numAttributes()]; root = new DNode(classify,ref marked, ""); }