/// <summary> /// Tests the case c using classification (majority vote) against the ensemble learner and assumes /// that its last attribute is the target label. Also requires the target attribute so then it knows /// how many variants there are. Unlike the ID3 node, this is a full /// learner or something, so it actually contains its own testing functions. /// </summary> /// <param name="c"></param> /// <returns></returns> public int TestEnsembleClassificaiton(Case c, DAttribute target) { double[] voting = new double[target.numVariants()]; for (int i = 0; i < VoteWeights.Length; i++) { int currentResult = ID3Tools.TestWithTree(c, Trees[i]); voting[currentResult] += VoteWeights[i]; //add the tree's voting power to the bucket for its answer } //find the majority vote in the voting pool int max = -1; double highest = -1; for (int i = 0; i < target.numVariants(); i++) { if (voting[i] > highest) { max = i; highest = voting[i]; } } //max should contain the winning variant number for the attribute. return(max); }
/// <summary> /// Calculates the Final label (output, found in data as #attributeNum) purity for each variant of a dataset and returns it. /// </summary> /// <param name="Data"></param> /// <returns></returns> public static double[] GetLabelDistribution(List <Case> Data, DAttribute attribute) { int numVars = attribute.numVariants(); double[] output = new double[numVars]; double sumWeight = 0; foreach (Case c in Data) { int AVal = (int)c.AttributeVals[attribute.ID]; // the varID of the attribute value held by C if (AVal <= -1) { continue; //value is undefined. proceed to the next value } output[AVal] += c.getWeight(); //increment the corresponding attribute variant by the case's weight (summing number of hits for each) sumWeight += c.getWeight(); } for (int i = 0; i < numVars; i++) //divide each by count to get the relative proportion of the label as oppsed to the count. { output[i] = output[i] / sumWeight; } return(output); }