/// <summary> /// /// </summary> /// <param name="data"></param> /// <param name="missing"></param> /// <param name="responses"></param> /// <param name="pWeight"></param> /// <returns></returns> private CvDTree MushroomCreateDTree(CvMat data, CvMat missing, CvMat responses, float pWeight) { float[] priors = { 1, pWeight }; CvMat varType = new CvMat(data.Cols + 1, 1, MatrixType.U8C1); Cv.Set(varType, CvScalar.ScalarAll(CvStatModel.CV_VAR_CATEGORICAL)); // all the variables are categorical CvDTree dtree = new CvDTree(); CvDTreeParams p = new CvDTreeParams(8, // max depth 10, // min sample count 0, // regression accuracy: N/A here true, // compute surrogate split, as we have missing data 15, // max number of categories (use sub-optimal algorithm for larger numbers) 10, // the number of cross-validation folds true, // use 1SE rule => smaller tree true, // throw away the pruned tree branches priors // the array of priors, the bigger p_weight, the more attention // to the poisonous mushrooms // (a mushroom will be judjed to be poisonous with bigger chance) ); dtree.Train(data, DTreeDataLayout.RowSample, responses, null, null, varType, missing, p); // compute hit-rate on the training database, demonstrates predict usage. int hr1 = 0, hr2 = 0, pTotal = 0; for (int i = 0; i < data.Rows; i++) { CvMat sample, mask; Cv.GetRow(data, out sample, i); Cv.GetRow(missing, out mask, i); double r = dtree.Predict(sample, mask).Value; bool d = Math.Abs(r - responses.DataArraySingle[i]) >= float.Epsilon; if (d) { if (r != 'p') hr1++; else hr2++; } //Console.WriteLine(responses.DataArraySingle[i]); pTotal += (responses.DataArraySingle[i] == (float)'p') ? 1 : 0; } Console.WriteLine("Results on the training database"); Console.WriteLine("\tPoisonous mushrooms mis-predicted: {0} ({1}%)", hr1, (double)hr1 * 100 / pTotal); Console.WriteLine("\tFalse-alarms: {0} ({1}%)", hr2, (double)hr2 * 100 / (data.Rows - pTotal)); varType.Dispose(); return dtree; }