Ejemplo n.º 1
0
        /// <summary>
        /// 
        /// </summary>
        /// <param name="data"></param>
        /// <param name="missing"></param>
        /// <param name="responses"></param>
        /// <param name="pWeight"></param>
        /// <returns></returns>
        private CvDTree MushroomCreateDTree(CvMat data, CvMat missing, CvMat responses, float pWeight)
        {
            float[] priors = { 1, pWeight };

            CvMat varType = new CvMat(data.Cols + 1, 1, MatrixType.U8C1);
            Cv.Set(varType, CvScalar.ScalarAll(CvStatModel.CV_VAR_CATEGORICAL)); // all the variables are categorical

            CvDTree dtree = new CvDTree();

            CvDTreeParams p = new CvDTreeParams(8, // max depth
                                            10, // min sample count
                                            0, // regression accuracy: N/A here
                                            true, // compute surrogate split, as we have missing data
                                            15, // max number of categories (use sub-optimal algorithm for larger numbers)
                                            10, // the number of cross-validation folds
                                            true, // use 1SE rule => smaller tree
                                            true, // throw away the pruned tree branches
                                            priors // the array of priors, the bigger p_weight, the more attention
                // to the poisonous mushrooms
                // (a mushroom will be judjed to be poisonous with bigger chance)
            );

            dtree.Train(data, DTreeDataLayout.RowSample, responses, null, null, varType, missing, p);

            // compute hit-rate on the training database, demonstrates predict usage.
            int hr1 = 0, hr2 = 0, pTotal = 0;
            for (int i = 0; i < data.Rows; i++)
            {
                CvMat sample, mask;
                Cv.GetRow(data, out sample, i);
                Cv.GetRow(missing, out mask, i);
                double r = dtree.Predict(sample, mask).Value;
                bool d = Math.Abs(r - responses.DataArraySingle[i]) >= float.Epsilon;
                if (d)
                {
                    if (r != 'p')
                        hr1++;
                    else
                        hr2++;
                }
                //Console.WriteLine(responses.DataArraySingle[i]);
                pTotal += (responses.DataArraySingle[i] == (float)'p') ? 1 : 0;
            }

            Console.WriteLine("Results on the training database");
            Console.WriteLine("\tPoisonous mushrooms mis-predicted: {0} ({1}%)", hr1, (double)hr1 * 100 / pTotal);
            Console.WriteLine("\tFalse-alarms: {0} ({1}%)", hr2, (double)hr2 * 100 / (data.Rows - pTotal));

            varType.Dispose();

            return dtree;
        }