Exemplo n.º 1
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="dataFilename"></param>
        /// <param name="filenameToSave"></param>
        /// <param name="filenameToLoad"></param>
        private void BuildBoostClassifier(string dataFilename, string filenameToSave, string filenameToLoad)
        {
            const int ClassCount = 26;

            CvMat data          = null;
            CvMat responses     = null;
            CvMat varType       = null;
            CvMat tempSample    = null;
            CvMat weakResponses = null;

            int     nsamplesAall = 0, ntrainSamples = 0;
            int     varCount;
            double  trainHr = 0, testHr = 0;
            CvBoost boost = new CvBoost();

            try
            {
                ReadNumClassData(dataFilename, 16, out data, out responses);
            }
            catch
            {
                Console.WriteLine("Could not read the database {0}", dataFilename);
                return;
            }
            Console.WriteLine("The database {0} is loaded.", dataFilename);

            nsamplesAall  = data.Rows;
            ntrainSamples = (int)(nsamplesAall * 0.5);
            varCount      = data.Cols;

            // Create or load Boosted Tree classifier
            if (filenameToLoad != null)
            {
                // load classifier from the specified file
                boost.Load(filenameToLoad);
                ntrainSamples = 0;
                if (boost.GetWeakPredictors() == null)
                {
                    Console.WriteLine("Could not read the classifier {0}", filenameToLoad);
                    return;
                }
                Console.WriteLine("The classifier {0} is loaded.", filenameToLoad);
            }
            else
            {
                // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                //
                // As currently boosted tree classifier in MLL can only be trained
                // for 2-class problems, we transform the training database by
                // "unrolling" each training sample as many times as the number of
                // classes (26) that we have.
                //
                // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

                using (CvMat newData = new CvMat(ntrainSamples * ClassCount, varCount + 1, MatrixType.F32C1))
                    using (CvMat newResponses = new CvMat(ntrainSamples * ClassCount, 1, MatrixType.S32C1))
                    {
                        // 1. unroll the database type mask
                        Console.WriteLine("Unrolling the database...");
                        for (int i = 0; i < ntrainSamples; i++)
                        {
                            unsafe
                            {
                                float *dataRow = (float *)(data.DataByte + data.Step * i);
                                for (int j = 0; j < ClassCount; j++)
                                {
                                    float *newDataRow = (float *)(newData.DataByte + newData.Step * (i * ClassCount + j));
                                    for (int k = 0; k < varCount; k++)
                                    {
                                        newDataRow[k] = dataRow[k];
                                    }
                                    newDataRow[varCount] = (float)j;
                                    newResponses.DataInt32[i * ClassCount + j] = (responses.DataSingle[i] == j + 'A') ? 1 : 0;
                                }
                            }
                        }

                        // 2. create type mask
                        varType = new CvMat(varCount + 2, 1, MatrixType.U8C1);
                        varType.Set(CvScalar.ScalarAll(CvStatModel.CV_VAR_ORDERED));
                        // the last indicator variable, as well
                        // as the new (binary) response are categorical
                        varType.SetReal1D(varCount, CvStatModel.CV_VAR_CATEGORICAL);
                        varType.SetReal1D(varCount + 1, CvStatModel.CV_VAR_CATEGORICAL);

                        // 3. train classifier
                        Console.Write("Training the classifier (may take a few minutes)...");
                        boost.Train(
                            newData, DTreeDataLayout.RowSample, newResponses, null, null, varType, null,
                            new CvBoostParams(CvBoost.REAL, 100, 0.95, 5, false, null)
                            );
                    }
                Console.WriteLine();
            }

            tempSample    = new CvMat(1, varCount + 1, MatrixType.F32C1);
            weakResponses = new CvMat(1, boost.GetWeakPredictors().Total, MatrixType.F32C1);

            // compute prediction error on train and test data
            for (int i = 0; i < nsamplesAall; i++)
            {
                int    bestClass = 0;
                double maxSum    = double.MinValue;
                double r;
                CvMat  sample;

                Cv.GetRow(data, out sample, i);
                for (int k = 0; k < varCount; k++)
                {
                    tempSample.DataArraySingle[k] = sample.DataArraySingle[k];
                }

                for (int j = 0; j < ClassCount; j++)
                {
                    tempSample.DataArraySingle[varCount] = (float)j;
                    boost.Predict(tempSample, null, weakResponses);
                    double sum = weakResponses.Sum().Val0;
                    if (maxSum < sum)
                    {
                        maxSum    = sum;
                        bestClass = j + 'A';
                    }
                }

                r = (Math.Abs(bestClass - responses.DataArraySingle[i]) < float.Epsilon) ? 1 : 0;

                if (i < ntrainSamples)
                {
                    trainHr += r;
                }
                else
                {
                    testHr += r;
                }
            }

            testHr  /= (double)(nsamplesAall - ntrainSamples);
            trainHr /= (double)ntrainSamples;
            Console.WriteLine("Recognition rate: train = {0:F1}%, test = {1:F1}%", trainHr * 100.0, testHr * 100.0);
            Console.WriteLine("Number of trees: {0}", boost.GetWeakPredictors().Total);

            // Save classifier to file if needed
            if (filenameToSave != null)
            {
                boost.Save(filenameToSave);
            }


            Console.Read();


            tempSample.Dispose();
            weakResponses.Dispose();
            if (varType != null)
            {
                varType.Dispose();
            }
            data.Dispose();
            responses.Dispose();
            boost.Dispose();
        }
Exemplo n.º 2
0
        /// <summary>
        /// 
        /// </summary>
        /// <param name="dataFilename"></param>
        /// <param name="filenameToSave"></param>
        /// <param name="filenameToLoad"></param>
        private void BuildBoostClassifier(string dataFilename, string filenameToSave, string filenameToLoad)
        {
            const int ClassCount = 26;

            CvMat data = null;
            CvMat responses = null;
            CvMat varType = null;
            CvMat tempSample = null;
            CvMat weakResponses = null;

            int nsamplesAall = 0, ntrainSamples = 0;
            int varCount;
            double trainHr = 0, testHr = 0;
            CvBoost boost = new CvBoost();

            try
            {
                ReadNumClassData(dataFilename, 16, out data, out responses);
            }
            catch
            {
                Console.WriteLine("Could not read the database {0}", dataFilename);
                return;
            }
            Console.WriteLine("The database {0} is loaded.", dataFilename);

            nsamplesAall = data.Rows;
            ntrainSamples = (int)(nsamplesAall * 0.5);
            varCount = data.Cols;

            // Create or load Boosted Tree classifier
            if (filenameToLoad != null)
            {
                // load classifier from the specified file
                boost.Load(filenameToLoad);
                ntrainSamples = 0;
                if (boost.GetWeakPredictors() == null)
                {
                    Console.WriteLine("Could not read the classifier {0}", filenameToLoad);
                    return;
                }
                Console.WriteLine("The classifier {0} is loaded.", filenameToLoad);
            }
            else
            {
                // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                //
                // As currently boosted tree classifier in MLL can only be trained
                // for 2-class problems, we transform the training database by
                // "unrolling" each training sample as many times as the number of
                // classes (26) that we have.
                //
                // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

                using (CvMat newData = new CvMat(ntrainSamples * ClassCount, varCount + 1, MatrixType.F32C1))
                using (CvMat newResponses = new CvMat(ntrainSamples * ClassCount, 1, MatrixType.S32C1))
                {

                    // 1. unroll the database type mask
                    Console.WriteLine("Unrolling the database...");
                    for (int i = 0; i < ntrainSamples; i++)
                    {
                        unsafe
                        {
                            float* dataRow = (float*)(data.DataByte + data.Step * i);
                            for (int j = 0; j < ClassCount; j++)
                            {
                                float* newDataRow = (float*)(newData.DataByte + newData.Step * (i * ClassCount + j));
                                for (int k = 0; k < varCount; k++)
                                {
                                    newDataRow[k] = dataRow[k];
                                }
                                newDataRow[varCount] = (float)j;
                                newResponses.DataInt32[i * ClassCount + j] = (responses.DataSingle[i] == j + 'A') ? 1 : 0;
                            }
                        }
                    }

                    // 2. create type mask
                    varType = new CvMat(varCount + 2, 1, MatrixType.U8C1);
                    varType.Set(CvScalar.ScalarAll(CvStatModel.CV_VAR_ORDERED));
                    // the last indicator variable, as well
                    // as the new (binary) response are categorical
                    varType.SetReal1D(varCount, CvStatModel.CV_VAR_CATEGORICAL);
                    varType.SetReal1D(varCount + 1, CvStatModel.CV_VAR_CATEGORICAL);

                    // 3. train classifier
                    Console.Write("Training the classifier (may take a few minutes)...");
                    boost.Train(
                        newData, DTreeDataLayout.RowSample, newResponses, null, null, varType, null,
                        new CvBoostParams(CvBoost.REAL, 100, 0.95, 5, false, null)
                    );
                }
                Console.WriteLine();
            }

            tempSample = new CvMat(1, varCount + 1, MatrixType.F32C1);
            weakResponses = new CvMat(1, boost.GetWeakPredictors().Total, MatrixType.F32C1);

            // compute prediction error on train and test data
            for (int i = 0; i < nsamplesAall; i++)
            {
                int bestClass = 0;
                double maxSum = double.MinValue;
                double r;
                CvMat sample;

                Cv.GetRow(data, out sample, i);
                for (int k = 0; k < varCount; k++)
                {
                    tempSample.DataArraySingle[k] = sample.DataArraySingle[k];
                }

                for (int j = 0; j < ClassCount; j++)
                {
                    tempSample.DataArraySingle[varCount] = (float)j;
                    boost.Predict(tempSample, null, weakResponses);
                    double sum = weakResponses.Sum().Val0;
                    if (maxSum < sum)
                    {
                        maxSum = sum;
                        bestClass = j + 'A';
                    }
                }

                r = (Math.Abs(bestClass - responses.DataArraySingle[i]) < float.Epsilon) ? 1 : 0;

                if (i < ntrainSamples)
                    trainHr += r;
                else
                    testHr += r;
            }

            testHr /= (double)(nsamplesAall - ntrainSamples);
            trainHr /= (double)ntrainSamples;
            Console.WriteLine("Recognition rate: train = {0:F1}%, test = {1:F1}%", trainHr * 100.0, testHr * 100.0);
            Console.WriteLine("Number of trees: {0}", boost.GetWeakPredictors().Total);

            // Save classifier to file if needed
            if (filenameToSave != null)
            {
                boost.Save(filenameToSave);
            }


            Console.Read();


            tempSample.Dispose();
            weakResponses.Dispose();
            if (varType != null) varType.Dispose();
            data.Dispose();
            responses.Dispose();
            boost.Dispose();
        }
Exemplo n.º 3
0
        /// <summary>
        /// RTrees
        /// </summary>
        /// <param name="dataFilename"></param>
        /// <param name="filenameToSave"></param>
        /// <param name="filenameToLoad"></param>
        private void BuildRtreesClassifier(string dataFilename, string filenameToSave, string filenameToLoad)
        {
            CvMat data      = null;
            CvMat responses = null;
            CvMat varType   = null;
            CvMat sampleIdx = null;


            int      nsamplesAll = 0, ntrainSamples = 0;
            double   trainHr = 0, testHr = 0;
            CvRTrees forest = new CvRTrees();

            try
            {
                ReadNumClassData(dataFilename, 16, out data, out responses);
            }
            catch
            {
                Console.WriteLine("Could not read the database {0}", dataFilename);
                return;
            }
            Console.WriteLine("The database {0} is loaded.", dataFilename);

            nsamplesAll   = data.Rows;
            ntrainSamples = (int)(nsamplesAll * 0.8);

            // Create or load Random Trees classifier
            if (filenameToLoad != null)
            {
                // load classifier from the specified file
                forest.Load(filenameToLoad);
                ntrainSamples = 0;
                if (forest.GetTreeCount() == 0)
                {
                    Console.WriteLine("Could not read the classifier {0}", filenameToLoad);
                    return;
                }
                Console.WriteLine("The classifier {0} is loaded.", filenameToLoad);
            }
            else
            {
                // create classifier by using <data> and <responses>
                Console.Write("Training the classifier ...");

                // 1. create type mask
                varType = new CvMat(data.Cols + 1, 1, MatrixType.U8C1);
                varType.Set(CvScalar.ScalarAll(CvStatModel.CV_VAR_ORDERED));
                varType.SetReal1D(data.Cols, CvStatModel.CV_VAR_CATEGORICAL);

                // 2. create sample_idx
                sampleIdx = new CvMat(1, nsamplesAll, MatrixType.U8C1);
                {
                    CvMat mat;
                    Cv.GetCols(sampleIdx, out mat, 0, ntrainSamples);
                    mat.Set(CvScalar.RealScalar(1));

                    Cv.GetCols(sampleIdx, out mat, ntrainSamples, nsamplesAll);
                    mat.SetZero();
                }

                // 3. train classifier
                forest.Train(
                    data, DTreeDataLayout.RowSample, responses, null, sampleIdx, varType, null,
                    new CvRTParams(10, 10, 0, false, 15, null, true, 4, new CvTermCriteria(100, 0.01f))
                    );
                Console.WriteLine();
            }

            // compute prediction error on train and test data
            for (int i = 0; i < nsamplesAll; i++)
            {
                double r;
                CvMat  sample;
                Cv.GetRow(data, out sample, i);

                r = forest.Predict(sample);
                r = Math.Abs((double)r - responses.DataArraySingle[i]) <= float.Epsilon ? 1 : 0;

                if (i < ntrainSamples)
                {
                    trainHr += r;
                }
                else
                {
                    testHr += r;
                }
            }

            testHr  /= (double)(nsamplesAll - ntrainSamples);
            trainHr /= (double)ntrainSamples;
            Console.WriteLine("Recognition rate: train = {0:F1}%, test = {1:F1}%", trainHr * 100.0, testHr * 100.0);

            Console.WriteLine("Number of trees: {0}", forest.GetTreeCount());

            // Print variable importance
            Mat   varImportance0 = forest.GetVarImportance();
            CvMat varImportance  = varImportance0.ToCvMat();

            if (varImportance != null)
            {
                double rtImpSum = Cv.Sum(varImportance).Val0;
                Console.WriteLine("var#\timportance (in %):");
                for (int i = 0; i < varImportance.Cols; i++)
                {
                    Console.WriteLine("{0}\t{1:F1}", i, 100.0f * varImportance.DataArraySingle[i] / rtImpSum);
                }
            }

            // Print some proximitites
            Console.WriteLine("Proximities between some samples corresponding to the letter 'T':");
            {
                CvMat sample1, sample2;
                int[,] pairs = new int[, ] {
                    { 0, 103 }, { 0, 106 }, { 106, 103 }, { -1, -1 }
                };

                for (int i = 0; pairs[i, 0] >= 0; i++)
                {
                    Cv.GetRow(data, out sample1, pairs[i, 0]);
                    Cv.GetRow(data, out sample2, pairs[i, 1]);
                    Console.WriteLine("proximity({0},{1}) = {2:F1}%", pairs[i, 0], pairs[i, 1], forest.GetProximity(sample1, sample2) * 100.0);
                }
            }

            // Save Random Trees classifier to file if needed
            if (filenameToSave != null)
            {
                forest.Save(filenameToSave);
            }


            Console.Read();


            if (sampleIdx != null)
            {
                sampleIdx.Dispose();
            }
            if (varType != null)
            {
                varType.Dispose();
            }
            data.Dispose();
            responses.Dispose();
            forest.Dispose();
        }
Exemplo n.º 4
0
        /// <summary>
        /// RTrees
        /// </summary>
        /// <param name="dataFilename"></param>
        /// <param name="filenameToSave"></param>
        /// <param name="filenameToLoad"></param>
        private void BuildRtreesClassifier(string dataFilename, string filenameToSave, string filenameToLoad)
        {
            CvMat data = null;
            CvMat responses = null;
            CvMat varType = null;
            CvMat sampleIdx = null;


            int nsamplesAll = 0, ntrainSamples = 0;
            double trainHr = 0, testHr = 0;
            CvRTrees forest = new CvRTrees();

            try
            {
                ReadNumClassData(dataFilename, 16, out data, out responses);
            }
            catch
            {
                Console.WriteLine("Could not read the database {0}", dataFilename);
                return;
            }
            Console.WriteLine("The database {0} is loaded.", dataFilename);

            nsamplesAll = data.Rows;
            ntrainSamples = (int)(nsamplesAll * 0.8);

            // Create or load Random Trees classifier
            if (filenameToLoad != null)
            {
                // load classifier from the specified file
                forest.Load(filenameToLoad);
                ntrainSamples = 0;
                if (forest.GetTreeCount() == 0)
                {
                    Console.WriteLine("Could not read the classifier {0}", filenameToLoad);
                    return;
                }
                Console.WriteLine("The classifier {0} is loaded.", filenameToLoad);
            }
            else
            {
                // create classifier by using <data> and <responses>
                Console.Write("Training the classifier ...");

                // 1. create type mask
                varType = new CvMat(data.Cols + 1, 1, MatrixType.U8C1);
                varType.Set(CvScalar.ScalarAll(CvStatModel.CV_VAR_ORDERED));
                varType.SetReal1D(data.Cols, CvStatModel.CV_VAR_CATEGORICAL);

                // 2. create sample_idx
                sampleIdx = new CvMat(1, nsamplesAll, MatrixType.U8C1);
                {
                    CvMat mat;
                    Cv.GetCols(sampleIdx, out mat, 0, ntrainSamples);
                    mat.Set(CvScalar.RealScalar(1));

                    Cv.GetCols(sampleIdx, out mat, ntrainSamples, nsamplesAll);
                    mat.SetZero();
                }

                // 3. train classifier
                forest.Train(
                    data, DTreeDataLayout.RowSample, responses, null, sampleIdx, varType, null,
                    new CvRTParams(10, 10, 0, false, 15, null, true, 4, new CvTermCriteria(100, 0.01f))
                );
                Console.WriteLine();
            }

            // compute prediction error on train and test data
            for (int i = 0; i < nsamplesAll; i++)
            {
                double r;
                CvMat sample;
                Cv.GetRow(data, out sample, i);

                r = forest.Predict(sample);
                r = Math.Abs((double)r - responses.DataArraySingle[i]) <= float.Epsilon ? 1 : 0;

                if (i < ntrainSamples)
                    trainHr += r;
                else
                    testHr += r;
            }

            testHr /= (double)(nsamplesAll - ntrainSamples);
            trainHr /= (double)ntrainSamples;
            Console.WriteLine("Recognition rate: train = {0:F1}%, test = {1:F1}%", trainHr * 100.0, testHr * 100.0);

            Console.WriteLine("Number of trees: {0}", forest.GetTreeCount());

            // Print variable importance
            Mat varImportance0 = forest.GetVarImportance();
            CvMat varImportance = varImportance0.ToCvMat();
            if (varImportance != null)
            {
                double rtImpSum = Cv.Sum(varImportance).Val0;
                Console.WriteLine("var#\timportance (in %):");
                for (int i = 0; i < varImportance.Cols; i++)
                {
                    Console.WriteLine("{0}\t{1:F1}", i, 100.0f * varImportance.DataArraySingle[i] / rtImpSum);
                }
            }

            // Print some proximitites
            Console.WriteLine("Proximities between some samples corresponding to the letter 'T':");
            {
                CvMat sample1, sample2;
                int[,] pairs = new int[,] { { 0, 103 }, { 0, 106 }, { 106, 103 }, { -1, -1 } };

                for (int i = 0; pairs[i, 0] >= 0; i++)
                {
                    Cv.GetRow(data, out sample1, pairs[i, 0]);
                    Cv.GetRow(data, out sample2, pairs[i, 1]);
                    Console.WriteLine("proximity({0},{1}) = {2:F1}%", pairs[i, 0], pairs[i, 1], forest.GetProximity(sample1, sample2) * 100.0);
                }
            }

            // Save Random Trees classifier to file if needed
            if (filenameToSave != null)
            {
                forest.Save(filenameToSave);
            }


            Console.Read();


            if (sampleIdx != null) sampleIdx.Dispose();
            if (varType != null) varType.Dispose();
            data.Dispose();
            responses.Dispose();
            forest.Dispose();
        }