/// <summary> /// /// </summary> /// <param name="dataFilename"></param> /// <param name="filenameToSave"></param> /// <param name="filenameToLoad"></param> private void BuildBoostClassifier(string dataFilename, string filenameToSave, string filenameToLoad) { const int ClassCount = 26; CvMat data = null; CvMat responses = null; CvMat varType = null; CvMat tempSample = null; CvMat weakResponses = null; int nsamplesAall = 0, ntrainSamples = 0; int varCount; double trainHr = 0, testHr = 0; CvBoost boost = new CvBoost(); try { ReadNumClassData(dataFilename, 16, out data, out responses); } catch { Console.WriteLine("Could not read the database {0}", dataFilename); return; } Console.WriteLine("The database {0} is loaded.", dataFilename); nsamplesAall = data.Rows; ntrainSamples = (int)(nsamplesAall * 0.5); varCount = data.Cols; // Create or load Boosted Tree classifier if (filenameToLoad != null) { // load classifier from the specified file boost.Load(filenameToLoad); ntrainSamples = 0; if (boost.GetWeakPredictors() == null) { Console.WriteLine("Could not read the classifier {0}", filenameToLoad); return; } Console.WriteLine("The classifier {0} is loaded.", filenameToLoad); } else { // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // // As currently boosted tree classifier in MLL can only be trained // for 2-class problems, we transform the training database by // "unrolling" each training sample as many times as the number of // classes (26) that we have. // // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! using (CvMat newData = new CvMat(ntrainSamples * ClassCount, varCount + 1, MatrixType.F32C1)) using (CvMat newResponses = new CvMat(ntrainSamples * ClassCount, 1, MatrixType.S32C1)) { // 1. unroll the database type mask Console.WriteLine("Unrolling the database..."); for (int i = 0; i < ntrainSamples; i++) { unsafe { float *dataRow = (float *)(data.DataByte + data.Step * i); for (int j = 0; j < ClassCount; j++) { float *newDataRow = (float *)(newData.DataByte + newData.Step * (i * ClassCount + j)); for (int k = 0; k < varCount; k++) { newDataRow[k] = dataRow[k]; } newDataRow[varCount] = (float)j; newResponses.DataInt32[i * ClassCount + j] = (responses.DataSingle[i] == j + 'A') ? 1 : 0; } } } // 2. create type mask varType = new CvMat(varCount + 2, 1, MatrixType.U8C1); varType.Set(CvScalar.ScalarAll(CvStatModel.CV_VAR_ORDERED)); // the last indicator variable, as well // as the new (binary) response are categorical varType.SetReal1D(varCount, CvStatModel.CV_VAR_CATEGORICAL); varType.SetReal1D(varCount + 1, CvStatModel.CV_VAR_CATEGORICAL); // 3. train classifier Console.Write("Training the classifier (may take a few minutes)..."); boost.Train( newData, DTreeDataLayout.RowSample, newResponses, null, null, varType, null, new CvBoostParams(CvBoost.REAL, 100, 0.95, 5, false, null) ); } Console.WriteLine(); } tempSample = new CvMat(1, varCount + 1, MatrixType.F32C1); weakResponses = new CvMat(1, boost.GetWeakPredictors().Total, MatrixType.F32C1); // compute prediction error on train and test data for (int i = 0; i < nsamplesAall; i++) { int bestClass = 0; double maxSum = double.MinValue; double r; CvMat sample; Cv.GetRow(data, out sample, i); for (int k = 0; k < varCount; k++) { tempSample.DataArraySingle[k] = sample.DataArraySingle[k]; } for (int j = 0; j < ClassCount; j++) { tempSample.DataArraySingle[varCount] = (float)j; boost.Predict(tempSample, null, weakResponses); double sum = weakResponses.Sum().Val0; if (maxSum < sum) { maxSum = sum; bestClass = j + 'A'; } } r = (Math.Abs(bestClass - responses.DataArraySingle[i]) < float.Epsilon) ? 1 : 0; if (i < ntrainSamples) { trainHr += r; } else { testHr += r; } } testHr /= (double)(nsamplesAall - ntrainSamples); trainHr /= (double)ntrainSamples; Console.WriteLine("Recognition rate: train = {0:F1}%, test = {1:F1}%", trainHr * 100.0, testHr * 100.0); Console.WriteLine("Number of trees: {0}", boost.GetWeakPredictors().Total); // Save classifier to file if needed if (filenameToSave != null) { boost.Save(filenameToSave); } Console.Read(); tempSample.Dispose(); weakResponses.Dispose(); if (varType != null) { varType.Dispose(); } data.Dispose(); responses.Dispose(); boost.Dispose(); }
/// <summary> /// /// </summary> /// <param name="dataFilename"></param> /// <param name="filenameToSave"></param> /// <param name="filenameToLoad"></param> private void BuildBoostClassifier(string dataFilename, string filenameToSave, string filenameToLoad) { const int ClassCount = 26; CvMat data = null; CvMat responses = null; CvMat varType = null; CvMat tempSample = null; CvMat weakResponses = null; int nsamplesAall = 0, ntrainSamples = 0; int varCount; double trainHr = 0, testHr = 0; CvBoost boost = new CvBoost(); try { ReadNumClassData(dataFilename, 16, out data, out responses); } catch { Console.WriteLine("Could not read the database {0}", dataFilename); return; } Console.WriteLine("The database {0} is loaded.", dataFilename); nsamplesAall = data.Rows; ntrainSamples = (int)(nsamplesAall * 0.5); varCount = data.Cols; // Create or load Boosted Tree classifier if (filenameToLoad != null) { // load classifier from the specified file boost.Load(filenameToLoad); ntrainSamples = 0; if (boost.GetWeakPredictors() == null) { Console.WriteLine("Could not read the classifier {0}", filenameToLoad); return; } Console.WriteLine("The classifier {0} is loaded.", filenameToLoad); } else { // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // // As currently boosted tree classifier in MLL can only be trained // for 2-class problems, we transform the training database by // "unrolling" each training sample as many times as the number of // classes (26) that we have. // // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! using (CvMat newData = new CvMat(ntrainSamples * ClassCount, varCount + 1, MatrixType.F32C1)) using (CvMat newResponses = new CvMat(ntrainSamples * ClassCount, 1, MatrixType.S32C1)) { // 1. unroll the database type mask Console.WriteLine("Unrolling the database..."); for (int i = 0; i < ntrainSamples; i++) { unsafe { float* dataRow = (float*)(data.DataByte + data.Step * i); for (int j = 0; j < ClassCount; j++) { float* newDataRow = (float*)(newData.DataByte + newData.Step * (i * ClassCount + j)); for (int k = 0; k < varCount; k++) { newDataRow[k] = dataRow[k]; } newDataRow[varCount] = (float)j; newResponses.DataInt32[i * ClassCount + j] = (responses.DataSingle[i] == j + 'A') ? 1 : 0; } } } // 2. create type mask varType = new CvMat(varCount + 2, 1, MatrixType.U8C1); varType.Set(CvScalar.ScalarAll(CvStatModel.CV_VAR_ORDERED)); // the last indicator variable, as well // as the new (binary) response are categorical varType.SetReal1D(varCount, CvStatModel.CV_VAR_CATEGORICAL); varType.SetReal1D(varCount + 1, CvStatModel.CV_VAR_CATEGORICAL); // 3. train classifier Console.Write("Training the classifier (may take a few minutes)..."); boost.Train( newData, DTreeDataLayout.RowSample, newResponses, null, null, varType, null, new CvBoostParams(CvBoost.REAL, 100, 0.95, 5, false, null) ); } Console.WriteLine(); } tempSample = new CvMat(1, varCount + 1, MatrixType.F32C1); weakResponses = new CvMat(1, boost.GetWeakPredictors().Total, MatrixType.F32C1); // compute prediction error on train and test data for (int i = 0; i < nsamplesAall; i++) { int bestClass = 0; double maxSum = double.MinValue; double r; CvMat sample; Cv.GetRow(data, out sample, i); for (int k = 0; k < varCount; k++) { tempSample.DataArraySingle[k] = sample.DataArraySingle[k]; } for (int j = 0; j < ClassCount; j++) { tempSample.DataArraySingle[varCount] = (float)j; boost.Predict(tempSample, null, weakResponses); double sum = weakResponses.Sum().Val0; if (maxSum < sum) { maxSum = sum; bestClass = j + 'A'; } } r = (Math.Abs(bestClass - responses.DataArraySingle[i]) < float.Epsilon) ? 1 : 0; if (i < ntrainSamples) trainHr += r; else testHr += r; } testHr /= (double)(nsamplesAall - ntrainSamples); trainHr /= (double)ntrainSamples; Console.WriteLine("Recognition rate: train = {0:F1}%, test = {1:F1}%", trainHr * 100.0, testHr * 100.0); Console.WriteLine("Number of trees: {0}", boost.GetWeakPredictors().Total); // Save classifier to file if needed if (filenameToSave != null) { boost.Save(filenameToSave); } Console.Read(); tempSample.Dispose(); weakResponses.Dispose(); if (varType != null) varType.Dispose(); data.Dispose(); responses.Dispose(); boost.Dispose(); }
/// <summary> /// RTrees /// </summary> /// <param name="dataFilename"></param> /// <param name="filenameToSave"></param> /// <param name="filenameToLoad"></param> private void BuildRtreesClassifier(string dataFilename, string filenameToSave, string filenameToLoad) { CvMat data = null; CvMat responses = null; CvMat varType = null; CvMat sampleIdx = null; int nsamplesAll = 0, ntrainSamples = 0; double trainHr = 0, testHr = 0; CvRTrees forest = new CvRTrees(); try { ReadNumClassData(dataFilename, 16, out data, out responses); } catch { Console.WriteLine("Could not read the database {0}", dataFilename); return; } Console.WriteLine("The database {0} is loaded.", dataFilename); nsamplesAll = data.Rows; ntrainSamples = (int)(nsamplesAll * 0.8); // Create or load Random Trees classifier if (filenameToLoad != null) { // load classifier from the specified file forest.Load(filenameToLoad); ntrainSamples = 0; if (forest.GetTreeCount() == 0) { Console.WriteLine("Could not read the classifier {0}", filenameToLoad); return; } Console.WriteLine("The classifier {0} is loaded.", filenameToLoad); } else { // create classifier by using <data> and <responses> Console.Write("Training the classifier ..."); // 1. create type mask varType = new CvMat(data.Cols + 1, 1, MatrixType.U8C1); varType.Set(CvScalar.ScalarAll(CvStatModel.CV_VAR_ORDERED)); varType.SetReal1D(data.Cols, CvStatModel.CV_VAR_CATEGORICAL); // 2. create sample_idx sampleIdx = new CvMat(1, nsamplesAll, MatrixType.U8C1); { CvMat mat; Cv.GetCols(sampleIdx, out mat, 0, ntrainSamples); mat.Set(CvScalar.RealScalar(1)); Cv.GetCols(sampleIdx, out mat, ntrainSamples, nsamplesAll); mat.SetZero(); } // 3. train classifier forest.Train( data, DTreeDataLayout.RowSample, responses, null, sampleIdx, varType, null, new CvRTParams(10, 10, 0, false, 15, null, true, 4, new CvTermCriteria(100, 0.01f)) ); Console.WriteLine(); } // compute prediction error on train and test data for (int i = 0; i < nsamplesAll; i++) { double r; CvMat sample; Cv.GetRow(data, out sample, i); r = forest.Predict(sample); r = Math.Abs((double)r - responses.DataArraySingle[i]) <= float.Epsilon ? 1 : 0; if (i < ntrainSamples) { trainHr += r; } else { testHr += r; } } testHr /= (double)(nsamplesAll - ntrainSamples); trainHr /= (double)ntrainSamples; Console.WriteLine("Recognition rate: train = {0:F1}%, test = {1:F1}%", trainHr * 100.0, testHr * 100.0); Console.WriteLine("Number of trees: {0}", forest.GetTreeCount()); // Print variable importance Mat varImportance0 = forest.GetVarImportance(); CvMat varImportance = varImportance0.ToCvMat(); if (varImportance != null) { double rtImpSum = Cv.Sum(varImportance).Val0; Console.WriteLine("var#\timportance (in %):"); for (int i = 0; i < varImportance.Cols; i++) { Console.WriteLine("{0}\t{1:F1}", i, 100.0f * varImportance.DataArraySingle[i] / rtImpSum); } } // Print some proximitites Console.WriteLine("Proximities between some samples corresponding to the letter 'T':"); { CvMat sample1, sample2; int[,] pairs = new int[, ] { { 0, 103 }, { 0, 106 }, { 106, 103 }, { -1, -1 } }; for (int i = 0; pairs[i, 0] >= 0; i++) { Cv.GetRow(data, out sample1, pairs[i, 0]); Cv.GetRow(data, out sample2, pairs[i, 1]); Console.WriteLine("proximity({0},{1}) = {2:F1}%", pairs[i, 0], pairs[i, 1], forest.GetProximity(sample1, sample2) * 100.0); } } // Save Random Trees classifier to file if needed if (filenameToSave != null) { forest.Save(filenameToSave); } Console.Read(); if (sampleIdx != null) { sampleIdx.Dispose(); } if (varType != null) { varType.Dispose(); } data.Dispose(); responses.Dispose(); forest.Dispose(); }
/// <summary> /// RTrees /// </summary> /// <param name="dataFilename"></param> /// <param name="filenameToSave"></param> /// <param name="filenameToLoad"></param> private void BuildRtreesClassifier(string dataFilename, string filenameToSave, string filenameToLoad) { CvMat data = null; CvMat responses = null; CvMat varType = null; CvMat sampleIdx = null; int nsamplesAll = 0, ntrainSamples = 0; double trainHr = 0, testHr = 0; CvRTrees forest = new CvRTrees(); try { ReadNumClassData(dataFilename, 16, out data, out responses); } catch { Console.WriteLine("Could not read the database {0}", dataFilename); return; } Console.WriteLine("The database {0} is loaded.", dataFilename); nsamplesAll = data.Rows; ntrainSamples = (int)(nsamplesAll * 0.8); // Create or load Random Trees classifier if (filenameToLoad != null) { // load classifier from the specified file forest.Load(filenameToLoad); ntrainSamples = 0; if (forest.GetTreeCount() == 0) { Console.WriteLine("Could not read the classifier {0}", filenameToLoad); return; } Console.WriteLine("The classifier {0} is loaded.", filenameToLoad); } else { // create classifier by using <data> and <responses> Console.Write("Training the classifier ..."); // 1. create type mask varType = new CvMat(data.Cols + 1, 1, MatrixType.U8C1); varType.Set(CvScalar.ScalarAll(CvStatModel.CV_VAR_ORDERED)); varType.SetReal1D(data.Cols, CvStatModel.CV_VAR_CATEGORICAL); // 2. create sample_idx sampleIdx = new CvMat(1, nsamplesAll, MatrixType.U8C1); { CvMat mat; Cv.GetCols(sampleIdx, out mat, 0, ntrainSamples); mat.Set(CvScalar.RealScalar(1)); Cv.GetCols(sampleIdx, out mat, ntrainSamples, nsamplesAll); mat.SetZero(); } // 3. train classifier forest.Train( data, DTreeDataLayout.RowSample, responses, null, sampleIdx, varType, null, new CvRTParams(10, 10, 0, false, 15, null, true, 4, new CvTermCriteria(100, 0.01f)) ); Console.WriteLine(); } // compute prediction error on train and test data for (int i = 0; i < nsamplesAll; i++) { double r; CvMat sample; Cv.GetRow(data, out sample, i); r = forest.Predict(sample); r = Math.Abs((double)r - responses.DataArraySingle[i]) <= float.Epsilon ? 1 : 0; if (i < ntrainSamples) trainHr += r; else testHr += r; } testHr /= (double)(nsamplesAll - ntrainSamples); trainHr /= (double)ntrainSamples; Console.WriteLine("Recognition rate: train = {0:F1}%, test = {1:F1}%", trainHr * 100.0, testHr * 100.0); Console.WriteLine("Number of trees: {0}", forest.GetTreeCount()); // Print variable importance Mat varImportance0 = forest.GetVarImportance(); CvMat varImportance = varImportance0.ToCvMat(); if (varImportance != null) { double rtImpSum = Cv.Sum(varImportance).Val0; Console.WriteLine("var#\timportance (in %):"); for (int i = 0; i < varImportance.Cols; i++) { Console.WriteLine("{0}\t{1:F1}", i, 100.0f * varImportance.DataArraySingle[i] / rtImpSum); } } // Print some proximitites Console.WriteLine("Proximities between some samples corresponding to the letter 'T':"); { CvMat sample1, sample2; int[,] pairs = new int[,] { { 0, 103 }, { 0, 106 }, { 106, 103 }, { -1, -1 } }; for (int i = 0; pairs[i, 0] >= 0; i++) { Cv.GetRow(data, out sample1, pairs[i, 0]); Cv.GetRow(data, out sample2, pairs[i, 1]); Console.WriteLine("proximity({0},{1}) = {2:F1}%", pairs[i, 0], pairs[i, 1], forest.GetProximity(sample1, sample2) * 100.0); } } // Save Random Trees classifier to file if needed if (filenameToSave != null) { forest.Save(filenameToSave); } Console.Read(); if (sampleIdx != null) sampleIdx.Dispose(); if (varType != null) varType.Dispose(); data.Dispose(); responses.Dispose(); forest.Dispose(); }