/// <summary> Builds the boosted classifier</summary> public virtual void buildClassifier(Instances data) { m_RandomInstance = new Random(m_Seed); Instances boostData; int classIndex = data.classIndex(); if (data.classAttribute().Numeric) { throw new Exception("LogitBoost can't handle a numeric class!"); } if (m_Classifier == null) { throw new System.Exception("A base classifier has not been specified!"); } if (!(m_Classifier is WeightedInstancesHandler) && !m_UseResampling) { m_UseResampling = true; } if (data.checkForStringAttributes()) { throw new Exception("Cannot handle string attributes!"); } if (m_Debug) { System.Console.Error.WriteLine("Creating copy of the training data"); } m_NumClasses = data.numClasses(); m_ClassAttribute = data.classAttribute(); // Create a copy of the data data = new Instances(data); data.deleteWithMissingClass(); // Create the base classifiers if (m_Debug) { System.Console.Error.WriteLine("Creating base classifiers"); } m_Classifiers = new Classifier[m_NumClasses][]; for (int j = 0; j < m_NumClasses; j++) { m_Classifiers[j] = Classifier.makeCopies(m_Classifier, this.NumIterations); } // Do we want to select the appropriate number of iterations // using cross-validation? int bestNumIterations = this.NumIterations; if (m_NumFolds > 1) { if (m_Debug) { System.Console.Error.WriteLine("Processing first fold."); } // Array for storing the results double[] results = new double[this.NumIterations]; // Iterate throught the cv-runs for (int r = 0; r < m_NumRuns; r++) { // Stratify the data data.randomize(m_RandomInstance); data.stratify(m_NumFolds); // Perform the cross-validation for (int i = 0; i < m_NumFolds; i++) { // Get train and test folds Instances train = data.trainCV(m_NumFolds, i, m_RandomInstance); Instances test = data.testCV(m_NumFolds, i); // Make class numeric Instances trainN = new Instances(train); trainN.ClassIndex = - 1; trainN.deleteAttributeAt(classIndex); trainN.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex); trainN.ClassIndex = classIndex; m_NumericClassData = new Instances(trainN, 0); // Get class values int numInstances = train.numInstances(); double[][] tmpArray = new double[numInstances][]; for (int i2 = 0; i2 < numInstances; i2++) { tmpArray[i2] = new double[m_NumClasses]; } double[][] trainFs = tmpArray; double[][] tmpArray2 = new double[numInstances][]; for (int i3 = 0; i3 < numInstances; i3++) { tmpArray2[i3] = new double[m_NumClasses]; } double[][] trainYs = tmpArray2; for (int j = 0; j < m_NumClasses; j++) { for (int k = 0; k < numInstances; k++) { trainYs[k][j] = (train.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses); } } // Perform iterations double[][] probs = initialProbs(numInstances); m_NumGenerated = 0; double sumOfWeights = train.sumOfWeights(); for (int j = 0; j < this.NumIterations; j++) { performIteration(trainYs, trainFs, probs, trainN, sumOfWeights); Evaluation eval = new Evaluation(train); eval.evaluateModel(this, test); results[j] += eval.correct(); } } } // Find the number of iterations with the lowest error //UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" double bestResult = - System.Double.MaxValue; for (int j = 0; j < this.NumIterations; j++) { if (results[j] > bestResult) { bestResult = results[j]; bestNumIterations = j; } } if (m_Debug) { System.Console.Error.WriteLine("Best result for " + bestNumIterations + " iterations: " + bestResult); } } // Build classifier on all the data int numInstances2 = data.numInstances(); double[][] trainFs2 = new double[numInstances2][]; for (int i4 = 0; i4 < numInstances2; i4++) { trainFs2[i4] = new double[m_NumClasses]; } double[][] trainYs2 = new double[numInstances2][]; for (int i5 = 0; i5 < numInstances2; i5++) { trainYs2[i5] = new double[m_NumClasses]; } for (int j = 0; j < m_NumClasses; j++) { for (int i = 0, k = 0; i < numInstances2; i++, k++) { trainYs2[i][j] = (data.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses); } } // Make class numeric data.ClassIndex = - 1; data.deleteAttributeAt(classIndex); data.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex); data.ClassIndex = classIndex; m_NumericClassData = new Instances(data, 0); // Perform iterations double[][] probs2 = initialProbs(numInstances2); double logLikelihood = CalculateLogLikelihood(trainYs2, probs2); m_NumGenerated = 0; if (m_Debug) { System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood); } double sumOfWeights2 = data.sumOfWeights(); for (int j = 0; j < bestNumIterations; j++) { double previousLoglikelihood = logLikelihood; performIteration(trainYs2, trainFs2, probs2, data, sumOfWeights2); logLikelihood = CalculateLogLikelihood(trainYs2, probs2); if (m_Debug) { System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood); } if (System.Math.Abs(previousLoglikelihood - logLikelihood) < m_Precision) { return ; } } }
/// <summary> Evaluates a classifier with the options given in an array of /// strings. <p/> /// /// Valid options are: <p/> /// /// -t name of training file <br/> /// Name of the file with the training data. (required) <p/> /// /// -T name of test file <br/> /// Name of the file with the test data. If missing a cross-validation /// is performed. <p/> /// /// -c class index <br/> /// Index of the class attribute (1, 2, ...; default: last). <p/> /// /// -x number of folds <br/> /// The number of folds for the cross-validation (default: 10). <p/> /// /// -s random number seed <br/> /// Random number seed for the cross-validation (default: 1). <p/> /// /// -m file with cost matrix <br/> /// The name of a file containing a cost matrix. <p/> /// /// -l name of model input file <br/> /// Loads classifier from the given file. <p/> /// /// -d name of model output file <br/> /// Saves classifier built from the training data into the given file. <p/> /// /// -v <br/> /// Outputs no statistics for the training data. <p/> /// /// -o <br/> /// Outputs statistics only, not the classifier. <p/> /// /// -i <br/> /// Outputs detailed information-retrieval statistics per class. <p/> /// /// -k <br/> /// Outputs information-theoretic statistics. <p/> /// /// -p <br/> /// Outputs predictions for test instances (and nothing else). <p/> /// /// -r <br/> /// Outputs cumulative margin distribution (and nothing else). <p/> /// /// -g <br/> /// Only for classifiers that implement "Graphable." Outputs /// the graph representation of the classifier (and nothing /// else). <p/> /// /// </summary> /// <param name="classifier">machine learning classifier /// </param> /// <param name="options">the array of string containing the options /// </param> /// <throws> Exception if model could not be evaluated successfully </throws> /// <returns> a string describing the results /// </returns> public static System.String evaluateModel(Classifier classifier, System.String[] options) { Instances train = null, tempTrain, test = null, template = null; int seed = 1, folds = 10, classIndex = - 1; System.String trainFileName, testFileName, sourceClass, classIndexString, seedString, foldsString, objectInputFileName, objectOutputFileName, attributeRangeString; bool noOutput = false, printClassifications = false, trainStatistics = true, printMargins = false, printComplexityStatistics = false, printGraph = false, classStatistics = false, printSource = false; System.Text.StringBuilder text = new System.Text.StringBuilder(); System.IO.StreamReader trainReader = null, testReader = null; //UPGRADE_TODO: Class 'java.io.ObjectInputStream' was converted to 'System.IO.BinaryReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioObjectInputStream'" System.IO.BinaryReader objectInputStream = null; System.IO.Stream objectStream=null; CostMatrix costMatrix = null; System.Text.StringBuilder schemeOptionsText = null; Range attributesToOutput = null; long trainTimeStart = 0, trainTimeElapsed = 0, testTimeStart = 0, testTimeElapsed = 0; Classifier classifierBackup; try { // Get basic options (options the same for all schemes) classIndexString = Utils.getOption('c', options); if (classIndexString.Length != 0) { if (classIndexString.Equals("first")) classIndex = 1; else if (classIndexString.Equals("last")) classIndex = - 1; else classIndex = System.Int32.Parse(classIndexString); } trainFileName = Utils.getOption('t', options); objectInputFileName = Utils.getOption('l', options); objectOutputFileName = Utils.getOption('d', options); testFileName = Utils.getOption('T', options); if (trainFileName.Length == 0) { if (objectInputFileName.Length == 0) { throw new System.Exception("No training file and no object " + "input file given."); } if (testFileName.Length == 0) { throw new System.Exception("No training file and no test " + "file given."); } } else if ((objectInputFileName.Length != 0) && ((!(classifier is UpdateableClassifier)) || (testFileName.Length == 0))) { throw new System.Exception("Classifier not incremental, or no " + "test file provided: can't " + "use both train and model file."); } try { if (trainFileName.Length != 0) { //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.io.BufferedReader.BufferedReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" //UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'" //UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" trainReader = new System.IO.StreamReader(new System.IO.StreamReader(trainFileName, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(trainFileName, System.Text.Encoding.Default).CurrentEncoding); } if (testFileName.Length != 0) { //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.io.BufferedReader.BufferedReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" //UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'" //UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" testReader = new System.IO.StreamReader(new System.IO.StreamReader(testFileName, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(testFileName, System.Text.Encoding.Default).CurrentEncoding); } if (objectInputFileName.Length != 0) { //UPGRADE_TODO: Constructor 'java.io.FileInputStream.FileInputStream' was converted to 'System.IO.FileStream.FileStream' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioFileInputStreamFileInputStream_javalangString'" objectStream= new System.IO.FileStream(objectInputFileName, System.IO.FileMode.Open, System.IO.FileAccess.Read); if (objectInputFileName.EndsWith(".gz")) { //UPGRADE_ISSUE: Constructor 'java.util.zip.GZIPInputStream.GZIPInputStream' was not converted. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1000_javautilzipGZIPInputStream'" objectStream= new ICSharpCode.SharpZipLib.GZip.GZipInputStream(objectStream); } //UPGRADE_TODO: Class 'java.io.ObjectInputStream' was converted to 'System.IO.BinaryReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioObjectInputStream'" objectInputStream = new System.IO.BinaryReader(objectStream); } } catch (System.Exception e) { //UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Throwable.getMessage' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" throw new System.Exception("Can't open file " + e.Message + '.'); } if (testFileName.Length != 0) { template = test = new Instances(testReader, 1); if (classIndex != - 1) { test.ClassIndex = classIndex - 1; } else { test.ClassIndex = test.numAttributes() - 1; } if (classIndex > test.numAttributes()) { throw new System.Exception("Index of class attribute too large."); } } if (trainFileName.Length != 0) { if ((classifier is UpdateableClassifier) && (testFileName.Length != 0)) { train = new Instances(trainReader, 1); } else { train = new Instances(trainReader); } template = train; if (classIndex != - 1) { train.ClassIndex = classIndex - 1; } else { train.ClassIndex = train.numAttributes() - 1; } if ((testFileName.Length != 0) && !test.equalHeaders(train)) { throw new System.ArgumentException("Train and test file not compatible!"); } if (classIndex > train.numAttributes()) { throw new System.Exception("Index of class attribute too large."); } } if (template == null) { throw new System.Exception("No actual dataset provided to use as template"); } seedString = Utils.getOption('s', options); if (seedString.Length != 0) { seed = System.Int32.Parse(seedString); } foldsString = Utils.getOption('x', options); if (foldsString.Length != 0) { folds = System.Int32.Parse(foldsString); } costMatrix = handleCostOption(Utils.getOption('m', options), template.numClasses()); classStatistics = Utils.getFlag('i', options); noOutput = Utils.getFlag('o', options); trainStatistics = !Utils.getFlag('v', options); printComplexityStatistics = Utils.getFlag('k', options); printMargins = Utils.getFlag('r', options); printGraph = Utils.getFlag('g', options); sourceClass = Utils.getOption('z', options); printSource = (sourceClass.Length != 0); // Check -p option try { attributeRangeString = Utils.getOption('p', options); } catch (System.Exception e) { //UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Throwable.getMessage' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" throw new System.Exception(e.Message + "\nNOTE: the -p option has changed. " + "It now expects a parameter specifying a range of attributes " + "to list with the predictions. Use '-p 0' for none."); } if (attributeRangeString.Length != 0) { // if no test file given, we cannot print predictions if (testFileName.Length == 0) throw new System.Exception("Cannot print predictions ('-p') without test file ('-T')!"); printClassifications = true; if (!attributeRangeString.Equals("0")) attributesToOutput = new Range(attributeRangeString); } // if no training file given, we don't have any priors if ((trainFileName.Length == 0) && (printComplexityStatistics)) throw new System.Exception("Cannot print complexity statistics ('-k') without training file ('-t')!"); // If a model file is given, we can't process // scheme-specific options if (objectInputFileName.Length != 0) { Utils.checkForRemainingOptions(options); } else { // Set options for classifier // if (classifier instanceof OptionHandler) // { // for (int i = 0; i < options.length; i++) // { // if (options[i].length() != 0) // { // if (schemeOptionsText == null) // { // schemeOptionsText = new StringBuffer(); // } // if (options[i].indexOf(' ') != -1) // { // schemeOptionsText.append('"' + options[i] + "\" "); // } // else // { // schemeOptionsText.append(options[i] + " "); // } // } // } // ((OptionHandler)classifier).setOptions(options); // } } Utils.checkForRemainingOptions(options); } catch (System.Exception e) { //UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Throwable.getMessage' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" throw new System.Exception("\nWeka exception: " + e.Message + makeOptionString(classifier)); } // Setup up evaluation objects Evaluation trainingEvaluation = new Evaluation(new Instances(template, 0), costMatrix); Evaluation testingEvaluation = new Evaluation(new Instances(template, 0), costMatrix); if (objectInputFileName.Length != 0) { testingEvaluation.useNoPriors(); // Load classifier from file //UPGRADE_WARNING: Method 'java.io.ObjectInputStream.readObject' was converted to 'SupportClass.Deserialize' which may throw an exception. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1101'" //classifier = (Classifier) SupportClass.Deserialize(objectInputStream); //FileStream fs = new FileStream("DataFile.dat", FileMode.Open); try { BinaryFormatter formatter = new BinaryFormatter(); // Deserialize the hashtable from the file and // assign the reference to the local variable. // addresses = (Hashtable)formatter.Deserialize(fs); classifier = (Classifier)formatter.Deserialize(objectStream); } catch (Exception e) { Console.WriteLine("Failed to deserialize. Reason: " + e.Message); throw; } finally { objectStream.Close(); //fs.Close(); } objectInputStream.Close(); } // backup of fully setup classifier for cross-validation classifierBackup = Classifier.makeCopy(classifier); // Build the classifier if no object file provided if ((classifier is UpdateableClassifier) && (testFileName.Length != 0) && (costMatrix == null) && (trainFileName.Length != 0)) { // Build classifier incrementally trainingEvaluation.Priors = train; testingEvaluation.Priors = train; trainTimeStart = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; if (objectInputFileName.Length == 0) { classifier.buildClassifier(train); } while (train.readInstance(trainReader)) { trainingEvaluation.updatePriors(train.instance(0)); testingEvaluation.updatePriors(train.instance(0)); ((UpdateableClassifier) classifier).updateClassifier(train.instance(0)); train.delete(0); } trainTimeElapsed = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - trainTimeStart; trainReader.Close(); } else if (objectInputFileName.Length == 0) { // Build classifier in one go tempTrain = new Instances(train); trainingEvaluation.Priors = tempTrain; testingEvaluation.Priors = tempTrain; trainTimeStart = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; classifier.buildClassifier(tempTrain); trainTimeElapsed = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - trainTimeStart; } // Save the classifier if an object output file is provided if (objectOutputFileName.Length != 0) { //UPGRADE_TODO: Constructor 'java.io.FileOutputStream.FileOutputStream' was converted to 'System.IO.FileStream.FileStream' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioFileOutputStreamFileOutputStream_javalangString'" System.IO.Stream os = new System.IO.FileStream(objectOutputFileName, System.IO.FileMode.Create); if (objectOutputFileName.EndsWith(".gz")) { //UPGRADE_ISSUE: Constructor 'java.util.zip.GZIPOutputStream.GZIPOutputStream' was not converted. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1000_javautilzipGZIPOutputStream'" os = new ICSharpCode.SharpZipLib.GZip.GZipOutputStream(os); } //UPGRADE_TODO: Class 'java.io.ObjectOutputStream' was converted to 'System.IO.BinaryWriter' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioObjectOutputStream'" System.IO.BinaryWriter objectOutputStream = new System.IO.BinaryWriter(os); //UPGRADE_TODO: Method 'java.io.ObjectOutputStream.writeObject' was converted to 'SupportClass.Serialize' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioObjectOutputStreamwriteObject_javalangObject'" //SupportClass.Serialize(objectOutputStream, classifier); BinaryFormatter bformatter = new BinaryFormatter(); bformatter.Serialize(os, classifier); objectOutputStream.Flush(); objectOutputStream.Close(); } // If classifier is drawable output string describing graph if ((classifier is Drawable) && (printGraph)) { return ((Drawable) classifier).graph(); } // Output the classifier as equivalent source if ((classifier is Sourcable) && (printSource)) { return wekaStaticWrapper((Sourcable) classifier, sourceClass); } // Output test instance predictions only if (printClassifications) { return toPrintClassifications(classifier, new Instances(template, 0), testFileName, classIndex, attributesToOutput); } // Output model if (!(noOutput || printMargins)) { // if (classifier instanceof OptionHandler) // { // if (schemeOptionsText != null) // { // text.append("\nOptions: "+schemeOptionsText); // text.append("\n"); // } // } //UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Object.toString' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" text.Append("\n" + classifier.ToString() + "\n"); } if (!printMargins && (costMatrix != null)) { text.Append("\n=== Evaluation Cost Matrix ===\n\n").Append(costMatrix.ToString()); } // Compute error estimate from training data if ((trainStatistics) && (trainFileName.Length != 0)) { if ((classifier is UpdateableClassifier) && (testFileName.Length != 0) && (costMatrix == null)) { // Classifier was trained incrementally, so we have to // reopen the training data in order to test on it. //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.io.BufferedReader.BufferedReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" //UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'" //UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" trainReader = new System.IO.StreamReader(new System.IO.StreamReader(trainFileName, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(trainFileName, System.Text.Encoding.Default).CurrentEncoding); // Incremental testing train = new Instances(trainReader, 1); if (classIndex != - 1) { train.ClassIndex = classIndex - 1; } else { train.ClassIndex = train.numAttributes() - 1; } testTimeStart = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; while (train.readInstance(trainReader)) { trainingEvaluation.evaluateModelOnce((Classifier) classifier, train.instance(0)); train.delete(0); } testTimeElapsed = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - testTimeStart; trainReader.Close(); } else { testTimeStart = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; trainingEvaluation.evaluateModel(classifier, train); testTimeElapsed = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - testTimeStart; } // Print the results of the training evaluation if (printMargins) { return trainingEvaluation.toCumulativeMarginDistributionString(); } else { text.Append("\nTime taken to build model: " + Utils.doubleToString(trainTimeElapsed / 1000.0, 2) + " seconds"); text.Append("\nTime taken to test model on training data: " + Utils.doubleToString(testTimeElapsed / 1000.0, 2) + " seconds"); text.Append(trainingEvaluation.toSummaryString("\n\n=== Error on training" + " data ===\n", printComplexityStatistics)); if (template.classAttribute().Nominal) { if (classStatistics) { text.Append("\n\n" + trainingEvaluation.toClassDetailsString()); } text.Append("\n\n" + trainingEvaluation.toMatrixString()); } } } // Compute proper error estimates if (testFileName.Length != 0) { // Testing is on the supplied test data while (test.readInstance(testReader)) { testingEvaluation.evaluateModelOnce((Classifier) classifier, test.instance(0)); test.delete(0); } testReader.Close(); text.Append("\n\n" + testingEvaluation.toSummaryString("=== Error on test data ===\n", printComplexityStatistics)); } else if (trainFileName.Length != 0) { // Testing is via cross-validation on training data //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.util.Random.Random' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" System.Random random = new System.Random((System.Int32) seed); // use untrained (!) classifier for cross-validation classifier = Classifier.makeCopy(classifierBackup); testingEvaluation.crossValidateModel(classifier, train, folds, random); if (template.classAttribute().Numeric) { text.Append("\n\n\n" + testingEvaluation.toSummaryString("=== Cross-validation ===\n", printComplexityStatistics)); } else { text.Append("\n\n\n" + testingEvaluation.toSummaryString("=== Stratified " + "cross-validation ===\n", printComplexityStatistics)); } } if (template.classAttribute().Nominal) { if (classStatistics) { text.Append("\n\n" + testingEvaluation.toClassDetailsString()); } text.Append("\n\n" + testingEvaluation.toMatrixString()); } return text.ToString(); }
public void EvaluateIncrementalExamples() { //Calculate the number of increments for the training data based on the increment size int numberIncrements = (int)Math.Ceiling((double)(this.numExamples * (DEFAULT_FOLDS - 1) / DEFAULT_FOLDS) / (double)TRAINING_INCRMENETS); this.trainingSizeMatrix = new double[this.classCount, DEFAULT_FOLDS, numberIncrements]; for (int i = 0; (i < this.classCount); i++) for (int j = 0; (j < DEFAULT_FOLDS); j++) for (int k = 0; (k < numberIncrements); k++) this.trainingSizeMatrix[i, j, k] = 0.0; //Randomize the data Randomize randomizeFilter = new Randomize(); randomizeFilter.setInputFormat(this.data); Instances randomData = Filter.useFilter(this.data, randomizeFilter); //Run incremental training data for each fold and store the results for each activity for (int i = 1; (i <= DEFAULT_FOLDS); i++) { //Training folds filter RemoveFolds trainingFoldsFilter = new RemoveFolds(); trainingFoldsFilter.set_NumFolds(DEFAULT_FOLDS); trainingFoldsFilter.inputFormat(randomData); trainingFoldsFilter.set_InvertSelection(true); trainingFoldsFilter.set_Fold(i); Instances alltraining = Filter.useFilter(randomData, trainingFoldsFilter); RemoveFolds testFoldsFilter = new RemoveFolds(); testFoldsFilter.set_NumFolds(DEFAULT_FOLDS); testFoldsFilter.inputFormat(randomData); testFoldsFilter.set_InvertSelection(false); testFoldsFilter.set_Fold(i); Instances test = Filter.useFilter(randomData, testFoldsFilter); for (int j = 1; (j <= numberIncrements); j++) { //Range Filter RemoveRange rangeFilter = new RemoveRange(); rangeFilter.setInputFormat(alltraining); int first = 1; int last = j * TRAINING_INCRMENETS; if (last > (alltraining.m_Instances.size())) last = alltraining.m_Instances.size(); string range = first.ToString() + "-" + last.ToString(); rangeFilter.set_InstancesIndices(range); rangeFilter.set_InvertSelection(true); Instances training = Filter.useFilter(alltraining, rangeFilter); //ready for training and testing J48 tree = new J48(); // new instance of tree tree.set_MinNumObj(10); tree.set_ConfidenceFactor((float)0.25); tree.buildClassifier(training); // build classifier Evaluation eval = new Evaluation(training); eval.evaluateModel(tree, test); //store the results for each activity for (int k = 0; (k < this.classCount); k++) { double tpRate = eval.truePositiveRate(k); trainingSizeMatrix[k, i - 1, j - 1] = +eval.truePositiveRate(k); } } } TextWriter tw = new StreamWriter("evaluation.txt"); for (int i = 0; (i < this.classCount); i++) { string line = randomData.attribute(this.data.numAttributes() - 1).value_Renamed(i); for (int k = 0; (k < numberIncrements); k++) { double percentage = 0.0; for (int j = 0; (j < DEFAULT_FOLDS); j++) percentage += this.trainingSizeMatrix[i, j, k]; percentage /= DEFAULT_FOLDS; percentage *= 100; line += "," + percentage.ToString("0.00"); } tw.WriteLine(line); } tw.Close(); for (int i = 0; (i < this.classCount); i++) { string activity = randomData.attribute(this.data.numAttributes() - 1).value_Renamed(i); tw = new StreamWriter("evaluation-" + activity + ".txt"); for (int j = 0; (j < DEFAULT_FOLDS); j++) { string line = j.ToString(); for (int k = 0; (k < numberIncrements); k++) { double percentage = this.trainingSizeMatrix[i, j, k]; percentage /= DEFAULT_FOLDS; percentage *= 100; line += "\t" + percentage.ToString("0.00"); } tw.WriteLine(line); } tw.Close(); } }