/// <summary> Performs a (stratified if class is nominal) cross-validation /// for a classifier on a set of instances. Now performs /// a deep copy of the classifier before each call to /// buildClassifier() (just in case the classifier is not /// initialized properly). /// /// </summary> /// <param name="classifier">the classifier with any options set. /// </param> /// <param name="data">the data on which the cross-validation is to be /// performed /// </param> /// <param name="numFolds">the number of folds for the cross-validation /// </param> /// <param name="random">random number generator for randomization /// </param> /// <throws> Exception if a classifier could not be generated </throws> /// <summary> successfully or the class is not defined /// </summary> public virtual void crossValidateModel(Classifier classifier, Instances data, int numFolds, System.Random random) { // Make a copy of the data we can reorder data = new Instances(data); data.randomize(random); if (data.classAttribute().Nominal) { data.stratify(numFolds); } // Do the folds for (int i = 0; i < numFolds; i++) { Instances train = data.trainCV(numFolds, i, random); Priors = train; Classifier copiedClassifier = Classifier.makeCopy(classifier); copiedClassifier.buildClassifier(train); Instances test = data.testCV(numFolds, i); evaluateModel(copiedClassifier, test); } m_NumFolds = numFolds; }
public static void cvdTest() { weka.core.Instances data = new weka.core.Instances(new java.io.FileReader("./data/Classification/Communication.arff")); data.setClassIndex(data.numAttributes() - 1); weka.classifiers.Classifier cls = new weka.classifiers.bayes.NaiveBayes(); //Save BayesNet results in .txt file using (System.IO.StreamWriter file = new System.IO.StreamWriter("./data/Classification/Communication_Report.txt")) { int runs = 1; int folds = 10; // perform cross-validation for (int i = 0; i < runs; i++) { // randomize data int seed = i + 1; java.util.Random rand = new java.util.Random(seed); weka.core.Instances randData = new weka.core.Instances(data); randData.randomize(rand); if (randData.classAttribute().isNominal()) { randData.stratify(folds); } weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(randData); for (int n = 0; n < folds; n++) { weka.core.Instances train = randData.trainCV(folds, n); weka.core.Instances test = randData.testCV(folds, n); // build and evaluate classifier //weka.classifiers.Classifier clsCopy = weka.classifiers.Classifier.makeCopy(cls); cls.buildClassifier(train); //eval.evaluateModel(cls, test); //Print classifier analytics for all the dataset file.WriteLine("EVALUATION OF TEST DATASET."); // Test the model weka.classifiers.Evaluation eTest = new weka.classifiers.Evaluation(test); eTest.evaluateModel(cls, test); // Print the results as in Weka explorer: //Print statistics String strSummaryTest = eTest.toSummaryString(); file.WriteLine(strSummaryTest); file.WriteLine(); //Print detailed class statistics file.WriteLine(eTest.toClassDetailsString()); file.WriteLine(); //Print confusion matrix file.WriteLine(eTest.toMatrixString()); file.WriteLine(); // Get the confusion matrix double[][] cmMatrixTest = eTest.confusionMatrix(); System.Console.WriteLine("Bayesian Network results saved in Communication_Report.txt file successfully."); } //Print classifier analytics for all the dataset file.WriteLine("EVALUATION OF ALL DATASET."); cls.buildClassifier(data); // Train the model weka.classifiers.Evaluation eAlldata = new weka.classifiers.Evaluation(data); eAlldata.evaluateModel(cls, data); // Print the results as in Weka explorer: //Print statistics String strSummaryAlldata = eAlldata.toSummaryString(); file.WriteLine(strSummaryAlldata); file.WriteLine(); //Print detailed class statistics file.WriteLine(eAlldata.toClassDetailsString()); file.WriteLine(); //Print confusion matrix file.WriteLine(eAlldata.toMatrixString()); file.WriteLine("----------------"); //print model file.WriteLine(cls); file.WriteLine(); } } }
/// <summary> Builds the boosted classifier</summary> public virtual void buildClassifier(Instances data) { m_RandomInstance = new Random(m_Seed); Instances boostData; int classIndex = data.classIndex(); if (data.classAttribute().Numeric) { throw new Exception("LogitBoost can't handle a numeric class!"); } if (m_Classifier == null) { throw new System.Exception("A base classifier has not been specified!"); } if (!(m_Classifier is WeightedInstancesHandler) && !m_UseResampling) { m_UseResampling = true; } if (data.checkForStringAttributes()) { throw new Exception("Cannot handle string attributes!"); } if (m_Debug) { System.Console.Error.WriteLine("Creating copy of the training data"); } m_NumClasses = data.numClasses(); m_ClassAttribute = data.classAttribute(); // Create a copy of the data data = new Instances(data); data.deleteWithMissingClass(); // Create the base classifiers if (m_Debug) { System.Console.Error.WriteLine("Creating base classifiers"); } m_Classifiers = new Classifier[m_NumClasses][]; for (int j = 0; j < m_NumClasses; j++) { m_Classifiers[j] = Classifier.makeCopies(m_Classifier, this.NumIterations); } // Do we want to select the appropriate number of iterations // using cross-validation? int bestNumIterations = this.NumIterations; if (m_NumFolds > 1) { if (m_Debug) { System.Console.Error.WriteLine("Processing first fold."); } // Array for storing the results double[] results = new double[this.NumIterations]; // Iterate throught the cv-runs for (int r = 0; r < m_NumRuns; r++) { // Stratify the data data.randomize(m_RandomInstance); data.stratify(m_NumFolds); // Perform the cross-validation for (int i = 0; i < m_NumFolds; i++) { // Get train and test folds Instances train = data.trainCV(m_NumFolds, i, m_RandomInstance); Instances test = data.testCV(m_NumFolds, i); // Make class numeric Instances trainN = new Instances(train); trainN.ClassIndex = - 1; trainN.deleteAttributeAt(classIndex); trainN.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex); trainN.ClassIndex = classIndex; m_NumericClassData = new Instances(trainN, 0); // Get class values int numInstances = train.numInstances(); double[][] tmpArray = new double[numInstances][]; for (int i2 = 0; i2 < numInstances; i2++) { tmpArray[i2] = new double[m_NumClasses]; } double[][] trainFs = tmpArray; double[][] tmpArray2 = new double[numInstances][]; for (int i3 = 0; i3 < numInstances; i3++) { tmpArray2[i3] = new double[m_NumClasses]; } double[][] trainYs = tmpArray2; for (int j = 0; j < m_NumClasses; j++) { for (int k = 0; k < numInstances; k++) { trainYs[k][j] = (train.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses); } } // Perform iterations double[][] probs = initialProbs(numInstances); m_NumGenerated = 0; double sumOfWeights = train.sumOfWeights(); for (int j = 0; j < this.NumIterations; j++) { performIteration(trainYs, trainFs, probs, trainN, sumOfWeights); Evaluation eval = new Evaluation(train); eval.evaluateModel(this, test); results[j] += eval.correct(); } } } // Find the number of iterations with the lowest error //UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" double bestResult = - System.Double.MaxValue; for (int j = 0; j < this.NumIterations; j++) { if (results[j] > bestResult) { bestResult = results[j]; bestNumIterations = j; } } if (m_Debug) { System.Console.Error.WriteLine("Best result for " + bestNumIterations + " iterations: " + bestResult); } } // Build classifier on all the data int numInstances2 = data.numInstances(); double[][] trainFs2 = new double[numInstances2][]; for (int i4 = 0; i4 < numInstances2; i4++) { trainFs2[i4] = new double[m_NumClasses]; } double[][] trainYs2 = new double[numInstances2][]; for (int i5 = 0; i5 < numInstances2; i5++) { trainYs2[i5] = new double[m_NumClasses]; } for (int j = 0; j < m_NumClasses; j++) { for (int i = 0, k = 0; i < numInstances2; i++, k++) { trainYs2[i][j] = (data.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses); } } // Make class numeric data.ClassIndex = - 1; data.deleteAttributeAt(classIndex); data.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex); data.ClassIndex = classIndex; m_NumericClassData = new Instances(data, 0); // Perform iterations double[][] probs2 = initialProbs(numInstances2); double logLikelihood = CalculateLogLikelihood(trainYs2, probs2); m_NumGenerated = 0; if (m_Debug) { System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood); } double sumOfWeights2 = data.sumOfWeights(); for (int j = 0; j < bestNumIterations; j++) { double previousLoglikelihood = logLikelihood; performIteration(trainYs2, trainFs2, probs2, data, sumOfWeights2); logLikelihood = CalculateLogLikelihood(trainYs2, probs2); if (m_Debug) { System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood); } if (System.Math.Abs(previousLoglikelihood - logLikelihood) < m_Precision) { return ; } } }