public static void Test() { weka.core.Instances data = new weka.core.Instances(new java.io.FileReader("./data/Classification/Communication.arff")); data.setClassIndex(data.numAttributes() - 1); weka.classifiers.Classifier cls = new weka.classifiers.bayes.BayesNet(); //Save BayesNet results in .txt file using (System.IO.StreamWriter file = new System.IO.StreamWriter("./data/Classification/Communication_Report.txt")) { file.WriteLine("Performing " + percentSplit + "% split evaluation."); int runs = 1; // perform cross-validation for (int i = 0; i < runs; i++) { // randomize data int seed = i + 1; java.util.Random rand = new java.util.Random(seed); weka.core.Instances randData = new weka.core.Instances(data); randData.randomize(rand); //weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(randData); int trainSize = (int)Math.Round((double)data.numInstances() * percentSplit / 100); int testSize = data.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(data, 0, 0); weka.core.Instances test = new weka.core.Instances(data, 0, 0); train.setClassIndex(train.numAttributes() - 1); test.setClassIndex(test.numAttributes() - 1); //Print classifier analytics for all the dataset file.WriteLine("EVALUATION OF TEST DATASET."); //int numCorrect = 0; for (int j = 0; j < data.numInstances(); j++) { weka.core.Instance currentInst = randData.instance(j); if (j < trainSize) { train.add(currentInst); } else { test.add(currentInst); /* * double predictedClass = cls.classifyInstance(currentInst); * * double[] prediction = cls.distributionForInstance(currentInst); * * for (int p = 0; p < prediction.Length; p++) * { * file.WriteLine("Probability of class [{0}] for [{1}] is: {2}", currentInst.classAttribute().value(p), currentInst, Math.Round(prediction[p], 4)); * } * file.WriteLine(); * * file.WriteLine(); * if (predictedClass == data.instance(j).classValue()) * numCorrect++;*/ } } // build and evaluate classifier cls.buildClassifier(train); // Test the model weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(randData); eval.evaluateModel(cls, test); // Print the results as in Weka explorer: //Print statistics String strSummaryTest = eval.toSummaryString(); file.WriteLine(strSummaryTest); file.WriteLine(); //Print detailed class statistics file.WriteLine(eval.toClassDetailsString()); file.WriteLine(); //Print confusion matrix file.WriteLine(eval.toMatrixString()); file.WriteLine(); // Get the confusion matrix double[][] cmMatrixTest = eval.confusionMatrix(); System.Console.WriteLine("Bayesian Network results saved in Communication_Report.txt file successfully."); } } }
public static void cvdTest() { weka.core.Instances data = new weka.core.Instances(new java.io.FileReader("./data/Classification/Communication.arff")); data.setClassIndex(data.numAttributes() - 1); weka.classifiers.Classifier cls = new weka.classifiers.bayes.NaiveBayes(); //Save BayesNet results in .txt file using (System.IO.StreamWriter file = new System.IO.StreamWriter("./data/Classification/Communication_Report.txt")) { int runs = 1; int folds = 10; // perform cross-validation for (int i = 0; i < runs; i++) { // randomize data int seed = i + 1; java.util.Random rand = new java.util.Random(seed); weka.core.Instances randData = new weka.core.Instances(data); randData.randomize(rand); if (randData.classAttribute().isNominal()) { randData.stratify(folds); } weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(randData); for (int n = 0; n < folds; n++) { weka.core.Instances train = randData.trainCV(folds, n); weka.core.Instances test = randData.testCV(folds, n); // build and evaluate classifier //weka.classifiers.Classifier clsCopy = weka.classifiers.Classifier.makeCopy(cls); cls.buildClassifier(train); //eval.evaluateModel(cls, test); //Print classifier analytics for all the dataset file.WriteLine("EVALUATION OF TEST DATASET."); // Test the model weka.classifiers.Evaluation eTest = new weka.classifiers.Evaluation(test); eTest.evaluateModel(cls, test); // Print the results as in Weka explorer: //Print statistics String strSummaryTest = eTest.toSummaryString(); file.WriteLine(strSummaryTest); file.WriteLine(); //Print detailed class statistics file.WriteLine(eTest.toClassDetailsString()); file.WriteLine(); //Print confusion matrix file.WriteLine(eTest.toMatrixString()); file.WriteLine(); // Get the confusion matrix double[][] cmMatrixTest = eTest.confusionMatrix(); System.Console.WriteLine("Bayesian Network results saved in Communication_Report.txt file successfully."); } //Print classifier analytics for all the dataset file.WriteLine("EVALUATION OF ALL DATASET."); cls.buildClassifier(data); // Train the model weka.classifiers.Evaluation eAlldata = new weka.classifiers.Evaluation(data); eAlldata.evaluateModel(cls, data); // Print the results as in Weka explorer: //Print statistics String strSummaryAlldata = eAlldata.toSummaryString(); file.WriteLine(strSummaryAlldata); file.WriteLine(); //Print detailed class statistics file.WriteLine(eAlldata.toClassDetailsString()); file.WriteLine(); //Print confusion matrix file.WriteLine(eAlldata.toMatrixString()); file.WriteLine("----------------"); //print model file.WriteLine(cls); file.WriteLine(); } } }
public override void buildClassifier(Instances insts) { if (insts.checkForStringAttributes()) { throw new Exception("Cannot handle string attributes!"); } if (insts.numClasses() > 2) { throw new System.Exception("Can only handle two-class datasets!"); } if (insts.classAttribute().Numeric) { throw new Exception("Can't handle a numeric class!"); } // Filter data m_Train = new Instances(insts); m_Train.deleteWithMissingClass(); m_ReplaceMissingValues = new ReplaceMissingValues(); m_ReplaceMissingValues.setInputFormat(m_Train); m_Train = Filter.useFilter(m_Train, m_ReplaceMissingValues); m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(m_Train); m_Train = Filter.useFilter(m_Train, m_NominalToBinary); /** Randomize training data */ //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.util.Random.Random' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" m_Train.randomize(new System.Random((System.Int32) m_Seed)); /** Make space to store perceptrons */ m_Additions = new int[m_MaxK + 1]; m_IsAddition = new bool[m_MaxK + 1]; m_Weights = new int[m_MaxK + 1]; /** Compute perceptrons */ m_K = 0; for (int it = 0; it < m_NumIterations; it++) { for (int i = 0; i < m_Train.numInstances(); i++) { Instance inst = m_Train.instance(i); if (!inst.classIsMissing()) { int prediction = makePrediction(m_K, inst); //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" int classValue = (int) inst.classValue(); if (prediction == classValue) { m_Weights[m_K]++; } else { m_IsAddition[m_K] = (classValue == 1); m_Additions[m_K] = i; m_K++; m_Weights[m_K]++; } if (m_K == m_MaxK) { //UPGRADE_NOTE: Labeled break statement was changed to a goto statement. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1012'" goto out_brk; } } } } //UPGRADE_NOTE: Label 'out_brk' was added. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1011'" out_brk: ; }
/// <summary> Performs a (stratified if class is nominal) cross-validation /// for a classifier on a set of instances. Now performs /// a deep copy of the classifier before each call to /// buildClassifier() (just in case the classifier is not /// initialized properly). /// /// </summary> /// <param name="classifier">the classifier with any options set. /// </param> /// <param name="data">the data on which the cross-validation is to be /// performed /// </param> /// <param name="numFolds">the number of folds for the cross-validation /// </param> /// <param name="random">random number generator for randomization /// </param> /// <throws> Exception if a classifier could not be generated </throws> /// <summary> successfully or the class is not defined /// </summary> public virtual void crossValidateModel(Classifier classifier, Instances data, int numFolds, System.Random random) { // Make a copy of the data we can reorder data = new Instances(data); data.randomize(random); if (data.classAttribute().Nominal) { data.stratify(numFolds); } // Do the folds for (int i = 0; i < numFolds; i++) { Instances train = data.trainCV(numFolds, i, random); Priors = train; Classifier copiedClassifier = Classifier.makeCopy(classifier); copiedClassifier.buildClassifier(train); Instances test = data.testCV(numFolds, i); evaluateModel(copiedClassifier, test); } m_NumFolds = numFolds; }
/// <summary> Builds the boosted classifier</summary> public virtual void buildClassifier(Instances data) { m_RandomInstance = new Random(m_Seed); Instances boostData; int classIndex = data.classIndex(); if (data.classAttribute().Numeric) { throw new Exception("LogitBoost can't handle a numeric class!"); } if (m_Classifier == null) { throw new System.Exception("A base classifier has not been specified!"); } if (!(m_Classifier is WeightedInstancesHandler) && !m_UseResampling) { m_UseResampling = true; } if (data.checkForStringAttributes()) { throw new Exception("Cannot handle string attributes!"); } if (m_Debug) { System.Console.Error.WriteLine("Creating copy of the training data"); } m_NumClasses = data.numClasses(); m_ClassAttribute = data.classAttribute(); // Create a copy of the data data = new Instances(data); data.deleteWithMissingClass(); // Create the base classifiers if (m_Debug) { System.Console.Error.WriteLine("Creating base classifiers"); } m_Classifiers = new Classifier[m_NumClasses][]; for (int j = 0; j < m_NumClasses; j++) { m_Classifiers[j] = Classifier.makeCopies(m_Classifier, this.NumIterations); } // Do we want to select the appropriate number of iterations // using cross-validation? int bestNumIterations = this.NumIterations; if (m_NumFolds > 1) { if (m_Debug) { System.Console.Error.WriteLine("Processing first fold."); } // Array for storing the results double[] results = new double[this.NumIterations]; // Iterate throught the cv-runs for (int r = 0; r < m_NumRuns; r++) { // Stratify the data data.randomize(m_RandomInstance); data.stratify(m_NumFolds); // Perform the cross-validation for (int i = 0; i < m_NumFolds; i++) { // Get train and test folds Instances train = data.trainCV(m_NumFolds, i, m_RandomInstance); Instances test = data.testCV(m_NumFolds, i); // Make class numeric Instances trainN = new Instances(train); trainN.ClassIndex = - 1; trainN.deleteAttributeAt(classIndex); trainN.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex); trainN.ClassIndex = classIndex; m_NumericClassData = new Instances(trainN, 0); // Get class values int numInstances = train.numInstances(); double[][] tmpArray = new double[numInstances][]; for (int i2 = 0; i2 < numInstances; i2++) { tmpArray[i2] = new double[m_NumClasses]; } double[][] trainFs = tmpArray; double[][] tmpArray2 = new double[numInstances][]; for (int i3 = 0; i3 < numInstances; i3++) { tmpArray2[i3] = new double[m_NumClasses]; } double[][] trainYs = tmpArray2; for (int j = 0; j < m_NumClasses; j++) { for (int k = 0; k < numInstances; k++) { trainYs[k][j] = (train.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses); } } // Perform iterations double[][] probs = initialProbs(numInstances); m_NumGenerated = 0; double sumOfWeights = train.sumOfWeights(); for (int j = 0; j < this.NumIterations; j++) { performIteration(trainYs, trainFs, probs, trainN, sumOfWeights); Evaluation eval = new Evaluation(train); eval.evaluateModel(this, test); results[j] += eval.correct(); } } } // Find the number of iterations with the lowest error //UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" double bestResult = - System.Double.MaxValue; for (int j = 0; j < this.NumIterations; j++) { if (results[j] > bestResult) { bestResult = results[j]; bestNumIterations = j; } } if (m_Debug) { System.Console.Error.WriteLine("Best result for " + bestNumIterations + " iterations: " + bestResult); } } // Build classifier on all the data int numInstances2 = data.numInstances(); double[][] trainFs2 = new double[numInstances2][]; for (int i4 = 0; i4 < numInstances2; i4++) { trainFs2[i4] = new double[m_NumClasses]; } double[][] trainYs2 = new double[numInstances2][]; for (int i5 = 0; i5 < numInstances2; i5++) { trainYs2[i5] = new double[m_NumClasses]; } for (int j = 0; j < m_NumClasses; j++) { for (int i = 0, k = 0; i < numInstances2; i++, k++) { trainYs2[i][j] = (data.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses); } } // Make class numeric data.ClassIndex = - 1; data.deleteAttributeAt(classIndex); data.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex); data.ClassIndex = classIndex; m_NumericClassData = new Instances(data, 0); // Perform iterations double[][] probs2 = initialProbs(numInstances2); double logLikelihood = CalculateLogLikelihood(trainYs2, probs2); m_NumGenerated = 0; if (m_Debug) { System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood); } double sumOfWeights2 = data.sumOfWeights(); for (int j = 0; j < bestNumIterations; j++) { double previousLoglikelihood = logLikelihood; performIteration(trainYs2, trainFs2, probs2, data, sumOfWeights2); logLikelihood = CalculateLogLikelihood(trainYs2, probs2); if (m_Debug) { System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood); } if (System.Math.Abs(previousLoglikelihood - logLikelihood) < m_Precision) { return ; } } }