// Test the classification result of each map that a user played, // with the data available as if they were playing through it public static void classifyTest(String dataString, String playerID) { String results = ""; try { java.io.StringReader stringReader = new java.io.StringReader(dataString); java.io.BufferedReader buffReader = new java.io.BufferedReader(stringReader); /* NOTE THAT FOR NAIVE BAYES ALL WEIGHTS CAN BE = 1*/ //weka.core.converters.ConverterUtils.DataSource source = new weka.core.converters.ConverterUtils.DataSource("iris.arff"); weka.core.Instances data = new weka.core.Instances(buffReader); //source.getDataSet(); // setting class attribute if the data format does not provide this information // For example, the XRFF format saves the class attribute information as well if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1); weka.classifiers.Classifier cl; for (int i = 3; i < data.numInstances(); i++) { cl = new weka.classifiers.bayes.NaiveBayes(); //cl = new weka.classifiers.trees.J48(); //cl = new weka.classifiers.lazy.IB1(); //cl = new weka.classifiers.functions.MultilayerPerceptron(); ((weka.classifiers.functions.MultilayerPerceptron)cl).setHiddenLayers("12"); weka.core.Instances subset = new weka.core.Instances(data,0,i); cl.buildClassifier(subset); weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(subset); eval.crossValidateModel(cl, subset, 3, new java.util.Random(1)); results = results + eval.pctCorrect(); // For accuracy measurement /* For Mathews Correlation Coefficient */ //double TP = eval.numTruePositives(1); //double FP = eval.numFalsePositives(1); //double TN = eval.numTrueNegatives(1); //double FN = eval.numFalseNegatives(1); //double correlationCoeff = ((TP*TN)-(FP*FN))/Math.Sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)); //results = results + correlationCoeff; if (i != data.numInstances()-1) results = results + ", "; if(i == data.numInstances()-1) Debug.Log("Player: " + playerID + ", Num Maps: " + data.numInstances() + ", AUC: " + eval.areaUnderROC(1)); } } catch (java.lang.Exception ex) { Debug.LogError(ex.getMessage()); } // Write values to file for a matlab read // For accuracy StreamWriter writer = new StreamWriter("DataForMatlab/"+playerID+"_CrossFoldValidations_NeuralNet.txt"); //StreamWriter writer = new StreamWriter("DataForMatlab/"+playerID+"_CrossFoldCorrCoeff.txt"); // For mathews cc writer.WriteLine(results); writer.Close(); Debug.Log(playerID + " has been written to file"); }
public static void classifierTwo(string classifierFileName, string predictionModel) { FileReader javaFileReader = new FileReader(classifierFileName); weka.core.Instances wekaInsts = new weka.core.Instances(javaFileReader); javaFileReader.close(); wekaInsts.setClassIndex(wekaInsts.numAttributes() - 1); //Classifier nbTree = (Classifier)SerializationHelper.read(Model) as J48; Instances testDataSet = new Instances(new BufferedReader(new FileReader(classifierFileName))); testDataSet.setClassIndex(wekaInsts.numAttributes() - 1); //testDataSet.setClassIndex(10); Evaluation evaluation = new Evaluation(testDataSet); J48 model = new J48(); //Classifier myClassifier = (Classifier)SerializationHelper.read(Model) as NaiveBayes; //Classifier myClassifier = new NaiveBayes(); for (int i = 0; i < testDataSet.numInstances(); i++) { Instance instance = testDataSet.instance(i); //evaluation.evaluateModelOnceAndRecordPrediction(myClassifier, instance); //evaluation.evaluateModelOnce(myClassifier, instance); } foreach (object o in evaluation.predictions().toArray()) { NominalPrediction prediction = o as NominalPrediction; if (prediction != null) { double[] distribution = prediction.distribution(); double predicted = prediction.predicted(); for (int i = 0; i < distribution.Length; i++) { System.Console.WriteLine(distribution[i]); } System.Console.WriteLine(predicted); } } System.Console.WriteLine(evaluation); System.Console.ReadKey(); }
public static void classifierOne(string classifierFileName, string predictionModel) { FileReader javaFileReader = new FileReader(classifierFileName); weka.core.Instances wekaInsts = new weka.core.Instances(javaFileReader); javaFileReader.close(); wekaInsts.setClassIndex(wekaInsts.numAttributes() - 1); Classifier cl = new SMO(); //Classifier cl = new NaiveBayes(); java.util.Random random = new java.util.Random(1); Evaluation evaluation = new Evaluation(wekaInsts); evaluation.crossValidateModel(cl, wekaInsts, 10, random); foreach (object o in evaluation.getMetricsToDisplay().toArray()) { } int count = 0; StringBuilder sb = new StringBuilder(); foreach (object o in evaluation.predictions().toArray()) { NominalPrediction prediction = o as NominalPrediction; if (prediction != null) { double[] distribution = prediction.distribution(); double predicted = prediction.predicted(); double actual = prediction.actual(); string revision = prediction.getRevision(); double weight = prediction.weight(); double margine = prediction.margin(); //bool equals = prediction.@equals(); string distributions = String.Empty; for (int i = 0; i < distribution.Length; i++) { //System.Console.WriteLine(distribution[i]); distributions += distribution[i]; } var predictionLine = String.Format("{0} - {1} - {2} - {3} - {4} - {5}\n", actual, predicted, revision, weight, margine, distributions); sb.Append(predictionLine); //System.Console.WriteLine(predicted); } count++; } File_Helper.WriteToFile(sb, predictionModel + "NbCl.txt"); System.Console.WriteLine(count); System.Console.ReadKey(); }
public String PrintClassifierTestReport() { try { Debug.Log("Classifier: Number of instances: " + playerData.numInstances()); weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(playerData); eval.crossValidateModel(classifier, playerData, 10, new java.util.Random(1)); Debug.Log(eval.toSummaryString("\nClassifier: Cross Validate Results: \n======\n", false)); return (eval.toSummaryString("\nResults\n======\n", false)); } catch (java.lang.Exception ex) { Debug.LogError(ex.getMessage()); } return null; }
/// <summary> Evaluates a classifier with the options given in an array of /// strings. <p/> /// /// Valid options are: <p/> /// /// -t name of training file <br/> /// Name of the file with the training data. (required) <p/> /// /// -T name of test file <br/> /// Name of the file with the test data. If missing a cross-validation /// is performed. <p/> /// /// -c class index <br/> /// Index of the class attribute (1, 2, ...; default: last). <p/> /// /// -x number of folds <br/> /// The number of folds for the cross-validation (default: 10). <p/> /// /// -s random number seed <br/> /// Random number seed for the cross-validation (default: 1). <p/> /// /// -m file with cost matrix <br/> /// The name of a file containing a cost matrix. <p/> /// /// -l name of model input file <br/> /// Loads classifier from the given file. <p/> /// /// -d name of model output file <br/> /// Saves classifier built from the training data into the given file. <p/> /// /// -v <br/> /// Outputs no statistics for the training data. <p/> /// /// -o <br/> /// Outputs statistics only, not the classifier. <p/> /// /// -i <br/> /// Outputs detailed information-retrieval statistics per class. <p/> /// /// -k <br/> /// Outputs information-theoretic statistics. <p/> /// /// -p <br/> /// Outputs predictions for test instances (and nothing else). <p/> /// /// -r <br/> /// Outputs cumulative margin distribution (and nothing else). <p/> /// /// -g <br/> /// Only for classifiers that implement "Graphable." Outputs /// the graph representation of the classifier (and nothing /// else). <p/> /// /// </summary> /// <param name="classifier">machine learning classifier /// </param> /// <param name="options">the array of string containing the options /// </param> /// <throws> Exception if model could not be evaluated successfully </throws> /// <returns> a string describing the results /// </returns> public static System.String evaluateModel(Classifier classifier, System.String[] options) { Instances train = null, tempTrain, test = null, template = null; int seed = 1, folds = 10, classIndex = - 1; System.String trainFileName, testFileName, sourceClass, classIndexString, seedString, foldsString, objectInputFileName, objectOutputFileName, attributeRangeString; bool noOutput = false, printClassifications = false, trainStatistics = true, printMargins = false, printComplexityStatistics = false, printGraph = false, classStatistics = false, printSource = false; System.Text.StringBuilder text = new System.Text.StringBuilder(); System.IO.StreamReader trainReader = null, testReader = null; //UPGRADE_TODO: Class 'java.io.ObjectInputStream' was converted to 'System.IO.BinaryReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioObjectInputStream'" System.IO.BinaryReader objectInputStream = null; System.IO.Stream objectStream=null; CostMatrix costMatrix = null; System.Text.StringBuilder schemeOptionsText = null; Range attributesToOutput = null; long trainTimeStart = 0, trainTimeElapsed = 0, testTimeStart = 0, testTimeElapsed = 0; Classifier classifierBackup; try { // Get basic options (options the same for all schemes) classIndexString = Utils.getOption('c', options); if (classIndexString.Length != 0) { if (classIndexString.Equals("first")) classIndex = 1; else if (classIndexString.Equals("last")) classIndex = - 1; else classIndex = System.Int32.Parse(classIndexString); } trainFileName = Utils.getOption('t', options); objectInputFileName = Utils.getOption('l', options); objectOutputFileName = Utils.getOption('d', options); testFileName = Utils.getOption('T', options); if (trainFileName.Length == 0) { if (objectInputFileName.Length == 0) { throw new System.Exception("No training file and no object " + "input file given."); } if (testFileName.Length == 0) { throw new System.Exception("No training file and no test " + "file given."); } } else if ((objectInputFileName.Length != 0) && ((!(classifier is UpdateableClassifier)) || (testFileName.Length == 0))) { throw new System.Exception("Classifier not incremental, or no " + "test file provided: can't " + "use both train and model file."); } try { if (trainFileName.Length != 0) { //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.io.BufferedReader.BufferedReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" //UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'" //UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" trainReader = new System.IO.StreamReader(new System.IO.StreamReader(trainFileName, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(trainFileName, System.Text.Encoding.Default).CurrentEncoding); } if (testFileName.Length != 0) { //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.io.BufferedReader.BufferedReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" //UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'" //UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" testReader = new System.IO.StreamReader(new System.IO.StreamReader(testFileName, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(testFileName, System.Text.Encoding.Default).CurrentEncoding); } if (objectInputFileName.Length != 0) { //UPGRADE_TODO: Constructor 'java.io.FileInputStream.FileInputStream' was converted to 'System.IO.FileStream.FileStream' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioFileInputStreamFileInputStream_javalangString'" objectStream= new System.IO.FileStream(objectInputFileName, System.IO.FileMode.Open, System.IO.FileAccess.Read); if (objectInputFileName.EndsWith(".gz")) { //UPGRADE_ISSUE: Constructor 'java.util.zip.GZIPInputStream.GZIPInputStream' was not converted. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1000_javautilzipGZIPInputStream'" objectStream= new ICSharpCode.SharpZipLib.GZip.GZipInputStream(objectStream); } //UPGRADE_TODO: Class 'java.io.ObjectInputStream' was converted to 'System.IO.BinaryReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioObjectInputStream'" objectInputStream = new System.IO.BinaryReader(objectStream); } } catch (System.Exception e) { //UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Throwable.getMessage' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" throw new System.Exception("Can't open file " + e.Message + '.'); } if (testFileName.Length != 0) { template = test = new Instances(testReader, 1); if (classIndex != - 1) { test.ClassIndex = classIndex - 1; } else { test.ClassIndex = test.numAttributes() - 1; } if (classIndex > test.numAttributes()) { throw new System.Exception("Index of class attribute too large."); } } if (trainFileName.Length != 0) { if ((classifier is UpdateableClassifier) && (testFileName.Length != 0)) { train = new Instances(trainReader, 1); } else { train = new Instances(trainReader); } template = train; if (classIndex != - 1) { train.ClassIndex = classIndex - 1; } else { train.ClassIndex = train.numAttributes() - 1; } if ((testFileName.Length != 0) && !test.equalHeaders(train)) { throw new System.ArgumentException("Train and test file not compatible!"); } if (classIndex > train.numAttributes()) { throw new System.Exception("Index of class attribute too large."); } } if (template == null) { throw new System.Exception("No actual dataset provided to use as template"); } seedString = Utils.getOption('s', options); if (seedString.Length != 0) { seed = System.Int32.Parse(seedString); } foldsString = Utils.getOption('x', options); if (foldsString.Length != 0) { folds = System.Int32.Parse(foldsString); } costMatrix = handleCostOption(Utils.getOption('m', options), template.numClasses()); classStatistics = Utils.getFlag('i', options); noOutput = Utils.getFlag('o', options); trainStatistics = !Utils.getFlag('v', options); printComplexityStatistics = Utils.getFlag('k', options); printMargins = Utils.getFlag('r', options); printGraph = Utils.getFlag('g', options); sourceClass = Utils.getOption('z', options); printSource = (sourceClass.Length != 0); // Check -p option try { attributeRangeString = Utils.getOption('p', options); } catch (System.Exception e) { //UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Throwable.getMessage' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" throw new System.Exception(e.Message + "\nNOTE: the -p option has changed. " + "It now expects a parameter specifying a range of attributes " + "to list with the predictions. Use '-p 0' for none."); } if (attributeRangeString.Length != 0) { // if no test file given, we cannot print predictions if (testFileName.Length == 0) throw new System.Exception("Cannot print predictions ('-p') without test file ('-T')!"); printClassifications = true; if (!attributeRangeString.Equals("0")) attributesToOutput = new Range(attributeRangeString); } // if no training file given, we don't have any priors if ((trainFileName.Length == 0) && (printComplexityStatistics)) throw new System.Exception("Cannot print complexity statistics ('-k') without training file ('-t')!"); // If a model file is given, we can't process // scheme-specific options if (objectInputFileName.Length != 0) { Utils.checkForRemainingOptions(options); } else { // Set options for classifier // if (classifier instanceof OptionHandler) // { // for (int i = 0; i < options.length; i++) // { // if (options[i].length() != 0) // { // if (schemeOptionsText == null) // { // schemeOptionsText = new StringBuffer(); // } // if (options[i].indexOf(' ') != -1) // { // schemeOptionsText.append('"' + options[i] + "\" "); // } // else // { // schemeOptionsText.append(options[i] + " "); // } // } // } // ((OptionHandler)classifier).setOptions(options); // } } Utils.checkForRemainingOptions(options); } catch (System.Exception e) { //UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Throwable.getMessage' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" throw new System.Exception("\nWeka exception: " + e.Message + makeOptionString(classifier)); } // Setup up evaluation objects Evaluation trainingEvaluation = new Evaluation(new Instances(template, 0), costMatrix); Evaluation testingEvaluation = new Evaluation(new Instances(template, 0), costMatrix); if (objectInputFileName.Length != 0) { testingEvaluation.useNoPriors(); // Load classifier from file //UPGRADE_WARNING: Method 'java.io.ObjectInputStream.readObject' was converted to 'SupportClass.Deserialize' which may throw an exception. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1101'" //classifier = (Classifier) SupportClass.Deserialize(objectInputStream); //FileStream fs = new FileStream("DataFile.dat", FileMode.Open); try { BinaryFormatter formatter = new BinaryFormatter(); // Deserialize the hashtable from the file and // assign the reference to the local variable. // addresses = (Hashtable)formatter.Deserialize(fs); classifier = (Classifier)formatter.Deserialize(objectStream); } catch (Exception e) { Console.WriteLine("Failed to deserialize. Reason: " + e.Message); throw; } finally { objectStream.Close(); //fs.Close(); } objectInputStream.Close(); } // backup of fully setup classifier for cross-validation classifierBackup = Classifier.makeCopy(classifier); // Build the classifier if no object file provided if ((classifier is UpdateableClassifier) && (testFileName.Length != 0) && (costMatrix == null) && (trainFileName.Length != 0)) { // Build classifier incrementally trainingEvaluation.Priors = train; testingEvaluation.Priors = train; trainTimeStart = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; if (objectInputFileName.Length == 0) { classifier.buildClassifier(train); } while (train.readInstance(trainReader)) { trainingEvaluation.updatePriors(train.instance(0)); testingEvaluation.updatePriors(train.instance(0)); ((UpdateableClassifier) classifier).updateClassifier(train.instance(0)); train.delete(0); } trainTimeElapsed = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - trainTimeStart; trainReader.Close(); } else if (objectInputFileName.Length == 0) { // Build classifier in one go tempTrain = new Instances(train); trainingEvaluation.Priors = tempTrain; testingEvaluation.Priors = tempTrain; trainTimeStart = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; classifier.buildClassifier(tempTrain); trainTimeElapsed = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - trainTimeStart; } // Save the classifier if an object output file is provided if (objectOutputFileName.Length != 0) { //UPGRADE_TODO: Constructor 'java.io.FileOutputStream.FileOutputStream' was converted to 'System.IO.FileStream.FileStream' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioFileOutputStreamFileOutputStream_javalangString'" System.IO.Stream os = new System.IO.FileStream(objectOutputFileName, System.IO.FileMode.Create); if (objectOutputFileName.EndsWith(".gz")) { //UPGRADE_ISSUE: Constructor 'java.util.zip.GZIPOutputStream.GZIPOutputStream' was not converted. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1000_javautilzipGZIPOutputStream'" os = new ICSharpCode.SharpZipLib.GZip.GZipOutputStream(os); } //UPGRADE_TODO: Class 'java.io.ObjectOutputStream' was converted to 'System.IO.BinaryWriter' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioObjectOutputStream'" System.IO.BinaryWriter objectOutputStream = new System.IO.BinaryWriter(os); //UPGRADE_TODO: Method 'java.io.ObjectOutputStream.writeObject' was converted to 'SupportClass.Serialize' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioObjectOutputStreamwriteObject_javalangObject'" //SupportClass.Serialize(objectOutputStream, classifier); BinaryFormatter bformatter = new BinaryFormatter(); bformatter.Serialize(os, classifier); objectOutputStream.Flush(); objectOutputStream.Close(); } // If classifier is drawable output string describing graph if ((classifier is Drawable) && (printGraph)) { return ((Drawable) classifier).graph(); } // Output the classifier as equivalent source if ((classifier is Sourcable) && (printSource)) { return wekaStaticWrapper((Sourcable) classifier, sourceClass); } // Output test instance predictions only if (printClassifications) { return toPrintClassifications(classifier, new Instances(template, 0), testFileName, classIndex, attributesToOutput); } // Output model if (!(noOutput || printMargins)) { // if (classifier instanceof OptionHandler) // { // if (schemeOptionsText != null) // { // text.append("\nOptions: "+schemeOptionsText); // text.append("\n"); // } // } //UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Object.toString' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" text.Append("\n" + classifier.ToString() + "\n"); } if (!printMargins && (costMatrix != null)) { text.Append("\n=== Evaluation Cost Matrix ===\n\n").Append(costMatrix.ToString()); } // Compute error estimate from training data if ((trainStatistics) && (trainFileName.Length != 0)) { if ((classifier is UpdateableClassifier) && (testFileName.Length != 0) && (costMatrix == null)) { // Classifier was trained incrementally, so we have to // reopen the training data in order to test on it. //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.io.BufferedReader.BufferedReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" //UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'" //UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" trainReader = new System.IO.StreamReader(new System.IO.StreamReader(trainFileName, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(trainFileName, System.Text.Encoding.Default).CurrentEncoding); // Incremental testing train = new Instances(trainReader, 1); if (classIndex != - 1) { train.ClassIndex = classIndex - 1; } else { train.ClassIndex = train.numAttributes() - 1; } testTimeStart = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; while (train.readInstance(trainReader)) { trainingEvaluation.evaluateModelOnce((Classifier) classifier, train.instance(0)); train.delete(0); } testTimeElapsed = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - testTimeStart; trainReader.Close(); } else { testTimeStart = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; trainingEvaluation.evaluateModel(classifier, train); testTimeElapsed = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - testTimeStart; } // Print the results of the training evaluation if (printMargins) { return trainingEvaluation.toCumulativeMarginDistributionString(); } else { text.Append("\nTime taken to build model: " + Utils.doubleToString(trainTimeElapsed / 1000.0, 2) + " seconds"); text.Append("\nTime taken to test model on training data: " + Utils.doubleToString(testTimeElapsed / 1000.0, 2) + " seconds"); text.Append(trainingEvaluation.toSummaryString("\n\n=== Error on training" + " data ===\n", printComplexityStatistics)); if (template.classAttribute().Nominal) { if (classStatistics) { text.Append("\n\n" + trainingEvaluation.toClassDetailsString()); } text.Append("\n\n" + trainingEvaluation.toMatrixString()); } } } // Compute proper error estimates if (testFileName.Length != 0) { // Testing is on the supplied test data while (test.readInstance(testReader)) { testingEvaluation.evaluateModelOnce((Classifier) classifier, test.instance(0)); test.delete(0); } testReader.Close(); text.Append("\n\n" + testingEvaluation.toSummaryString("=== Error on test data ===\n", printComplexityStatistics)); } else if (trainFileName.Length != 0) { // Testing is via cross-validation on training data //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.util.Random.Random' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" System.Random random = new System.Random((System.Int32) seed); // use untrained (!) classifier for cross-validation classifier = Classifier.makeCopy(classifierBackup); testingEvaluation.crossValidateModel(classifier, train, folds, random); if (template.classAttribute().Numeric) { text.Append("\n\n\n" + testingEvaluation.toSummaryString("=== Cross-validation ===\n", printComplexityStatistics)); } else { text.Append("\n\n\n" + testingEvaluation.toSummaryString("=== Stratified " + "cross-validation ===\n", printComplexityStatistics)); } } if (template.classAttribute().Nominal) { if (classStatistics) { text.Append("\n\n" + testingEvaluation.toClassDetailsString()); } text.Append("\n\n" + testingEvaluation.toMatrixString()); } return text.ToString(); }
/// <summary> /// Plate by plate classification /// </summary> /// <param name="NeutralClass">Neutral class</param> /// <param name="IdxClassifier">Classifier Index (0:J48), (1:SVM), (2:NN), (3:KNN)</param> private void ClassificationPlateByPlate(int NeutralClass, int IdxClassifier) { int NumberOfPlates = cGlobalInfo.CurrentScreening.ListPlatesActive.Count; for (int PlateIdx = 0; PlateIdx < NumberOfPlates; PlateIdx++) { cPlate CurrentPlateToProcess = cGlobalInfo.CurrentScreening.ListPlatesActive.GetPlate(cGlobalInfo.CurrentScreening.ListPlatesActive[PlateIdx].GetName()); cInfoClass InfoClass = CurrentPlateToProcess.GetNumberOfClassesBut(NeutralClass); // return; if (InfoClass.NumberOfClass <= 1) { richTextBoxInfoClassif.AppendText(CurrentPlateToProcess.GetName() + " not processed.\n"); continue; } weka.core.Instances insts = CurrentPlateToProcess.CreateInstancesWithClasses(InfoClass, NeutralClass); Classifier ClassificationModel = null; string Text = ""; switch (IdxClassifier) { case 0: // J48 ClassificationModel = new weka.classifiers.trees.J48(); weka.classifiers.trees.J48 J48Model = (weka.classifiers.trees.J48)ClassificationModel; J48Model.setMinNumObj((int)cGlobalInfo.OptionsWindow.numericUpDownJ48MinNumObjects.Value); Text = "J48 - "; break; case 1: // SVM ClassificationModel = new weka.classifiers.functions.SMO(); Text = "SVM - "; break; case 2: // NN ClassificationModel = new weka.classifiers.functions.MultilayerPerceptron(); Text = "Neural Network - "; break; case 3: // KNN ClassificationModel = new weka.classifiers.lazy.IBk((int)cGlobalInfo.OptionsWindow.numericUpDownKofKNN.Value); Text = "K-Nearest Neighbor(s) - "; break; case 4: // Random Forest ClassificationModel = new weka.classifiers.trees.RandomForest(); Text = "Random Forest - "; break; default: break; } richTextBoxInfoClassif.AppendText(Text + InfoClass.NumberOfClass + " classes - Plate: "); richTextBoxInfoClassif.AppendText(CurrentPlateToProcess.GetName() + " OK \n"); weka.core.Instances train = new weka.core.Instances(insts, 0, insts.numInstances()); ClassificationModel.buildClassifier(train); cGlobalInfo.ConsoleWriteLine(ClassificationModel.ToString()); weka.classifiers.Evaluation evaluation = new weka.classifiers.Evaluation(insts); evaluation.crossValidateModel(ClassificationModel, insts, 2, new java.util.Random(1)); cGlobalInfo.ConsoleWriteLine(evaluation.toSummaryString()); cGlobalInfo.ConsoleWriteLine(evaluation.toMatrixString()); // update classification information of the current plate switch (IdxClassifier) { case 0: // J48 weka.classifiers.trees.J48 CurrentClassifier = (weka.classifiers.trees.J48)(ClassificationModel); CurrentPlateToProcess.GetInfoClassif().StringForTree = CurrentClassifier.graph().Remove(0, CurrentClassifier.graph().IndexOf("{") + 2); break; /*case 1: // SVM break; case 2: // NN break; case 3: // KNN break;*/ default: break; } CurrentPlateToProcess.GetInfoClassif().StringForQuality = evaluation.toSummaryString(); CurrentPlateToProcess.GetInfoClassif().ConfusionMatrix = evaluation.toMatrixString(); foreach (cWell TmpWell in CurrentPlateToProcess.ListActiveWells) { weka.core.Instance currentInst = TmpWell.CreateInstanceForNClasses(InfoClass).instance(0); double predictedClass = ClassificationModel.classifyInstance(currentInst); TmpWell.SetClass(InfoClass.ListBackAssociation[(int)predictedClass]); } } return; }
/// <summary> /// Global classification /// </summary> /// <param name="NeutralClass">Neutral class</param> /// <param name="IdxClassifier">Classifier Index (0:J48), (1:SVM), (2:NN), (3:KNN)</param> private void ClassificationGlobal(int NeutralClass, int IdxClassifier) { cInfoClass InfoClass = cGlobalInfo.CurrentScreening.GetNumberOfClassesBut(NeutralClass); if (InfoClass.NumberOfClass <= 1) { richTextBoxInfoClassif.AppendText("Screening not processed.\n"); return; } cExtendedTable TrainingTable = cGlobalInfo.CurrentScreening.ListPlatesActive.GetListActiveWells().GetAverageDescriptorValues(cGlobalInfo.CurrentScreening.ListDescriptors.GetActiveDescriptors(), false, true); weka.core.Instances insts = cGlobalInfo.CurrentScreening.CreateInstancesWithClasses(InfoClass, NeutralClass); Classifier ClassificationModel = null; switch (IdxClassifier) { case 0: // J48 ClassificationModel = new weka.classifiers.trees.J48(); weka.classifiers.trees.J48 J48Model = (weka.classifiers.trees.J48)ClassificationModel; J48Model.setMinNumObj((int)cGlobalInfo.OptionsWindow.numericUpDownJ48MinNumObjects.Value); richTextBoxInfoClassif.AppendText("\nC4.5 : " + InfoClass.NumberOfClass + " classes"); break; case 1: // SVM ClassificationModel = new weka.classifiers.functions.SMO(); break; case 2: // NN ClassificationModel = new weka.classifiers.functions.MultilayerPerceptron(); break; case 3: // KNN ClassificationModel = new weka.classifiers.lazy.IBk((int)cGlobalInfo.OptionsWindow.numericUpDownKofKNN.Value); break; case 4: // Random Forest ClassificationModel = new weka.classifiers.trees.RandomForest(); break; default: break; } weka.core.Instances train = new weka.core.Instances(insts, 0, insts.numInstances()); ClassificationModel.buildClassifier(train); cGlobalInfo.ConsoleWriteLine(ClassificationModel.ToString()); weka.classifiers.Evaluation evaluation = new weka.classifiers.Evaluation(insts); evaluation.crossValidateModel(ClassificationModel, insts, 2, new java.util.Random(1)); cGlobalInfo.ConsoleWriteLine(evaluation.toSummaryString()); cGlobalInfo.ConsoleWriteLine(evaluation.toMatrixString()); // update classification information of the current plate string Text = ""; switch (IdxClassifier) { case 0: // J48 Text = "J48 - "; break; case 1: // SVM // ClassificationModel = new weka.classifiers.functions.SMO(); Text = "SVM - "; break; case 2: // NN // ClassificationModel = new weka.classifiers.functions.MultilayerPerceptron(); Text = "Neural Network - "; break; case 3: // KNN // ClassificationModel = new weka.classifiers.lazy.IBk((int)CompleteScreening.GlobalInfo.OptionsWindow.numericUpDownKofKNN.Value); Text = "K-Nearest Neighbor(s) - "; break; default: break; } richTextBoxInfoClassif.AppendText(Text + InfoClass.NumberOfClass + " classes."); // CurrentPlateToProcess.GetInfoClassif().StringForQuality = evaluation.toSummaryString(); // CurrentPlateToProcess.GetInfoClassif().ConfusionMatrix = evaluation.toMatrixString(); foreach (cPlate CurrentPlateToProcess in cGlobalInfo.CurrentScreening.ListPlatesActive) { foreach (cWell TmpWell in CurrentPlateToProcess.ListActiveWells) { // return; weka.core.Instance currentInst = TmpWell.CreateInstanceForNClasses(InfoClass).instance(0); double predictedClass = ClassificationModel.classifyInstance(currentInst); double[] ClassConfidence = ClassificationModel.distributionForInstance(currentInst); double ConfidenceValue = ClassConfidence[(int)predictedClass]; TmpWell.SetClass(InfoClass.ListBackAssociation[(int)predictedClass], ConfidenceValue); } } return; }
/// <summary> /// Build the learning model for classification /// </summary> /// <param name="InstancesList">list of instances </param> /// <param name="NumberofClusters">Number of Clusters</param> /// <param name="TextBoxForFeedback">Text box for the results (can be NULL)</param> /// <param name="PanelForVisualFeedback">Panel to display visual results if avalaible (can be NULL)</param> public Classifier PerformTraining(FormForClassificationInfo WindowForClassificationParam, Instances InstancesList, /*int NumberofClusters,*/ RichTextBox TextBoxForFeedback, Panel PanelForVisualFeedback, out weka.classifiers.Evaluation ModelEvaluation, bool IsCellular) { // weka.classifiers.Evaluation ModelEvaluation = null; // FormForClassificationInfo WindowForClassificationParam = new FormForClassificationInfo(GlobalInfo); ModelEvaluation = null; // if (WindowForClassificationParam.ShowDialog() != System.Windows.Forms.DialogResult.OK) return null; // weka.classifiers.Evaluation ModelEvaluation = new Evaluation( cParamAlgo ClassifAlgoParams = WindowForClassificationParam.GetSelectedAlgoAndParameters(); if (ClassifAlgoParams == null) return null; //this.Cursor = Cursors.WaitCursor; // cParamAlgo ClassificationAlgo = WindowForClassificationParam.GetSelectedAlgoAndParameters(); cListValuesParam Parameters = ClassifAlgoParams.GetListValuesParam(); //Classifier this.CurrentClassifier = null; // -------------------------- Classification ------------------------------- // create the instances // InstancesList = this.ListInstances; this.attValsWithoutClasses = new FastVector(); if (IsCellular) for (int i = 0; i < cGlobalInfo.ListCellularPhenotypes.Count; i++) this.attValsWithoutClasses.addElement(cGlobalInfo.ListCellularPhenotypes[i].Name); else for (int i = 0; i < cGlobalInfo.ListWellClasses.Count; i++) this.attValsWithoutClasses.addElement(cGlobalInfo.ListWellClasses[i].Name); InstancesList.insertAttributeAt(new weka.core.Attribute("Class", this.attValsWithoutClasses), InstancesList.numAttributes()); //int A = Classes.Count; for (int i = 0; i < Classes.Count; i++) InstancesList.get(i).setValue(InstancesList.numAttributes() - 1, Classes[i]); InstancesList.setClassIndex(InstancesList.numAttributes() - 1); weka.core.Instances train = new weka.core.Instances(InstancesList, 0, InstancesList.numInstances()); if (PanelForVisualFeedback != null) PanelForVisualFeedback.Controls.Clear(); #region List classifiers #region J48 if (ClassifAlgoParams.Name == "J48") { this.CurrentClassifier = new weka.classifiers.trees.J48(); ((J48)this.CurrentClassifier).setMinNumObj((int)Parameters.ListDoubleValues.Get("numericUpDownMinInstLeaf").Value); ((J48)this.CurrentClassifier).setConfidenceFactor((float)Parameters.ListDoubleValues.Get("numericUpDownConfFactor").Value); ((J48)this.CurrentClassifier).setNumFolds((int)Parameters.ListDoubleValues.Get("numericUpDownNumFolds").Value); ((J48)this.CurrentClassifier).setUnpruned((bool)Parameters.ListCheckValues.Get("checkBoxUnPruned").Value); ((J48)this.CurrentClassifier).setUseLaplace((bool)Parameters.ListCheckValues.Get("checkBoxLaplacianSmoothing").Value); ((J48)this.CurrentClassifier).setSeed((int)Parameters.ListDoubleValues.Get("numericUpDownSeedNumber").Value); ((J48)this.CurrentClassifier).setSubtreeRaising((bool)Parameters.ListCheckValues.Get("checkBoxSubTreeRaising").Value); // CurrentClassif.SetJ48Tree((J48)this.CurrentClassifier, Classes.Length); this.CurrentClassifier.buildClassifier(train); // display results training // display tree if (PanelForVisualFeedback != null) { GViewer GraphView = DisplayTree(GlobalInfo, ((J48)this.CurrentClassifier), IsCellular).gViewerForTreeClassif; GraphView.Size = new System.Drawing.Size(PanelForVisualFeedback.Width, PanelForVisualFeedback.Height); GraphView.Anchor = (AnchorStyles.Bottom | AnchorStyles.Top | AnchorStyles.Left | AnchorStyles.Right); PanelForVisualFeedback.Controls.Clear(); PanelForVisualFeedback.Controls.Add(GraphView); } } #endregion #region Random Tree else if (ClassifAlgoParams.Name == "RandomTree") { this.CurrentClassifier = new weka.classifiers.trees.RandomTree(); if ((bool)Parameters.ListCheckValues.Get("checkBoxMaxDepthUnlimited").Value) ((RandomTree)this.CurrentClassifier).setMaxDepth(0); else ((RandomTree)this.CurrentClassifier).setMaxDepth((int)Parameters.ListDoubleValues.Get("numericUpDownMaxDepth").Value); ((RandomTree)this.CurrentClassifier).setSeed((int)Parameters.ListDoubleValues.Get("numericUpDownSeed").Value); ((RandomTree)this.CurrentClassifier).setMinNum((double)Parameters.ListDoubleValues.Get("numericUpDownMinWeight").Value); if ((bool)Parameters.ListCheckValues.Get("checkBoxIsBackfitting").Value) { ((RandomTree)this.CurrentClassifier).setNumFolds((int)Parameters.ListDoubleValues.Get("numericUpDownBackFittingFolds").Value); } else { ((RandomTree)this.CurrentClassifier).setNumFolds(0); } this.CurrentClassifier.buildClassifier(train); //string StringForTree = ((RandomTree)this.CurrentClassifier).graph().Remove(0, ((RandomTree)this.CurrentClassifier).graph().IndexOf("{") + 2); //Microsoft.Msagl.GraphViewerGdi.GViewer GraphView = new GViewer(); //GraphView.Graph = GlobalInfo.WindowHCSAnalyzer.ComputeAndDisplayGraph(StringForTree);//.Remove(StringForTree.Length - 3, 3)); //GraphView.Size = new System.Drawing.Size(panelForGraphicalResults.Width, panelForGraphicalResults.Height); //GraphView.Anchor = (AnchorStyles.Bottom | AnchorStyles.Top | AnchorStyles.Left | AnchorStyles.Right); //this.panelForGraphicalResults.Controls.Clear(); //this.panelForGraphicalResults.Controls.Add(GraphView); } #endregion #region Random Forest else if (ClassifAlgoParams.Name == "RandomForest") { this.CurrentClassifier = new weka.classifiers.trees.RandomForest(); if ((bool)Parameters.ListCheckValues.Get("checkBoxMaxDepthUnlimited").Value) ((RandomForest)this.CurrentClassifier).setMaxDepth(0); else ((RandomForest)this.CurrentClassifier).setMaxDepth((int)Parameters.ListDoubleValues.Get("numericUpDownMaxDepth").Value); ((RandomForest)this.CurrentClassifier).setNumTrees((int)Parameters.ListDoubleValues.Get("numericUpDownNumTrees").Value); ((RandomForest)this.CurrentClassifier).setSeed((int)Parameters.ListDoubleValues.Get("numericUpDownSeed").Value); this.CurrentClassifier.buildClassifier(train); } #endregion #region KStar else if (ClassifAlgoParams.Name == "KStar") { this.CurrentClassifier = new weka.classifiers.lazy.KStar(); ((KStar)this.CurrentClassifier).setGlobalBlend((int)Parameters.ListDoubleValues.Get("numericUpDownGlobalBlend").Value); ((KStar)this.CurrentClassifier).setEntropicAutoBlend((bool)Parameters.ListCheckValues.Get("checkBoxBlendAuto").Value); this.CurrentClassifier.buildClassifier(train); } #endregion #region SVM else if (ClassifAlgoParams.Name == "SVM") { this.CurrentClassifier = new weka.classifiers.functions.SMO(); ((SMO)this.CurrentClassifier).setC((double)Parameters.ListDoubleValues.Get("numericUpDownC").Value); ((SMO)this.CurrentClassifier).setKernel(WindowForClassificationParam.GeneratedKernel); ((SMO)this.CurrentClassifier).setRandomSeed((int)Parameters.ListDoubleValues.Get("numericUpDownSeed").Value); this.CurrentClassifier.buildClassifier(train); } #endregion #region KNN else if (ClassifAlgoParams.Name == "KNN") { this.CurrentClassifier = new weka.classifiers.lazy.IBk(); string OptionDistance = " -K " + (int)Parameters.ListDoubleValues.Get("numericUpDownKNN").Value + " -W 0 "; string WeightType = (string)Parameters.ListTextValues.Get("comboBoxDistanceWeight").Value; switch (WeightType) { case "No Weighting": OptionDistance += ""; break; case "1/Distance": OptionDistance += "-I"; break; case "1-Distance": OptionDistance += "-F"; break; default: break; } OptionDistance += " -A \"weka.core.neighboursearch.LinearNNSearch -A \\\"weka.core."; string DistanceType = (string)Parameters.ListTextValues.Get("comboBoxDistance").Value; // OptionDistance += " -A \"weka.core."; switch (DistanceType) { case "Euclidean": OptionDistance += "EuclideanDistance"; break; case "Manhattan": OptionDistance += "ManhattanDistance"; break; case "Chebyshev": OptionDistance += "ChebyshevDistance"; break; default: break; } if (!(bool)Parameters.ListCheckValues.Get("checkBoxNormalize").Value) OptionDistance += " -D"; OptionDistance += " -R "; OptionDistance += "first-last\\\"\""; ((IBk)this.CurrentClassifier).setOptions(weka.core.Utils.splitOptions(OptionDistance)); //((IBk)this.CurrentClassifier).setKNN((int)Parameters.ListDoubleValues.Get("numericUpDownKNN").Value); this.CurrentClassifier.buildClassifier(train); } #endregion #region Multilayer Perceptron else if (ClassifAlgoParams.Name == "Perceptron") { this.CurrentClassifier = new weka.classifiers.functions.MultilayerPerceptron(); ((MultilayerPerceptron)this.CurrentClassifier).setMomentum((double)Parameters.ListDoubleValues.Get("numericUpDownMomentum").Value); ((MultilayerPerceptron)this.CurrentClassifier).setLearningRate((double)Parameters.ListDoubleValues.Get("numericUpDownLearningRate").Value); ((MultilayerPerceptron)this.CurrentClassifier).setSeed((int)Parameters.ListDoubleValues.Get("numericUpDownSeed").Value); ((MultilayerPerceptron)this.CurrentClassifier).setTrainingTime((int)Parameters.ListDoubleValues.Get("numericUpDownTrainingTime").Value); ((MultilayerPerceptron)this.CurrentClassifier).setNormalizeAttributes((bool)Parameters.ListCheckValues.Get("checkBoxNormAttribute").Value); ((MultilayerPerceptron)this.CurrentClassifier).setNormalizeNumericClass((bool)Parameters.ListCheckValues.Get("checkBoxNormNumericClasses").Value); this.CurrentClassifier.buildClassifier(train); } #endregion #region ZeroR else if (ClassifAlgoParams.Name == "ZeroR") { this.CurrentClassifier = new weka.classifiers.rules.OneR(); this.CurrentClassifier.buildClassifier(train); } #endregion #region OneR else if (ClassifAlgoParams.Name == "OneR") { this.CurrentClassifier = new weka.classifiers.rules.OneR(); ((OneR)this.CurrentClassifier).setMinBucketSize((int)Parameters.ListDoubleValues.Get("numericUpDownMinBucketSize").Value); this.CurrentClassifier.buildClassifier(train); } #endregion #region Naive Bayes else if (ClassifAlgoParams.Name == "NaiveBayes") { this.CurrentClassifier = new weka.classifiers.bayes.NaiveBayes(); ((NaiveBayes)this.CurrentClassifier).setUseKernelEstimator((bool)Parameters.ListCheckValues.Get("checkBoxKernelEstimator").Value); this.CurrentClassifier.buildClassifier(train); } #endregion #region Logistic else if (ClassifAlgoParams.Name == "Logistic") { this.CurrentClassifier = new weka.classifiers.functions.Logistic(); ((Logistic)this.CurrentClassifier).setUseConjugateGradientDescent((bool)Parameters.ListCheckValues.Get("checkBoxUseConjugateGradientDescent").Value); ((Logistic)this.CurrentClassifier).setRidge((double)Parameters.ListDoubleValues.Get("numericUpDownRidge").Value); this.CurrentClassifier.buildClassifier(train); } #endregion //weka.classifiers.functions.SMO //BayesNet #endregion if (TextBoxForFeedback != null) { TextBoxForFeedback.Clear(); TextBoxForFeedback.AppendText(this.CurrentClassifier.ToString()); } TextBoxForFeedback.AppendText("\n" + (InstancesList.numAttributes() - 1) + " attributes:\n\n"); for (int IdxAttributes = 0; IdxAttributes < InstancesList.numAttributes() - 1; IdxAttributes++) { TextBoxForFeedback.AppendText(IdxAttributes + "\t: " + InstancesList.attribute(IdxAttributes).name() + "\n"); } #region evaluation of the model and results display if ((WindowForClassificationParam.numericUpDownFoldNumber.Enabled) && (TextBoxForFeedback != null)) { TextBoxForFeedback.AppendText("\n-----------------------------\nModel validation\n-----------------------------\n"); ModelEvaluation = new weka.classifiers.Evaluation(InstancesList); ModelEvaluation.crossValidateModel(this.CurrentClassifier, InstancesList, (int)WindowForClassificationParam.numericUpDownFoldNumber.Value, new java.util.Random(1)); TextBoxForFeedback.AppendText(ModelEvaluation.toSummaryString()); TextBoxForFeedback.AppendText("\n-----------------------------\nConfusion Matrix:\n-----------------------------\n"); double[][] ConfusionMatrix = ModelEvaluation.confusionMatrix(); string NewLine = ""; for (int i = 0; i < ConfusionMatrix[0].Length; i++) { NewLine += "c" + i + "\t"; } TextBoxForFeedback.AppendText(NewLine + "\n\n"); for (int j = 0; j < ConfusionMatrix.Length; j++) { NewLine = ""; for (int i = 0; i < ConfusionMatrix[0].Length; i++) { NewLine += ConfusionMatrix[j][i] + "\t"; } // if TextBoxForFeedback.AppendText(NewLine + "| c" + j + " <=> " + cGlobalInfo.ListCellularPhenotypes[j].Name + "\n"); } } #endregion return this.CurrentClassifier; }
public cClusteringObject(Classifier Model, Evaluation Evaluation, int NumFolds) { this.FoldNumber = NumFolds; this.Model = Model; this.Evaluation = Evaluation; }
/// <summary> Builds the boosted classifier</summary> public virtual void buildClassifier(Instances data) { m_RandomInstance = new Random(m_Seed); Instances boostData; int classIndex = data.classIndex(); if (data.classAttribute().Numeric) { throw new Exception("LogitBoost can't handle a numeric class!"); } if (m_Classifier == null) { throw new System.Exception("A base classifier has not been specified!"); } if (!(m_Classifier is WeightedInstancesHandler) && !m_UseResampling) { m_UseResampling = true; } if (data.checkForStringAttributes()) { throw new Exception("Cannot handle string attributes!"); } if (m_Debug) { System.Console.Error.WriteLine("Creating copy of the training data"); } m_NumClasses = data.numClasses(); m_ClassAttribute = data.classAttribute(); // Create a copy of the data data = new Instances(data); data.deleteWithMissingClass(); // Create the base classifiers if (m_Debug) { System.Console.Error.WriteLine("Creating base classifiers"); } m_Classifiers = new Classifier[m_NumClasses][]; for (int j = 0; j < m_NumClasses; j++) { m_Classifiers[j] = Classifier.makeCopies(m_Classifier, this.NumIterations); } // Do we want to select the appropriate number of iterations // using cross-validation? int bestNumIterations = this.NumIterations; if (m_NumFolds > 1) { if (m_Debug) { System.Console.Error.WriteLine("Processing first fold."); } // Array for storing the results double[] results = new double[this.NumIterations]; // Iterate throught the cv-runs for (int r = 0; r < m_NumRuns; r++) { // Stratify the data data.randomize(m_RandomInstance); data.stratify(m_NumFolds); // Perform the cross-validation for (int i = 0; i < m_NumFolds; i++) { // Get train and test folds Instances train = data.trainCV(m_NumFolds, i, m_RandomInstance); Instances test = data.testCV(m_NumFolds, i); // Make class numeric Instances trainN = new Instances(train); trainN.ClassIndex = - 1; trainN.deleteAttributeAt(classIndex); trainN.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex); trainN.ClassIndex = classIndex; m_NumericClassData = new Instances(trainN, 0); // Get class values int numInstances = train.numInstances(); double[][] tmpArray = new double[numInstances][]; for (int i2 = 0; i2 < numInstances; i2++) { tmpArray[i2] = new double[m_NumClasses]; } double[][] trainFs = tmpArray; double[][] tmpArray2 = new double[numInstances][]; for (int i3 = 0; i3 < numInstances; i3++) { tmpArray2[i3] = new double[m_NumClasses]; } double[][] trainYs = tmpArray2; for (int j = 0; j < m_NumClasses; j++) { for (int k = 0; k < numInstances; k++) { trainYs[k][j] = (train.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses); } } // Perform iterations double[][] probs = initialProbs(numInstances); m_NumGenerated = 0; double sumOfWeights = train.sumOfWeights(); for (int j = 0; j < this.NumIterations; j++) { performIteration(trainYs, trainFs, probs, trainN, sumOfWeights); Evaluation eval = new Evaluation(train); eval.evaluateModel(this, test); results[j] += eval.correct(); } } } // Find the number of iterations with the lowest error //UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" double bestResult = - System.Double.MaxValue; for (int j = 0; j < this.NumIterations; j++) { if (results[j] > bestResult) { bestResult = results[j]; bestNumIterations = j; } } if (m_Debug) { System.Console.Error.WriteLine("Best result for " + bestNumIterations + " iterations: " + bestResult); } } // Build classifier on all the data int numInstances2 = data.numInstances(); double[][] trainFs2 = new double[numInstances2][]; for (int i4 = 0; i4 < numInstances2; i4++) { trainFs2[i4] = new double[m_NumClasses]; } double[][] trainYs2 = new double[numInstances2][]; for (int i5 = 0; i5 < numInstances2; i5++) { trainYs2[i5] = new double[m_NumClasses]; } for (int j = 0; j < m_NumClasses; j++) { for (int i = 0, k = 0; i < numInstances2; i++, k++) { trainYs2[i][j] = (data.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses); } } // Make class numeric data.ClassIndex = - 1; data.deleteAttributeAt(classIndex); data.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex); data.ClassIndex = classIndex; m_NumericClassData = new Instances(data, 0); // Perform iterations double[][] probs2 = initialProbs(numInstances2); double logLikelihood = CalculateLogLikelihood(trainYs2, probs2); m_NumGenerated = 0; if (m_Debug) { System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood); } double sumOfWeights2 = data.sumOfWeights(); for (int j = 0; j < bestNumIterations; j++) { double previousLoglikelihood = logLikelihood; performIteration(trainYs2, trainFs2, probs2, data, sumOfWeights2); logLikelihood = CalculateLogLikelihood(trainYs2, probs2); if (m_Debug) { System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood); } if (System.Math.Abs(previousLoglikelihood - logLikelihood) < m_Precision) { return ; } } }
public void EvaluateIncrementalExamples() { //Calculate the number of increments for the training data based on the increment size int numberIncrements = (int)Math.Ceiling((double)(this.numExamples * (DEFAULT_FOLDS - 1) / DEFAULT_FOLDS) / (double)TRAINING_INCRMENETS); this.trainingSizeMatrix = new double[this.classCount, DEFAULT_FOLDS, numberIncrements]; for (int i = 0; (i < this.classCount); i++) for (int j = 0; (j < DEFAULT_FOLDS); j++) for (int k = 0; (k < numberIncrements); k++) this.trainingSizeMatrix[i, j, k] = 0.0; //Randomize the data Randomize randomizeFilter = new Randomize(); randomizeFilter.setInputFormat(this.data); Instances randomData = Filter.useFilter(this.data, randomizeFilter); //Run incremental training data for each fold and store the results for each activity for (int i = 1; (i <= DEFAULT_FOLDS); i++) { //Training folds filter RemoveFolds trainingFoldsFilter = new RemoveFolds(); trainingFoldsFilter.set_NumFolds(DEFAULT_FOLDS); trainingFoldsFilter.inputFormat(randomData); trainingFoldsFilter.set_InvertSelection(true); trainingFoldsFilter.set_Fold(i); Instances alltraining = Filter.useFilter(randomData, trainingFoldsFilter); RemoveFolds testFoldsFilter = new RemoveFolds(); testFoldsFilter.set_NumFolds(DEFAULT_FOLDS); testFoldsFilter.inputFormat(randomData); testFoldsFilter.set_InvertSelection(false); testFoldsFilter.set_Fold(i); Instances test = Filter.useFilter(randomData, testFoldsFilter); for (int j = 1; (j <= numberIncrements); j++) { //Range Filter RemoveRange rangeFilter = new RemoveRange(); rangeFilter.setInputFormat(alltraining); int first = 1; int last = j * TRAINING_INCRMENETS; if (last > (alltraining.m_Instances.size())) last = alltraining.m_Instances.size(); string range = first.ToString() + "-" + last.ToString(); rangeFilter.set_InstancesIndices(range); rangeFilter.set_InvertSelection(true); Instances training = Filter.useFilter(alltraining, rangeFilter); //ready for training and testing J48 tree = new J48(); // new instance of tree tree.set_MinNumObj(10); tree.set_ConfidenceFactor((float)0.25); tree.buildClassifier(training); // build classifier Evaluation eval = new Evaluation(training); eval.evaluateModel(tree, test); //store the results for each activity for (int k = 0; (k < this.classCount); k++) { double tpRate = eval.truePositiveRate(k); trainingSizeMatrix[k, i - 1, j - 1] = +eval.truePositiveRate(k); } } } TextWriter tw = new StreamWriter("evaluation.txt"); for (int i = 0; (i < this.classCount); i++) { string line = randomData.attribute(this.data.numAttributes() - 1).value_Renamed(i); for (int k = 0; (k < numberIncrements); k++) { double percentage = 0.0; for (int j = 0; (j < DEFAULT_FOLDS); j++) percentage += this.trainingSizeMatrix[i, j, k]; percentage /= DEFAULT_FOLDS; percentage *= 100; line += "," + percentage.ToString("0.00"); } tw.WriteLine(line); } tw.Close(); for (int i = 0; (i < this.classCount); i++) { string activity = randomData.attribute(this.data.numAttributes() - 1).value_Renamed(i); tw = new StreamWriter("evaluation-" + activity + ".txt"); for (int j = 0; (j < DEFAULT_FOLDS); j++) { string line = j.ToString(); for (int k = 0; (k < numberIncrements); k++) { double percentage = this.trainingSizeMatrix[i, j, k]; percentage /= DEFAULT_FOLDS; percentage *= 100; line += "\t" + percentage.ToString("0.00"); } tw.WriteLine(line); } tw.Close(); } }
public static void Test_predictClass(string classifierFileName) { FileReader javaFileReader = new FileReader(classifierFileName); weka.core.Instances insts = new weka.core.Instances(javaFileReader); javaFileReader.close(); insts.setClassIndex(insts.numAttributes() - 1); weka.classifiers.Classifier cl = new weka.classifiers.trees.J48(); System.Console.WriteLine("Performing " + percentSplit + "% split evaluation."); #region Manual Cross Fold Instances foldsData = new Instances(insts); int folds = 10; for (int n = 0; n < folds; n++) { Instances trainFold = foldsData.trainCV(folds, n); Instances testFold = foldsData.testCV(folds, n); } #endregion #region int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); cl.buildClassifier(train); #endregion //Classifier cls = new J48(); Evaluation eval = new Evaluation(insts); java.util.Random rand = new java.util.Random(1); // using seed = 1 int fold = 10; eval.crossValidateModel(cl, insts, fold, rand); System.Console.WriteLine("toClassDetailsString" + eval.toClassDetailsString()); System.Console.WriteLine("toMatrixString\n" + eval.toMatrixString()); System.Console.WriteLine("toCumulativeMarginDistributionString\n" + eval.toCumulativeMarginDistributionString()); //System.Console.WriteLine("predictions\n" + eval.predictions()); System.Console.ReadKey(); //var numnerOfInst = insts.numInstances(); //for (int i = trainSize; i < numnerOfInst; i++) //{ // weka.core.Instance currentInst = insts.instance(i); // double pred = cl.classifyInstance(currentInst); // System.Console.WriteLine("class Index: " + insts.instance(i).classIndex()); // System.Console.WriteLine(", class value: " + insts.instance(i).classValue()); // System.Console.WriteLine(", ID: " + insts.instance(i).value(0)); // System.Console.WriteLine(", actual: " + insts.classAttribute().value((int)insts.instance(i).classValue())); // System.Console.WriteLine(", predicted: " + insts.classAttribute().value((int)pred)); //} }