/// <summary> Generates the classifier. /// /// </summary> /// <param name="instances">set of instances serving as training data /// </param> /// <exception cref="Exception">if the classifier has not been generated successfully /// </exception> public override void buildClassifier(Instances instances) { double sumOfWeights = 0; m_Class = instances.classAttribute(); m_ClassValue = 0; switch (instances.classAttribute().type()) { case weka.core.Attribute.NUMERIC: m_Counts = null; break; case weka.core.Attribute.NOMINAL: m_Counts = new double[instances.numClasses()]; for (int i = 0; i < m_Counts.Length; i++) { m_Counts[i] = 1; } sumOfWeights = instances.numClasses(); break; default: throw new System.Exception("ZeroR can only handle nominal and numeric class" + " attributes."); } System.Collections.IEnumerator enu = instances.enumerateInstances(); //UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'" while (enu.MoveNext()) { //UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'" Instance instance = (Instance) enu.Current; if (!instance.classIsMissing()) { if (instances.classAttribute().Nominal) { //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" m_Counts[(int) instance.classValue()] += instance.weight(); } else { m_ClassValue += instance.weight() * instance.classValue(); } sumOfWeights += instance.weight(); } } if (instances.classAttribute().Numeric) { if (Utils.gr(sumOfWeights, 0)) { m_ClassValue /= sumOfWeights; } } else { m_ClassValue = Utils.maxIndex(m_Counts); Utils.normalize(m_Counts, sumOfWeights); } }
public static double classifyTrain_Test(string classifierFileName, Classifier _classifier) { double performance = 0.0; try { FileReader javaFileReader = new FileReader(classifierFileName); weka.core.Instances insts = new weka.core.Instances(javaFileReader); javaFileReader.close(); insts.setClassIndex(insts.numAttributes() - 1); System.Console.WriteLine("Performing " + percentSplit + "% split evaluation."); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); _classifier.buildClassifier(train); int numCorrect = 0; var numnerOfInst = insts.numInstances(); int dataIndex = 0; for (int i = trainSize; i < numnerOfInst; i++) { dataIndex++; weka.core.Instance currentInst = insts.instance(i); double predictClass = _classifier.classifyInstance(currentInst); double[] dist = _classifier.distributionForInstance(currentInst); string actualClass = insts.classAttribute().value((int)insts.instance(i).classValue()); string predictedClass = insts.classAttribute().value((int)predictClass); var abcd = _classifier.getClass(); if (predictedClass == actualClass) { numCorrect++; } } performance = (double)((double)numCorrect / (double)testSize) * 100; System.Console.WriteLine(numCorrect + " out of " + testSize + " correct (" + performance.toString() + "%)"); } catch (java.lang.Exception ex) { ex.printStackTrace(); } return(performance); }
public static void cvdTest() { weka.core.Instances data = new weka.core.Instances(new java.io.FileReader("./data/Classification/Communication.arff")); data.setClassIndex(data.numAttributes() - 1); weka.classifiers.Classifier cls = new weka.classifiers.bayes.NaiveBayes(); //Save BayesNet results in .txt file using (System.IO.StreamWriter file = new System.IO.StreamWriter("./data/Classification/Communication_Report.txt")) { int runs = 1; int folds = 10; // perform cross-validation for (int i = 0; i < runs; i++) { // randomize data int seed = i + 1; java.util.Random rand = new java.util.Random(seed); weka.core.Instances randData = new weka.core.Instances(data); randData.randomize(rand); if (randData.classAttribute().isNominal()) { randData.stratify(folds); } weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(randData); for (int n = 0; n < folds; n++) { weka.core.Instances train = randData.trainCV(folds, n); weka.core.Instances test = randData.testCV(folds, n); // build and evaluate classifier //weka.classifiers.Classifier clsCopy = weka.classifiers.Classifier.makeCopy(cls); cls.buildClassifier(train); //eval.evaluateModel(cls, test); //Print classifier analytics for all the dataset file.WriteLine("EVALUATION OF TEST DATASET."); // Test the model weka.classifiers.Evaluation eTest = new weka.classifiers.Evaluation(test); eTest.evaluateModel(cls, test); // Print the results as in Weka explorer: //Print statistics String strSummaryTest = eTest.toSummaryString(); file.WriteLine(strSummaryTest); file.WriteLine(); //Print detailed class statistics file.WriteLine(eTest.toClassDetailsString()); file.WriteLine(); //Print confusion matrix file.WriteLine(eTest.toMatrixString()); file.WriteLine(); // Get the confusion matrix double[][] cmMatrixTest = eTest.confusionMatrix(); System.Console.WriteLine("Bayesian Network results saved in Communication_Report.txt file successfully."); } //Print classifier analytics for all the dataset file.WriteLine("EVALUATION OF ALL DATASET."); cls.buildClassifier(data); // Train the model weka.classifiers.Evaluation eAlldata = new weka.classifiers.Evaluation(data); eAlldata.evaluateModel(cls, data); // Print the results as in Weka explorer: //Print statistics String strSummaryAlldata = eAlldata.toSummaryString(); file.WriteLine(strSummaryAlldata); file.WriteLine(); //Print detailed class statistics file.WriteLine(eAlldata.toClassDetailsString()); file.WriteLine(); //Print confusion matrix file.WriteLine(eAlldata.toMatrixString()); file.WriteLine("----------------"); //print model file.WriteLine(cls); file.WriteLine(); } } }
/// <summary> Method for building a pruneable classifier tree. /// /// </summary> /// <exception cref="Exception">if something goes wrong /// </exception> public override void buildClassifier(Instances data) { if (data.classAttribute().Numeric) throw new Exception("Class is numeric!"); if (data.checkForStringAttributes()) { throw new Exception("Cannot handle string attributes!"); } data = new Instances(data); data.deleteWithMissingClass(); buildTree(data, m_subtreeRaising); collapse(); if (m_pruneTheTree) { prune(); } if (m_cleanup) { cleanup(new Instances(data, 0)); } }
/// <summary> Performs a (stratified if class is nominal) cross-validation /// for a classifier on a set of instances. Now performs /// a deep copy of the classifier before each call to /// buildClassifier() (just in case the classifier is not /// initialized properly). /// /// </summary> /// <param name="classifier">the classifier with any options set. /// </param> /// <param name="data">the data on which the cross-validation is to be /// performed /// </param> /// <param name="numFolds">the number of folds for the cross-validation /// </param> /// <param name="random">random number generator for randomization /// </param> /// <throws> Exception if a classifier could not be generated </throws> /// <summary> successfully or the class is not defined /// </summary> public virtual void crossValidateModel(Classifier classifier, Instances data, int numFolds, System.Random random) { // Make a copy of the data we can reorder data = new Instances(data); data.randomize(random); if (data.classAttribute().Nominal) { data.stratify(numFolds); } // Do the folds for (int i = 0; i < numFolds; i++) { Instances train = data.trainCV(numFolds, i, random); Priors = train; Classifier copiedClassifier = Classifier.makeCopy(classifier); copiedClassifier.buildClassifier(train); Instances test = data.testCV(numFolds, i); evaluateModel(copiedClassifier, test); } m_NumFolds = numFolds; }
/// <summary> Initializes all the counters for the evaluation and also takes a /// cost matrix as parameter. /// Use <code>useNoPriors()</code> if the dataset is the test set and you /// can't initialize with the priors from the training set via /// <code>setPriors(Instances)</code>. /// /// </summary> /// <param name="data"> set of training instances, to get some header /// information and prior class distribution information /// </param> /// <param name="costMatrix"> the cost matrix---if null, default costs will be used /// </param> /// <throws> Exception if cost matrix is not compatible with </throws> /// <summary> data, the class is not defined or the class is numeric /// </summary> /// <seealso cref="useNoPriors()"> /// </seealso> /// <seealso cref="setPriors(Instances)"> /// </seealso> public Evaluation(Instances data, CostMatrix costMatrix) { m_NumClasses = data.numClasses(); m_NumFolds = 1; m_ClassIsNominal = data.classAttribute().Nominal; if (m_ClassIsNominal) { double[][] tmpArray = new double[m_NumClasses][]; for (int i = 0; i < m_NumClasses; i++) { tmpArray[i] = new double[m_NumClasses]; } m_ConfusionMatrix = tmpArray; m_ClassNames = new System.String[m_NumClasses]; for (int i = 0; i < m_NumClasses; i++) { m_ClassNames[i] = data.classAttribute().value_Renamed(i); } } m_CostMatrix = costMatrix; if (m_CostMatrix != null) { if (!m_ClassIsNominal) { throw new System.Exception("Class has to be nominal if cost matrix " + "given!"); } if (m_CostMatrix.size() != m_NumClasses) { throw new System.Exception("Cost matrix not compatible with data!"); } } m_ClassPriors = new double[m_NumClasses]; Priors = data; m_MarginCounts = new double[k_MarginResolution + 1]; }
/// <summary> Prints the predictions for the given dataset into a String variable. /// /// </summary> /// <param name="classifier the">classifier to use /// </param> /// <param name="train the">training data /// </param> /// <param name="testFileName the">name of the test file /// </param> /// <param name="classIndex the">class index /// </param> /// <param name="attributesToOutput the">indices of the attributes to output /// </param> /// <returns> the generated predictions for the attribute range /// </returns> /// <throws> Exception if test file cannot be opened </throws> protected internal static System.String toPrintClassifications(Classifier classifier, Instances train, System.String testFileName, int classIndex, Range attributesToOutput) { System.Text.StringBuilder text = new System.Text.StringBuilder(); if (testFileName.Length != 0) { System.IO.StreamReader testReader = null; try { //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.io.BufferedReader.BufferedReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" //UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'" //UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" testReader = new System.IO.StreamReader(new System.IO.StreamReader(testFileName, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(testFileName, System.Text.Encoding.Default).CurrentEncoding); } catch (System.Exception e) { //UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Throwable.getMessage' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" throw new System.Exception("Can't open file " + e.Message + '.'); } Instances test = new Instances(testReader, 1); if (classIndex != - 1) { test.ClassIndex = classIndex - 1; } else { test.ClassIndex = test.numAttributes() - 1; } int i = 0; while (test.readInstance(testReader)) { Instance instance = test.instance(0); Instance withMissing = (Instance) instance.copy(); withMissing.Dataset = test; double predValue = ((Classifier) classifier).classifyInstance(withMissing); if (test.classAttribute().Numeric) { if (Instance.isMissingValue(predValue)) { text.Append(i + " missing "); } else { text.Append(i + " " + predValue + " "); } if (instance.classIsMissing()) { text.Append("missing"); } else { text.Append(instance.classValue()); } text.Append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n"); } else { if (Instance.isMissingValue(predValue)) { text.Append(i + " missing "); } else { //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" text.Append(i + " " + test.classAttribute().value_Renamed((int) predValue) + " "); } if (Instance.isMissingValue(predValue)) { text.Append("missing "); } else { //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" text.Append(classifier.distributionForInstance(withMissing)[(int) predValue] + " "); } text.Append(instance.toString(instance.classIndex()) + " " + attributeValuesString(withMissing, attributesToOutput) + "\n"); } test.delete(0); i++; } testReader.Close(); } return text.ToString(); }
public override void buildClassifier(Instances insts) { if (insts.checkForStringAttributes()) { throw new Exception("Cannot handle string attributes!"); } if (insts.numClasses() > 2) { throw new System.Exception("Can only handle two-class datasets!"); } if (insts.classAttribute().Numeric) { throw new Exception("Can't handle a numeric class!"); } // Filter data m_Train = new Instances(insts); m_Train.deleteWithMissingClass(); m_ReplaceMissingValues = new ReplaceMissingValues(); m_ReplaceMissingValues.setInputFormat(m_Train); m_Train = Filter.useFilter(m_Train, m_ReplaceMissingValues); m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(m_Train); m_Train = Filter.useFilter(m_Train, m_NominalToBinary); /** Randomize training data */ //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.util.Random.Random' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" m_Train.randomize(new System.Random((System.Int32) m_Seed)); /** Make space to store perceptrons */ m_Additions = new int[m_MaxK + 1]; m_IsAddition = new bool[m_MaxK + 1]; m_Weights = new int[m_MaxK + 1]; /** Compute perceptrons */ m_K = 0; for (int it = 0; it < m_NumIterations; it++) { for (int i = 0; i < m_Train.numInstances(); i++) { Instance inst = m_Train.instance(i); if (!inst.classIsMissing()) { int prediction = makePrediction(m_K, inst); //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" int classValue = (int) inst.classValue(); if (prediction == classValue) { m_Weights[m_K]++; } else { m_IsAddition[m_K] = (classValue == 1); m_Additions[m_K] = i; m_K++; m_Weights[m_K]++; } if (m_K == m_MaxK) { //UPGRADE_NOTE: Labeled break statement was changed to a goto statement. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1012'" goto out_brk; } } } } //UPGRADE_NOTE: Label 'out_brk' was added. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1011'" out_brk: ; }
/// <summary> Method for testing this class. /// /// </summary> /// <param name="argv">should contain one element: the name of an ARFF file /// </param> //@ requires argv != null; //@ requires argv.length == 1; //@ requires argv[0] != null; public static void test(System.String[] argv) { Instances instances, secondInstances, train, test, empty; //Instance instance; //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.util.Random.Random' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" System.Random random = new System.Random((System.Int32) 2); //UPGRADE_ISSUE: Class hierarchy differences between 'java.io.Reader' and 'System.IO.StreamReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1186'" System.IO.StreamReader reader; int start, num; //double newWeight; FastVector testAtts, testVals; int i, j; try { if (argv.Length > 1) { throw (new System.Exception("Usage: Instances [<filename>]")); } // Creating set of instances from scratch testVals = new FastVector(2); testVals.addElement("first_value"); testVals.addElement("second_value"); testAtts = new FastVector(2); testAtts.addElement(new Attribute("nominal_attribute", testVals)); testAtts.addElement(new Attribute("numeric_attribute")); instances = new Instances("test_set", testAtts, 10); instances.add(new Instance(instances.numAttributes())); instances.add(new Instance(instances.numAttributes())); instances.add(new Instance(instances.numAttributes())); instances.ClassIndex = 0; System.Console.Out.WriteLine("\nSet of instances created from scratch:\n"); //UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'" System.Console.Out.WriteLine(instances); if (argv.Length == 1) { System.String filename = argv[0]; //UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" reader = new System.IO.StreamReader(filename, System.Text.Encoding.Default); // Read first five instances and print them System.Console.Out.WriteLine("\nFirst five instances from file:\n"); instances = new Instances(reader, 1); instances.ClassIndex = instances.numAttributes() - 1; i = 0; while ((i < 5) && (instances.readInstance(reader))) { i++; } //UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'" System.Console.Out.WriteLine(instances); // Read all the instances in the file //UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" reader = new System.IO.StreamReader(filename, System.Text.Encoding.Default); instances = new Instances(reader); // Make the last attribute be the class instances.ClassIndex = instances.numAttributes() - 1; // Print header and instances. System.Console.Out.WriteLine("\nDataset:\n"); //UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'" System.Console.Out.WriteLine(instances); System.Console.Out.WriteLine("\nClass index: " + instances.classIndex()); } // Test basic methods based on class index. System.Console.Out.WriteLine("\nClass name: " + instances.classAttribute().name()); System.Console.Out.WriteLine("\nClass index: " + instances.classIndex()); System.Console.Out.WriteLine("\nClass is nominal: " + instances.classAttribute().Nominal); System.Console.Out.WriteLine("\nClass is numeric: " + instances.classAttribute().Numeric); System.Console.Out.WriteLine("\nClasses:\n"); for (i = 0; i < instances.numClasses(); i++) { System.Console.Out.WriteLine(instances.classAttribute().value_Renamed(i)); } System.Console.Out.WriteLine("\nClass values and labels of instances:\n"); for (i = 0; i < instances.numInstances(); i++) { Instance inst = instances.instance(i); System.Console.Out.Write(inst.classValue() + "\t"); System.Console.Out.Write(inst.toString(inst.classIndex())); if (instances.instance(i).classIsMissing()) { System.Console.Out.WriteLine("\tis missing"); } else { System.Console.Out.WriteLine(); } } // Create random weights. System.Console.Out.WriteLine("\nCreating random weights for instances."); for (i = 0; i < instances.numInstances(); i++) { instances.instance(i).Weight = random.NextDouble(); } // Print all instances and their weights (and the sum of weights). System.Console.Out.WriteLine("\nInstances and their weights:\n"); System.Console.Out.WriteLine(instances.instancesAndWeights()); System.Console.Out.Write("\nSum of weights: "); System.Console.Out.WriteLine(instances.sumOfWeights()); // Insert an attribute secondInstances = new Instances(instances); Attribute testAtt = new Attribute("Inserted"); secondInstances.insertAttributeAt(testAtt, 0); System.Console.Out.WriteLine("\nSet with inserted attribute:\n"); //UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'" System.Console.Out.WriteLine(secondInstances); System.Console.Out.WriteLine("\nClass name: " + secondInstances.classAttribute().name()); // Delete the attribute secondInstances.deleteAttributeAt(0); System.Console.Out.WriteLine("\nSet with attribute deleted:\n"); //UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'" System.Console.Out.WriteLine(secondInstances); System.Console.Out.WriteLine("\nClass name: " + secondInstances.classAttribute().name()); // Test if headers are equal System.Console.Out.WriteLine("\nHeaders equal: " + instances.equalHeaders(secondInstances) + "\n"); // Print data in internal format. System.Console.Out.WriteLine("\nData (internal values):\n"); for (i = 0; i < instances.numInstances(); i++) { for (j = 0; j < instances.numAttributes(); j++) { if (instances.instance(i).isMissing(j)) { System.Console.Out.Write("? "); } else { System.Console.Out.Write(instances.instance(i).value_Renamed(j) + " "); } } System.Console.Out.WriteLine(); } // Just print header System.Console.Out.WriteLine("\nEmpty dataset:\n"); empty = new Instances(instances, 0); //UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'" System.Console.Out.WriteLine(empty); System.Console.Out.WriteLine("\nClass name: " + empty.classAttribute().name()); // Create copy and rename an attribute and a value (if possible) if (empty.classAttribute().Nominal) { Instances copy = new Instances(empty, 0); copy.renameAttribute(copy.classAttribute(), "new_name"); copy.renameAttributeValue(copy.classAttribute(), copy.classAttribute().value_Renamed(0), "new_val_name"); System.Console.Out.WriteLine("\nDataset with names changed:\n" + copy); System.Console.Out.WriteLine("\nOriginal dataset:\n" + empty); } // Create and prints subset of instances. start = instances.numInstances() / 4; num = instances.numInstances() / 2; System.Console.Out.Write("\nSubset of dataset: "); System.Console.Out.WriteLine(num + " instances from " + (start + 1) + ". instance"); secondInstances = new Instances(instances, start, num); System.Console.Out.WriteLine("\nClass name: " + secondInstances.classAttribute().name()); // Print all instances and their weights (and the sum of weights). System.Console.Out.WriteLine("\nInstances and their weights:\n"); System.Console.Out.WriteLine(secondInstances.instancesAndWeights()); System.Console.Out.Write("\nSum of weights: "); System.Console.Out.WriteLine(secondInstances.sumOfWeights()); // Create and print training and test sets for 3-fold // cross-validation. System.Console.Out.WriteLine("\nTrain and test folds for 3-fold CV:"); if (instances.classAttribute().Nominal) { instances.stratify(3); } for (j = 0; j < 3; j++) { //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.util.Random.Random' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" train = instances.trainCV(3, j, new System.Random((System.Int32) 1)); test = instances.testCV(3, j); // Print all instances and their weights (and the sum of weights). System.Console.Out.WriteLine("\nTrain: "); System.Console.Out.WriteLine("\nInstances and their weights:\n"); System.Console.Out.WriteLine(train.instancesAndWeights()); System.Console.Out.Write("\nSum of weights: "); System.Console.Out.WriteLine(train.sumOfWeights()); System.Console.Out.WriteLine("\nClass name: " + train.classAttribute().name()); System.Console.Out.WriteLine("\nTest: "); System.Console.Out.WriteLine("\nInstances and their weights:\n"); System.Console.Out.WriteLine(test.instancesAndWeights()); System.Console.Out.Write("\nSum of weights: "); System.Console.Out.WriteLine(test.sumOfWeights()); System.Console.Out.WriteLine("\nClass name: " + test.classAttribute().name()); } // Randomize instances and print them. System.Console.Out.WriteLine("\nRandomized dataset:"); instances.randomize(random); // Print all instances and their weights (and the sum of weights). System.Console.Out.WriteLine("\nInstances and their weights:\n"); System.Console.Out.WriteLine(instances.instancesAndWeights()); System.Console.Out.Write("\nSum of weights: "); System.Console.Out.WriteLine(instances.sumOfWeights()); // Sort instances according to first attribute and // print them. System.Console.Out.Write("\nInstances sorted according to first attribute:\n "); instances.sort(0); // Print all instances and their weights (and the sum of weights). System.Console.Out.WriteLine("\nInstances and their weights:\n"); System.Console.Out.WriteLine(instances.instancesAndWeights()); System.Console.Out.Write("\nSum of weights: "); System.Console.Out.WriteLine(instances.sumOfWeights()); } catch (System.Exception) { //.WriteStackTrace(e, Console.Error); } }
public static double classifyTrain_Test(string classifierFileName, int baseClasses, Classifier _classifier) { double performance = 0.0; try { List <BrResult> results = new List <BrResult>(); for (int singleClass = 1; singleClass <= baseClasses; singleClass++) { string eachFileName = String.Format("{0}_{1}.arff", classifierFileName, singleClass); BrResult result = new BrResult(); result.classNumber = singleClass; FileReader javaFileReader = new FileReader(eachFileName); weka.core.Instances insts = new weka.core.Instances(javaFileReader); javaFileReader.close(); insts.setClassIndex(insts.numAttributes() - 1); System.Console.WriteLine("Performing " + percentSplit + "% split evaluation."); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); _classifier.buildClassifier(train); int numCorrect = 0; var numnerOfInst = insts.numInstances(); List <Result> eachResults = new List <Result>(); int dataIndex = 0; for (int i = trainSize; i < numnerOfInst; i++) { dataIndex++; Result eachRow = new Result(); eachRow.lineIndex = 0; weka.core.Instance currentInst = insts.instance(i); double predictClass = _classifier.classifyInstance(currentInst); double[] dist = _classifier.distributionForInstance(currentInst); string actualClass = insts.classAttribute().value((int)insts.instance(i).classValue()); string predictedClass = insts.classAttribute().value((int)predictClass); var abcd = _classifier.getClass(); if (predictedClass == actualClass) { eachRow.correct = "1"; numCorrect++; } else { eachRow.correct = "0"; } eachRow.lineIndex = dataIndex; eachRow.classActual = actualClass; eachRow.classPredicted = predictedClass; eachResults.Add(eachRow); } result.classResult = eachResults; results.Add(result); System.Console.WriteLine(numCorrect + " out of " + testSize + " correct (" + (double)((double)numCorrect / (double)testSize * 100.0) + "%)"); } #region Evaludation Matrix var evaluationMatrix = new Dictionary <int, string>(); foreach (var res in results) { foreach (var classRes in res.classResult) { if (!evaluationMatrix.Keys.Contains(classRes.lineIndex)) { evaluationMatrix[classRes.lineIndex] = classRes.correct.toString(); } else { evaluationMatrix[classRes.lineIndex] = evaluationMatrix[classRes.lineIndex].toString() + "," + classRes.correct.toString(); } } } #endregion #region int correnctlyClassified = 0; int incorrenctlyClassified = 0; int totalData = evaluationMatrix.Count; foreach (var key in evaluationMatrix.Keys) { string multiLevelClass = evaluationMatrix[key].ToString(); string[] a = multiLevelClass.Split(','); int classPredect = 0; for (int i = 0; i < a.Length; i++) { if (a[i] == "0") { classPredect++; } } if (classPredect == 0) { correnctlyClassified++; } else if (classPredect > 0) { incorrenctlyClassified++; } } performance = (double)((double)correnctlyClassified / (double)totalData) * 100; System.Console.WriteLine(performance); #endregion } catch (java.lang.Exception ex) { ex.printStackTrace(); } return(performance); }
/// <summary> Builds the boosted classifier</summary> public virtual void buildClassifier(Instances data) { m_RandomInstance = new Random(m_Seed); Instances boostData; int classIndex = data.classIndex(); if (data.classAttribute().Numeric) { throw new Exception("LogitBoost can't handle a numeric class!"); } if (m_Classifier == null) { throw new System.Exception("A base classifier has not been specified!"); } if (!(m_Classifier is WeightedInstancesHandler) && !m_UseResampling) { m_UseResampling = true; } if (data.checkForStringAttributes()) { throw new Exception("Cannot handle string attributes!"); } if (m_Debug) { System.Console.Error.WriteLine("Creating copy of the training data"); } m_NumClasses = data.numClasses(); m_ClassAttribute = data.classAttribute(); // Create a copy of the data data = new Instances(data); data.deleteWithMissingClass(); // Create the base classifiers if (m_Debug) { System.Console.Error.WriteLine("Creating base classifiers"); } m_Classifiers = new Classifier[m_NumClasses][]; for (int j = 0; j < m_NumClasses; j++) { m_Classifiers[j] = Classifier.makeCopies(m_Classifier, this.NumIterations); } // Do we want to select the appropriate number of iterations // using cross-validation? int bestNumIterations = this.NumIterations; if (m_NumFolds > 1) { if (m_Debug) { System.Console.Error.WriteLine("Processing first fold."); } // Array for storing the results double[] results = new double[this.NumIterations]; // Iterate throught the cv-runs for (int r = 0; r < m_NumRuns; r++) { // Stratify the data data.randomize(m_RandomInstance); data.stratify(m_NumFolds); // Perform the cross-validation for (int i = 0; i < m_NumFolds; i++) { // Get train and test folds Instances train = data.trainCV(m_NumFolds, i, m_RandomInstance); Instances test = data.testCV(m_NumFolds, i); // Make class numeric Instances trainN = new Instances(train); trainN.ClassIndex = - 1; trainN.deleteAttributeAt(classIndex); trainN.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex); trainN.ClassIndex = classIndex; m_NumericClassData = new Instances(trainN, 0); // Get class values int numInstances = train.numInstances(); double[][] tmpArray = new double[numInstances][]; for (int i2 = 0; i2 < numInstances; i2++) { tmpArray[i2] = new double[m_NumClasses]; } double[][] trainFs = tmpArray; double[][] tmpArray2 = new double[numInstances][]; for (int i3 = 0; i3 < numInstances; i3++) { tmpArray2[i3] = new double[m_NumClasses]; } double[][] trainYs = tmpArray2; for (int j = 0; j < m_NumClasses; j++) { for (int k = 0; k < numInstances; k++) { trainYs[k][j] = (train.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses); } } // Perform iterations double[][] probs = initialProbs(numInstances); m_NumGenerated = 0; double sumOfWeights = train.sumOfWeights(); for (int j = 0; j < this.NumIterations; j++) { performIteration(trainYs, trainFs, probs, trainN, sumOfWeights); Evaluation eval = new Evaluation(train); eval.evaluateModel(this, test); results[j] += eval.correct(); } } } // Find the number of iterations with the lowest error //UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" double bestResult = - System.Double.MaxValue; for (int j = 0; j < this.NumIterations; j++) { if (results[j] > bestResult) { bestResult = results[j]; bestNumIterations = j; } } if (m_Debug) { System.Console.Error.WriteLine("Best result for " + bestNumIterations + " iterations: " + bestResult); } } // Build classifier on all the data int numInstances2 = data.numInstances(); double[][] trainFs2 = new double[numInstances2][]; for (int i4 = 0; i4 < numInstances2; i4++) { trainFs2[i4] = new double[m_NumClasses]; } double[][] trainYs2 = new double[numInstances2][]; for (int i5 = 0; i5 < numInstances2; i5++) { trainYs2[i5] = new double[m_NumClasses]; } for (int j = 0; j < m_NumClasses; j++) { for (int i = 0, k = 0; i < numInstances2; i++, k++) { trainYs2[i][j] = (data.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses); } } // Make class numeric data.ClassIndex = - 1; data.deleteAttributeAt(classIndex); data.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex); data.ClassIndex = classIndex; m_NumericClassData = new Instances(data, 0); // Perform iterations double[][] probs2 = initialProbs(numInstances2); double logLikelihood = CalculateLogLikelihood(trainYs2, probs2); m_NumGenerated = 0; if (m_Debug) { System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood); } double sumOfWeights2 = data.sumOfWeights(); for (int j = 0; j < bestNumIterations; j++) { double previousLoglikelihood = logLikelihood; performIteration(trainYs2, trainFs2, probs2, data, sumOfWeights2); logLikelihood = CalculateLogLikelihood(trainYs2, probs2); if (m_Debug) { System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood); } if (System.Math.Abs(previousLoglikelihood - logLikelihood) < m_Precision) { return ; } } }
/// <summary> Generates the classifier. /// /// </summary> /// <param name="instances">set of instances serving as training data /// </param> /// <exception cref="Exception">if the classifier has not been generated successfully /// </exception> public override void buildClassifier(Instances instances) { //UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" double bestVal = System.Double.MaxValue, currVal; //UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" double bestPoint = - System.Double.MaxValue, sum; int bestAtt = - 1, numClasses; if (instances.checkForStringAttributes()) { throw new Exception("Can't handle string attributes!"); } double[][] bestDist = new double[3][]; for (int i = 0; i < 3; i++) { bestDist[i] = new double[instances.numClasses()]; } m_Instances = new Instances(instances); m_Instances.deleteWithMissingClass(); if (m_Instances.numInstances() == 0) { throw new System.ArgumentException("No instances without missing " + "class values in training file!"); } if (instances.numAttributes() == 1) { throw new System.ArgumentException("Attribute missing. Need at least one " + "attribute other than class attribute!"); } if (m_Instances.classAttribute().Nominal) { numClasses = m_Instances.numClasses(); } else { numClasses = 1; } // For each attribute bool first = true; for (int i = 0; i < m_Instances.numAttributes(); i++) { if (i != m_Instances.classIndex()) { // Reserve space for distribution. double[][] tmpArray = new double[3][]; for (int i2 = 0; i2 < 3; i2++) { tmpArray[i2] = new double[numClasses]; } m_Distribution = tmpArray; // Compute value of criterion for best split on attribute if (m_Instances.attribute(i).Nominal) { currVal = findSplitNominal(i); } else { currVal = findSplitNumeric(i); } if ((first) || (currVal < bestVal)) { bestVal = currVal; bestAtt = i; bestPoint = m_SplitPoint; for (int j = 0; j < 3; j++) { Array.Copy(m_Distribution[j], 0, bestDist[j], 0, numClasses); } } // First attribute has been investigated first = false; } } // Set attribute, split point and distribution. m_AttIndex = bestAtt; m_SplitPoint = bestPoint; m_Distribution = bestDist; if (m_Instances.classAttribute().Nominal) { for (int i = 0; i < m_Distribution.Length; i++) { double sumCounts = Utils.sum(m_Distribution[i]); if (sumCounts == 0) { // This means there were only missing attribute values Array.Copy(m_Distribution[2], 0, m_Distribution[i], 0, m_Distribution[2].Length); Utils.normalize(m_Distribution[i]); } else { Utils.normalize(m_Distribution[i], sumCounts); } } } // Save memory m_Instances = new Instances(m_Instances, 0); }