/// <summary> /// Determines all categories --> categories /// Determines category numbers of each attributes -->> categoryTypeNumber /// Determines target numbers and amounts of each categories of each attributes -->> categoryTypeTargetNumber /// [i][j][k] i means attributes, j means categories, k means targets /// </summary> /// <param name="insts"></param> private void DataPreparation(weka.core.Instances insts) { for (int i = 0; i < insts.numAttributes(); i++) { string[] categoryType = new string[insts.attribute(i).numValues()]; for (int j = 0; j < insts.attribute(i).numValues(); j++) { categoryType[j] = insts.attribute(i).value(j).ToString(); } categories.Add(categoryType); } List <List <string> > lst = new List <List <string> >(); for (int i = 0; i < insts.numInstances(); i++) { lst.Add(new List <string>()); for (int j = 0; j < insts.instance(i).numValues(); j++) { lst[lst.Count - 1].Add(insts.instance(i).toString(j)); } } List <int[]> categoryTypeNumber = new List <int[]>(); List <int[, ]> categoryTypeTargetNumber = new List <int[, ]>(); for (int i = 0; i < categories.Count; i++) { categoryTypeNumber.Add(new int[categories[i].Length]); categoryTypeTargetNumber.Add(new int[categories[i].Length, categories[categories.Count - 1].Length]); } for (int i = 0; i < lst.Count; i++) //Satır { for (int j = 0; j < lst[i].Count; j++) //Sütün { for (int k = 0; k < categories[j].Length; k++) //Kategori Sayısı { string targetValue = lst[i][lst[i].Count - 1]; if (lst[i][j].Contains(categories[j][k])) { categoryTypeNumber[j][k] += 1; for (int trgt = 0; trgt < categories[categories.Count - 1].Length; trgt++) { if (targetValue == categories[categories.Count - 1][trgt]) { categoryTypeTargetNumber[j][k, trgt] += 1; } } } } } } Twoing(insts, categoryTypeNumber, categoryTypeTargetNumber); Gini(insts, categoryTypeNumber, categoryTypeTargetNumber); LogInfo("Dataset is saved.\r\n\r\n"); LogInfo("TWOING : " + twoingPath + "\r\n\r\n"); LogInfo("GINI : " + giniPath + "\r\n"); }
public static string Classify(bool useRubine, float duration, bool righthandedness, List<float> SpeakerAngles, PointCollection pointHist, StylusPointCollection S, List<List<int>> hist, List<List<int>> ihist) { // Convert all parameters to format used in GestureTests List<Vector2> InterpretedPoints = new List<Vector2>(); List<Vector2> StylusPoints = new List<Vector2>(); List<Vector2> VelocityHistory = new List<Vector2>(); List<Vector2> InverseVelocityHistory = new List<Vector2>(); foreach(Point P in pointHist) InterpretedPoints.Add(new Vector2((float)P.X,(float)P.Y)); foreach(StylusPoint P in S) StylusPoints.Add(new Vector2((float)P.X,(float)P.Y)); for (int i = 0; i < hist[0].Count; i++) { VelocityHistory.Add(new Vector2(hist[0][i], hist[1][i])); InverseVelocityHistory.Add(new Vector2(ihist[0][i], ihist[1][i])); } // Create a new Sample, compute the features, and classify GS = new GestureSample(GestureTests.Types.GestureType.unknown, righthandedness,duration,SpeakerAngles,InterpretedPoints,StylusPoints,VelocityHistory,InverseVelocityHistory); GS.ComputeFeatures(GestureFeatures.PointsStroke); if (useRubine) return EC.Recognizer.Classify(GS).ToString(); WriteARFF(); Instances test = new Instances(new java.io.FileReader("outfile.arff")); test.setClassIndex(0); double clsLabel = cls.classifyInstance(test.instance(0)); test.instance(0).setClassValue(clsLabel); // Return the appropriate label return ((GestureType2D)((int)clsLabel+1)).ToString(); }
public static void Main(string[] args) { try { int runs = 1; string algo = ""; string data = ""; if(args.Length>0) runs = Convert.ToInt32(args[0]); if(args.Length>1) algo = args[1]; if(args.Length>2) data = args[2]; Stopwatch read = new Stopwatch(), build = new Stopwatch(), classify = new Stopwatch(); for (int cnt=0; cnt<runs; cnt++) { read.Start(); Instances train = new Instances(new java.io.FileReader(data+"train.arff")); train.setClassIndex(train.numAttributes() - 1); Instances test = new Instances(new java.io.FileReader(data+"test.arff")); test.setClassIndex(test.numAttributes() - 1); read.Stop(); Classifier[] clList = { new weka.classifiers.bayes.NaiveBayes(), new weka.classifiers.trees.RandomForest(), new weka.classifiers.trees.J48(), new weka.classifiers.functions.MultilayerPerceptron(), new weka.classifiers.rules.ConjunctiveRule(), new weka.classifiers.functions.SMO() }; build.Start(); foreach (Classifier classifier in clList) { if(algo.Equals("") || algo.Equals("All") || classifier.getClass().getSimpleName().Equals(algo)) classifier.buildClassifier(train); } build.Stop(); classify.Start(); foreach (Classifier classifier in clList) { if(algo.Equals("") || algo.Equals("All") || classifier.getClass().getSimpleName().Equals(algo)) { int numCorrect = 0; for (int i = 0; i < test.numInstances(); i++) { if (classifier.classifyInstance(test.instance(i)) == test.instance(i).classValue()) numCorrect++; } //Console.Write(classifier.getClass().getSimpleName() + "\t" + numCorrect + " out of " + test.numInstances() + " correct (" +(100.0 * numCorrect / test.numInstances()) + "%)"); } } classify.Stop(); } Console.WriteLine("{\""+ algo + "\"," + read.ElapsedMilliseconds + "," + build.ElapsedMilliseconds + "," + classify.ElapsedMilliseconds + "," + (read.ElapsedMilliseconds+build.ElapsedMilliseconds+classify.ElapsedMilliseconds)+"};"); if(args.Length>3) Console.ReadLine(); } catch (java.lang.Exception e){ e.printStackTrace(); } }
public static double classifyTrain_Test(string classifierFileName, Classifier _classifier) { double performance = 0.0; try { FileReader javaFileReader = new FileReader(classifierFileName); weka.core.Instances insts = new weka.core.Instances(javaFileReader); javaFileReader.close(); insts.setClassIndex(insts.numAttributes() - 1); System.Console.WriteLine("Performing " + percentSplit + "% split evaluation."); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); _classifier.buildClassifier(train); int numCorrect = 0; var numnerOfInst = insts.numInstances(); int dataIndex = 0; for (int i = trainSize; i < numnerOfInst; i++) { dataIndex++; weka.core.Instance currentInst = insts.instance(i); double predictClass = _classifier.classifyInstance(currentInst); double[] dist = _classifier.distributionForInstance(currentInst); string actualClass = insts.classAttribute().value((int)insts.instance(i).classValue()); string predictedClass = insts.classAttribute().value((int)predictClass); var abcd = _classifier.getClass(); if (predictedClass == actualClass) { numCorrect++; } } performance = (double)((double)numCorrect / (double)testSize) * 100; System.Console.WriteLine(numCorrect + " out of " + testSize + " correct (" + performance.toString() + "%)"); } catch (java.lang.Exception ex) { ex.printStackTrace(); } return(performance); }
public static double SupportVectorMachineTest(weka.core.Instances insts) { try { //weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader("iris.arff")); insts.setClassIndex(insts.numAttributes() - 1); SupportVectorMachine = new weka.classifiers.functions.SMO(); weka.filters.Filter myDummy = new weka.filters.unsupervised.attribute.NominalToBinary(); myDummy.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myDummy); weka.filters.Filter myNormalize = new weka.filters.unsupervised.instance.Normalize(); myNormalize.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myNormalize); weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myRandom); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); SupportVectorMachine.buildClassifier(train); int numCorrect = 0; for (int i = trainSize; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); double predictedClass = SupportVectorMachine.classifyInstance(currentInst); if (predictedClass == insts.instance(i).classValue()) { numCorrect++; } } return((double)numCorrect / (double)testSize * 100.0); } catch (java.lang.Exception ex) { ex.printStackTrace(); return(0); } }
/// <summary> /// Adds teta results of gini results to the list /// Change the attributes of the arff file /// Adds the attributes to arff file /// </summary> /// <param name="insts"></param> /// <param name="result"></param> /// <param name="path"></param> private void CreateNewDataset(weka.core.Instances insts, List <double[]> result, string path) { //Tetaları Listeye Ekle List <List <string> > lst = new List <List <string> >(); for (int i = 0; i < insts.numInstances(); i++) { lst.Add(new List <string>()); for (int j = 0; j < insts.instance(i).numValues() - 1; j++) { string value = insts.instance(i).toString(j); for (int k = 0; k < categories[j].Length; k++) { if (insts.instance(i).toString(j) == categories[j][k]) { lst[lst.Count - 1].Add(String.Format("{0:0.00}", result[j][k])); break; } } } } //Attiribute Değiştir for (int i = 0; i < insts.numAttributes() - 1; i++) { string name = insts.attribute(i).name().ToString(); insts.deleteAttributeAt(i); weka.core.Attribute att = new weka.core.Attribute(name); insts.insertAttributeAt(att, i); } //Attiributeları yaz for (int i = 0; i < insts.numInstances(); i++) { for (int j = 0; j < insts.instance(i).numValues() - 1; j++) { insts.instance(i).setValue(j, Convert.ToDouble(lst[i][j])); } } if (File.Exists(path)) { File.Delete(path); } var saver = new ArffSaver(); saver.setInstances(insts); saver.setFile(new java.io.File(path)); saver.writeBatch(); }
public static double NaiveBayesTest(weka.core.Instances insts) { try { //weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader("iris.arff")); insts.setClassIndex(insts.numAttributes() - 1); NaiveBayescl = new weka.classifiers.bayes.NaiveBayes(); //discretize weka.filters.Filter myDiscretize = new weka.filters.unsupervised.attribute.Discretize(); myDiscretize.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myDiscretize); weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myRandom); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); NaiveBayescl.buildClassifier(train); int numCorrect = 0; for (int i = trainSize; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); double predictedClass = NaiveBayescl.classifyInstance(currentInst); if (predictedClass == insts.instance(i).classValue()) { numCorrect++; } } return((double)numCorrect / (double)testSize * 100.0); } catch (java.lang.Exception ex) { ex.printStackTrace(); return(0); } }
/// <summary> Builds the boosted classifier</summary> public virtual void buildClassifier(Instances data) { m_RandomInstance = new Random(m_Seed); Instances boostData; int classIndex = data.classIndex(); if (data.classAttribute().Numeric) { throw new Exception("LogitBoost can't handle a numeric class!"); } if (m_Classifier == null) { throw new System.Exception("A base classifier has not been specified!"); } if (!(m_Classifier is WeightedInstancesHandler) && !m_UseResampling) { m_UseResampling = true; } if (data.checkForStringAttributes()) { throw new Exception("Cannot handle string attributes!"); } if (m_Debug) { System.Console.Error.WriteLine("Creating copy of the training data"); } m_NumClasses = data.numClasses(); m_ClassAttribute = data.classAttribute(); // Create a copy of the data data = new Instances(data); data.deleteWithMissingClass(); // Create the base classifiers if (m_Debug) { System.Console.Error.WriteLine("Creating base classifiers"); } m_Classifiers = new Classifier[m_NumClasses][]; for (int j = 0; j < m_NumClasses; j++) { m_Classifiers[j] = Classifier.makeCopies(m_Classifier, this.NumIterations); } // Do we want to select the appropriate number of iterations // using cross-validation? int bestNumIterations = this.NumIterations; if (m_NumFolds > 1) { if (m_Debug) { System.Console.Error.WriteLine("Processing first fold."); } // Array for storing the results double[] results = new double[this.NumIterations]; // Iterate throught the cv-runs for (int r = 0; r < m_NumRuns; r++) { // Stratify the data data.randomize(m_RandomInstance); data.stratify(m_NumFolds); // Perform the cross-validation for (int i = 0; i < m_NumFolds; i++) { // Get train and test folds Instances train = data.trainCV(m_NumFolds, i, m_RandomInstance); Instances test = data.testCV(m_NumFolds, i); // Make class numeric Instances trainN = new Instances(train); trainN.ClassIndex = - 1; trainN.deleteAttributeAt(classIndex); trainN.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex); trainN.ClassIndex = classIndex; m_NumericClassData = new Instances(trainN, 0); // Get class values int numInstances = train.numInstances(); double[][] tmpArray = new double[numInstances][]; for (int i2 = 0; i2 < numInstances; i2++) { tmpArray[i2] = new double[m_NumClasses]; } double[][] trainFs = tmpArray; double[][] tmpArray2 = new double[numInstances][]; for (int i3 = 0; i3 < numInstances; i3++) { tmpArray2[i3] = new double[m_NumClasses]; } double[][] trainYs = tmpArray2; for (int j = 0; j < m_NumClasses; j++) { for (int k = 0; k < numInstances; k++) { trainYs[k][j] = (train.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses); } } // Perform iterations double[][] probs = initialProbs(numInstances); m_NumGenerated = 0; double sumOfWeights = train.sumOfWeights(); for (int j = 0; j < this.NumIterations; j++) { performIteration(trainYs, trainFs, probs, trainN, sumOfWeights); Evaluation eval = new Evaluation(train); eval.evaluateModel(this, test); results[j] += eval.correct(); } } } // Find the number of iterations with the lowest error //UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" double bestResult = - System.Double.MaxValue; for (int j = 0; j < this.NumIterations; j++) { if (results[j] > bestResult) { bestResult = results[j]; bestNumIterations = j; } } if (m_Debug) { System.Console.Error.WriteLine("Best result for " + bestNumIterations + " iterations: " + bestResult); } } // Build classifier on all the data int numInstances2 = data.numInstances(); double[][] trainFs2 = new double[numInstances2][]; for (int i4 = 0; i4 < numInstances2; i4++) { trainFs2[i4] = new double[m_NumClasses]; } double[][] trainYs2 = new double[numInstances2][]; for (int i5 = 0; i5 < numInstances2; i5++) { trainYs2[i5] = new double[m_NumClasses]; } for (int j = 0; j < m_NumClasses; j++) { for (int i = 0, k = 0; i < numInstances2; i++, k++) { trainYs2[i][j] = (data.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses); } } // Make class numeric data.ClassIndex = - 1; data.deleteAttributeAt(classIndex); data.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex); data.ClassIndex = classIndex; m_NumericClassData = new Instances(data, 0); // Perform iterations double[][] probs2 = initialProbs(numInstances2); double logLikelihood = CalculateLogLikelihood(trainYs2, probs2); m_NumGenerated = 0; if (m_Debug) { System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood); } double sumOfWeights2 = data.sumOfWeights(); for (int j = 0; j < bestNumIterations; j++) { double previousLoglikelihood = logLikelihood; performIteration(trainYs2, trainFs2, probs2, data, sumOfWeights2); logLikelihood = CalculateLogLikelihood(trainYs2, probs2); if (m_Debug) { System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood); } if (System.Math.Abs(previousLoglikelihood - logLikelihood) < m_Precision) { return ; } } }
public static void BayesTest() { try { weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader("iris.arff")); insts.setClassIndex(insts.numAttributes() - 1); weka.classifiers.Classifier cl = new weka.classifiers.bayes.BayesNet(); System.Console.WriteLine("Performing " + percentSplit + "% split evaluation."); //randomize the order of the instances in the dataset. weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myRandom); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); weka.core.Instances test = new weka.core.Instances(insts, 0, 0); cl.buildClassifier(train); //print model System.Console.WriteLine(cl); int numCorrect = 0; for (int i = trainSize; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); double predictedClass = cl.classifyInstance(currentInst); test.add(currentInst); double[] prediction = cl.distributionForInstance(currentInst); for (int x = 0; x < prediction.Length; x++) { System.Console.WriteLine("Probability of class [{0}] for [{1}] is: {2}", currentInst.classAttribute().value(x), currentInst, Math.Round(prediction[x], 4)); } System.Console.WriteLine(); if (predictedClass == insts.instance(i).classValue()) { numCorrect++; } } System.Console.WriteLine(numCorrect + " out of " + testSize + " correct (" + (double)((double)numCorrect / (double)testSize * 100.0) + "%)"); // Train the model weka.classifiers.Evaluation eTrain = new weka.classifiers.Evaluation(train); eTrain.evaluateModel(cl, train); // Print the results as in Weka explorer: //Print statistics String strSummaryTrain = eTrain.toSummaryString(); System.Console.WriteLine(strSummaryTrain); //Print detailed class statistics System.Console.WriteLine(eTrain.toClassDetailsString()); //Print confusion matrix System.Console.WriteLine(eTrain.toMatrixString()); // Get the confusion matrix double[][] cmMatrixTrain = eTrain.confusionMatrix(); // Test the model weka.classifiers.Evaluation eTest = new weka.classifiers.Evaluation(test); eTest.evaluateModel(cl, test); // Print the results as in Weka explorer: //Print statistics String strSummaryTest = eTest.toSummaryString(); System.Console.WriteLine(strSummaryTest); //Print detailed class statistics System.Console.WriteLine(eTest.toClassDetailsString()); //Print confusion matrix System.Console.WriteLine(eTest.toMatrixString()); // Get the confusion matrix double[][] cmMatrixTest = eTest.confusionMatrix(); } catch (java.lang.Exception ex) { ex.printStackTrace(); } }
/// <summary> Evaluates a classifier with the options given in an array of /// strings. <p/> /// /// Valid options are: <p/> /// /// -t name of training file <br/> /// Name of the file with the training data. (required) <p/> /// /// -T name of test file <br/> /// Name of the file with the test data. If missing a cross-validation /// is performed. <p/> /// /// -c class index <br/> /// Index of the class attribute (1, 2, ...; default: last). <p/> /// /// -x number of folds <br/> /// The number of folds for the cross-validation (default: 10). <p/> /// /// -s random number seed <br/> /// Random number seed for the cross-validation (default: 1). <p/> /// /// -m file with cost matrix <br/> /// The name of a file containing a cost matrix. <p/> /// /// -l name of model input file <br/> /// Loads classifier from the given file. <p/> /// /// -d name of model output file <br/> /// Saves classifier built from the training data into the given file. <p/> /// /// -v <br/> /// Outputs no statistics for the training data. <p/> /// /// -o <br/> /// Outputs statistics only, not the classifier. <p/> /// /// -i <br/> /// Outputs detailed information-retrieval statistics per class. <p/> /// /// -k <br/> /// Outputs information-theoretic statistics. <p/> /// /// -p <br/> /// Outputs predictions for test instances (and nothing else). <p/> /// /// -r <br/> /// Outputs cumulative margin distribution (and nothing else). <p/> /// /// -g <br/> /// Only for classifiers that implement "Graphable." Outputs /// the graph representation of the classifier (and nothing /// else). <p/> /// /// </summary> /// <param name="classifier">machine learning classifier /// </param> /// <param name="options">the array of string containing the options /// </param> /// <throws> Exception if model could not be evaluated successfully </throws> /// <returns> a string describing the results /// </returns> public static System.String evaluateModel(Classifier classifier, System.String[] options) { Instances train = null, tempTrain, test = null, template = null; int seed = 1, folds = 10, classIndex = - 1; System.String trainFileName, testFileName, sourceClass, classIndexString, seedString, foldsString, objectInputFileName, objectOutputFileName, attributeRangeString; bool noOutput = false, printClassifications = false, trainStatistics = true, printMargins = false, printComplexityStatistics = false, printGraph = false, classStatistics = false, printSource = false; System.Text.StringBuilder text = new System.Text.StringBuilder(); System.IO.StreamReader trainReader = null, testReader = null; //UPGRADE_TODO: Class 'java.io.ObjectInputStream' was converted to 'System.IO.BinaryReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioObjectInputStream'" System.IO.BinaryReader objectInputStream = null; System.IO.Stream objectStream=null; CostMatrix costMatrix = null; System.Text.StringBuilder schemeOptionsText = null; Range attributesToOutput = null; long trainTimeStart = 0, trainTimeElapsed = 0, testTimeStart = 0, testTimeElapsed = 0; Classifier classifierBackup; try { // Get basic options (options the same for all schemes) classIndexString = Utils.getOption('c', options); if (classIndexString.Length != 0) { if (classIndexString.Equals("first")) classIndex = 1; else if (classIndexString.Equals("last")) classIndex = - 1; else classIndex = System.Int32.Parse(classIndexString); } trainFileName = Utils.getOption('t', options); objectInputFileName = Utils.getOption('l', options); objectOutputFileName = Utils.getOption('d', options); testFileName = Utils.getOption('T', options); if (trainFileName.Length == 0) { if (objectInputFileName.Length == 0) { throw new System.Exception("No training file and no object " + "input file given."); } if (testFileName.Length == 0) { throw new System.Exception("No training file and no test " + "file given."); } } else if ((objectInputFileName.Length != 0) && ((!(classifier is UpdateableClassifier)) || (testFileName.Length == 0))) { throw new System.Exception("Classifier not incremental, or no " + "test file provided: can't " + "use both train and model file."); } try { if (trainFileName.Length != 0) { //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.io.BufferedReader.BufferedReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" //UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'" //UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" trainReader = new System.IO.StreamReader(new System.IO.StreamReader(trainFileName, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(trainFileName, System.Text.Encoding.Default).CurrentEncoding); } if (testFileName.Length != 0) { //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.io.BufferedReader.BufferedReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" //UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'" //UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" testReader = new System.IO.StreamReader(new System.IO.StreamReader(testFileName, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(testFileName, System.Text.Encoding.Default).CurrentEncoding); } if (objectInputFileName.Length != 0) { //UPGRADE_TODO: Constructor 'java.io.FileInputStream.FileInputStream' was converted to 'System.IO.FileStream.FileStream' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioFileInputStreamFileInputStream_javalangString'" objectStream= new System.IO.FileStream(objectInputFileName, System.IO.FileMode.Open, System.IO.FileAccess.Read); if (objectInputFileName.EndsWith(".gz")) { //UPGRADE_ISSUE: Constructor 'java.util.zip.GZIPInputStream.GZIPInputStream' was not converted. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1000_javautilzipGZIPInputStream'" objectStream= new ICSharpCode.SharpZipLib.GZip.GZipInputStream(objectStream); } //UPGRADE_TODO: Class 'java.io.ObjectInputStream' was converted to 'System.IO.BinaryReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioObjectInputStream'" objectInputStream = new System.IO.BinaryReader(objectStream); } } catch (System.Exception e) { //UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Throwable.getMessage' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" throw new System.Exception("Can't open file " + e.Message + '.'); } if (testFileName.Length != 0) { template = test = new Instances(testReader, 1); if (classIndex != - 1) { test.ClassIndex = classIndex - 1; } else { test.ClassIndex = test.numAttributes() - 1; } if (classIndex > test.numAttributes()) { throw new System.Exception("Index of class attribute too large."); } } if (trainFileName.Length != 0) { if ((classifier is UpdateableClassifier) && (testFileName.Length != 0)) { train = new Instances(trainReader, 1); } else { train = new Instances(trainReader); } template = train; if (classIndex != - 1) { train.ClassIndex = classIndex - 1; } else { train.ClassIndex = train.numAttributes() - 1; } if ((testFileName.Length != 0) && !test.equalHeaders(train)) { throw new System.ArgumentException("Train and test file not compatible!"); } if (classIndex > train.numAttributes()) { throw new System.Exception("Index of class attribute too large."); } } if (template == null) { throw new System.Exception("No actual dataset provided to use as template"); } seedString = Utils.getOption('s', options); if (seedString.Length != 0) { seed = System.Int32.Parse(seedString); } foldsString = Utils.getOption('x', options); if (foldsString.Length != 0) { folds = System.Int32.Parse(foldsString); } costMatrix = handleCostOption(Utils.getOption('m', options), template.numClasses()); classStatistics = Utils.getFlag('i', options); noOutput = Utils.getFlag('o', options); trainStatistics = !Utils.getFlag('v', options); printComplexityStatistics = Utils.getFlag('k', options); printMargins = Utils.getFlag('r', options); printGraph = Utils.getFlag('g', options); sourceClass = Utils.getOption('z', options); printSource = (sourceClass.Length != 0); // Check -p option try { attributeRangeString = Utils.getOption('p', options); } catch (System.Exception e) { //UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Throwable.getMessage' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" throw new System.Exception(e.Message + "\nNOTE: the -p option has changed. " + "It now expects a parameter specifying a range of attributes " + "to list with the predictions. Use '-p 0' for none."); } if (attributeRangeString.Length != 0) { // if no test file given, we cannot print predictions if (testFileName.Length == 0) throw new System.Exception("Cannot print predictions ('-p') without test file ('-T')!"); printClassifications = true; if (!attributeRangeString.Equals("0")) attributesToOutput = new Range(attributeRangeString); } // if no training file given, we don't have any priors if ((trainFileName.Length == 0) && (printComplexityStatistics)) throw new System.Exception("Cannot print complexity statistics ('-k') without training file ('-t')!"); // If a model file is given, we can't process // scheme-specific options if (objectInputFileName.Length != 0) { Utils.checkForRemainingOptions(options); } else { // Set options for classifier // if (classifier instanceof OptionHandler) // { // for (int i = 0; i < options.length; i++) // { // if (options[i].length() != 0) // { // if (schemeOptionsText == null) // { // schemeOptionsText = new StringBuffer(); // } // if (options[i].indexOf(' ') != -1) // { // schemeOptionsText.append('"' + options[i] + "\" "); // } // else // { // schemeOptionsText.append(options[i] + " "); // } // } // } // ((OptionHandler)classifier).setOptions(options); // } } Utils.checkForRemainingOptions(options); } catch (System.Exception e) { //UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Throwable.getMessage' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" throw new System.Exception("\nWeka exception: " + e.Message + makeOptionString(classifier)); } // Setup up evaluation objects Evaluation trainingEvaluation = new Evaluation(new Instances(template, 0), costMatrix); Evaluation testingEvaluation = new Evaluation(new Instances(template, 0), costMatrix); if (objectInputFileName.Length != 0) { testingEvaluation.useNoPriors(); // Load classifier from file //UPGRADE_WARNING: Method 'java.io.ObjectInputStream.readObject' was converted to 'SupportClass.Deserialize' which may throw an exception. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1101'" //classifier = (Classifier) SupportClass.Deserialize(objectInputStream); //FileStream fs = new FileStream("DataFile.dat", FileMode.Open); try { BinaryFormatter formatter = new BinaryFormatter(); // Deserialize the hashtable from the file and // assign the reference to the local variable. // addresses = (Hashtable)formatter.Deserialize(fs); classifier = (Classifier)formatter.Deserialize(objectStream); } catch (Exception e) { Console.WriteLine("Failed to deserialize. Reason: " + e.Message); throw; } finally { objectStream.Close(); //fs.Close(); } objectInputStream.Close(); } // backup of fully setup classifier for cross-validation classifierBackup = Classifier.makeCopy(classifier); // Build the classifier if no object file provided if ((classifier is UpdateableClassifier) && (testFileName.Length != 0) && (costMatrix == null) && (trainFileName.Length != 0)) { // Build classifier incrementally trainingEvaluation.Priors = train; testingEvaluation.Priors = train; trainTimeStart = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; if (objectInputFileName.Length == 0) { classifier.buildClassifier(train); } while (train.readInstance(trainReader)) { trainingEvaluation.updatePriors(train.instance(0)); testingEvaluation.updatePriors(train.instance(0)); ((UpdateableClassifier) classifier).updateClassifier(train.instance(0)); train.delete(0); } trainTimeElapsed = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - trainTimeStart; trainReader.Close(); } else if (objectInputFileName.Length == 0) { // Build classifier in one go tempTrain = new Instances(train); trainingEvaluation.Priors = tempTrain; testingEvaluation.Priors = tempTrain; trainTimeStart = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; classifier.buildClassifier(tempTrain); trainTimeElapsed = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - trainTimeStart; } // Save the classifier if an object output file is provided if (objectOutputFileName.Length != 0) { //UPGRADE_TODO: Constructor 'java.io.FileOutputStream.FileOutputStream' was converted to 'System.IO.FileStream.FileStream' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioFileOutputStreamFileOutputStream_javalangString'" System.IO.Stream os = new System.IO.FileStream(objectOutputFileName, System.IO.FileMode.Create); if (objectOutputFileName.EndsWith(".gz")) { //UPGRADE_ISSUE: Constructor 'java.util.zip.GZIPOutputStream.GZIPOutputStream' was not converted. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1000_javautilzipGZIPOutputStream'" os = new ICSharpCode.SharpZipLib.GZip.GZipOutputStream(os); } //UPGRADE_TODO: Class 'java.io.ObjectOutputStream' was converted to 'System.IO.BinaryWriter' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioObjectOutputStream'" System.IO.BinaryWriter objectOutputStream = new System.IO.BinaryWriter(os); //UPGRADE_TODO: Method 'java.io.ObjectOutputStream.writeObject' was converted to 'SupportClass.Serialize' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioObjectOutputStreamwriteObject_javalangObject'" //SupportClass.Serialize(objectOutputStream, classifier); BinaryFormatter bformatter = new BinaryFormatter(); bformatter.Serialize(os, classifier); objectOutputStream.Flush(); objectOutputStream.Close(); } // If classifier is drawable output string describing graph if ((classifier is Drawable) && (printGraph)) { return ((Drawable) classifier).graph(); } // Output the classifier as equivalent source if ((classifier is Sourcable) && (printSource)) { return wekaStaticWrapper((Sourcable) classifier, sourceClass); } // Output test instance predictions only if (printClassifications) { return toPrintClassifications(classifier, new Instances(template, 0), testFileName, classIndex, attributesToOutput); } // Output model if (!(noOutput || printMargins)) { // if (classifier instanceof OptionHandler) // { // if (schemeOptionsText != null) // { // text.append("\nOptions: "+schemeOptionsText); // text.append("\n"); // } // } //UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Object.toString' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" text.Append("\n" + classifier.ToString() + "\n"); } if (!printMargins && (costMatrix != null)) { text.Append("\n=== Evaluation Cost Matrix ===\n\n").Append(costMatrix.ToString()); } // Compute error estimate from training data if ((trainStatistics) && (trainFileName.Length != 0)) { if ((classifier is UpdateableClassifier) && (testFileName.Length != 0) && (costMatrix == null)) { // Classifier was trained incrementally, so we have to // reopen the training data in order to test on it. //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.io.BufferedReader.BufferedReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" //UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'" //UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" trainReader = new System.IO.StreamReader(new System.IO.StreamReader(trainFileName, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(trainFileName, System.Text.Encoding.Default).CurrentEncoding); // Incremental testing train = new Instances(trainReader, 1); if (classIndex != - 1) { train.ClassIndex = classIndex - 1; } else { train.ClassIndex = train.numAttributes() - 1; } testTimeStart = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; while (train.readInstance(trainReader)) { trainingEvaluation.evaluateModelOnce((Classifier) classifier, train.instance(0)); train.delete(0); } testTimeElapsed = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - testTimeStart; trainReader.Close(); } else { testTimeStart = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; trainingEvaluation.evaluateModel(classifier, train); testTimeElapsed = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - testTimeStart; } // Print the results of the training evaluation if (printMargins) { return trainingEvaluation.toCumulativeMarginDistributionString(); } else { text.Append("\nTime taken to build model: " + Utils.doubleToString(trainTimeElapsed / 1000.0, 2) + " seconds"); text.Append("\nTime taken to test model on training data: " + Utils.doubleToString(testTimeElapsed / 1000.0, 2) + " seconds"); text.Append(trainingEvaluation.toSummaryString("\n\n=== Error on training" + " data ===\n", printComplexityStatistics)); if (template.classAttribute().Nominal) { if (classStatistics) { text.Append("\n\n" + trainingEvaluation.toClassDetailsString()); } text.Append("\n\n" + trainingEvaluation.toMatrixString()); } } } // Compute proper error estimates if (testFileName.Length != 0) { // Testing is on the supplied test data while (test.readInstance(testReader)) { testingEvaluation.evaluateModelOnce((Classifier) classifier, test.instance(0)); test.delete(0); } testReader.Close(); text.Append("\n\n" + testingEvaluation.toSummaryString("=== Error on test data ===\n", printComplexityStatistics)); } else if (trainFileName.Length != 0) { // Testing is via cross-validation on training data //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.util.Random.Random' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" System.Random random = new System.Random((System.Int32) seed); // use untrained (!) classifier for cross-validation classifier = Classifier.makeCopy(classifierBackup); testingEvaluation.crossValidateModel(classifier, train, folds, random); if (template.classAttribute().Numeric) { text.Append("\n\n\n" + testingEvaluation.toSummaryString("=== Cross-validation ===\n", printComplexityStatistics)); } else { text.Append("\n\n\n" + testingEvaluation.toSummaryString("=== Stratified " + "cross-validation ===\n", printComplexityStatistics)); } } if (template.classAttribute().Nominal) { if (classStatistics) { text.Append("\n\n" + testingEvaluation.toClassDetailsString()); } text.Append("\n\n" + testingEvaluation.toMatrixString()); } return text.ToString(); }
/// <summary> Evaluates the classifier on a given set of instances. Note that /// the data must have exactly the same format (e.g. order of /// attributes) as the data used to train the classifier! Otherwise /// the results will generally be meaningless. /// /// </summary> /// <param name="classifier">machine learning classifier /// </param> /// <param name="data">set of test instances for evaluation /// </param> /// <returns> the predictions /// </returns> /// <throws> Exception if model could not be evaluated </throws> /// <summary> successfully /// </summary> public virtual double[] evaluateModel(Classifier classifier, Instances data) { double[] predictions = new double[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { predictions[i] = evaluateModelOnce((Classifier) classifier, data.instance(i)); } return predictions; }
/// <summary> Creates split on numeric attribute. /// /// </summary> /// <exception cref="Exception">if something goes wrong /// </exception> private void handleNumericAttribute(Instances trainInstances) { int firstMiss; int next = 1; int last = 0; int splitIndex = - 1; double currentInfoGain; double defaultEnt; double minSplit; Instance instance; int i; // Current attribute is a numeric attribute. m_distribution = new Distribution(2, trainInstances.numClasses()); // Only Instances with known values are relevant. System.Collections.IEnumerator enu = trainInstances.enumerateInstances(); i = 0; //UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'" while (enu.MoveNext()) { //UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'" instance = (Instance) enu.Current; if (instance.isMissing(m_attIndex)) break; m_distribution.add(1, instance); i++; } firstMiss = i; // Compute minimum number of Instances required in each // subset. minSplit = 0.1 * (m_distribution.total()) / ((double) trainInstances.numClasses()); if (Utils.smOrEq(minSplit, m_minNoObj)) minSplit = m_minNoObj; else if (Utils.gr(minSplit, 25)) minSplit = 25; // Enough Instances with known values? if (Utils.sm((double) firstMiss, 2 * minSplit)) return ; // Compute values of criteria for all possible split // indices. defaultEnt = infoGainCrit.oldEnt(m_distribution); while (next < firstMiss) { if (trainInstances.instance(next - 1).value_Renamed(m_attIndex) + 1e-5 < trainInstances.instance(next).value_Renamed(m_attIndex)) { // Move class values for all Instances up to next // possible split point. m_distribution.shiftRange(1, 0, trainInstances, last, next); // Check if enough Instances in each subset and compute // values for criteria. if (Utils.grOrEq(m_distribution.perBag(0), minSplit) && Utils.grOrEq(m_distribution.perBag(1), minSplit)) { currentInfoGain = infoGainCrit.splitCritValue(m_distribution, m_sumOfWeights, defaultEnt); if (Utils.gr(currentInfoGain, m_infoGain)) { m_infoGain = currentInfoGain; splitIndex = next - 1; } m_index++; } last = next; } next++; } // Was there any useful split? if (m_index == 0) return ; // Compute modified information gain for best split. m_infoGain = m_infoGain - (Utils.log2(m_index) / m_sumOfWeights); if (Utils.smOrEq(m_infoGain, 0)) return ; // Set instance variables' values to values for // best split. m_numSubsets = 2; m_splitPoint = (trainInstances.instance(splitIndex + 1).value_Renamed(m_attIndex) + trainInstances.instance(splitIndex).value_Renamed(m_attIndex)) / 2; // In case we have a numerical precision problem we need to choose the // smaller value if (m_splitPoint == trainInstances.instance(splitIndex + 1).value_Renamed(m_attIndex)) { m_splitPoint = trainInstances.instance(splitIndex).value_Renamed(m_attIndex); } // Restore distributioN for best split. m_distribution = new Distribution(2, trainInstances.numClasses()); m_distribution.addRange(0, trainInstances, 0, splitIndex + 1); m_distribution.addRange(1, trainInstances, splitIndex + 1, firstMiss); // Compute modified gain ratio for best split. m_gainRatio = gainRatioCrit.splitCritValue(m_distribution, m_sumOfWeights, m_infoGain); }
/// <summary> Creates a new dataset of the same size using random sampling /// with replacement according to the given weight vector. The /// weights of the instances in the new dataset are set to one. /// The length of the weight vector has to be the same as the /// number of instances in the dataset, and all weights have to /// be positive. /// /// </summary> /// <param name="random">a random number generator /// </param> /// <param name="weights">the weight vector /// </param> /// <returns> the new dataset /// </returns> /// <exception cref="IllegalArgumentException">if the weights array is of the wrong /// length or contains negative weights. /// </exception> public virtual Instances resampleWithWeights(System.Random random, double[] weights) { if (weights.Length != numInstances()) { throw new System.ArgumentException("weights.length != numInstances."); } Instances newData = new Instances(this, numInstances()); if (numInstances() == 0) { return newData; } double[] probabilities = new double[numInstances()]; double sumProbs = 0, sumOfWeights = Utils.sum(weights); for (int i = 0; i < numInstances(); i++) { sumProbs += random.NextDouble(); probabilities[i] = sumProbs; } Utils.normalize(probabilities, sumProbs / sumOfWeights); // Make sure that rounding errors don't mess things up probabilities[numInstances() - 1] = sumOfWeights; int k = 0; int l = 0; sumProbs = 0; while ((k < numInstances() && (l < numInstances()))) { if (weights[l] < 0) { throw new System.ArgumentException("Weights have to be positive."); } sumProbs += weights[l]; while ((k < numInstances()) && (probabilities[k] <= sumProbs)) { newData.add(instance(l)); newData.instance(k).Weight = 1; k++; } l++; } return newData; }
public static double classifyTrain_Test(string classifierFileName, int baseClasses, Classifier _classifier) { double performance = 0.0; try { List <BrResult> results = new List <BrResult>(); for (int singleClass = 1; singleClass <= baseClasses; singleClass++) { string eachFileName = String.Format("{0}_{1}.arff", classifierFileName, singleClass); BrResult result = new BrResult(); result.classNumber = singleClass; FileReader javaFileReader = new FileReader(eachFileName); weka.core.Instances insts = new weka.core.Instances(javaFileReader); javaFileReader.close(); insts.setClassIndex(insts.numAttributes() - 1); System.Console.WriteLine("Performing " + percentSplit + "% split evaluation."); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); _classifier.buildClassifier(train); int numCorrect = 0; var numnerOfInst = insts.numInstances(); List <Result> eachResults = new List <Result>(); int dataIndex = 0; for (int i = trainSize; i < numnerOfInst; i++) { dataIndex++; Result eachRow = new Result(); eachRow.lineIndex = 0; weka.core.Instance currentInst = insts.instance(i); double predictClass = _classifier.classifyInstance(currentInst); double[] dist = _classifier.distributionForInstance(currentInst); string actualClass = insts.classAttribute().value((int)insts.instance(i).classValue()); string predictedClass = insts.classAttribute().value((int)predictClass); var abcd = _classifier.getClass(); if (predictedClass == actualClass) { eachRow.correct = "1"; numCorrect++; } else { eachRow.correct = "0"; } eachRow.lineIndex = dataIndex; eachRow.classActual = actualClass; eachRow.classPredicted = predictedClass; eachResults.Add(eachRow); } result.classResult = eachResults; results.Add(result); System.Console.WriteLine(numCorrect + " out of " + testSize + " correct (" + (double)((double)numCorrect / (double)testSize * 100.0) + "%)"); } #region Evaludation Matrix var evaluationMatrix = new Dictionary <int, string>(); foreach (var res in results) { foreach (var classRes in res.classResult) { if (!evaluationMatrix.Keys.Contains(classRes.lineIndex)) { evaluationMatrix[classRes.lineIndex] = classRes.correct.toString(); } else { evaluationMatrix[classRes.lineIndex] = evaluationMatrix[classRes.lineIndex].toString() + "," + classRes.correct.toString(); } } } #endregion #region int correnctlyClassified = 0; int incorrenctlyClassified = 0; int totalData = evaluationMatrix.Count; foreach (var key in evaluationMatrix.Keys) { string multiLevelClass = evaluationMatrix[key].ToString(); string[] a = multiLevelClass.Split(','); int classPredect = 0; for (int i = 0; i < a.Length; i++) { if (a[i] == "0") { classPredect++; } } if (classPredect == 0) { correnctlyClassified++; } else if (classPredect > 0) { incorrenctlyClassified++; } } performance = (double)((double)correnctlyClassified / (double)totalData) * 100; System.Console.WriteLine(performance); #endregion } catch (java.lang.Exception ex) { ex.printStackTrace(); } return(performance); }
// Test the classification result of each map that a user played, // with the data available as if they were playing through it public static void classifyTest(String dataString, String playerID) { try { java.io.StringReader stringReader = new java.io.StringReader(dataString); java.io.BufferedReader buffReader = new java.io.BufferedReader(stringReader); /* NOTE THAT FOR NAIVE BAYES ALL WEIGHTS CAN BE = 1*/ //weka.core.converters.ConverterUtils.DataSource source = new weka.core.converters.ConverterUtils.DataSource("iris.arff"); weka.core.Instances thisData = new weka.core.Instances(buffReader); //source.getDataSet(); if (thisData.classIndex() == -1) thisData.setClassIndex(thisData.numAttributes() - 1); weka.core.Instances thisUniqueData = new weka.core.Instances(thisData); if (thisUniqueData.classIndex() == -1) thisUniqueData.setClassIndex(thisUniqueData.numAttributes() - 1); thisUniqueData.delete(); if (allUniqueData == null) { allUniqueData = new weka.core.Instances(thisData); if (allUniqueData.classIndex() == -1) allUniqueData.setClassIndex(allUniqueData.numAttributes() - 1); allUniqueData.delete(); } weka.core.InstanceComparator com = new weka.core.InstanceComparator(false); for (int i = 0; i < thisData.numInstances(); i++) { bool dup = false; for (int j = 0; j < allUniqueData.numInstances(); j++) { if (com.compare(thisData.instance(i),allUniqueData.instance(j)) == 0) { Debug.Log("Duplicate found!"); dup = true; break; } } if (!dup) allUniqueData.add(thisData.instance(i)); else dupInstances++; } for (int i = 0; i < thisData.numInstances(); i++) { bool dup = false; for (int j = 0; j < thisUniqueData.numInstances(); j++) { if (com.compare(thisData.instance(i),thisUniqueData.instance(j)) == 0) { Debug.Log("Duplicate found!"); dup = true; break; } } if (!dup) thisUniqueData.add(thisData.instance(i)); else dupInstancesSamePlayer++; } //Debug.Log("All Data Instance Count = " + thisData.numInstances()); //Debug.Log("Unique Data Instance Count = " + thisUniqueData.numInstances()); //Debug.Log("Done!"); } catch (java.lang.Exception ex) { Debug.LogError(ex.getMessage()); } }
/// <summary> Performs one boosting iteration.</summary> private void performIteration(double[][] trainYs, double[][] trainFs, double[][] probs, Instances data, double origSumOfWeights) { if (m_Debug) { System.Console.Error.WriteLine("Training classifier " + (m_NumGenerated + 1)); } // Build the new models for (int j = 0; j < m_NumClasses; j++) { if (m_Debug) { System.Console.Error.WriteLine("\t...for class " + (j + 1) + " (" + m_ClassAttribute.name() + "=" + m_ClassAttribute.value_Renamed(j) + ")"); } // Make copy because we want to save the weights Instances boostData = new Instances(data); // Set instance pseudoclass and weights for (int i = 0; i < probs.Length; i++) { // Compute response and weight double p = probs[i][j]; double z, actual = trainYs[i][j]; if (actual == 1 - m_Offset) { z = 1.0 / p; if (z > Z_MAX) { // threshold z = Z_MAX; } } else { z = (- 1.0) / (1.0 - p); if (z < - Z_MAX) { // threshold z = - Z_MAX; } } double w = (actual - p) / z; // Set values for instance Instance current = boostData.instance(i); current.setValue(boostData.classIndex(), z); current.Weight = current.weight() * w; } // Scale the weights (helps with some base learners) double sumOfWeights = boostData.sumOfWeights(); double scalingFactor = (double) origSumOfWeights / sumOfWeights; for (int i = 0; i < probs.Length; i++) { Instance current = boostData.instance(i); current.Weight = current.weight() * scalingFactor; } // Select instances to train the classifier on Instances trainData = boostData; if (m_WeightThreshold < 100) { trainData = selectWeightQuantile(boostData, (double) m_WeightThreshold / 100); } else { if (m_UseResampling) { double[] weights = new double[boostData.numInstances()]; for (int kk = 0; kk < weights.Length; kk++) { weights[kk] = boostData.instance(kk).weight(); } trainData = boostData.resampleWithWeights(m_RandomInstance, weights); } } // Build the classifier m_Classifiers[j][m_NumGenerated].buildClassifier(trainData); } // Evaluate / increment trainFs from the classifier for (int i = 0; i < trainFs.Length; i++) { double[] pred = new double[m_NumClasses]; double predSum = 0; for (int j = 0; j < m_NumClasses; j++) { pred[j] = m_Shrinkage * m_Classifiers[j][m_NumGenerated].classifyInstance(data.instance(i)); predSum += pred[j]; } predSum /= m_NumClasses; for (int j = 0; j < m_NumClasses; j++) { trainFs[i][j] += (pred[j] - predSum) * (m_NumClasses - 1) / m_NumClasses; } } m_NumGenerated++; // Compute the current probability estimates for (int i = 0; i < trainYs.Length; i++) { probs[i] = Calculateprobs(trainFs[i]); } }
public override void buildClassifier(Instances insts) { if (insts.checkForStringAttributes()) { throw new Exception("Cannot handle string attributes!"); } if (insts.numClasses() > 2) { throw new System.Exception("Can only handle two-class datasets!"); } if (insts.classAttribute().Numeric) { throw new Exception("Can't handle a numeric class!"); } // Filter data m_Train = new Instances(insts); m_Train.deleteWithMissingClass(); m_ReplaceMissingValues = new ReplaceMissingValues(); m_ReplaceMissingValues.setInputFormat(m_Train); m_Train = Filter.useFilter(m_Train, m_ReplaceMissingValues); m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(m_Train); m_Train = Filter.useFilter(m_Train, m_NominalToBinary); /** Randomize training data */ //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.util.Random.Random' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" m_Train.randomize(new System.Random((System.Int32) m_Seed)); /** Make space to store perceptrons */ m_Additions = new int[m_MaxK + 1]; m_IsAddition = new bool[m_MaxK + 1]; m_Weights = new int[m_MaxK + 1]; /** Compute perceptrons */ m_K = 0; for (int it = 0; it < m_NumIterations; it++) { for (int i = 0; i < m_Train.numInstances(); i++) { Instance inst = m_Train.instance(i); if (!inst.classIsMissing()) { int prediction = makePrediction(m_K, inst); //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" int classValue = (int) inst.classValue(); if (prediction == classValue) { m_Weights[m_K]++; } else { m_IsAddition[m_K] = (classValue == 1); m_Additions[m_K] = i; m_K++; m_Weights[m_K]++; } if (m_K == m_MaxK) { //UPGRADE_NOTE: Labeled break statement was changed to a goto statement. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1012'" goto out_brk; } } } } //UPGRADE_NOTE: Label 'out_brk' was added. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1011'" out_brk: ; }
/// <summary> Merges two sets of Instances together. The resulting set will have /// all the attributes of the first set plus all the attributes of the /// second set. The number of instances in both sets must be the same. /// /// </summary> /// <param name="first">the first set of Instances /// </param> /// <param name="second">the second set of Instances /// </param> /// <returns> the merged set of Instances /// </returns> /// <exception cref="IllegalArgumentException">if the datasets are not the same size /// </exception> public static Instances mergeInstances(Instances first, Instances second) { if (first.numInstances() != second.numInstances()) { throw new System.ArgumentException("Instance sets must be of the same size"); } // Create the vector of merged attributes FastVector newAttributes = new FastVector(); for (int i = 0; i < first.numAttributes(); i++) { newAttributes.addElement(first.attribute(i)); } for (int i = 0; i < second.numAttributes(); i++) { newAttributes.addElement(second.attribute(i)); } // Create the set of Instances Instances merged = new Instances(first.relationName() + '_' + second.relationName(), newAttributes, first.numInstances()); // Merge each instance for (int i = 0; i < first.numInstances(); i++) { merged.add(first.instance(i).mergeInstance(second.instance(i))); } return merged; }
/// <summary> Sets distribution associated with model.</summary> public override void resetDistribution(Instances data) { Instances insts = new Instances(data, data.numInstances()); for (int i = 0; i < data.numInstances(); i++) { if (whichSubset(data.instance(i)) > - 1) { insts.add(data.instance(i)); } } Distribution newD = new Distribution(insts, this); newD.addInstWithUnknown(data, m_attIndex); m_distribution = newD; }
public async Task <string> classifyTest(weka.classifiers.Classifier cl) { string a = ""; double rate = 0; try { //instsTest = Instances.mergeInstances(ins,null); /*if (ins.classIndex() == -1) * ins.setClassIndex(insts.numAttributes() - 1);*/ System.Console.WriteLine("Performing " + percentSplit + "% split evaluation."); weka.filters.Filter normalized = new weka.filters.unsupervised.attribute.Normalize(); normalized.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, normalized); //randomize the order of the instances in the dataset. weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myRandom); //replace missing values weka.filters.Filter replaceMissingValues = new weka.filters.unsupervised.attribute.ReplaceMissingValues(); replaceMissingValues.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, replaceMissingValues); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); cl.buildClassifier(train); //double label = cl.classifyInstance(instsTest.instance(0)); double label = cl.classifyInstance(ins); ins.setClassValue(label); //instsTest.instance(0).setClassValue(label); a = ins.toString(ins.numAttributes() - 1); weka.core.SerializationHelper.write("mymodel.model", cl); int numCorrect = 0; for (int i = trainSize; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); double predictedClass = cl.classifyInstance(currentInst); if (predictedClass == insts.instance(i).classValue()) { numCorrect++; } } rate = (double)((double)numCorrect / (double)testSize * 100.0); } catch (java.lang.Exception ex) { //ex.printStackTrace(); rate = -1; } return(rate.ToString() + ";" + a ?? ""); }
/// <summary> Prints the predictions for the given dataset into a String variable. /// /// </summary> /// <param name="classifier the">classifier to use /// </param> /// <param name="train the">training data /// </param> /// <param name="testFileName the">name of the test file /// </param> /// <param name="classIndex the">class index /// </param> /// <param name="attributesToOutput the">indices of the attributes to output /// </param> /// <returns> the generated predictions for the attribute range /// </returns> /// <throws> Exception if test file cannot be opened </throws> protected internal static System.String toPrintClassifications(Classifier classifier, Instances train, System.String testFileName, int classIndex, Range attributesToOutput) { System.Text.StringBuilder text = new System.Text.StringBuilder(); if (testFileName.Length != 0) { System.IO.StreamReader testReader = null; try { //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.io.BufferedReader.BufferedReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" //UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'" //UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" testReader = new System.IO.StreamReader(new System.IO.StreamReader(testFileName, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(testFileName, System.Text.Encoding.Default).CurrentEncoding); } catch (System.Exception e) { //UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Throwable.getMessage' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" throw new System.Exception("Can't open file " + e.Message + '.'); } Instances test = new Instances(testReader, 1); if (classIndex != - 1) { test.ClassIndex = classIndex - 1; } else { test.ClassIndex = test.numAttributes() - 1; } int i = 0; while (test.readInstance(testReader)) { Instance instance = test.instance(0); Instance withMissing = (Instance) instance.copy(); withMissing.Dataset = test; double predValue = ((Classifier) classifier).classifyInstance(withMissing); if (test.classAttribute().Numeric) { if (Instance.isMissingValue(predValue)) { text.Append(i + " missing "); } else { text.Append(i + " " + predValue + " "); } if (instance.classIsMissing()) { text.Append("missing"); } else { text.Append(instance.classValue()); } text.Append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n"); } else { if (Instance.isMissingValue(predValue)) { text.Append(i + " missing "); } else { //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" text.Append(i + " " + test.classAttribute().value_Renamed((int) predValue) + " "); } if (Instance.isMissingValue(predValue)) { text.Append("missing "); } else { //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" text.Append(classifier.distributionForInstance(withMissing)[(int) predValue] + " "); } text.Append(instance.toString(instance.classIndex()) + " " + attributeValuesString(withMissing, attributesToOutput) + "\n"); } test.delete(0); i++; } testReader.Close(); } return text.ToString(); }
public void trainSMOUsingWeka(string wekaFile, string modelName) { try { weka.core.converters.CSVLoader csvLoader = new weka.core.converters.CSVLoader(); csvLoader.setSource(new java.io.File(wekaFile)); weka.core.Instances insts = csvLoader.getDataSet(); //weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader(wekaFile)); insts.setClassIndex(insts.numAttributes() - 1); cl = new weka.classifiers.functions.SMO(); cl.setBatchSize("100"); cl.setCalibrator(new weka.classifiers.functions.Logistic()); cl.setKernel(new weka.classifiers.functions.supportVector.PolyKernel()); cl.setEpsilon(1.02E-12); cl.setC(1.0); cl.setDebug(false); cl.setChecksTurnedOff(false); cl.setFilterType(new SelectedTag(weka.classifiers.functions.SMO.FILTER_NORMALIZE, weka.classifiers.functions.SMO.TAGS_FILTER)); System.Console.WriteLine("Performing " + percentSplit + "% split evaluation."); //randomize the order of the instances in the dataset. // weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); //myRandom.setInputFormat(insts); // insts = weka.filters.Filter.useFilter(insts, myRandom); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); java.io.File path = new java.io.File("/models/"); cl.buildClassifier(train); saveModel(cl, modelName, path); #region test whole set int numCorrect = 0; for (int i = 0; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); if (i == 12) { array = new List <float>(); foreach (float value in currentInst.toDoubleArray()) { array.Add(value); } } double predictedClass = cl.classifyInstance(currentInst); if (predictedClass == insts.instance(i).classValue()) { numCorrect++; } } System.Console.WriteLine(numCorrect + " out of " + testSize + " correct (" + (double)((double)numCorrect / (double)testSize * 100.0) + "%)"); #endregion } catch (java.lang.Exception ex) { ex.printStackTrace(); } }
/// <summary> Deletes all instances in given range from given bag. /// /// </summary> /// <exception cref="Exception">if something goes wrong /// </exception> public void delRange(int bagIndex, Instances source, int startIndex, int lastPlusOne) { double sumOfWeights = 0; int classIndex; Instance instance; int i; for (i = startIndex; i < lastPlusOne; i++) { instance = (Instance) source.instance(i); //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" classIndex = (int) instance.classValue(); sumOfWeights = sumOfWeights + instance.weight(); m_perClassPerBag[bagIndex][classIndex] -= instance.weight(); m_perClass[classIndex] -= instance.weight(); } m_perBag[bagIndex] -= sumOfWeights; totaL -= sumOfWeights; }
/// <summary> Shifts all instances in given range from one bag to another one. /// /// </summary> /// <exception cref="Exception">if something goes wrong /// </exception> public void shiftRange(int from, int to, Instances source, int startIndex, int lastPlusOne) { int classIndex; double weight; Instance instance; int i; for (i = startIndex; i < lastPlusOne; i++) { instance = (Instance) source.instance(i); //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" classIndex = (int) instance.classValue(); weight = instance.weight(); m_perClassPerBag[from][classIndex] -= weight; m_perClassPerBag[to][classIndex] += weight; m_perBag[from] -= weight; m_perBag[to] += weight; } }
public static void Test() { weka.core.Instances data = new weka.core.Instances(new java.io.FileReader("./data/Classification/Communication.arff")); data.setClassIndex(data.numAttributes() - 1); weka.classifiers.Classifier cls = new weka.classifiers.bayes.BayesNet(); //Save BayesNet results in .txt file using (System.IO.StreamWriter file = new System.IO.StreamWriter("./data/Classification/Communication_Report.txt")) { file.WriteLine("Performing " + percentSplit + "% split evaluation."); int runs = 1; // perform cross-validation for (int i = 0; i < runs; i++) { // randomize data int seed = i + 1; java.util.Random rand = new java.util.Random(seed); weka.core.Instances randData = new weka.core.Instances(data); randData.randomize(rand); //weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(randData); int trainSize = (int)Math.Round((double)data.numInstances() * percentSplit / 100); int testSize = data.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(data, 0, 0); weka.core.Instances test = new weka.core.Instances(data, 0, 0); train.setClassIndex(train.numAttributes() - 1); test.setClassIndex(test.numAttributes() - 1); //Print classifier analytics for all the dataset file.WriteLine("EVALUATION OF TEST DATASET."); //int numCorrect = 0; for (int j = 0; j < data.numInstances(); j++) { weka.core.Instance currentInst = randData.instance(j); if (j < trainSize) { train.add(currentInst); } else { test.add(currentInst); /* * double predictedClass = cls.classifyInstance(currentInst); * * double[] prediction = cls.distributionForInstance(currentInst); * * for (int p = 0; p < prediction.Length; p++) * { * file.WriteLine("Probability of class [{0}] for [{1}] is: {2}", currentInst.classAttribute().value(p), currentInst, Math.Round(prediction[p], 4)); * } * file.WriteLine(); * * file.WriteLine(); * if (predictedClass == data.instance(j).classValue()) * numCorrect++;*/ } } // build and evaluate classifier cls.buildClassifier(train); // Test the model weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(randData); eval.evaluateModel(cls, test); // Print the results as in Weka explorer: //Print statistics String strSummaryTest = eval.toSummaryString(); file.WriteLine(strSummaryTest); file.WriteLine(); //Print detailed class statistics file.WriteLine(eval.toClassDetailsString()); file.WriteLine(); //Print confusion matrix file.WriteLine(eval.toMatrixString()); file.WriteLine(); // Get the confusion matrix double[][] cmMatrixTest = eval.confusionMatrix(); System.Console.WriteLine("Bayesian Network results saved in Communication_Report.txt file successfully."); } } }
/// <summary> Select only instances with weights that contribute to /// the specified quantile of the weight distribution /// /// </summary> /// <param name="data">the input instances /// </param> /// <param name="quantile">the specified quantile eg 0.9 to select /// 90% of the weight mass /// </param> /// <returns> the selected instances /// </returns> protected internal virtual Instances selectWeightQuantile(Instances data, double quantile) { int numInstances = data.numInstances(); Instances trainData = new Instances(data, numInstances); double[] weights = new double[numInstances]; double sumOfWeights = 0; for (int i = 0; i < numInstances; i++) { weights[i] = data.instance(i).weight(); sumOfWeights += weights[i]; } double weightMassToSelect = sumOfWeights * quantile; int[] sortedIndices = Utils.sort(weights); // Select the instances sumOfWeights = 0; for (int i = numInstances - 1; i >= 0; i--) { Instance instance = (Instance) data.instance(sortedIndices[i]).copy(); trainData.add(instance); sumOfWeights += weights[sortedIndices[i]]; if ((sumOfWeights > weightMassToSelect) && (i > 0) && (weights[sortedIndices[i]] != weights[sortedIndices[i - 1]])) { break; } } if (m_Debug) { System.Console.Error.WriteLine("Selected " + trainData.numInstances() + " out of " + numInstances); } return trainData; }