// Test the classification result of each map that a user played, // with the data available as if they were playing through it public static void classifyTest(String dataString, String playerID) { String results = ""; try { java.io.StringReader stringReader = new java.io.StringReader(dataString); java.io.BufferedReader buffReader = new java.io.BufferedReader(stringReader); /* NOTE THAT FOR NAIVE BAYES ALL WEIGHTS CAN BE = 1*/ //weka.core.converters.ConverterUtils.DataSource source = new weka.core.converters.ConverterUtils.DataSource("iris.arff"); weka.core.Instances data = new weka.core.Instances(buffReader); //source.getDataSet(); // setting class attribute if the data format does not provide this information // For example, the XRFF format saves the class attribute information as well if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1); weka.classifiers.Classifier cl; for (int i = 3; i < data.numInstances(); i++) { cl = new weka.classifiers.bayes.NaiveBayes(); //cl = new weka.classifiers.trees.J48(); //cl = new weka.classifiers.lazy.IB1(); //cl = new weka.classifiers.functions.MultilayerPerceptron(); ((weka.classifiers.functions.MultilayerPerceptron)cl).setHiddenLayers("12"); weka.core.Instances subset = new weka.core.Instances(data,0,i); cl.buildClassifier(subset); weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(subset); eval.crossValidateModel(cl, subset, 3, new java.util.Random(1)); results = results + eval.pctCorrect(); // For accuracy measurement /* For Mathews Correlation Coefficient */ //double TP = eval.numTruePositives(1); //double FP = eval.numFalsePositives(1); //double TN = eval.numTrueNegatives(1); //double FN = eval.numFalseNegatives(1); //double correlationCoeff = ((TP*TN)-(FP*FN))/Math.Sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)); //results = results + correlationCoeff; if (i != data.numInstances()-1) results = results + ", "; if(i == data.numInstances()-1) Debug.Log("Player: " + playerID + ", Num Maps: " + data.numInstances() + ", AUC: " + eval.areaUnderROC(1)); } } catch (java.lang.Exception ex) { Debug.LogError(ex.getMessage()); } // Write values to file for a matlab read // For accuracy StreamWriter writer = new StreamWriter("DataForMatlab/"+playerID+"_CrossFoldValidations_NeuralNet.txt"); //StreamWriter writer = new StreamWriter("DataForMatlab/"+playerID+"_CrossFoldCorrCoeff.txt"); // For mathews cc writer.WriteLine(results); writer.Close(); Debug.Log(playerID + " has been written to file"); }
public List <DrugBrandInfo> AssociateDrugs(int?drugId, string dataPath) { List <DrugBrandInfo> drugList = new List <DrugBrandInfo>(); weka.core.Instances data = new weka.core.Instances(new java.io.FileReader(dataPath)); data.setClassIndex(data.numAttributes() - 1); Apriori apriori = new Apriori(); apriori.setClassIndex(data.classIndex()); apriori.buildAssociations(data); FastVector[] vector = apriori.getAllTheRules(); for (int i = 0; i < vector[0].size(); i++) { string value1 = ((AprioriItemSet)vector[0].elementAt(i)).toString(data); string value2 = ((AprioriItemSet)vector[1].elementAt(i)).toString(data); string[] set1 = value1.Split(' ', '='); string[] set2 = value2.Split(' ', '='); if (set1[0].Equals(drugId.ToString())) { if (set1[1] == "1" && set2[1] == "1") { int brandId = Convert.ToInt32(set2[0]); var drug = db.DrugBrandInfos.SingleOrDefault(c => c.Id == brandId); drugList.Add(drug); } break; } } return(drugList); }
/* Use when the player logs in to initially create the classifier with data from server */ public void InitializeClassifier(String dataString) { try { java.io.StringReader stringReader = new java.io.StringReader(dataString); java.io.BufferedReader buffReader = new java.io.BufferedReader(stringReader); playerData = new weka.core.Instances(buffReader); /* State where in each Instance the class attribute is, if its not already specified by the file */ if (playerData.classIndex() == -1) playerData.setClassIndex(playerData.numAttributes() - 1); /* NAIVE BAYES */ //classifier = new weka.classifiers.bayes.NaiveBayes(); /* NEURAL NET */ //classifier = new weka.classifiers.functions.MultilayerPerceptron(); //((weka.classifiers.functions.MultilayerPerceptron)classifier).setHiddenLayers("12"); /* J48 TREE */ //classifier = new weka.classifiers.trees.J48(); /* IB1 NEAREST NEIGHBOUR */ //classifier = new weka.classifiers.lazy.IB1(); /* RANDOM FOREST */ classifier = new weka.classifiers.trees.RandomForest(); classifier.buildClassifier(playerData); Debug.Log("Initialized Classifier"); } catch (java.lang.Exception ex) { Debug.LogError(ex.getMessage()); } }
/// <summary> Method for testing this class. /// /// </summary> /// <param name="argv">should contain one element: the name of an ARFF file /// </param> //@ requires argv != null; //@ requires argv.length == 1; //@ requires argv[0] != null; public static void test(System.String[] argv) { Instances instances, secondInstances, train, test, empty; //Instance instance; //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.util.Random.Random' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" System.Random random = new System.Random((System.Int32) 2); //UPGRADE_ISSUE: Class hierarchy differences between 'java.io.Reader' and 'System.IO.StreamReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1186'" System.IO.StreamReader reader; int start, num; //double newWeight; FastVector testAtts, testVals; int i, j; try { if (argv.Length > 1) { throw (new System.Exception("Usage: Instances [<filename>]")); } // Creating set of instances from scratch testVals = new FastVector(2); testVals.addElement("first_value"); testVals.addElement("second_value"); testAtts = new FastVector(2); testAtts.addElement(new Attribute("nominal_attribute", testVals)); testAtts.addElement(new Attribute("numeric_attribute")); instances = new Instances("test_set", testAtts, 10); instances.add(new Instance(instances.numAttributes())); instances.add(new Instance(instances.numAttributes())); instances.add(new Instance(instances.numAttributes())); instances.ClassIndex = 0; System.Console.Out.WriteLine("\nSet of instances created from scratch:\n"); //UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'" System.Console.Out.WriteLine(instances); if (argv.Length == 1) { System.String filename = argv[0]; //UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" reader = new System.IO.StreamReader(filename, System.Text.Encoding.Default); // Read first five instances and print them System.Console.Out.WriteLine("\nFirst five instances from file:\n"); instances = new Instances(reader, 1); instances.ClassIndex = instances.numAttributes() - 1; i = 0; while ((i < 5) && (instances.readInstance(reader))) { i++; } //UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'" System.Console.Out.WriteLine(instances); // Read all the instances in the file //UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" reader = new System.IO.StreamReader(filename, System.Text.Encoding.Default); instances = new Instances(reader); // Make the last attribute be the class instances.ClassIndex = instances.numAttributes() - 1; // Print header and instances. System.Console.Out.WriteLine("\nDataset:\n"); //UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'" System.Console.Out.WriteLine(instances); System.Console.Out.WriteLine("\nClass index: " + instances.classIndex()); } // Test basic methods based on class index. System.Console.Out.WriteLine("\nClass name: " + instances.classAttribute().name()); System.Console.Out.WriteLine("\nClass index: " + instances.classIndex()); System.Console.Out.WriteLine("\nClass is nominal: " + instances.classAttribute().Nominal); System.Console.Out.WriteLine("\nClass is numeric: " + instances.classAttribute().Numeric); System.Console.Out.WriteLine("\nClasses:\n"); for (i = 0; i < instances.numClasses(); i++) { System.Console.Out.WriteLine(instances.classAttribute().value_Renamed(i)); } System.Console.Out.WriteLine("\nClass values and labels of instances:\n"); for (i = 0; i < instances.numInstances(); i++) { Instance inst = instances.instance(i); System.Console.Out.Write(inst.classValue() + "\t"); System.Console.Out.Write(inst.toString(inst.classIndex())); if (instances.instance(i).classIsMissing()) { System.Console.Out.WriteLine("\tis missing"); } else { System.Console.Out.WriteLine(); } } // Create random weights. System.Console.Out.WriteLine("\nCreating random weights for instances."); for (i = 0; i < instances.numInstances(); i++) { instances.instance(i).Weight = random.NextDouble(); } // Print all instances and their weights (and the sum of weights). System.Console.Out.WriteLine("\nInstances and their weights:\n"); System.Console.Out.WriteLine(instances.instancesAndWeights()); System.Console.Out.Write("\nSum of weights: "); System.Console.Out.WriteLine(instances.sumOfWeights()); // Insert an attribute secondInstances = new Instances(instances); Attribute testAtt = new Attribute("Inserted"); secondInstances.insertAttributeAt(testAtt, 0); System.Console.Out.WriteLine("\nSet with inserted attribute:\n"); //UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'" System.Console.Out.WriteLine(secondInstances); System.Console.Out.WriteLine("\nClass name: " + secondInstances.classAttribute().name()); // Delete the attribute secondInstances.deleteAttributeAt(0); System.Console.Out.WriteLine("\nSet with attribute deleted:\n"); //UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'" System.Console.Out.WriteLine(secondInstances); System.Console.Out.WriteLine("\nClass name: " + secondInstances.classAttribute().name()); // Test if headers are equal System.Console.Out.WriteLine("\nHeaders equal: " + instances.equalHeaders(secondInstances) + "\n"); // Print data in internal format. System.Console.Out.WriteLine("\nData (internal values):\n"); for (i = 0; i < instances.numInstances(); i++) { for (j = 0; j < instances.numAttributes(); j++) { if (instances.instance(i).isMissing(j)) { System.Console.Out.Write("? "); } else { System.Console.Out.Write(instances.instance(i).value_Renamed(j) + " "); } } System.Console.Out.WriteLine(); } // Just print header System.Console.Out.WriteLine("\nEmpty dataset:\n"); empty = new Instances(instances, 0); //UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'" System.Console.Out.WriteLine(empty); System.Console.Out.WriteLine("\nClass name: " + empty.classAttribute().name()); // Create copy and rename an attribute and a value (if possible) if (empty.classAttribute().Nominal) { Instances copy = new Instances(empty, 0); copy.renameAttribute(copy.classAttribute(), "new_name"); copy.renameAttributeValue(copy.classAttribute(), copy.classAttribute().value_Renamed(0), "new_val_name"); System.Console.Out.WriteLine("\nDataset with names changed:\n" + copy); System.Console.Out.WriteLine("\nOriginal dataset:\n" + empty); } // Create and prints subset of instances. start = instances.numInstances() / 4; num = instances.numInstances() / 2; System.Console.Out.Write("\nSubset of dataset: "); System.Console.Out.WriteLine(num + " instances from " + (start + 1) + ". instance"); secondInstances = new Instances(instances, start, num); System.Console.Out.WriteLine("\nClass name: " + secondInstances.classAttribute().name()); // Print all instances and their weights (and the sum of weights). System.Console.Out.WriteLine("\nInstances and their weights:\n"); System.Console.Out.WriteLine(secondInstances.instancesAndWeights()); System.Console.Out.Write("\nSum of weights: "); System.Console.Out.WriteLine(secondInstances.sumOfWeights()); // Create and print training and test sets for 3-fold // cross-validation. System.Console.Out.WriteLine("\nTrain and test folds for 3-fold CV:"); if (instances.classAttribute().Nominal) { instances.stratify(3); } for (j = 0; j < 3; j++) { //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.util.Random.Random' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" train = instances.trainCV(3, j, new System.Random((System.Int32) 1)); test = instances.testCV(3, j); // Print all instances and their weights (and the sum of weights). System.Console.Out.WriteLine("\nTrain: "); System.Console.Out.WriteLine("\nInstances and their weights:\n"); System.Console.Out.WriteLine(train.instancesAndWeights()); System.Console.Out.Write("\nSum of weights: "); System.Console.Out.WriteLine(train.sumOfWeights()); System.Console.Out.WriteLine("\nClass name: " + train.classAttribute().name()); System.Console.Out.WriteLine("\nTest: "); System.Console.Out.WriteLine("\nInstances and their weights:\n"); System.Console.Out.WriteLine(test.instancesAndWeights()); System.Console.Out.Write("\nSum of weights: "); System.Console.Out.WriteLine(test.sumOfWeights()); System.Console.Out.WriteLine("\nClass name: " + test.classAttribute().name()); } // Randomize instances and print them. System.Console.Out.WriteLine("\nRandomized dataset:"); instances.randomize(random); // Print all instances and their weights (and the sum of weights). System.Console.Out.WriteLine("\nInstances and their weights:\n"); System.Console.Out.WriteLine(instances.instancesAndWeights()); System.Console.Out.Write("\nSum of weights: "); System.Console.Out.WriteLine(instances.sumOfWeights()); // Sort instances according to first attribute and // print them. System.Console.Out.Write("\nInstances sorted according to first attribute:\n "); instances.sort(0); // Print all instances and their weights (and the sum of weights). System.Console.Out.WriteLine("\nInstances and their weights:\n"); System.Console.Out.WriteLine(instances.instancesAndWeights()); System.Console.Out.Write("\nSum of weights: "); System.Console.Out.WriteLine(instances.sumOfWeights()); } catch (System.Exception) { //.WriteStackTrace(e, Console.Error); } }
// Test the classification result of each map that a user played, // with the data available as if they were playing through it public static void classifyTest(String dataString, String playerID) { try { java.io.StringReader stringReader = new java.io.StringReader(dataString); java.io.BufferedReader buffReader = new java.io.BufferedReader(stringReader); /* NOTE THAT FOR NAIVE BAYES ALL WEIGHTS CAN BE = 1*/ //weka.core.converters.ConverterUtils.DataSource source = new weka.core.converters.ConverterUtils.DataSource("iris.arff"); weka.core.Instances thisData = new weka.core.Instances(buffReader); //source.getDataSet(); if (thisData.classIndex() == -1) thisData.setClassIndex(thisData.numAttributes() - 1); weka.core.Instances thisUniqueData = new weka.core.Instances(thisData); if (thisUniqueData.classIndex() == -1) thisUniqueData.setClassIndex(thisUniqueData.numAttributes() - 1); thisUniqueData.delete(); if (allUniqueData == null) { allUniqueData = new weka.core.Instances(thisData); if (allUniqueData.classIndex() == -1) allUniqueData.setClassIndex(allUniqueData.numAttributes() - 1); allUniqueData.delete(); } weka.core.InstanceComparator com = new weka.core.InstanceComparator(false); for (int i = 0; i < thisData.numInstances(); i++) { bool dup = false; for (int j = 0; j < allUniqueData.numInstances(); j++) { if (com.compare(thisData.instance(i),allUniqueData.instance(j)) == 0) { Debug.Log("Duplicate found!"); dup = true; break; } } if (!dup) allUniqueData.add(thisData.instance(i)); else dupInstances++; } for (int i = 0; i < thisData.numInstances(); i++) { bool dup = false; for (int j = 0; j < thisUniqueData.numInstances(); j++) { if (com.compare(thisData.instance(i),thisUniqueData.instance(j)) == 0) { Debug.Log("Duplicate found!"); dup = true; break; } } if (!dup) thisUniqueData.add(thisData.instance(i)); else dupInstancesSamePlayer++; } //Debug.Log("All Data Instance Count = " + thisData.numInstances()); //Debug.Log("Unique Data Instance Count = " + thisUniqueData.numInstances()); //Debug.Log("Done!"); } catch (java.lang.Exception ex) { Debug.LogError(ex.getMessage()); } }
/// <summary> Performs one boosting iteration.</summary> private void performIteration(double[][] trainYs, double[][] trainFs, double[][] probs, Instances data, double origSumOfWeights) { if (m_Debug) { System.Console.Error.WriteLine("Training classifier " + (m_NumGenerated + 1)); } // Build the new models for (int j = 0; j < m_NumClasses; j++) { if (m_Debug) { System.Console.Error.WriteLine("\t...for class " + (j + 1) + " (" + m_ClassAttribute.name() + "=" + m_ClassAttribute.value_Renamed(j) + ")"); } // Make copy because we want to save the weights Instances boostData = new Instances(data); // Set instance pseudoclass and weights for (int i = 0; i < probs.Length; i++) { // Compute response and weight double p = probs[i][j]; double z, actual = trainYs[i][j]; if (actual == 1 - m_Offset) { z = 1.0 / p; if (z > Z_MAX) { // threshold z = Z_MAX; } } else { z = (- 1.0) / (1.0 - p); if (z < - Z_MAX) { // threshold z = - Z_MAX; } } double w = (actual - p) / z; // Set values for instance Instance current = boostData.instance(i); current.setValue(boostData.classIndex(), z); current.Weight = current.weight() * w; } // Scale the weights (helps with some base learners) double sumOfWeights = boostData.sumOfWeights(); double scalingFactor = (double) origSumOfWeights / sumOfWeights; for (int i = 0; i < probs.Length; i++) { Instance current = boostData.instance(i); current.Weight = current.weight() * scalingFactor; } // Select instances to train the classifier on Instances trainData = boostData; if (m_WeightThreshold < 100) { trainData = selectWeightQuantile(boostData, (double) m_WeightThreshold / 100); } else { if (m_UseResampling) { double[] weights = new double[boostData.numInstances()]; for (int kk = 0; kk < weights.Length; kk++) { weights[kk] = boostData.instance(kk).weight(); } trainData = boostData.resampleWithWeights(m_RandomInstance, weights); } } // Build the classifier m_Classifiers[j][m_NumGenerated].buildClassifier(trainData); } // Evaluate / increment trainFs from the classifier for (int i = 0; i < trainFs.Length; i++) { double[] pred = new double[m_NumClasses]; double predSum = 0; for (int j = 0; j < m_NumClasses; j++) { pred[j] = m_Shrinkage * m_Classifiers[j][m_NumGenerated].classifyInstance(data.instance(i)); predSum += pred[j]; } predSum /= m_NumClasses; for (int j = 0; j < m_NumClasses; j++) { trainFs[i][j] += (pred[j] - predSum) * (m_NumClasses - 1) / m_NumClasses; } } m_NumGenerated++; // Compute the current probability estimates for (int i = 0; i < trainYs.Length; i++) { probs[i] = Calculateprobs(trainFs[i]); } }
/// <summary> Builds the boosted classifier</summary> public virtual void buildClassifier(Instances data) { m_RandomInstance = new Random(m_Seed); Instances boostData; int classIndex = data.classIndex(); if (data.classAttribute().Numeric) { throw new Exception("LogitBoost can't handle a numeric class!"); } if (m_Classifier == null) { throw new System.Exception("A base classifier has not been specified!"); } if (!(m_Classifier is WeightedInstancesHandler) && !m_UseResampling) { m_UseResampling = true; } if (data.checkForStringAttributes()) { throw new Exception("Cannot handle string attributes!"); } if (m_Debug) { System.Console.Error.WriteLine("Creating copy of the training data"); } m_NumClasses = data.numClasses(); m_ClassAttribute = data.classAttribute(); // Create a copy of the data data = new Instances(data); data.deleteWithMissingClass(); // Create the base classifiers if (m_Debug) { System.Console.Error.WriteLine("Creating base classifiers"); } m_Classifiers = new Classifier[m_NumClasses][]; for (int j = 0; j < m_NumClasses; j++) { m_Classifiers[j] = Classifier.makeCopies(m_Classifier, this.NumIterations); } // Do we want to select the appropriate number of iterations // using cross-validation? int bestNumIterations = this.NumIterations; if (m_NumFolds > 1) { if (m_Debug) { System.Console.Error.WriteLine("Processing first fold."); } // Array for storing the results double[] results = new double[this.NumIterations]; // Iterate throught the cv-runs for (int r = 0; r < m_NumRuns; r++) { // Stratify the data data.randomize(m_RandomInstance); data.stratify(m_NumFolds); // Perform the cross-validation for (int i = 0; i < m_NumFolds; i++) { // Get train and test folds Instances train = data.trainCV(m_NumFolds, i, m_RandomInstance); Instances test = data.testCV(m_NumFolds, i); // Make class numeric Instances trainN = new Instances(train); trainN.ClassIndex = - 1; trainN.deleteAttributeAt(classIndex); trainN.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex); trainN.ClassIndex = classIndex; m_NumericClassData = new Instances(trainN, 0); // Get class values int numInstances = train.numInstances(); double[][] tmpArray = new double[numInstances][]; for (int i2 = 0; i2 < numInstances; i2++) { tmpArray[i2] = new double[m_NumClasses]; } double[][] trainFs = tmpArray; double[][] tmpArray2 = new double[numInstances][]; for (int i3 = 0; i3 < numInstances; i3++) { tmpArray2[i3] = new double[m_NumClasses]; } double[][] trainYs = tmpArray2; for (int j = 0; j < m_NumClasses; j++) { for (int k = 0; k < numInstances; k++) { trainYs[k][j] = (train.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses); } } // Perform iterations double[][] probs = initialProbs(numInstances); m_NumGenerated = 0; double sumOfWeights = train.sumOfWeights(); for (int j = 0; j < this.NumIterations; j++) { performIteration(trainYs, trainFs, probs, trainN, sumOfWeights); Evaluation eval = new Evaluation(train); eval.evaluateModel(this, test); results[j] += eval.correct(); } } } // Find the number of iterations with the lowest error //UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" double bestResult = - System.Double.MaxValue; for (int j = 0; j < this.NumIterations; j++) { if (results[j] > bestResult) { bestResult = results[j]; bestNumIterations = j; } } if (m_Debug) { System.Console.Error.WriteLine("Best result for " + bestNumIterations + " iterations: " + bestResult); } } // Build classifier on all the data int numInstances2 = data.numInstances(); double[][] trainFs2 = new double[numInstances2][]; for (int i4 = 0; i4 < numInstances2; i4++) { trainFs2[i4] = new double[m_NumClasses]; } double[][] trainYs2 = new double[numInstances2][]; for (int i5 = 0; i5 < numInstances2; i5++) { trainYs2[i5] = new double[m_NumClasses]; } for (int j = 0; j < m_NumClasses; j++) { for (int i = 0, k = 0; i < numInstances2; i++, k++) { trainYs2[i][j] = (data.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses); } } // Make class numeric data.ClassIndex = - 1; data.deleteAttributeAt(classIndex); data.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex); data.ClassIndex = classIndex; m_NumericClassData = new Instances(data, 0); // Perform iterations double[][] probs2 = initialProbs(numInstances2); double logLikelihood = CalculateLogLikelihood(trainYs2, probs2); m_NumGenerated = 0; if (m_Debug) { System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood); } double sumOfWeights2 = data.sumOfWeights(); for (int j = 0; j < bestNumIterations; j++) { double previousLoglikelihood = logLikelihood; performIteration(trainYs2, trainFs2, probs2, data, sumOfWeights2); logLikelihood = CalculateLogLikelihood(trainYs2, probs2); if (m_Debug) { System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood); } if (System.Math.Abs(previousLoglikelihood - logLikelihood) < m_Precision) { return ; } } }
/// <summary> Generates the classifier. /// /// </summary> /// <param name="instances">set of instances serving as training data /// </param> /// <exception cref="Exception">if the classifier has not been generated successfully /// </exception> public override void buildClassifier(Instances instances) { //UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" double bestVal = System.Double.MaxValue, currVal; //UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" double bestPoint = - System.Double.MaxValue, sum; int bestAtt = - 1, numClasses; if (instances.checkForStringAttributes()) { throw new Exception("Can't handle string attributes!"); } double[][] bestDist = new double[3][]; for (int i = 0; i < 3; i++) { bestDist[i] = new double[instances.numClasses()]; } m_Instances = new Instances(instances); m_Instances.deleteWithMissingClass(); if (m_Instances.numInstances() == 0) { throw new System.ArgumentException("No instances without missing " + "class values in training file!"); } if (instances.numAttributes() == 1) { throw new System.ArgumentException("Attribute missing. Need at least one " + "attribute other than class attribute!"); } if (m_Instances.classAttribute().Nominal) { numClasses = m_Instances.numClasses(); } else { numClasses = 1; } // For each attribute bool first = true; for (int i = 0; i < m_Instances.numAttributes(); i++) { if (i != m_Instances.classIndex()) { // Reserve space for distribution. double[][] tmpArray = new double[3][]; for (int i2 = 0; i2 < 3; i2++) { tmpArray[i2] = new double[numClasses]; } m_Distribution = tmpArray; // Compute value of criterion for best split on attribute if (m_Instances.attribute(i).Nominal) { currVal = findSplitNominal(i); } else { currVal = findSplitNumeric(i); } if ((first) || (currVal < bestVal)) { bestVal = currVal; bestAtt = i; bestPoint = m_SplitPoint; for (int j = 0; j < 3; j++) { Array.Copy(m_Distribution[j], 0, bestDist[j], 0, numClasses); } } // First attribute has been investigated first = false; } } // Set attribute, split point and distribution. m_AttIndex = bestAtt; m_SplitPoint = bestPoint; m_Distribution = bestDist; if (m_Instances.classAttribute().Nominal) { for (int i = 0; i < m_Distribution.Length; i++) { double sumCounts = Utils.sum(m_Distribution[i]); if (sumCounts == 0) { // This means there were only missing attribute values Array.Copy(m_Distribution[2], 0, m_Distribution[i], 0, m_Distribution[2].Length); Utils.normalize(m_Distribution[i]); } else { Utils.normalize(m_Distribution[i], sumCounts); } } } // Save memory m_Instances = new Instances(m_Instances, 0); }