/* Use when the player logs in to initially create the classifier with data from server */ public void InitializeClassifier(String dataString) { try { java.io.StringReader stringReader = new java.io.StringReader(dataString); java.io.BufferedReader buffReader = new java.io.BufferedReader(stringReader); playerData = new weka.core.Instances(buffReader); /* State where in each Instance the class attribute is, if its not already specified by the file */ if (playerData.classIndex() == -1) { playerData.setClassIndex(playerData.numAttributes() - 1); } /* NAIVE BAYES */ //classifier = new weka.classifiers.bayes.NaiveBayes(); /* NEURAL NET */ //classifier = new weka.classifiers.functions.MultilayerPerceptron(); //((weka.classifiers.functions.MultilayerPerceptron)classifier).setHiddenLayers("12"); /* J48 TREE */ //classifier = new weka.classifiers.trees.J48(); /* IB1 NEAREST NEIGHBOUR */ //classifier = new weka.classifiers.lazy.IB1(); /* RANDOM FOREST */ classifier = new weka.classifiers.trees.RandomForest(); classifier.buildClassifier(playerData); Debug.Log("Initialized Classifier"); } catch (java.lang.Exception ex) { Debug.LogError(ex.getMessage()); } }
/// <summary> Applies the cost matrix to a set of instances. If a random number generator is /// supplied the instances will be resampled, otherwise they will be rewighted. /// Adapted from code once sitting in Instances.java /// /// </summary> /// <param name="data">the instances to reweight. /// </param> /// <param name="random">a random number generator for resampling, if null then instances are /// rewighted. /// </param> /// <returns> a new dataset reflecting the cost of misclassification. /// </returns> /// <exception cref="Exception">if the data has no class or the matrix in inappropriate. /// </exception> public virtual Instances applyCostMatrix(Instances data, System.Random random) { double sumOfWeightFactors = 0, sumOfMissClassWeights, sumOfWeights; double[] weightOfInstancesInClass, weightFactor, weightOfInstances; Instances newData; if (data.classIndex() < 0) { throw new System.Exception("Class index is not set!"); } if (size() != data.numClasses()) { throw new System.Exception("Misclassification cost matrix has " + "wrong format!"); } weightFactor = new double[data.numClasses()]; weightOfInstancesInClass = new double[data.numClasses()]; for (int j = 0; j < data.numInstances(); j++) { //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" weightOfInstancesInClass[(int) data.instance(j).classValue()] += data.instance(j).weight(); } sumOfWeights = Utils.sum(weightOfInstancesInClass); // normalize the matrix if not already for (int i = 0; i < size(); i++) if (!Utils.eq(getXmlElement(i, i), 0)) { CostMatrix normMatrix = new CostMatrix(this); normMatrix.normalize(); return normMatrix.applyCostMatrix(data, random); } for (int i = 0; i < data.numClasses(); i++) { // Using Kai Ming Ting's formula for deriving weights for // the classes and Breiman's heuristic for multiclass // problems. sumOfMissClassWeights = 0; for (int j = 0; j < data.numClasses(); j++) { if (Utils.sm(getXmlElement(i, j), 0)) { throw new System.Exception("Neg. weights in misclassification " + "cost matrix!"); } sumOfMissClassWeights += getXmlElement(i, j); } weightFactor[i] = sumOfMissClassWeights * sumOfWeights; sumOfWeightFactors += sumOfMissClassWeights * weightOfInstancesInClass[i]; } for (int i = 0; i < data.numClasses(); i++) { weightFactor[i] /= sumOfWeightFactors; } // Store new weights weightOfInstances = new double[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" weightOfInstances[i] = data.instance(i).weight() * weightFactor[(int) data.instance(i).classValue()]; } // Change instances weight or do resampling if (random != null) { return data.resampleWithWeights(random, weightOfInstances); } else { Instances instances = new Instances(data); for (int i = 0; i < data.numInstances(); i++) { instances.instance(i).Weight = weightOfInstances[i]; } return instances; } }
// Test the classification result of each map that a user played, // with the data available as if they were playing through it public static void classifyTest(String dataString, String playerID) { String results = ""; try { java.io.StringReader stringReader = new java.io.StringReader(dataString); java.io.BufferedReader buffReader = new java.io.BufferedReader(stringReader); /* NOTE THAT FOR NAIVE BAYES ALL WEIGHTS CAN BE = 1*/ //weka.core.converters.ConverterUtils.DataSource source = new weka.core.converters.ConverterUtils.DataSource("iris.arff"); weka.core.Instances data = new weka.core.Instances(buffReader); //source.getDataSet(); // setting class attribute if the data format does not provide this information // For example, the XRFF format saves the class attribute information as well if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } weka.classifiers.Classifier cl; for (int i = 3; i < data.numInstances(); i++) { cl = new weka.classifiers.bayes.NaiveBayes(); //cl = new weka.classifiers.trees.J48(); //cl = new weka.classifiers.lazy.IB1(); //cl = new weka.classifiers.functions.MultilayerPerceptron(); ((weka.classifiers.functions.MultilayerPerceptron)cl).setHiddenLayers("12"); weka.core.Instances subset = new weka.core.Instances(data, 0, i); cl.buildClassifier(subset); weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(subset); eval.crossValidateModel(cl, subset, 3, new java.util.Random(1)); results = results + eval.pctCorrect(); // For accuracy measurement /* For Mathews Correlation Coefficient */ //double TP = eval.numTruePositives(1); //double FP = eval.numFalsePositives(1); //double TN = eval.numTrueNegatives(1); //double FN = eval.numFalseNegatives(1); //double correlationCoeff = ((TP*TN)-(FP*FN))/Math.Sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)); //results = results + correlationCoeff; if (i != data.numInstances() - 1) { results = results + ", "; } if (i == data.numInstances() - 1) { Debug.Log("Player: " + playerID + ", Num Maps: " + data.numInstances() + ", AUC: " + eval.areaUnderROC(1)); } } } catch (java.lang.Exception ex) { Debug.LogError(ex.getMessage()); } // Write values to file for a matlab read // For accuracy StreamWriter writer = new StreamWriter("DataForMatlab/" + playerID + "_CrossFoldValidations_NeuralNet.txt"); //StreamWriter writer = new StreamWriter("DataForMatlab/"+playerID+"_CrossFoldCorrCoeff.txt"); // For mathews cc writer.WriteLine(results); writer.Close(); Debug.Log(playerID + " has been written to file"); }
/// <summary> Sets the format of the input instances. /// /// </summary> /// <param name="instanceInfo">an Instances object containing the input /// instance structure (any instances contained in the object are /// ignored - only the structure is required). /// </param> /// <returns> true if the outputFormat may be collected immediately /// </returns> /// <exception cref="Exception">if the input format can't be set /// successfully /// </exception> public override bool setInputFormat(Instances instanceInfo) { if (instanceInfo.classIndex() < 0 || !instanceInfo.classAttribute().Nominal) { throw new System.ArgumentException("Supervised resample requires nominal class"); } base.setInputFormat(instanceInfo); setOutputFormat(instanceInfo); return true; }
/// <summary> Sets the format of the input instances. /// /// </summary> /// <param name="instanceInfo">an Instances object containing the input instance /// structure (any instances contained in the object are ignored - only the /// structure is required). /// </param> /// <returns> true if the outputFormat may be collected immediately /// </returns> /// <exception cref="Exception">if the format couldn't be set successfully /// </exception> public override bool setInputFormat(Instances instanceInfo) { base.setInputFormat(instanceInfo); m_SelectCols.Upper = instanceInfo.numAttributes() - 1; // Create the output buffer FastVector attributes = new FastVector(); int outputClass = - 1; m_SelectedAttributes = m_SelectCols.Selection; int inStrKeepLen = 0; int[] inStrKeep = new int[m_SelectedAttributes.Length]; for (int i = 0; i < m_SelectedAttributes.Length; i++) { int current = m_SelectedAttributes[i]; if (instanceInfo.classIndex() == current) { outputClass = attributes.size(); } Attribute keep = (Attribute) instanceInfo.attribute(current).copy(); if (keep.type() == Attribute.STRING) { inStrKeep[inStrKeepLen++] = current; } attributes.addElement(keep); } m_InputStringIndex = new int[inStrKeepLen]; Array.Copy(inStrKeep, 0, m_InputStringIndex, 0, inStrKeepLen); Instances outputFormat = new Instances(instanceInfo.relationName(), attributes, 0); outputFormat.ClassIndex = outputClass; setOutputFormat(outputFormat); return true; }
// Test the classification result of each map that a user played, // with the data available as if they were playing through it public static void classifyTest(String dataString, String playerID) { try { java.io.StringReader stringReader = new java.io.StringReader(dataString); java.io.BufferedReader buffReader = new java.io.BufferedReader(stringReader); /* NOTE THAT FOR NAIVE BAYES ALL WEIGHTS CAN BE = 1*/ //weka.core.converters.ConverterUtils.DataSource source = new weka.core.converters.ConverterUtils.DataSource("iris.arff"); weka.core.Instances thisData = new weka.core.Instances(buffReader); //source.getDataSet(); if (thisData.classIndex() == -1) { thisData.setClassIndex(thisData.numAttributes() - 1); } weka.core.Instances thisUniqueData = new weka.core.Instances(thisData); if (thisUniqueData.classIndex() == -1) { thisUniqueData.setClassIndex(thisUniqueData.numAttributes() - 1); } thisUniqueData.delete(); if (allUniqueData == null) { allUniqueData = new weka.core.Instances(thisData); if (allUniqueData.classIndex() == -1) { allUniqueData.setClassIndex(allUniqueData.numAttributes() - 1); } allUniqueData.delete(); } weka.core.InstanceComparator com = new weka.core.InstanceComparator(false); for (int i = 0; i < thisData.numInstances(); i++) { bool dup = false; for (int j = 0; j < allUniqueData.numInstances(); j++) { if (com.compare(thisData.instance(i), allUniqueData.instance(j)) == 0) { Debug.Log("Duplicate found!"); dup = true; break; } } if (!dup) { allUniqueData.add(thisData.instance(i)); } else { dupInstances++; } } for (int i = 0; i < thisData.numInstances(); i++) { bool dup = false; for (int j = 0; j < thisUniqueData.numInstances(); j++) { if (com.compare(thisData.instance(i), thisUniqueData.instance(j)) == 0) { Debug.Log("Duplicate found!"); dup = true; break; } } if (!dup) { thisUniqueData.add(thisData.instance(i)); } else { dupInstancesSamePlayer++; } } //Debug.Log("All Data Instance Count = " + thisData.numInstances()); //Debug.Log("Unique Data Instance Count = " + thisUniqueData.numInstances()); //Debug.Log("Done!"); } catch (java.lang.Exception ex) { Debug.LogError(ex.getMessage()); } }