/// <summary> Applies the cost matrix to a set of instances. If a random number generator is /// supplied the instances will be resampled, otherwise they will be rewighted. /// Adapted from code once sitting in Instances.java /// /// </summary> /// <param name="data">the instances to reweight. /// </param> /// <param name="random">a random number generator for resampling, if null then instances are /// rewighted. /// </param> /// <returns> a new dataset reflecting the cost of misclassification. /// </returns> /// <exception cref="Exception">if the data has no class or the matrix in inappropriate. /// </exception> public virtual Instances applyCostMatrix(Instances data, System.Random random) { double sumOfWeightFactors = 0, sumOfMissClassWeights, sumOfWeights; double[] weightOfInstancesInClass, weightFactor, weightOfInstances; Instances newData; if (data.classIndex() < 0) { throw new System.Exception("Class index is not set!"); } if (size() != data.numClasses()) { throw new System.Exception("Misclassification cost matrix has " + "wrong format!"); } weightFactor = new double[data.numClasses()]; weightOfInstancesInClass = new double[data.numClasses()]; for (int j = 0; j < data.numInstances(); j++) { //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" weightOfInstancesInClass[(int) data.instance(j).classValue()] += data.instance(j).weight(); } sumOfWeights = Utils.sum(weightOfInstancesInClass); // normalize the matrix if not already for (int i = 0; i < size(); i++) if (!Utils.eq(getXmlElement(i, i), 0)) { CostMatrix normMatrix = new CostMatrix(this); normMatrix.normalize(); return normMatrix.applyCostMatrix(data, random); } for (int i = 0; i < data.numClasses(); i++) { // Using Kai Ming Ting's formula for deriving weights for // the classes and Breiman's heuristic for multiclass // problems. sumOfMissClassWeights = 0; for (int j = 0; j < data.numClasses(); j++) { if (Utils.sm(getXmlElement(i, j), 0)) { throw new System.Exception("Neg. weights in misclassification " + "cost matrix!"); } sumOfMissClassWeights += getXmlElement(i, j); } weightFactor[i] = sumOfMissClassWeights * sumOfWeights; sumOfWeightFactors += sumOfMissClassWeights * weightOfInstancesInClass[i]; } for (int i = 0; i < data.numClasses(); i++) { weightFactor[i] /= sumOfWeightFactors; } // Store new weights weightOfInstances = new double[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" weightOfInstances[i] = data.instance(i).weight() * weightFactor[(int) data.instance(i).classValue()]; } // Change instances weight or do resampling if (random != null) { return data.resampleWithWeights(random, weightOfInstances); } else { Instances instances = new Instances(data); for (int i = 0; i < data.numInstances(); i++) { instances.instance(i).Weight = weightOfInstances[i]; } return instances; } }