public void Build(weka.core.Instances instances) { WekaUtils.DebugAssert(instances.numClasses() == 3, "instance's numClasses should be 3."); for (int i = 0; i < m_counts.Length; i++) { m_counts[i] = 0; } double c = m_tp / m_sl; foreach (weka.core.Instance instance in instances) { int v = (int)instance.classValue(); if (v == 2) { m_counts[2] += c; } else if (v == 0) { m_counts[0]++; } else { m_counts[1]++; } } }
/// /// <summary> * Generates the classifier. /// * </summary> /// * <param name="instances"> set of instances serving as training data </param> /// * <exception cref="Exception"> if the classifier has not been generated successfully </exception> /// public override void buildClassifier(Instances instances) { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class instances = new Instances(instances); instances.deleteWithMissingClass(); double sumOfWeights = 0; WekaUtils.DebugAssert(instances.numClasses() == 3, "instance's numClasses should be 3."); m_counts = new double[instances.numClasses()]; m_normalCounts = new double[instances.numClasses()]; for (int i = 0; i < m_counts.Length; i++) { m_counts[i] = 0; m_normalCounts[i] = 0; } double c = m_tp / m_sl; foreach (Instance instance in instances) { int v = (int)instance.classValue(); if (v == 2) { m_counts[v] += instance.weight() * c; sumOfWeights += instance.weight() * c; } else { m_counts[v] += instance.weight(); sumOfWeights += instance.weight(); } } double start = 0; for (int i = 0; i < m_counts.Length; ++i) { m_normalCounts[i] = (double)m_counts[i] / sumOfWeights + start; start = m_normalCounts[i]; } }
/// /// <summary> * Generates the classifier. /// * </summary> /// * <param name="instances"> set of instances serving as training data </param> /// * <exception cref="Exception"> if the classifier has not been generated successfully </exception> /// public override void buildClassifier(Instances instances) { // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class var trainInstances = new Instances(instances); trainInstances.deleteWithMissingClass(); WekaUtils.DebugAssert(instances.numClasses() == 3, "instance's numClasses should be 3."); m_counts = new double[instances.numClasses()]; for (int i = 0; i < m_counts.Length; i++) { m_counts[i] = 0; } //double c = m_tp / m_sl; foreach (Instance instance in instances) { int v = (int)instance.classValue(); m_counts[v] += 1; sumOfWeights += 1; } }
/// <summary> Applies the cost matrix to a set of instances. If a random number generator is /// supplied the instances will be resampled, otherwise they will be rewighted. /// Adapted from code once sitting in Instances.java /// /// </summary> /// <param name="data">the instances to reweight. /// </param> /// <param name="random">a random number generator for resampling, if null then instances are /// rewighted. /// </param> /// <returns> a new dataset reflecting the cost of misclassification. /// </returns> /// <exception cref="Exception">if the data has no class or the matrix in inappropriate. /// </exception> public virtual Instances applyCostMatrix(Instances data, System.Random random) { double sumOfWeightFactors = 0, sumOfMissClassWeights, sumOfWeights; double[] weightOfInstancesInClass, weightFactor, weightOfInstances; Instances newData; if (data.classIndex() < 0) { throw new System.Exception("Class index is not set!"); } if (size() != data.numClasses()) { throw new System.Exception("Misclassification cost matrix has " + "wrong format!"); } weightFactor = new double[data.numClasses()]; weightOfInstancesInClass = new double[data.numClasses()]; for (int j = 0; j < data.numInstances(); j++) { //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" weightOfInstancesInClass[(int) data.instance(j).classValue()] += data.instance(j).weight(); } sumOfWeights = Utils.sum(weightOfInstancesInClass); // normalize the matrix if not already for (int i = 0; i < size(); i++) if (!Utils.eq(getXmlElement(i, i), 0)) { CostMatrix normMatrix = new CostMatrix(this); normMatrix.normalize(); return normMatrix.applyCostMatrix(data, random); } for (int i = 0; i < data.numClasses(); i++) { // Using Kai Ming Ting's formula for deriving weights for // the classes and Breiman's heuristic for multiclass // problems. sumOfMissClassWeights = 0; for (int j = 0; j < data.numClasses(); j++) { if (Utils.sm(getXmlElement(i, j), 0)) { throw new System.Exception("Neg. weights in misclassification " + "cost matrix!"); } sumOfMissClassWeights += getXmlElement(i, j); } weightFactor[i] = sumOfMissClassWeights * sumOfWeights; sumOfWeightFactors += sumOfMissClassWeights * weightOfInstancesInClass[i]; } for (int i = 0; i < data.numClasses(); i++) { weightFactor[i] /= sumOfWeightFactors; } // Store new weights weightOfInstances = new double[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" weightOfInstances[i] = data.instance(i).weight() * weightFactor[(int) data.instance(i).classValue()]; } // Change instances weight or do resampling if (random != null) { return data.resampleWithWeights(random, weightOfInstances); } else { Instances instances = new Instances(data); for (int i = 0; i < data.numInstances(); i++) { instances.instance(i).Weight = weightOfInstances[i]; } return instances; } }