Exemple #1
0
		/// <summary> Applies the cost matrix to a set of instances. If a random number generator is 
		/// supplied the instances will be resampled, otherwise they will be rewighted. 
		/// Adapted from code once sitting in Instances.java
		/// 
		/// </summary>
		/// <param name="data">the instances to reweight.
		/// </param>
		/// <param name="random">a random number generator for resampling, if null then instances are
		/// rewighted.
		/// </param>
		/// <returns> a new dataset reflecting the cost of misclassification.
		/// </returns>
		/// <exception cref="Exception">if the data has no class or the matrix in inappropriate.
		/// </exception>
		public virtual Instances applyCostMatrix(Instances data, System.Random random)
		{
			
			double sumOfWeightFactors = 0, sumOfMissClassWeights, sumOfWeights;
			double[] weightOfInstancesInClass, weightFactor, weightOfInstances;
			Instances newData;
			
			if (data.classIndex() < 0)
			{
				throw new System.Exception("Class index is not set!");
			}
			
			if (size() != data.numClasses())
			{
				throw new System.Exception("Misclassification cost matrix has " + "wrong format!");
			}
			
			weightFactor = new double[data.numClasses()];
			weightOfInstancesInClass = new double[data.numClasses()];
			for (int j = 0; j < data.numInstances(); j++)
			{
				//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
				weightOfInstancesInClass[(int) data.instance(j).classValue()] += data.instance(j).weight();
			}
			sumOfWeights = Utils.sum(weightOfInstancesInClass);
			
			// normalize the matrix if not already
			for (int i = 0; i < size(); i++)
				if (!Utils.eq(getXmlElement(i, i), 0))
				{
					CostMatrix normMatrix = new CostMatrix(this);
					normMatrix.normalize();
					return normMatrix.applyCostMatrix(data, random);
				}
			
			for (int i = 0; i < data.numClasses(); i++)
			{
				
				// Using Kai Ming Ting's formula for deriving weights for 
				// the classes and Breiman's heuristic for multiclass 
				// problems.
				sumOfMissClassWeights = 0;
				for (int j = 0; j < data.numClasses(); j++)
				{
					if (Utils.sm(getXmlElement(i, j), 0))
					{
						throw new System.Exception("Neg. weights in misclassification " + "cost matrix!");
					}
					sumOfMissClassWeights += getXmlElement(i, j);
				}
				weightFactor[i] = sumOfMissClassWeights * sumOfWeights;
				sumOfWeightFactors += sumOfMissClassWeights * weightOfInstancesInClass[i];
			}
			for (int i = 0; i < data.numClasses(); i++)
			{
				weightFactor[i] /= sumOfWeightFactors;
			}
			
			// Store new weights
			weightOfInstances = new double[data.numInstances()];
			for (int i = 0; i < data.numInstances(); i++)
			{
				//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
				weightOfInstances[i] = data.instance(i).weight() * weightFactor[(int) data.instance(i).classValue()];
			}
			
			// Change instances weight or do resampling
			if (random != null)
			{
				return data.resampleWithWeights(random, weightOfInstances);
			}
			else
			{
				Instances instances = new Instances(data);
				for (int i = 0; i < data.numInstances(); i++)
				{
					instances.instance(i).Weight = weightOfInstances[i];
				}
				return instances;
			}
		}