Ejemplo n.º 1
0
		/// <summary> Method for building a classifier tree.
		/// 
		/// </summary>
		/// <exception cref="Exception">if something goes wrong
		/// </exception>
		public virtual void  buildClassifier(Instances data)
		{
			
			if (data.checkForStringAttributes())
			{
				throw new Exception("Cannot handle string attributes!");
			}
			data = new Instances(data);
			data.deleteWithMissingClass();
			buildTree(data, false);
		}
		/// <summary> Method for building a pruneable classifier tree.
		/// 
		/// </summary>
		/// <exception cref="Exception">if something goes wrong
		/// </exception>
		public override void  buildClassifier(Instances data)
		{
			
			if (data.classAttribute().Numeric)
				throw new Exception("Class is numeric!");
			if (data.checkForStringAttributes())
			{
				throw new Exception("Cannot handle string attributes!");
			}
			data = new Instances(data);
			data.deleteWithMissingClass();
			buildTree(data, m_subtreeRaising);
			collapse();
			if (m_pruneTheTree)
			{
				prune();
			}
			if (m_cleanup)
			{
				cleanup(new Instances(data, 0));
			}
		}
Ejemplo n.º 3
0
		/// <summary> Builds the boosted classifier</summary>
		public virtual void  buildClassifier(Instances data)
		{
			m_RandomInstance = new Random(m_Seed);
			Instances boostData;
			int classIndex = data.classIndex();
			
			if (data.classAttribute().Numeric)
			{
				throw new Exception("LogitBoost can't handle a numeric class!");
			}
			if (m_Classifier == null)
			{
				throw new System.Exception("A base classifier has not been specified!");
			}
			
			if (!(m_Classifier is WeightedInstancesHandler) && !m_UseResampling)
			{
				m_UseResampling = true;
			}
			if (data.checkForStringAttributes())
			{
				throw new Exception("Cannot handle string attributes!");
			}
			if (m_Debug)
			{
				System.Console.Error.WriteLine("Creating copy of the training data");
			}
			
			m_NumClasses = data.numClasses();
			m_ClassAttribute = data.classAttribute();
			
			// Create a copy of the data 
			data = new Instances(data);
			data.deleteWithMissingClass();
			
			// Create the base classifiers
			if (m_Debug)
			{
				System.Console.Error.WriteLine("Creating base classifiers");
			}
			m_Classifiers = new Classifier[m_NumClasses][];
			for (int j = 0; j < m_NumClasses; j++)
			{
				m_Classifiers[j] = Classifier.makeCopies(m_Classifier, this.NumIterations);
			}
			
			// Do we want to select the appropriate number of iterations
			// using cross-validation?
			int bestNumIterations = this.NumIterations;
			if (m_NumFolds > 1)
			{
				if (m_Debug)
				{
					System.Console.Error.WriteLine("Processing first fold.");
				}
				
				// Array for storing the results
				double[] results = new double[this.NumIterations];
				
				// Iterate throught the cv-runs
				for (int r = 0; r < m_NumRuns; r++)
				{
					
					// Stratify the data
					data.randomize(m_RandomInstance);
					data.stratify(m_NumFolds);
					
					// Perform the cross-validation
					for (int i = 0; i < m_NumFolds; i++)
					{
						
						// Get train and test folds
						Instances train = data.trainCV(m_NumFolds, i, m_RandomInstance);
						Instances test = data.testCV(m_NumFolds, i);
						
						// Make class numeric
						Instances trainN = new Instances(train);
						trainN.ClassIndex = - 1;
						trainN.deleteAttributeAt(classIndex);
						trainN.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex);
						trainN.ClassIndex = classIndex;
						m_NumericClassData = new Instances(trainN, 0);
						
						// Get class values
						int numInstances = train.numInstances();
						double[][] tmpArray = new double[numInstances][];
						for (int i2 = 0; i2 < numInstances; i2++)
						{
							tmpArray[i2] = new double[m_NumClasses];
						}
						double[][] trainFs = tmpArray;
						double[][] tmpArray2 = new double[numInstances][];
						for (int i3 = 0; i3 < numInstances; i3++)
						{
							tmpArray2[i3] = new double[m_NumClasses];
						}
						double[][] trainYs = tmpArray2;
						for (int j = 0; j < m_NumClasses; j++)
						{
							for (int k = 0; k < numInstances; k++)
							{
								trainYs[k][j] = (train.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses);
							}
						}
						
						// Perform iterations
						double[][] probs = initialProbs(numInstances);
						m_NumGenerated = 0;
						double sumOfWeights = train.sumOfWeights();
						for (int j = 0; j < this.NumIterations; j++)
						{
							performIteration(trainYs, trainFs, probs, trainN, sumOfWeights);
							Evaluation eval = new Evaluation(train);
							eval.evaluateModel(this, test);
							results[j] += eval.correct();
						}
					}
				}
				
				// Find the number of iterations with the lowest error
				//UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'"
				double bestResult = - System.Double.MaxValue;
				for (int j = 0; j < this.NumIterations; j++)
				{
					if (results[j] > bestResult)
					{
						bestResult = results[j];
						bestNumIterations = j;
					}
				}
				if (m_Debug)
				{
					System.Console.Error.WriteLine("Best result for " + bestNumIterations + " iterations: " + bestResult);
				}
			}
			
			// Build classifier on all the data
			int numInstances2 = data.numInstances();
			double[][] trainFs2 = new double[numInstances2][];
			for (int i4 = 0; i4 < numInstances2; i4++)
			{
				trainFs2[i4] = new double[m_NumClasses];
			}
			double[][] trainYs2 = new double[numInstances2][];
			for (int i5 = 0; i5 < numInstances2; i5++)
			{
				trainYs2[i5] = new double[m_NumClasses];
			}
			for (int j = 0; j < m_NumClasses; j++)
			{
				for (int i = 0, k = 0; i < numInstances2; i++, k++)
				{
					trainYs2[i][j] = (data.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses);
				}
			}
			
			// Make class numeric
			data.ClassIndex = - 1;
			data.deleteAttributeAt(classIndex);
			data.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex);
			data.ClassIndex = classIndex;
			m_NumericClassData = new Instances(data, 0);
			
			// Perform iterations
			double[][] probs2 = initialProbs(numInstances2);
            double logLikelihood = CalculateLogLikelihood(trainYs2, probs2);
			m_NumGenerated = 0;
			if (m_Debug)
			{
				System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood);
			}
			double sumOfWeights2 = data.sumOfWeights();
			for (int j = 0; j < bestNumIterations; j++)
			{
				double previousLoglikelihood = logLikelihood;
				performIteration(trainYs2, trainFs2, probs2, data, sumOfWeights2);
                logLikelihood = CalculateLogLikelihood(trainYs2, probs2);
				if (m_Debug)
				{
					System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood);
				}
				if (System.Math.Abs(previousLoglikelihood - logLikelihood) < m_Precision)
				{
					return ;
				}
			}
		}
Ejemplo n.º 4
0
		public override void  buildClassifier(Instances insts)
		{
			
			if (insts.checkForStringAttributes())
			{
				throw new Exception("Cannot handle string attributes!");
			}
			if (insts.numClasses() > 2)
			{
				throw new System.Exception("Can only handle two-class datasets!");
			}
			if (insts.classAttribute().Numeric)
			{
				throw new Exception("Can't handle a numeric class!");
			}
			
			// Filter data
			m_Train = new Instances(insts);
			m_Train.deleteWithMissingClass();
			m_ReplaceMissingValues = new ReplaceMissingValues();
			m_ReplaceMissingValues.setInputFormat(m_Train);
			m_Train = Filter.useFilter(m_Train, m_ReplaceMissingValues);
			
			m_NominalToBinary = new NominalToBinary();
			m_NominalToBinary.setInputFormat(m_Train);
			m_Train = Filter.useFilter(m_Train, m_NominalToBinary);
			
			/** Randomize training data */
			//UPGRADE_TODO: The differences in the expected value  of parameters for constructor 'java.util.Random.Random'  may cause compilation errors.  "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'"
			m_Train.randomize(new System.Random((System.Int32) m_Seed));
			
			/** Make space to store perceptrons */
			m_Additions = new int[m_MaxK + 1];
			m_IsAddition = new bool[m_MaxK + 1];
			m_Weights = new int[m_MaxK + 1];
			
			/** Compute perceptrons */
			m_K = 0;
			for (int it = 0; it < m_NumIterations; it++)
			{
				for (int i = 0; i < m_Train.numInstances(); i++)
				{
					Instance inst = m_Train.instance(i);
					if (!inst.classIsMissing())
					{
						int prediction = makePrediction(m_K, inst);
						//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
						int classValue = (int) inst.classValue();
						if (prediction == classValue)
						{
							m_Weights[m_K]++;
						}
						else
						{
							m_IsAddition[m_K] = (classValue == 1);
							m_Additions[m_K] = i;
							m_K++;
							m_Weights[m_K]++;
						}
						if (m_K == m_MaxK)
						{
							//UPGRADE_NOTE: Labeled break statement was changed to a goto statement. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1012'"
							goto out_brk;
						}
					}
				}
			}
			//UPGRADE_NOTE: Label 'out_brk' was added. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1011'"

out_brk: ;
			
		}
Ejemplo n.º 5
0
		/// <summary> Generates the classifier.
		/// 
		/// </summary>
		/// <param name="instances">set of instances serving as training data 
		/// </param>
		/// <exception cref="Exception">if the classifier has not been generated successfully
		/// </exception>
		public override void  buildClassifier(Instances instances)
		{
			//UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'"
			double bestVal = System.Double.MaxValue, currVal;
			//UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'"
			double bestPoint = - System.Double.MaxValue, sum;
			int bestAtt = - 1, numClasses;
			
			if (instances.checkForStringAttributes())
			{
				throw new Exception("Can't handle string attributes!");
			}
			
			double[][] bestDist = new double[3][];
			for (int i = 0; i < 3; i++)
			{
				bestDist[i] = new double[instances.numClasses()];
			}
			
			m_Instances = new Instances(instances);
			m_Instances.deleteWithMissingClass();
			
			if (m_Instances.numInstances() == 0)
			{
				throw new System.ArgumentException("No instances without missing " + "class values in training file!");
			}
			
			if (instances.numAttributes() == 1)
			{
				throw new System.ArgumentException("Attribute missing. Need at least one " + "attribute other than class attribute!");
			}
			
			if (m_Instances.classAttribute().Nominal)
			{
				numClasses = m_Instances.numClasses();
			}
			else
			{
				numClasses = 1;
			}
			
			// For each attribute
			bool first = true;
			for (int i = 0; i < m_Instances.numAttributes(); i++)
			{
				if (i != m_Instances.classIndex())
				{
					
					// Reserve space for distribution.
					double[][] tmpArray = new double[3][];
					for (int i2 = 0; i2 < 3; i2++)
					{
						tmpArray[i2] = new double[numClasses];
					}
					m_Distribution = tmpArray;
					
					// Compute value of criterion for best split on attribute
					if (m_Instances.attribute(i).Nominal)
					{
						currVal = findSplitNominal(i);
					}
					else
					{
						currVal = findSplitNumeric(i);
					}
					if ((first) || (currVal < bestVal))
					{
						bestVal = currVal;
						bestAtt = i;
						bestPoint = m_SplitPoint;
						for (int j = 0; j < 3; j++)
						{
							Array.Copy(m_Distribution[j], 0, bestDist[j], 0, numClasses);
						}
					}
					
					// First attribute has been investigated
					first = false;
				}
			}
			
			// Set attribute, split point and distribution.
			m_AttIndex = bestAtt;
			m_SplitPoint = bestPoint;
			m_Distribution = bestDist;
			if (m_Instances.classAttribute().Nominal)
			{
				for (int i = 0; i < m_Distribution.Length; i++)
				{
					double sumCounts = Utils.sum(m_Distribution[i]);
					if (sumCounts == 0)
					{
						// This means there were only missing attribute values
						Array.Copy(m_Distribution[2], 0, m_Distribution[i], 0, m_Distribution[2].Length);
						Utils.normalize(m_Distribution[i]);
					}
					else
					{
						Utils.normalize(m_Distribution[i], sumCounts);
					}
				}
			}
			
			// Save memory
			m_Instances = new Instances(m_Instances, 0);
		}