예제 #1
		/// <summary> Performs a (stratified if class is nominal) cross-validation 
		/// for a classifier on a set of instances. Now performs
		/// a deep copy of the classifier before each call to 
		/// buildClassifier() (just in case the classifier is not
		/// initialized properly).
		/// </summary>
		/// <param name="classifier">the classifier with any options set.
		/// </param>
		/// <param name="data">the data on which the cross-validation is to be 
		/// performed 
		/// </param>
		/// <param name="numFolds">the number of folds for the cross-validation
		/// </param>
		/// <param name="random">random number generator for randomization 
		/// </param>
		/// <throws>  Exception if a classifier could not be generated  </throws>
		/// <summary> successfully or the class is not defined
		/// </summary>
		public virtual void  crossValidateModel(Classifier classifier, Instances data, int numFolds, System.Random random)
			// Make a copy of the data we can reorder
			data = new Instances(data);
			if (data.classAttribute().Nominal)
			// Do the folds
			for (int i = 0; i < numFolds; i++)
				Instances train = data.trainCV(numFolds, i, random);
				Priors = train;
				Classifier copiedClassifier = Classifier.makeCopy(classifier);
				Instances test = data.testCV(numFolds, i);
				evaluateModel(copiedClassifier, test);
			m_NumFolds = numFolds;
예제 #2
        public static void cvdTest()
            weka.core.Instances data = new weka.core.Instances(new java.io.FileReader("./data/Classification/Communication.arff"));
            data.setClassIndex(data.numAttributes() - 1);

            weka.classifiers.Classifier cls = new weka.classifiers.bayes.NaiveBayes();

            //Save BayesNet results in .txt file
            using (System.IO.StreamWriter file = new System.IO.StreamWriter("./data/Classification/Communication_Report.txt"))
                int runs  = 1;
                int folds = 10;

                // perform cross-validation
                for (int i = 0; i < runs; i++)
                    // randomize data
                    int seed = i + 1;
                    java.util.Random    rand     = new java.util.Random(seed);
                    weka.core.Instances randData = new weka.core.Instances(data);
                    if (randData.classAttribute().isNominal())

                    weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(randData);
                    for (int n = 0; n < folds; n++)
                        weka.core.Instances train = randData.trainCV(folds, n);
                        weka.core.Instances test  = randData.testCV(folds, n);
                        // build and evaluate classifier
                        //weka.classifiers.Classifier clsCopy = weka.classifiers.Classifier.makeCopy(cls);
                        //eval.evaluateModel(cls, test);

                        //Print classifier analytics for all the dataset
                        file.WriteLine("EVALUATION OF TEST DATASET.");
                        // Test the model
                        weka.classifiers.Evaluation eTest = new weka.classifiers.Evaluation(test);
                        eTest.evaluateModel(cls, test);

                        // Print the results as in Weka explorer:
                        //Print statistics
                        String strSummaryTest = eTest.toSummaryString();


                        //Print detailed class statistics

                        //Print confusion matrix

                        // Get the confusion matrix
                        double[][] cmMatrixTest = eTest.confusionMatrix();

                        System.Console.WriteLine("Bayesian Network results saved in Communication_Report.txt file successfully.");

                    //Print classifier analytics for all the dataset
                    file.WriteLine("EVALUATION OF ALL DATASET.");


                    // Train the model
                    weka.classifiers.Evaluation eAlldata = new weka.classifiers.Evaluation(data);
                    eAlldata.evaluateModel(cls, data);

                    // Print the results as in Weka explorer:
                    //Print statistics
                    String strSummaryAlldata = eAlldata.toSummaryString();

                    //Print detailed class statistics

                    //Print confusion matrix

                    //print model
예제 #3
		/// <summary> Builds the boosted classifier</summary>
		public virtual void  buildClassifier(Instances data)
			m_RandomInstance = new Random(m_Seed);
			Instances boostData;
			int classIndex = data.classIndex();
			if (data.classAttribute().Numeric)
				throw new Exception("LogitBoost can't handle a numeric class!");
			if (m_Classifier == null)
				throw new System.Exception("A base classifier has not been specified!");
			if (!(m_Classifier is WeightedInstancesHandler) && !m_UseResampling)
				m_UseResampling = true;
			if (data.checkForStringAttributes())
				throw new Exception("Cannot handle string attributes!");
			if (m_Debug)
				System.Console.Error.WriteLine("Creating copy of the training data");
			m_NumClasses = data.numClasses();
			m_ClassAttribute = data.classAttribute();
			// Create a copy of the data 
			data = new Instances(data);
			// Create the base classifiers
			if (m_Debug)
				System.Console.Error.WriteLine("Creating base classifiers");
			m_Classifiers = new Classifier[m_NumClasses][];
			for (int j = 0; j < m_NumClasses; j++)
				m_Classifiers[j] = Classifier.makeCopies(m_Classifier, this.NumIterations);
			// Do we want to select the appropriate number of iterations
			// using cross-validation?
			int bestNumIterations = this.NumIterations;
			if (m_NumFolds > 1)
				if (m_Debug)
					System.Console.Error.WriteLine("Processing first fold.");
				// Array for storing the results
				double[] results = new double[this.NumIterations];
				// Iterate throught the cv-runs
				for (int r = 0; r < m_NumRuns; r++)
					// Stratify the data
					// Perform the cross-validation
					for (int i = 0; i < m_NumFolds; i++)
						// Get train and test folds
						Instances train = data.trainCV(m_NumFolds, i, m_RandomInstance);
						Instances test = data.testCV(m_NumFolds, i);
						// Make class numeric
						Instances trainN = new Instances(train);
						trainN.ClassIndex = - 1;
						trainN.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex);
						trainN.ClassIndex = classIndex;
						m_NumericClassData = new Instances(trainN, 0);
						// Get class values
						int numInstances = train.numInstances();
						double[][] tmpArray = new double[numInstances][];
						for (int i2 = 0; i2 < numInstances; i2++)
							tmpArray[i2] = new double[m_NumClasses];
						double[][] trainFs = tmpArray;
						double[][] tmpArray2 = new double[numInstances][];
						for (int i3 = 0; i3 < numInstances; i3++)
							tmpArray2[i3] = new double[m_NumClasses];
						double[][] trainYs = tmpArray2;
						for (int j = 0; j < m_NumClasses; j++)
							for (int k = 0; k < numInstances; k++)
								trainYs[k][j] = (train.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses);
						// Perform iterations
						double[][] probs = initialProbs(numInstances);
						m_NumGenerated = 0;
						double sumOfWeights = train.sumOfWeights();
						for (int j = 0; j < this.NumIterations; j++)
							performIteration(trainYs, trainFs, probs, trainN, sumOfWeights);
							Evaluation eval = new Evaluation(train);
							eval.evaluateModel(this, test);
							results[j] += eval.correct();
				// Find the number of iterations with the lowest error
				//UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'"
				double bestResult = - System.Double.MaxValue;
				for (int j = 0; j < this.NumIterations; j++)
					if (results[j] > bestResult)
						bestResult = results[j];
						bestNumIterations = j;
				if (m_Debug)
					System.Console.Error.WriteLine("Best result for " + bestNumIterations + " iterations: " + bestResult);
			// Build classifier on all the data
			int numInstances2 = data.numInstances();
			double[][] trainFs2 = new double[numInstances2][];
			for (int i4 = 0; i4 < numInstances2; i4++)
				trainFs2[i4] = new double[m_NumClasses];
			double[][] trainYs2 = new double[numInstances2][];
			for (int i5 = 0; i5 < numInstances2; i5++)
				trainYs2[i5] = new double[m_NumClasses];
			for (int j = 0; j < m_NumClasses; j++)
				for (int i = 0, k = 0; i < numInstances2; i++, k++)
					trainYs2[i][j] = (data.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses);
			// Make class numeric
			data.ClassIndex = - 1;
			data.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex);
			data.ClassIndex = classIndex;
			m_NumericClassData = new Instances(data, 0);
			// Perform iterations
			double[][] probs2 = initialProbs(numInstances2);
            double logLikelihood = CalculateLogLikelihood(trainYs2, probs2);
			m_NumGenerated = 0;
			if (m_Debug)
				System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood);
			double sumOfWeights2 = data.sumOfWeights();
			for (int j = 0; j < bestNumIterations; j++)
				double previousLoglikelihood = logLikelihood;
				performIteration(trainYs2, trainFs2, probs2, data, sumOfWeights2);
                logLikelihood = CalculateLogLikelihood(trainYs2, probs2);
				if (m_Debug)
					System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood);
				if (System.Math.Abs(previousLoglikelihood - logLikelihood) < m_Precision)
					return ;