Ejemplo n.º 1
0
		/// <summary> Generates the classifier.
		/// 
		/// </summary>
		/// <param name="instances">set of instances serving as training data 
		/// </param>
		/// <exception cref="Exception">if the classifier has not been generated successfully
		/// </exception>
		public override void  buildClassifier(Instances instances)
		{
			
			double sumOfWeights = 0;
			
			m_Class = instances.classAttribute();
			m_ClassValue = 0;
			switch (instances.classAttribute().type())
			{
				
				case weka.core.Attribute.NUMERIC: 
					m_Counts = null;
					break;

                case weka.core.Attribute.NOMINAL: 
					m_Counts = new double[instances.numClasses()];
					for (int i = 0; i < m_Counts.Length; i++)
					{
						m_Counts[i] = 1;
					}
					sumOfWeights = instances.numClasses();
					break;
				
				default: 
					throw new System.Exception("ZeroR can only handle nominal and numeric class" + " attributes.");
				
			}
			System.Collections.IEnumerator enu = instances.enumerateInstances();
			//UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'"
			while (enu.MoveNext())
			{
				//UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'"
				Instance instance = (Instance) enu.Current;
				if (!instance.classIsMissing())
				{
					if (instances.classAttribute().Nominal)
					{
						//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
						m_Counts[(int) instance.classValue()] += instance.weight();
					}
					else
					{
						m_ClassValue += instance.weight() * instance.classValue();
					}
					sumOfWeights += instance.weight();
				}
			}
			if (instances.classAttribute().Numeric)
			{
				if (Utils.gr(sumOfWeights, 0))
				{
					m_ClassValue /= sumOfWeights;
				}
			}
			else
			{
				m_ClassValue = Utils.maxIndex(m_Counts);
				Utils.normalize(m_Counts, sumOfWeights);
			}
		}
Ejemplo n.º 2
0
        public static double classifyTrain_Test(string classifierFileName, Classifier _classifier)
        {
            double performance = 0.0;

            try
            {
                FileReader          javaFileReader = new FileReader(classifierFileName);
                weka.core.Instances insts          = new weka.core.Instances(javaFileReader);
                javaFileReader.close();

                insts.setClassIndex(insts.numAttributes() - 1);

                System.Console.WriteLine("Performing " + percentSplit + "% split evaluation.");

                int trainSize             = insts.numInstances() * percentSplit / 100;
                int testSize              = insts.numInstances() - trainSize;
                weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize);

                _classifier.buildClassifier(train);

                int numCorrect   = 0;
                var numnerOfInst = insts.numInstances();
                int dataIndex    = 0;
                for (int i = trainSize; i < numnerOfInst; i++)
                {
                    dataIndex++;
                    weka.core.Instance currentInst = insts.instance(i);

                    double   predictClass = _classifier.classifyInstance(currentInst);
                    double[] dist         = _classifier.distributionForInstance(currentInst);


                    string actualClass    = insts.classAttribute().value((int)insts.instance(i).classValue());
                    string predictedClass = insts.classAttribute().value((int)predictClass);


                    var abcd = _classifier.getClass();

                    if (predictedClass == actualClass)
                    {
                        numCorrect++;
                    }
                }
                performance = (double)((double)numCorrect / (double)testSize) * 100;

                System.Console.WriteLine(numCorrect + " out of " + testSize + " correct (" + performance.toString() + "%)");
            }
            catch (java.lang.Exception ex)
            {
                ex.printStackTrace();
            }

            return(performance);
        }
Ejemplo n.º 3
0
        public static void cvdTest()
        {
            weka.core.Instances data = new weka.core.Instances(new java.io.FileReader("./data/Classification/Communication.arff"));
            data.setClassIndex(data.numAttributes() - 1);

            weka.classifiers.Classifier cls = new weka.classifiers.bayes.NaiveBayes();

            //Save BayesNet results in .txt file
            using (System.IO.StreamWriter file = new System.IO.StreamWriter("./data/Classification/Communication_Report.txt"))
            {
                int runs  = 1;
                int folds = 10;

                // perform cross-validation
                for (int i = 0; i < runs; i++)
                {
                    // randomize data
                    int seed = i + 1;
                    java.util.Random    rand     = new java.util.Random(seed);
                    weka.core.Instances randData = new weka.core.Instances(data);
                    randData.randomize(rand);
                    if (randData.classAttribute().isNominal())
                    {
                        randData.stratify(folds);
                    }

                    weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(randData);
                    for (int n = 0; n < folds; n++)
                    {
                        weka.core.Instances train = randData.trainCV(folds, n);
                        weka.core.Instances test  = randData.testCV(folds, n);
                        // build and evaluate classifier
                        //weka.classifiers.Classifier clsCopy = weka.classifiers.Classifier.makeCopy(cls);
                        cls.buildClassifier(train);
                        //eval.evaluateModel(cls, test);

                        //Print classifier analytics for all the dataset
                        file.WriteLine("EVALUATION OF TEST DATASET.");
                        // Test the model
                        weka.classifiers.Evaluation eTest = new weka.classifiers.Evaluation(test);
                        eTest.evaluateModel(cls, test);

                        // Print the results as in Weka explorer:
                        //Print statistics
                        String strSummaryTest = eTest.toSummaryString();

                        file.WriteLine(strSummaryTest);
                        file.WriteLine();

                        //Print detailed class statistics
                        file.WriteLine(eTest.toClassDetailsString());
                        file.WriteLine();

                        //Print confusion matrix
                        file.WriteLine(eTest.toMatrixString());
                        file.WriteLine();

                        // Get the confusion matrix
                        double[][] cmMatrixTest = eTest.confusionMatrix();

                        System.Console.WriteLine("Bayesian Network results saved in Communication_Report.txt file successfully.");
                    }

                    //Print classifier analytics for all the dataset
                    file.WriteLine("EVALUATION OF ALL DATASET.");

                    cls.buildClassifier(data);

                    // Train the model
                    weka.classifiers.Evaluation eAlldata = new weka.classifiers.Evaluation(data);
                    eAlldata.evaluateModel(cls, data);

                    // Print the results as in Weka explorer:
                    //Print statistics
                    String strSummaryAlldata = eAlldata.toSummaryString();
                    file.WriteLine(strSummaryAlldata);
                    file.WriteLine();

                    //Print detailed class statistics
                    file.WriteLine(eAlldata.toClassDetailsString());
                    file.WriteLine();

                    //Print confusion matrix
                    file.WriteLine(eAlldata.toMatrixString());
                    file.WriteLine("----------------");

                    //print model
                    file.WriteLine(cls);
                    file.WriteLine();
                }
            }
        }
		/// <summary> Method for building a pruneable classifier tree.
		/// 
		/// </summary>
		/// <exception cref="Exception">if something goes wrong
		/// </exception>
		public override void  buildClassifier(Instances data)
		{
			
			if (data.classAttribute().Numeric)
				throw new Exception("Class is numeric!");
			if (data.checkForStringAttributes())
			{
				throw new Exception("Cannot handle string attributes!");
			}
			data = new Instances(data);
			data.deleteWithMissingClass();
			buildTree(data, m_subtreeRaising);
			collapse();
			if (m_pruneTheTree)
			{
				prune();
			}
			if (m_cleanup)
			{
				cleanup(new Instances(data, 0));
			}
		}
Ejemplo n.º 5
0
		/// <summary> Performs a (stratified if class is nominal) cross-validation 
		/// for a classifier on a set of instances. Now performs
		/// a deep copy of the classifier before each call to 
		/// buildClassifier() (just in case the classifier is not
		/// initialized properly).
		/// 
		/// </summary>
		/// <param name="classifier">the classifier with any options set.
		/// </param>
		/// <param name="data">the data on which the cross-validation is to be 
		/// performed 
		/// </param>
		/// <param name="numFolds">the number of folds for the cross-validation
		/// </param>
		/// <param name="random">random number generator for randomization 
		/// </param>
		/// <throws>  Exception if a classifier could not be generated  </throws>
		/// <summary> successfully or the class is not defined
		/// </summary>
		public virtual void  crossValidateModel(Classifier classifier, Instances data, int numFolds, System.Random random)
		{
			
			// Make a copy of the data we can reorder
			data = new Instances(data);
			data.randomize(random);
			if (data.classAttribute().Nominal)
			{
				data.stratify(numFolds);
			}
			// Do the folds
			for (int i = 0; i < numFolds; i++)
			{
				Instances train = data.trainCV(numFolds, i, random);
				Priors = train;
				Classifier copiedClassifier = Classifier.makeCopy(classifier);
				copiedClassifier.buildClassifier(train);
				Instances test = data.testCV(numFolds, i);
				evaluateModel(copiedClassifier, test);
			}
			m_NumFolds = numFolds;
		}
Ejemplo n.º 6
0
		/// <summary> Initializes all the counters for the evaluation and also takes a
		/// cost matrix as parameter.
		/// Use <code>useNoPriors()</code> if the dataset is the test set and you
		/// can't initialize with the priors from the training set via 
		/// <code>setPriors(Instances)</code>.
		/// 
		/// </summary>
		/// <param name="data">	set of training instances, to get some header 
		/// information and prior class distribution information
		/// </param>
		/// <param name="costMatrix">	the cost matrix---if null, default costs will be used
		/// </param>
		/// <throws>  Exception 	if cost matrix is not compatible with  </throws>
		/// <summary> 			data, the class is not defined or the class is numeric
		/// </summary>
		/// <seealso cref="useNoPriors()">
		/// </seealso>
		/// <seealso cref="setPriors(Instances)">
		/// </seealso>
		public Evaluation(Instances data, CostMatrix costMatrix)
		{
			
			m_NumClasses = data.numClasses();
			m_NumFolds = 1;
			m_ClassIsNominal = data.classAttribute().Nominal;
			
			if (m_ClassIsNominal)
			{
				double[][] tmpArray = new double[m_NumClasses][];
				for (int i = 0; i < m_NumClasses; i++)
				{
					tmpArray[i] = new double[m_NumClasses];
				}
				m_ConfusionMatrix = tmpArray;
				m_ClassNames = new System.String[m_NumClasses];
				for (int i = 0; i < m_NumClasses; i++)
				{
					m_ClassNames[i] = data.classAttribute().value_Renamed(i);
				}
			}
			m_CostMatrix = costMatrix;
			if (m_CostMatrix != null)
			{
				if (!m_ClassIsNominal)
				{
					throw new System.Exception("Class has to be nominal if cost matrix " + "given!");
				}
				if (m_CostMatrix.size() != m_NumClasses)
				{
					throw new System.Exception("Cost matrix not compatible with data!");
				}
			}
			m_ClassPriors = new double[m_NumClasses];
			Priors = data;
			m_MarginCounts = new double[k_MarginResolution + 1];
		}
Ejemplo n.º 7
0
		/// <summary> Prints the predictions for the given dataset into a String variable.
		/// 
		/// </summary>
		/// <param name="classifier		the">classifier to use
		/// </param>
		/// <param name="train		the">training data
		/// </param>
		/// <param name="testFileName	the">name of the test file
		/// </param>
		/// <param name="classIndex		the">class index
		/// </param>
		/// <param name="attributesToOutput	the">indices of the attributes to output
		/// </param>
		/// <returns>			the generated predictions for the attribute range
		/// </returns>
		/// <throws>  Exception 		if test file cannot be opened </throws>
		protected internal static System.String toPrintClassifications(Classifier classifier, Instances train, System.String testFileName, int classIndex, Range attributesToOutput)
		{
			
			System.Text.StringBuilder text = new System.Text.StringBuilder();
			if (testFileName.Length != 0)
			{
				System.IO.StreamReader testReader = null;
				try
				{
					//UPGRADE_TODO: The differences in the expected value  of parameters for constructor 'java.io.BufferedReader.BufferedReader'  may cause compilation errors.  "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'"
					//UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'"
					//UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'"
					testReader = new System.IO.StreamReader(new System.IO.StreamReader(testFileName, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(testFileName, System.Text.Encoding.Default).CurrentEncoding);
				}
				catch (System.Exception e)
				{
					//UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Throwable.getMessage' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'"
					throw new System.Exception("Can't open file " + e.Message + '.');
				}
				Instances test = new Instances(testReader, 1);
				if (classIndex != - 1)
				{
					test.ClassIndex = classIndex - 1;
				}
				else
				{
					test.ClassIndex = test.numAttributes() - 1;
				}
				int i = 0;
				while (test.readInstance(testReader))
				{
					Instance instance = test.instance(0);
					Instance withMissing = (Instance) instance.copy();
					withMissing.Dataset = test;
					double predValue = ((Classifier) classifier).classifyInstance(withMissing);
					if (test.classAttribute().Numeric)
					{
						if (Instance.isMissingValue(predValue))
						{
							text.Append(i + " missing ");
						}
						else
						{
							text.Append(i + " " + predValue + " ");
						}
						if (instance.classIsMissing())
						{
							text.Append("missing");
						}
						else
						{
							text.Append(instance.classValue());
						}
						text.Append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n");
					}
					else
					{
						if (Instance.isMissingValue(predValue))
						{
							text.Append(i + " missing ");
						}
						else
						{
							//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
							text.Append(i + " " + test.classAttribute().value_Renamed((int) predValue) + " ");
						}
						if (Instance.isMissingValue(predValue))
						{
							text.Append("missing ");
						}
						else
						{
							//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
							text.Append(classifier.distributionForInstance(withMissing)[(int) predValue] + " ");
						}
						text.Append(instance.toString(instance.classIndex()) + " " + attributeValuesString(withMissing, attributesToOutput) + "\n");
					}
					test.delete(0);
					i++;
				}
				testReader.Close();
			}
			return text.ToString();
		}
Ejemplo n.º 8
0
		public override void  buildClassifier(Instances insts)
		{
			
			if (insts.checkForStringAttributes())
			{
				throw new Exception("Cannot handle string attributes!");
			}
			if (insts.numClasses() > 2)
			{
				throw new System.Exception("Can only handle two-class datasets!");
			}
			if (insts.classAttribute().Numeric)
			{
				throw new Exception("Can't handle a numeric class!");
			}
			
			// Filter data
			m_Train = new Instances(insts);
			m_Train.deleteWithMissingClass();
			m_ReplaceMissingValues = new ReplaceMissingValues();
			m_ReplaceMissingValues.setInputFormat(m_Train);
			m_Train = Filter.useFilter(m_Train, m_ReplaceMissingValues);
			
			m_NominalToBinary = new NominalToBinary();
			m_NominalToBinary.setInputFormat(m_Train);
			m_Train = Filter.useFilter(m_Train, m_NominalToBinary);
			
			/** Randomize training data */
			//UPGRADE_TODO: The differences in the expected value  of parameters for constructor 'java.util.Random.Random'  may cause compilation errors.  "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'"
			m_Train.randomize(new System.Random((System.Int32) m_Seed));
			
			/** Make space to store perceptrons */
			m_Additions = new int[m_MaxK + 1];
			m_IsAddition = new bool[m_MaxK + 1];
			m_Weights = new int[m_MaxK + 1];
			
			/** Compute perceptrons */
			m_K = 0;
			for (int it = 0; it < m_NumIterations; it++)
			{
				for (int i = 0; i < m_Train.numInstances(); i++)
				{
					Instance inst = m_Train.instance(i);
					if (!inst.classIsMissing())
					{
						int prediction = makePrediction(m_K, inst);
						//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
						int classValue = (int) inst.classValue();
						if (prediction == classValue)
						{
							m_Weights[m_K]++;
						}
						else
						{
							m_IsAddition[m_K] = (classValue == 1);
							m_Additions[m_K] = i;
							m_K++;
							m_Weights[m_K]++;
						}
						if (m_K == m_MaxK)
						{
							//UPGRADE_NOTE: Labeled break statement was changed to a goto statement. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1012'"
							goto out_brk;
						}
					}
				}
			}
			//UPGRADE_NOTE: Label 'out_brk' was added. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1011'"

out_brk: ;
			
		}
Ejemplo n.º 9
0
		/// <summary> Method for testing this class.
		/// 
		/// </summary>
		/// <param name="argv">should contain one element: the name of an ARFF file
		/// </param>
		//@ requires argv != null;
		//@ requires argv.length == 1;
		//@ requires argv[0] != null;
		public static void  test(System.String[] argv)
		{
			
			Instances instances, secondInstances, train, test, empty;
			//Instance instance;
			//UPGRADE_TODO: The differences in the expected value  of parameters for constructor 'java.util.Random.Random'  may cause compilation errors.  "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'"
			System.Random random = new System.Random((System.Int32) 2);
			//UPGRADE_ISSUE: Class hierarchy differences between 'java.io.Reader' and 'System.IO.StreamReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1186'"
			System.IO.StreamReader reader;
			int start, num;
			//double newWeight;
			FastVector testAtts, testVals;
			int i, j;
			
			try
			{
				if (argv.Length > 1)
				{
					throw (new System.Exception("Usage: Instances [<filename>]"));
				}
				
				// Creating set of instances from scratch
				testVals = new FastVector(2);
				testVals.addElement("first_value");
				testVals.addElement("second_value");
				testAtts = new FastVector(2);
				testAtts.addElement(new Attribute("nominal_attribute", testVals));
				testAtts.addElement(new Attribute("numeric_attribute"));
				instances = new Instances("test_set", testAtts, 10);
				instances.add(new Instance(instances.numAttributes()));
				instances.add(new Instance(instances.numAttributes()));
				instances.add(new Instance(instances.numAttributes()));
				instances.ClassIndex = 0;
				System.Console.Out.WriteLine("\nSet of instances created from scratch:\n");
				//UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'"
				System.Console.Out.WriteLine(instances);
				
				if (argv.Length == 1)
				{
					System.String filename = argv[0];
					//UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'"
					reader = new System.IO.StreamReader(filename, System.Text.Encoding.Default);
					
					// Read first five instances and print them
					System.Console.Out.WriteLine("\nFirst five instances from file:\n");
					instances = new Instances(reader, 1);
					instances.ClassIndex = instances.numAttributes() - 1;
					i = 0;
					while ((i < 5) && (instances.readInstance(reader)))
					{
						i++;
					}
					//UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'"
					System.Console.Out.WriteLine(instances);
					
					// Read all the instances in the file
					//UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'"
					reader = new System.IO.StreamReader(filename, System.Text.Encoding.Default);
					instances = new Instances(reader);
					
					// Make the last attribute be the class 
					instances.ClassIndex = instances.numAttributes() - 1;
					
					// Print header and instances.
					System.Console.Out.WriteLine("\nDataset:\n");
					//UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'"
					System.Console.Out.WriteLine(instances);
					System.Console.Out.WriteLine("\nClass index: " + instances.classIndex());
				}
				
				// Test basic methods based on class index.
				System.Console.Out.WriteLine("\nClass name: " + instances.classAttribute().name());
				System.Console.Out.WriteLine("\nClass index: " + instances.classIndex());
				System.Console.Out.WriteLine("\nClass is nominal: " + instances.classAttribute().Nominal);
				System.Console.Out.WriteLine("\nClass is numeric: " + instances.classAttribute().Numeric);
				System.Console.Out.WriteLine("\nClasses:\n");
				for (i = 0; i < instances.numClasses(); i++)
				{
					System.Console.Out.WriteLine(instances.classAttribute().value_Renamed(i));
				}
				System.Console.Out.WriteLine("\nClass values and labels of instances:\n");
				for (i = 0; i < instances.numInstances(); i++)
				{
					Instance inst = instances.instance(i);
					System.Console.Out.Write(inst.classValue() + "\t");
					System.Console.Out.Write(inst.toString(inst.classIndex()));
					if (instances.instance(i).classIsMissing())
					{
						System.Console.Out.WriteLine("\tis missing");
					}
					else
					{
						System.Console.Out.WriteLine();
					}
				}
				
				// Create random weights.
				System.Console.Out.WriteLine("\nCreating random weights for instances.");
				for (i = 0; i < instances.numInstances(); i++)
				{
					instances.instance(i).Weight = random.NextDouble();
				}
				
				// Print all instances and their weights (and the sum of weights).
				System.Console.Out.WriteLine("\nInstances and their weights:\n");
				System.Console.Out.WriteLine(instances.instancesAndWeights());
				System.Console.Out.Write("\nSum of weights: ");
				System.Console.Out.WriteLine(instances.sumOfWeights());
				
				// Insert an attribute
				secondInstances = new Instances(instances);
				Attribute testAtt = new Attribute("Inserted");
				secondInstances.insertAttributeAt(testAtt, 0);
				System.Console.Out.WriteLine("\nSet with inserted attribute:\n");
				//UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'"
				System.Console.Out.WriteLine(secondInstances);
				System.Console.Out.WriteLine("\nClass name: " + secondInstances.classAttribute().name());
				
				// Delete the attribute
				secondInstances.deleteAttributeAt(0);
				System.Console.Out.WriteLine("\nSet with attribute deleted:\n");
				//UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'"
				System.Console.Out.WriteLine(secondInstances);
				System.Console.Out.WriteLine("\nClass name: " + secondInstances.classAttribute().name());
				
				// Test if headers are equal
				System.Console.Out.WriteLine("\nHeaders equal: " + instances.equalHeaders(secondInstances) + "\n");
				
				// Print data in internal format.
				System.Console.Out.WriteLine("\nData (internal values):\n");
				for (i = 0; i < instances.numInstances(); i++)
				{
					for (j = 0; j < instances.numAttributes(); j++)
					{
						if (instances.instance(i).isMissing(j))
						{
							System.Console.Out.Write("? ");
						}
						else
						{
							System.Console.Out.Write(instances.instance(i).value_Renamed(j) + " ");
						}
					}
					System.Console.Out.WriteLine();
				}
				
				// Just print header
				System.Console.Out.WriteLine("\nEmpty dataset:\n");
				empty = new Instances(instances, 0);
				//UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'"
				System.Console.Out.WriteLine(empty);
				System.Console.Out.WriteLine("\nClass name: " + empty.classAttribute().name());
				
				// Create copy and rename an attribute and a value (if possible)
				if (empty.classAttribute().Nominal)
				{
					Instances copy = new Instances(empty, 0);
					copy.renameAttribute(copy.classAttribute(), "new_name");
					copy.renameAttributeValue(copy.classAttribute(), copy.classAttribute().value_Renamed(0), "new_val_name");
					System.Console.Out.WriteLine("\nDataset with names changed:\n" + copy);
					System.Console.Out.WriteLine("\nOriginal dataset:\n" + empty);
				}
				
				// Create and prints subset of instances.
				start = instances.numInstances() / 4;
				num = instances.numInstances() / 2;
				System.Console.Out.Write("\nSubset of dataset: ");
				System.Console.Out.WriteLine(num + " instances from " + (start + 1) + ". instance");
				secondInstances = new Instances(instances, start, num);
				System.Console.Out.WriteLine("\nClass name: " + secondInstances.classAttribute().name());
				
				// Print all instances and their weights (and the sum of weights).
				System.Console.Out.WriteLine("\nInstances and their weights:\n");
				System.Console.Out.WriteLine(secondInstances.instancesAndWeights());
				System.Console.Out.Write("\nSum of weights: ");
				System.Console.Out.WriteLine(secondInstances.sumOfWeights());
				
				// Create and print training and test sets for 3-fold
				// cross-validation.
				System.Console.Out.WriteLine("\nTrain and test folds for 3-fold CV:");
				if (instances.classAttribute().Nominal)
				{
					instances.stratify(3);
				}
				for (j = 0; j < 3; j++)
				{
					//UPGRADE_TODO: The differences in the expected value  of parameters for constructor 'java.util.Random.Random'  may cause compilation errors.  "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'"
					train = instances.trainCV(3, j, new System.Random((System.Int32) 1));
					test = instances.testCV(3, j);
					
					// Print all instances and their weights (and the sum of weights).
					System.Console.Out.WriteLine("\nTrain: ");
					System.Console.Out.WriteLine("\nInstances and their weights:\n");
					System.Console.Out.WriteLine(train.instancesAndWeights());
					System.Console.Out.Write("\nSum of weights: ");
					System.Console.Out.WriteLine(train.sumOfWeights());
					System.Console.Out.WriteLine("\nClass name: " + train.classAttribute().name());
					System.Console.Out.WriteLine("\nTest: ");
					System.Console.Out.WriteLine("\nInstances and their weights:\n");
					System.Console.Out.WriteLine(test.instancesAndWeights());
					System.Console.Out.Write("\nSum of weights: ");
					System.Console.Out.WriteLine(test.sumOfWeights());
					System.Console.Out.WriteLine("\nClass name: " + test.classAttribute().name());
				}
				
				// Randomize instances and print them.
				System.Console.Out.WriteLine("\nRandomized dataset:");
				instances.randomize(random);
				
				// Print all instances and their weights (and the sum of weights).
				System.Console.Out.WriteLine("\nInstances and their weights:\n");
				System.Console.Out.WriteLine(instances.instancesAndWeights());
				System.Console.Out.Write("\nSum of weights: ");
				System.Console.Out.WriteLine(instances.sumOfWeights());
				
				// Sort instances according to first attribute and
				// print them.
				System.Console.Out.Write("\nInstances sorted according to first attribute:\n ");
				instances.sort(0);
				
				// Print all instances and their weights (and the sum of weights).
				System.Console.Out.WriteLine("\nInstances and their weights:\n");
				System.Console.Out.WriteLine(instances.instancesAndWeights());
				System.Console.Out.Write("\nSum of weights: ");
				System.Console.Out.WriteLine(instances.sumOfWeights());
			}
			catch (System.Exception)
			{
				//.WriteStackTrace(e, Console.Error);
			}
		}
Ejemplo n.º 10
0
        public static double classifyTrain_Test(string classifierFileName, int baseClasses, Classifier _classifier)
        {
            double performance = 0.0;

            try
            {
                List <BrResult> results = new List <BrResult>();
                for (int singleClass = 1; singleClass <= baseClasses; singleClass++)
                {
                    string eachFileName = String.Format("{0}_{1}.arff", classifierFileName, singleClass);

                    BrResult result = new BrResult();
                    result.classNumber = singleClass;

                    FileReader          javaFileReader = new FileReader(eachFileName);
                    weka.core.Instances insts          = new weka.core.Instances(javaFileReader);
                    javaFileReader.close();

                    insts.setClassIndex(insts.numAttributes() - 1);

                    System.Console.WriteLine("Performing " + percentSplit + "% split evaluation.");

                    int trainSize             = insts.numInstances() * percentSplit / 100;
                    int testSize              = insts.numInstances() - trainSize;
                    weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize);

                    _classifier.buildClassifier(train);

                    int           numCorrect   = 0;
                    var           numnerOfInst = insts.numInstances();
                    List <Result> eachResults  = new List <Result>();
                    int           dataIndex    = 0;
                    for (int i = trainSize; i < numnerOfInst; i++)
                    {
                        dataIndex++;
                        Result eachRow = new Result();
                        eachRow.lineIndex = 0;
                        weka.core.Instance currentInst = insts.instance(i);

                        double   predictClass = _classifier.classifyInstance(currentInst);
                        double[] dist         = _classifier.distributionForInstance(currentInst);

                        string actualClass    = insts.classAttribute().value((int)insts.instance(i).classValue());
                        string predictedClass = insts.classAttribute().value((int)predictClass);


                        var abcd = _classifier.getClass();

                        if (predictedClass == actualClass)
                        {
                            eachRow.correct = "1";
                            numCorrect++;
                        }
                        else
                        {
                            eachRow.correct = "0";
                        }
                        eachRow.lineIndex      = dataIndex;
                        eachRow.classActual    = actualClass;
                        eachRow.classPredicted = predictedClass;

                        eachResults.Add(eachRow);
                    }
                    result.classResult = eachResults;
                    results.Add(result);

                    System.Console.WriteLine(numCorrect + " out of " + testSize + " correct (" + (double)((double)numCorrect / (double)testSize * 100.0) + "%)");
                }

                #region Evaludation Matrix
                var evaluationMatrix = new Dictionary <int, string>();

                foreach (var res in results)
                {
                    foreach (var classRes in res.classResult)
                    {
                        if (!evaluationMatrix.Keys.Contains(classRes.lineIndex))
                        {
                            evaluationMatrix[classRes.lineIndex] = classRes.correct.toString();
                        }
                        else
                        {
                            evaluationMatrix[classRes.lineIndex] = evaluationMatrix[classRes.lineIndex].toString() + "," + classRes.correct.toString();
                        }
                    }
                }
                #endregion

                #region
                int correnctlyClassified   = 0;
                int incorrenctlyClassified = 0;
                int totalData = evaluationMatrix.Count;
                foreach (var key in evaluationMatrix.Keys)
                {
                    string   multiLevelClass = evaluationMatrix[key].ToString();
                    string[] a = multiLevelClass.Split(',');

                    int classPredect = 0;
                    for (int i = 0; i < a.Length; i++)
                    {
                        if (a[i] == "0")
                        {
                            classPredect++;
                        }
                    }
                    if (classPredect == 0)
                    {
                        correnctlyClassified++;
                    }
                    else if (classPredect > 0)
                    {
                        incorrenctlyClassified++;
                    }
                }

                performance = (double)((double)correnctlyClassified / (double)totalData) * 100;
                System.Console.WriteLine(performance);
                #endregion
            }
            catch (java.lang.Exception ex)
            {
                ex.printStackTrace();
            }
            return(performance);
        }
Ejemplo n.º 11
0
		/// <summary> Builds the boosted classifier</summary>
		public virtual void  buildClassifier(Instances data)
		{
			m_RandomInstance = new Random(m_Seed);
			Instances boostData;
			int classIndex = data.classIndex();
			
			if (data.classAttribute().Numeric)
			{
				throw new Exception("LogitBoost can't handle a numeric class!");
			}
			if (m_Classifier == null)
			{
				throw new System.Exception("A base classifier has not been specified!");
			}
			
			if (!(m_Classifier is WeightedInstancesHandler) && !m_UseResampling)
			{
				m_UseResampling = true;
			}
			if (data.checkForStringAttributes())
			{
				throw new Exception("Cannot handle string attributes!");
			}
			if (m_Debug)
			{
				System.Console.Error.WriteLine("Creating copy of the training data");
			}
			
			m_NumClasses = data.numClasses();
			m_ClassAttribute = data.classAttribute();
			
			// Create a copy of the data 
			data = new Instances(data);
			data.deleteWithMissingClass();
			
			// Create the base classifiers
			if (m_Debug)
			{
				System.Console.Error.WriteLine("Creating base classifiers");
			}
			m_Classifiers = new Classifier[m_NumClasses][];
			for (int j = 0; j < m_NumClasses; j++)
			{
				m_Classifiers[j] = Classifier.makeCopies(m_Classifier, this.NumIterations);
			}
			
			// Do we want to select the appropriate number of iterations
			// using cross-validation?
			int bestNumIterations = this.NumIterations;
			if (m_NumFolds > 1)
			{
				if (m_Debug)
				{
					System.Console.Error.WriteLine("Processing first fold.");
				}
				
				// Array for storing the results
				double[] results = new double[this.NumIterations];
				
				// Iterate throught the cv-runs
				for (int r = 0; r < m_NumRuns; r++)
				{
					
					// Stratify the data
					data.randomize(m_RandomInstance);
					data.stratify(m_NumFolds);
					
					// Perform the cross-validation
					for (int i = 0; i < m_NumFolds; i++)
					{
						
						// Get train and test folds
						Instances train = data.trainCV(m_NumFolds, i, m_RandomInstance);
						Instances test = data.testCV(m_NumFolds, i);
						
						// Make class numeric
						Instances trainN = new Instances(train);
						trainN.ClassIndex = - 1;
						trainN.deleteAttributeAt(classIndex);
						trainN.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex);
						trainN.ClassIndex = classIndex;
						m_NumericClassData = new Instances(trainN, 0);
						
						// Get class values
						int numInstances = train.numInstances();
						double[][] tmpArray = new double[numInstances][];
						for (int i2 = 0; i2 < numInstances; i2++)
						{
							tmpArray[i2] = new double[m_NumClasses];
						}
						double[][] trainFs = tmpArray;
						double[][] tmpArray2 = new double[numInstances][];
						for (int i3 = 0; i3 < numInstances; i3++)
						{
							tmpArray2[i3] = new double[m_NumClasses];
						}
						double[][] trainYs = tmpArray2;
						for (int j = 0; j < m_NumClasses; j++)
						{
							for (int k = 0; k < numInstances; k++)
							{
								trainYs[k][j] = (train.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses);
							}
						}
						
						// Perform iterations
						double[][] probs = initialProbs(numInstances);
						m_NumGenerated = 0;
						double sumOfWeights = train.sumOfWeights();
						for (int j = 0; j < this.NumIterations; j++)
						{
							performIteration(trainYs, trainFs, probs, trainN, sumOfWeights);
							Evaluation eval = new Evaluation(train);
							eval.evaluateModel(this, test);
							results[j] += eval.correct();
						}
					}
				}
				
				// Find the number of iterations with the lowest error
				//UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'"
				double bestResult = - System.Double.MaxValue;
				for (int j = 0; j < this.NumIterations; j++)
				{
					if (results[j] > bestResult)
					{
						bestResult = results[j];
						bestNumIterations = j;
					}
				}
				if (m_Debug)
				{
					System.Console.Error.WriteLine("Best result for " + bestNumIterations + " iterations: " + bestResult);
				}
			}
			
			// Build classifier on all the data
			int numInstances2 = data.numInstances();
			double[][] trainFs2 = new double[numInstances2][];
			for (int i4 = 0; i4 < numInstances2; i4++)
			{
				trainFs2[i4] = new double[m_NumClasses];
			}
			double[][] trainYs2 = new double[numInstances2][];
			for (int i5 = 0; i5 < numInstances2; i5++)
			{
				trainYs2[i5] = new double[m_NumClasses];
			}
			for (int j = 0; j < m_NumClasses; j++)
			{
				for (int i = 0, k = 0; i < numInstances2; i++, k++)
				{
					trainYs2[i][j] = (data.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses);
				}
			}
			
			// Make class numeric
			data.ClassIndex = - 1;
			data.deleteAttributeAt(classIndex);
			data.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex);
			data.ClassIndex = classIndex;
			m_NumericClassData = new Instances(data, 0);
			
			// Perform iterations
			double[][] probs2 = initialProbs(numInstances2);
            double logLikelihood = CalculateLogLikelihood(trainYs2, probs2);
			m_NumGenerated = 0;
			if (m_Debug)
			{
				System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood);
			}
			double sumOfWeights2 = data.sumOfWeights();
			for (int j = 0; j < bestNumIterations; j++)
			{
				double previousLoglikelihood = logLikelihood;
				performIteration(trainYs2, trainFs2, probs2, data, sumOfWeights2);
                logLikelihood = CalculateLogLikelihood(trainYs2, probs2);
				if (m_Debug)
				{
					System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood);
				}
				if (System.Math.Abs(previousLoglikelihood - logLikelihood) < m_Precision)
				{
					return ;
				}
			}
		}
Ejemplo n.º 12
0
		/// <summary> Generates the classifier.
		/// 
		/// </summary>
		/// <param name="instances">set of instances serving as training data 
		/// </param>
		/// <exception cref="Exception">if the classifier has not been generated successfully
		/// </exception>
		public override void  buildClassifier(Instances instances)
		{
			//UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'"
			double bestVal = System.Double.MaxValue, currVal;
			//UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'"
			double bestPoint = - System.Double.MaxValue, sum;
			int bestAtt = - 1, numClasses;
			
			if (instances.checkForStringAttributes())
			{
				throw new Exception("Can't handle string attributes!");
			}
			
			double[][] bestDist = new double[3][];
			for (int i = 0; i < 3; i++)
			{
				bestDist[i] = new double[instances.numClasses()];
			}
			
			m_Instances = new Instances(instances);
			m_Instances.deleteWithMissingClass();
			
			if (m_Instances.numInstances() == 0)
			{
				throw new System.ArgumentException("No instances without missing " + "class values in training file!");
			}
			
			if (instances.numAttributes() == 1)
			{
				throw new System.ArgumentException("Attribute missing. Need at least one " + "attribute other than class attribute!");
			}
			
			if (m_Instances.classAttribute().Nominal)
			{
				numClasses = m_Instances.numClasses();
			}
			else
			{
				numClasses = 1;
			}
			
			// For each attribute
			bool first = true;
			for (int i = 0; i < m_Instances.numAttributes(); i++)
			{
				if (i != m_Instances.classIndex())
				{
					
					// Reserve space for distribution.
					double[][] tmpArray = new double[3][];
					for (int i2 = 0; i2 < 3; i2++)
					{
						tmpArray[i2] = new double[numClasses];
					}
					m_Distribution = tmpArray;
					
					// Compute value of criterion for best split on attribute
					if (m_Instances.attribute(i).Nominal)
					{
						currVal = findSplitNominal(i);
					}
					else
					{
						currVal = findSplitNumeric(i);
					}
					if ((first) || (currVal < bestVal))
					{
						bestVal = currVal;
						bestAtt = i;
						bestPoint = m_SplitPoint;
						for (int j = 0; j < 3; j++)
						{
							Array.Copy(m_Distribution[j], 0, bestDist[j], 0, numClasses);
						}
					}
					
					// First attribute has been investigated
					first = false;
				}
			}
			
			// Set attribute, split point and distribution.
			m_AttIndex = bestAtt;
			m_SplitPoint = bestPoint;
			m_Distribution = bestDist;
			if (m_Instances.classAttribute().Nominal)
			{
				for (int i = 0; i < m_Distribution.Length; i++)
				{
					double sumCounts = Utils.sum(m_Distribution[i]);
					if (sumCounts == 0)
					{
						// This means there were only missing attribute values
						Array.Copy(m_Distribution[2], 0, m_Distribution[i], 0, m_Distribution[2].Length);
						Utils.normalize(m_Distribution[i]);
					}
					else
					{
						Utils.normalize(m_Distribution[i], sumCounts);
					}
				}
			}
			
			// Save memory
			m_Instances = new Instances(m_Instances, 0);
		}