Ejemplo n.º 1
0
        /// <summary>
        /// Determines all categories --> categories
        /// Determines category numbers of each attributes -->> categoryTypeNumber
        /// Determines target numbers and amounts of each categories of each attributes  -->> categoryTypeTargetNumber
        /// [i][j][k] i means attributes, j means  categories, k means targets
        /// </summary>
        /// <param name="insts"></param>
        private void DataPreparation(weka.core.Instances insts)
        {
            for (int i = 0; i < insts.numAttributes(); i++)
            {
                string[] categoryType = new string[insts.attribute(i).numValues()];
                for (int j = 0; j < insts.attribute(i).numValues(); j++)
                {
                    categoryType[j] = insts.attribute(i).value(j).ToString();
                }
                categories.Add(categoryType);
            }

            List <List <string> > lst = new List <List <string> >();

            for (int i = 0; i < insts.numInstances(); i++)
            {
                lst.Add(new List <string>());

                for (int j = 0; j < insts.instance(i).numValues(); j++)
                {
                    lst[lst.Count - 1].Add(insts.instance(i).toString(j));
                }
            }

            List <int[]>   categoryTypeNumber       = new List <int[]>();
            List <int[, ]> categoryTypeTargetNumber = new List <int[, ]>();

            for (int i = 0; i < categories.Count; i++)
            {
                categoryTypeNumber.Add(new int[categories[i].Length]);
                categoryTypeTargetNumber.Add(new int[categories[i].Length, categories[categories.Count - 1].Length]);
            }

            for (int i = 0; i < lst.Count; i++)                    //Satır
            {
                for (int j = 0; j < lst[i].Count; j++)             //Sütün
                {
                    for (int k = 0; k < categories[j].Length; k++) //Kategori Sayısı
                    {
                        string targetValue = lst[i][lst[i].Count - 1];
                        if (lst[i][j].Contains(categories[j][k]))
                        {
                            categoryTypeNumber[j][k] += 1;
                            for (int trgt = 0; trgt < categories[categories.Count - 1].Length; trgt++)
                            {
                                if (targetValue == categories[categories.Count - 1][trgt])
                                {
                                    categoryTypeTargetNumber[j][k, trgt] += 1;
                                }
                            }
                        }
                    }
                }
            }
            Twoing(insts, categoryTypeNumber, categoryTypeTargetNumber);
            Gini(insts, categoryTypeNumber, categoryTypeTargetNumber);
            LogInfo("Dataset is saved.\r\n\r\n");
            LogInfo("TWOING : " + twoingPath + "\r\n\r\n");
            LogInfo("GINI : " + giniPath + "\r\n");
        }
Ejemplo n.º 2
0
        public static string Classify(bool useRubine, float duration, bool righthandedness, List<float> SpeakerAngles, PointCollection pointHist, StylusPointCollection S, List<List<int>> hist, List<List<int>> ihist)
        {
            // Convert all parameters to format used in GestureTests
            List<Vector2> InterpretedPoints = new List<Vector2>();
            List<Vector2> StylusPoints = new List<Vector2>();
            List<Vector2> VelocityHistory = new List<Vector2>();
            List<Vector2> InverseVelocityHistory = new List<Vector2>();
            foreach(Point P in pointHist)
                InterpretedPoints.Add(new Vector2((float)P.X,(float)P.Y));
            foreach(StylusPoint P in S)
                StylusPoints.Add(new Vector2((float)P.X,(float)P.Y));
            for (int i = 0; i < hist[0].Count; i++)
            {
                VelocityHistory.Add(new Vector2(hist[0][i], hist[1][i]));
                InverseVelocityHistory.Add(new Vector2(ihist[0][i], ihist[1][i]));
            }

            // Create a new Sample, compute the features, and classify
            GS = new GestureSample(GestureTests.Types.GestureType.unknown, righthandedness,duration,SpeakerAngles,InterpretedPoints,StylusPoints,VelocityHistory,InverseVelocityHistory);
            GS.ComputeFeatures(GestureFeatures.PointsStroke);

            if (useRubine)
                return EC.Recognizer.Classify(GS).ToString();
            WriteARFF();

            Instances test = new Instances(new java.io.FileReader("outfile.arff"));
            test.setClassIndex(0);

            double clsLabel = cls.classifyInstance(test.instance(0));
            test.instance(0).setClassValue(clsLabel);

            // Return the appropriate label
            return ((GestureType2D)((int)clsLabel+1)).ToString();
        }
Ejemplo n.º 3
0
  public static void Main(string[] args) {
    try {
      int runs = 1;
      string algo = "";
      string data = "";
      if(args.Length>0) runs = Convert.ToInt32(args[0]);
      if(args.Length>1) algo = args[1];
      if(args.Length>2) data = args[2];

      Stopwatch read = new Stopwatch(), 
        build = new Stopwatch(), 
        classify = new Stopwatch();
      for (int cnt=0; cnt<runs; cnt++) {
        read.Start();
        Instances train = new Instances(new java.io.FileReader(data+"train.arff"));
        train.setClassIndex(train.numAttributes() - 1);
        Instances test = new Instances(new java.io.FileReader(data+"test.arff"));
        test.setClassIndex(test.numAttributes() - 1);
        read.Stop();

        Classifier[] clList = {
          new weka.classifiers.bayes.NaiveBayes(),
          new weka.classifiers.trees.RandomForest(),
          new weka.classifiers.trees.J48(),
          new weka.classifiers.functions.MultilayerPerceptron(),
          new weka.classifiers.rules.ConjunctiveRule(),
          new weka.classifiers.functions.SMO()
        };

        build.Start();
        foreach (Classifier classifier in clList) {
          if(algo.Equals("") || algo.Equals("All") || classifier.getClass().getSimpleName().Equals(algo))
              classifier.buildClassifier(train);
        }
        build.Stop();

        classify.Start();
        foreach (Classifier classifier in clList) {
          if(algo.Equals("") || algo.Equals("All") || classifier.getClass().getSimpleName().Equals(algo)) {
              int numCorrect = 0;
              for (int i = 0; i < test.numInstances(); i++)
              {
                  if (classifier.classifyInstance(test.instance(i)) == test.instance(i).classValue())
                      numCorrect++;
              }
              //Console.Write(classifier.getClass().getSimpleName() + "\t" + numCorrect + " out of " + test.numInstances() + " correct (" +(100.0 * numCorrect / test.numInstances()) + "%)");
          }
        }
        classify.Stop();
      }
      Console.WriteLine("{\""+ algo + "\"," + read.ElapsedMilliseconds + "," + build.ElapsedMilliseconds + "," + classify.ElapsedMilliseconds + "," + (read.ElapsedMilliseconds+build.ElapsedMilliseconds+classify.ElapsedMilliseconds)+"};");
      if(args.Length>3) Console.ReadLine();
    } catch (java.lang.Exception e){
      e.printStackTrace();
    }
  }
Ejemplo n.º 4
0
        public static double classifyTrain_Test(string classifierFileName, Classifier _classifier)
        {
            double performance = 0.0;

            try
            {
                FileReader          javaFileReader = new FileReader(classifierFileName);
                weka.core.Instances insts          = new weka.core.Instances(javaFileReader);
                javaFileReader.close();

                insts.setClassIndex(insts.numAttributes() - 1);

                System.Console.WriteLine("Performing " + percentSplit + "% split evaluation.");

                int trainSize             = insts.numInstances() * percentSplit / 100;
                int testSize              = insts.numInstances() - trainSize;
                weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize);

                _classifier.buildClassifier(train);

                int numCorrect   = 0;
                var numnerOfInst = insts.numInstances();
                int dataIndex    = 0;
                for (int i = trainSize; i < numnerOfInst; i++)
                {
                    dataIndex++;
                    weka.core.Instance currentInst = insts.instance(i);

                    double   predictClass = _classifier.classifyInstance(currentInst);
                    double[] dist         = _classifier.distributionForInstance(currentInst);


                    string actualClass    = insts.classAttribute().value((int)insts.instance(i).classValue());
                    string predictedClass = insts.classAttribute().value((int)predictClass);


                    var abcd = _classifier.getClass();

                    if (predictedClass == actualClass)
                    {
                        numCorrect++;
                    }
                }
                performance = (double)((double)numCorrect / (double)testSize) * 100;

                System.Console.WriteLine(numCorrect + " out of " + testSize + " correct (" + performance.toString() + "%)");
            }
            catch (java.lang.Exception ex)
            {
                ex.printStackTrace();
            }

            return(performance);
        }
        public static double SupportVectorMachineTest(weka.core.Instances insts)
        {
            try
            {
                //weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader("iris.arff"));

                insts.setClassIndex(insts.numAttributes() - 1);


                SupportVectorMachine = new weka.classifiers.functions.SMO();

                weka.filters.Filter myDummy = new weka.filters.unsupervised.attribute.NominalToBinary();

                myDummy.setInputFormat(insts);
                insts = weka.filters.Filter.useFilter(insts, myDummy);


                weka.filters.Filter myNormalize = new weka.filters.unsupervised.instance.Normalize();
                myNormalize.setInputFormat(insts);
                insts = weka.filters.Filter.useFilter(insts, myNormalize);

                weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize();
                myRandom.setInputFormat(insts);
                insts = weka.filters.Filter.useFilter(insts, myRandom);

                int trainSize             = insts.numInstances() * percentSplit / 100;
                int testSize              = insts.numInstances() - trainSize;
                weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize);


                SupportVectorMachine.buildClassifier(train);


                int numCorrect = 0;
                for (int i = trainSize; i < insts.numInstances(); i++)
                {
                    weka.core.Instance currentInst    = insts.instance(i);
                    double             predictedClass = SupportVectorMachine.classifyInstance(currentInst);
                    if (predictedClass == insts.instance(i).classValue())
                    {
                        numCorrect++;
                    }
                }
                return((double)numCorrect / (double)testSize * 100.0);
            }
            catch (java.lang.Exception ex)
            {
                ex.printStackTrace();
                return(0);
            }
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Adds teta results of gini results to the list
        /// Change the attributes of the arff file
        /// Adds the attributes to arff file
        /// </summary>
        /// <param name="insts"></param>
        /// <param name="result"></param>
        /// <param name="path"></param>
        private void CreateNewDataset(weka.core.Instances insts, List <double[]> result, string path)
        {
            //Tetaları Listeye Ekle
            List <List <string> > lst = new List <List <string> >();

            for (int i = 0; i < insts.numInstances(); i++)
            {
                lst.Add(new List <string>());
                for (int j = 0; j < insts.instance(i).numValues() - 1; j++)
                {
                    string value = insts.instance(i).toString(j);
                    for (int k = 0; k < categories[j].Length; k++)
                    {
                        if (insts.instance(i).toString(j) == categories[j][k])
                        {
                            lst[lst.Count - 1].Add(String.Format("{0:0.00}", result[j][k]));
                            break;
                        }
                    }
                }
            }
            //Attiribute Değiştir
            for (int i = 0; i < insts.numAttributes() - 1; i++)
            {
                string name = insts.attribute(i).name().ToString();
                insts.deleteAttributeAt(i);
                weka.core.Attribute att = new weka.core.Attribute(name);
                insts.insertAttributeAt(att, i);
            }

            //Attiributeları yaz
            for (int i = 0; i < insts.numInstances(); i++)
            {
                for (int j = 0; j < insts.instance(i).numValues() - 1; j++)
                {
                    insts.instance(i).setValue(j, Convert.ToDouble(lst[i][j]));
                }
            }

            if (File.Exists(path))
            {
                File.Delete(path);
            }
            var saver = new ArffSaver();

            saver.setInstances(insts);
            saver.setFile(new java.io.File(path));
            saver.writeBatch();
        }
        public static double NaiveBayesTest(weka.core.Instances insts)
        {
            try
            {
                //weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader("iris.arff"));

                insts.setClassIndex(insts.numAttributes() - 1);


                NaiveBayescl = new weka.classifiers.bayes.NaiveBayes();


                //discretize
                weka.filters.Filter myDiscretize = new weka.filters.unsupervised.attribute.Discretize();
                myDiscretize.setInputFormat(insts);
                insts = weka.filters.Filter.useFilter(insts, myDiscretize);

                weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize();
                myRandom.setInputFormat(insts);
                insts = weka.filters.Filter.useFilter(insts, myRandom);

                int trainSize             = insts.numInstances() * percentSplit / 100;
                int testSize              = insts.numInstances() - trainSize;
                weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize);

                NaiveBayescl.buildClassifier(train);


                int numCorrect = 0;
                for (int i = trainSize; i < insts.numInstances(); i++)
                {
                    weka.core.Instance currentInst    = insts.instance(i);
                    double             predictedClass = NaiveBayescl.classifyInstance(currentInst);
                    if (predictedClass == insts.instance(i).classValue())
                    {
                        numCorrect++;
                    }
                }
                return((double)numCorrect / (double)testSize * 100.0);
            }
            catch (java.lang.Exception ex)
            {
                ex.printStackTrace();
                return(0);
            }
        }
Ejemplo n.º 8
0
		/// <summary> Builds the boosted classifier</summary>
		public virtual void  buildClassifier(Instances data)
		{
			m_RandomInstance = new Random(m_Seed);
			Instances boostData;
			int classIndex = data.classIndex();
			
			if (data.classAttribute().Numeric)
			{
				throw new Exception("LogitBoost can't handle a numeric class!");
			}
			if (m_Classifier == null)
			{
				throw new System.Exception("A base classifier has not been specified!");
			}
			
			if (!(m_Classifier is WeightedInstancesHandler) && !m_UseResampling)
			{
				m_UseResampling = true;
			}
			if (data.checkForStringAttributes())
			{
				throw new Exception("Cannot handle string attributes!");
			}
			if (m_Debug)
			{
				System.Console.Error.WriteLine("Creating copy of the training data");
			}
			
			m_NumClasses = data.numClasses();
			m_ClassAttribute = data.classAttribute();
			
			// Create a copy of the data 
			data = new Instances(data);
			data.deleteWithMissingClass();
			
			// Create the base classifiers
			if (m_Debug)
			{
				System.Console.Error.WriteLine("Creating base classifiers");
			}
			m_Classifiers = new Classifier[m_NumClasses][];
			for (int j = 0; j < m_NumClasses; j++)
			{
				m_Classifiers[j] = Classifier.makeCopies(m_Classifier, this.NumIterations);
			}
			
			// Do we want to select the appropriate number of iterations
			// using cross-validation?
			int bestNumIterations = this.NumIterations;
			if (m_NumFolds > 1)
			{
				if (m_Debug)
				{
					System.Console.Error.WriteLine("Processing first fold.");
				}
				
				// Array for storing the results
				double[] results = new double[this.NumIterations];
				
				// Iterate throught the cv-runs
				for (int r = 0; r < m_NumRuns; r++)
				{
					
					// Stratify the data
					data.randomize(m_RandomInstance);
					data.stratify(m_NumFolds);
					
					// Perform the cross-validation
					for (int i = 0; i < m_NumFolds; i++)
					{
						
						// Get train and test folds
						Instances train = data.trainCV(m_NumFolds, i, m_RandomInstance);
						Instances test = data.testCV(m_NumFolds, i);
						
						// Make class numeric
						Instances trainN = new Instances(train);
						trainN.ClassIndex = - 1;
						trainN.deleteAttributeAt(classIndex);
						trainN.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex);
						trainN.ClassIndex = classIndex;
						m_NumericClassData = new Instances(trainN, 0);
						
						// Get class values
						int numInstances = train.numInstances();
						double[][] tmpArray = new double[numInstances][];
						for (int i2 = 0; i2 < numInstances; i2++)
						{
							tmpArray[i2] = new double[m_NumClasses];
						}
						double[][] trainFs = tmpArray;
						double[][] tmpArray2 = new double[numInstances][];
						for (int i3 = 0; i3 < numInstances; i3++)
						{
							tmpArray2[i3] = new double[m_NumClasses];
						}
						double[][] trainYs = tmpArray2;
						for (int j = 0; j < m_NumClasses; j++)
						{
							for (int k = 0; k < numInstances; k++)
							{
								trainYs[k][j] = (train.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses);
							}
						}
						
						// Perform iterations
						double[][] probs = initialProbs(numInstances);
						m_NumGenerated = 0;
						double sumOfWeights = train.sumOfWeights();
						for (int j = 0; j < this.NumIterations; j++)
						{
							performIteration(trainYs, trainFs, probs, trainN, sumOfWeights);
							Evaluation eval = new Evaluation(train);
							eval.evaluateModel(this, test);
							results[j] += eval.correct();
						}
					}
				}
				
				// Find the number of iterations with the lowest error
				//UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'"
				double bestResult = - System.Double.MaxValue;
				for (int j = 0; j < this.NumIterations; j++)
				{
					if (results[j] > bestResult)
					{
						bestResult = results[j];
						bestNumIterations = j;
					}
				}
				if (m_Debug)
				{
					System.Console.Error.WriteLine("Best result for " + bestNumIterations + " iterations: " + bestResult);
				}
			}
			
			// Build classifier on all the data
			int numInstances2 = data.numInstances();
			double[][] trainFs2 = new double[numInstances2][];
			for (int i4 = 0; i4 < numInstances2; i4++)
			{
				trainFs2[i4] = new double[m_NumClasses];
			}
			double[][] trainYs2 = new double[numInstances2][];
			for (int i5 = 0; i5 < numInstances2; i5++)
			{
				trainYs2[i5] = new double[m_NumClasses];
			}
			for (int j = 0; j < m_NumClasses; j++)
			{
				for (int i = 0, k = 0; i < numInstances2; i++, k++)
				{
					trainYs2[i][j] = (data.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses);
				}
			}
			
			// Make class numeric
			data.ClassIndex = - 1;
			data.deleteAttributeAt(classIndex);
			data.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex);
			data.ClassIndex = classIndex;
			m_NumericClassData = new Instances(data, 0);
			
			// Perform iterations
			double[][] probs2 = initialProbs(numInstances2);
            double logLikelihood = CalculateLogLikelihood(trainYs2, probs2);
			m_NumGenerated = 0;
			if (m_Debug)
			{
				System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood);
			}
			double sumOfWeights2 = data.sumOfWeights();
			for (int j = 0; j < bestNumIterations; j++)
			{
				double previousLoglikelihood = logLikelihood;
				performIteration(trainYs2, trainFs2, probs2, data, sumOfWeights2);
                logLikelihood = CalculateLogLikelihood(trainYs2, probs2);
				if (m_Debug)
				{
					System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood);
				}
				if (System.Math.Abs(previousLoglikelihood - logLikelihood) < m_Precision)
				{
					return ;
				}
			}
		}
Ejemplo n.º 9
0
        public static void BayesTest()
        {
            try
            {
                weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader("iris.arff"));
                insts.setClassIndex(insts.numAttributes() - 1);

                weka.classifiers.Classifier cl = new weka.classifiers.bayes.BayesNet();
                System.Console.WriteLine("Performing " + percentSplit + "% split evaluation.");

                //randomize the order of the instances in the dataset.
                weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize();
                myRandom.setInputFormat(insts);
                insts = weka.filters.Filter.useFilter(insts, myRandom);

                int trainSize             = insts.numInstances() * percentSplit / 100;
                int testSize              = insts.numInstances() - trainSize;
                weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize);
                weka.core.Instances test  = new weka.core.Instances(insts, 0, 0);


                cl.buildClassifier(train);
                //print model
                System.Console.WriteLine(cl);

                int numCorrect = 0;
                for (int i = trainSize; i < insts.numInstances(); i++)
                {
                    weka.core.Instance currentInst    = insts.instance(i);
                    double             predictedClass = cl.classifyInstance(currentInst);
                    test.add(currentInst);

                    double[] prediction = cl.distributionForInstance(currentInst);

                    for (int x = 0; x < prediction.Length; x++)
                    {
                        System.Console.WriteLine("Probability of class [{0}] for [{1}] is: {2}", currentInst.classAttribute().value(x), currentInst, Math.Round(prediction[x], 4));
                    }
                    System.Console.WriteLine();

                    if (predictedClass == insts.instance(i).classValue())
                    {
                        numCorrect++;
                    }
                }
                System.Console.WriteLine(numCorrect + " out of " + testSize + " correct (" +
                                         (double)((double)numCorrect / (double)testSize * 100.0) + "%)");

                // Train the model
                weka.classifiers.Evaluation eTrain = new weka.classifiers.Evaluation(train);
                eTrain.evaluateModel(cl, train);

                // Print the results as in Weka explorer:
                //Print statistics
                String strSummaryTrain = eTrain.toSummaryString();
                System.Console.WriteLine(strSummaryTrain);

                //Print detailed class statistics
                System.Console.WriteLine(eTrain.toClassDetailsString());

                //Print confusion matrix
                System.Console.WriteLine(eTrain.toMatrixString());

                // Get the confusion matrix
                double[][] cmMatrixTrain = eTrain.confusionMatrix();


                // Test the model
                weka.classifiers.Evaluation eTest = new weka.classifiers.Evaluation(test);
                eTest.evaluateModel(cl, test);

                // Print the results as in Weka explorer:
                //Print statistics
                String strSummaryTest = eTest.toSummaryString();
                System.Console.WriteLine(strSummaryTest);

                //Print detailed class statistics
                System.Console.WriteLine(eTest.toClassDetailsString());

                //Print confusion matrix
                System.Console.WriteLine(eTest.toMatrixString());

                // Get the confusion matrix
                double[][] cmMatrixTest = eTest.confusionMatrix();
            }

            catch (java.lang.Exception ex)
            {
                ex.printStackTrace();
            }
        }
Ejemplo n.º 10
0
        /// <summary> Evaluates a classifier with the options given in an array of
		/// strings. <p/>
		/// 
		/// Valid options are: <p/>
		/// 
		/// -t name of training file <br/>
		/// Name of the file with the training data. (required) <p/>
		/// 
		/// -T name of test file <br/>
		/// Name of the file with the test data. If missing a cross-validation 
		/// is performed. <p/>
		/// 
		/// -c class index <br/>
		/// Index of the class attribute (1, 2, ...; default: last). <p/>
		/// 
		/// -x number of folds <br/>
		/// The number of folds for the cross-validation (default: 10). <p/>
		/// 
		/// -s random number seed <br/>
		/// Random number seed for the cross-validation (default: 1). <p/>
		/// 
		/// -m file with cost matrix <br/>
		/// The name of a file containing a cost matrix. <p/>
		/// 
		/// -l name of model input file <br/>
		/// Loads classifier from the given file. <p/>
		/// 
		/// -d name of model output file <br/>
		/// Saves classifier built from the training data into the given file. <p/>
		/// 
		/// -v <br/>
		/// Outputs no statistics for the training data. <p/>
		/// 
		/// -o <br/>
		/// Outputs statistics only, not the classifier. <p/>
		/// 
		/// -i <br/>
		/// Outputs detailed information-retrieval statistics per class. <p/>
		/// 
		/// -k <br/>
		/// Outputs information-theoretic statistics. <p/>
		/// 
		/// -p <br/>
		/// Outputs predictions for test instances (and nothing else). <p/>
		/// 
		/// -r <br/>
		/// Outputs cumulative margin distribution (and nothing else). <p/>
		/// 
		/// -g <br/> 
		/// Only for classifiers that implement "Graphable." Outputs
		/// the graph representation of the classifier (and nothing
		/// else). <p/>
		/// 
		/// </summary>
		/// <param name="classifier">machine learning classifier
		/// </param>
		/// <param name="options">the array of string containing the options
		/// </param>
		/// <throws>  Exception if model could not be evaluated successfully </throws>
		/// <returns> a string describing the results 
		/// </returns>
		public static System.String evaluateModel(Classifier classifier, System.String[] options)
		{
			
			Instances train = null, tempTrain, test = null, template = null;
			int seed = 1, folds = 10, classIndex = - 1;
			System.String trainFileName, testFileName, sourceClass, classIndexString, seedString, foldsString, objectInputFileName, objectOutputFileName, attributeRangeString;
			bool noOutput = false, printClassifications = false, trainStatistics = true, printMargins = false, printComplexityStatistics = false, printGraph = false, classStatistics = false, printSource = false;
			System.Text.StringBuilder text = new System.Text.StringBuilder();
			System.IO.StreamReader trainReader = null, testReader = null;
			//UPGRADE_TODO: Class 'java.io.ObjectInputStream' was converted to 'System.IO.BinaryReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioObjectInputStream'"
			System.IO.BinaryReader objectInputStream = null;
            System.IO.Stream objectStream=null;
			CostMatrix costMatrix = null;
			System.Text.StringBuilder schemeOptionsText = null;
			Range attributesToOutput = null;
			long trainTimeStart = 0, trainTimeElapsed = 0, testTimeStart = 0, testTimeElapsed = 0;
			Classifier classifierBackup;
			
			try
			{
				
				// Get basic options (options the same for all schemes)
				classIndexString = Utils.getOption('c', options);
				if (classIndexString.Length != 0)
				{
					if (classIndexString.Equals("first"))
						classIndex = 1;
					else if (classIndexString.Equals("last"))
						classIndex = - 1;
					else
						classIndex = System.Int32.Parse(classIndexString);
				}
				trainFileName = Utils.getOption('t', options);
				objectInputFileName = Utils.getOption('l', options);
				objectOutputFileName = Utils.getOption('d', options);
				testFileName = Utils.getOption('T', options);
				if (trainFileName.Length == 0)
				{
					if (objectInputFileName.Length == 0)
					{
						throw new System.Exception("No training file and no object " + "input file given.");
					}
					if (testFileName.Length == 0)
					{
						throw new System.Exception("No training file and no test " + "file given.");
					}
				}
				else if ((objectInputFileName.Length != 0) && ((!(classifier is UpdateableClassifier)) || (testFileName.Length == 0)))
				{
					throw new System.Exception("Classifier not incremental, or no " + "test file provided: can't " + "use both train and model file.");
				}
				try
				{
					if (trainFileName.Length != 0)
					{
						//UPGRADE_TODO: The differences in the expected value  of parameters for constructor 'java.io.BufferedReader.BufferedReader'  may cause compilation errors.  "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'"
						//UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'"
						//UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'"
						trainReader = new System.IO.StreamReader(new System.IO.StreamReader(trainFileName, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(trainFileName, System.Text.Encoding.Default).CurrentEncoding);
					}
					if (testFileName.Length != 0)
					{
						//UPGRADE_TODO: The differences in the expected value  of parameters for constructor 'java.io.BufferedReader.BufferedReader'  may cause compilation errors.  "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'"
						//UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'"
						//UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'"
						testReader = new System.IO.StreamReader(new System.IO.StreamReader(testFileName, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(testFileName, System.Text.Encoding.Default).CurrentEncoding);
					}
					if (objectInputFileName.Length != 0)
					{
						//UPGRADE_TODO: Constructor 'java.io.FileInputStream.FileInputStream' was converted to 'System.IO.FileStream.FileStream' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioFileInputStreamFileInputStream_javalangString'"
						objectStream= new System.IO.FileStream(objectInputFileName, System.IO.FileMode.Open, System.IO.FileAccess.Read);
						if (objectInputFileName.EndsWith(".gz"))
						{
							//UPGRADE_ISSUE: Constructor 'java.util.zip.GZIPInputStream.GZIPInputStream' was not converted. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1000_javautilzipGZIPInputStream'"
							objectStream= new ICSharpCode.SharpZipLib.GZip.GZipInputStream(objectStream);
						}
						//UPGRADE_TODO: Class 'java.io.ObjectInputStream' was converted to 'System.IO.BinaryReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioObjectInputStream'"
						objectInputStream = new System.IO.BinaryReader(objectStream);
					}
				}
				catch (System.Exception e)
				{
					//UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Throwable.getMessage' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'"
					throw new System.Exception("Can't open file " + e.Message + '.');
				}
				if (testFileName.Length != 0)
				{
					template = test = new Instances(testReader, 1);
					if (classIndex != - 1)
					{
						test.ClassIndex = classIndex - 1;
					}
					else
					{
						test.ClassIndex = test.numAttributes() - 1;
					}
					if (classIndex > test.numAttributes())
					{
						throw new System.Exception("Index of class attribute too large.");
					}
				}
				if (trainFileName.Length != 0)
				{
					if ((classifier is UpdateableClassifier) && (testFileName.Length != 0))
					{
						train = new Instances(trainReader, 1);
					}
					else
					{
						train = new Instances(trainReader);
					}
					template = train;
					if (classIndex != - 1)
					{
						train.ClassIndex = classIndex - 1;
					}
					else
					{
						train.ClassIndex = train.numAttributes() - 1;
					}
					if ((testFileName.Length != 0) && !test.equalHeaders(train))
					{
						throw new System.ArgumentException("Train and test file not compatible!");
					}
					if (classIndex > train.numAttributes())
					{
						throw new System.Exception("Index of class attribute too large.");
					}
				}
				if (template == null)
				{
					throw new System.Exception("No actual dataset provided to use as template");
				}
				seedString = Utils.getOption('s', options);
				if (seedString.Length != 0)
				{
					seed = System.Int32.Parse(seedString);
				}
				foldsString = Utils.getOption('x', options);
				if (foldsString.Length != 0)
				{
					folds = System.Int32.Parse(foldsString);
				}
				costMatrix = handleCostOption(Utils.getOption('m', options), template.numClasses());
				
				classStatistics = Utils.getFlag('i', options);
				noOutput = Utils.getFlag('o', options);
				trainStatistics = !Utils.getFlag('v', options);
				printComplexityStatistics = Utils.getFlag('k', options);
				printMargins = Utils.getFlag('r', options);
				printGraph = Utils.getFlag('g', options);
				sourceClass = Utils.getOption('z', options);
				printSource = (sourceClass.Length != 0);
				
				// Check -p option
				try
				{
					attributeRangeString = Utils.getOption('p', options);
				}
				catch (System.Exception e)
				{
					//UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Throwable.getMessage' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'"
					throw new System.Exception(e.Message + "\nNOTE: the -p option has changed. " + "It now expects a parameter specifying a range of attributes " + "to list with the predictions. Use '-p 0' for none.");
				}
				if (attributeRangeString.Length != 0)
				{
					// if no test file given, we cannot print predictions
					if (testFileName.Length == 0)
						throw new System.Exception("Cannot print predictions ('-p') without test file ('-T')!");
					
					printClassifications = true;
					if (!attributeRangeString.Equals("0"))
						attributesToOutput = new Range(attributeRangeString);
				}
				
				// if no training file given, we don't have any priors
				if ((trainFileName.Length == 0) && (printComplexityStatistics))
					throw new System.Exception("Cannot print complexity statistics ('-k') without training file ('-t')!");
				
				// If a model file is given, we can't process 
				// scheme-specific options
				if (objectInputFileName.Length != 0)
				{
					Utils.checkForRemainingOptions(options);
				}
				else
				{
					
					// Set options for classifier
					//				if (classifier instanceof OptionHandler) 
					//				{
					//					for (int i = 0; i < options.length; i++) 
					//					{
					//						if (options[i].length() != 0) 
					//						{
					//							if (schemeOptionsText == null) 
					//							{
					//								schemeOptionsText = new StringBuffer();
					//							}
					//							if (options[i].indexOf(' ') != -1) 
					//							{
					//								schemeOptionsText.append('"' + options[i] + "\" ");
					//							} 
					//							else 
					//							{
					//								schemeOptionsText.append(options[i] + " ");
					//							}
					//						}
					//					}
					//					((OptionHandler)classifier).setOptions(options);
					//				}
				}
				Utils.checkForRemainingOptions(options);
			}
			catch (System.Exception e)
			{
				//UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Throwable.getMessage' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'"
				throw new System.Exception("\nWeka exception: " + e.Message + makeOptionString(classifier));
			}
			
			
			// Setup up evaluation objects
			Evaluation trainingEvaluation = new Evaluation(new Instances(template, 0), costMatrix);
			Evaluation testingEvaluation = new Evaluation(new Instances(template, 0), costMatrix);
			
			if (objectInputFileName.Length != 0)
			{
				testingEvaluation.useNoPriors();
				
				// Load classifier from file
				//UPGRADE_WARNING: Method 'java.io.ObjectInputStream.readObject' was converted to 'SupportClass.Deserialize' which may throw an exception. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1101'"
				//classifier = (Classifier) SupportClass.Deserialize(objectInputStream);


                //FileStream fs = new FileStream("DataFile.dat", FileMode.Open);
                try
                {
                    BinaryFormatter formatter = new BinaryFormatter();

                    // Deserialize the hashtable from the file and 
                    // assign the reference to the local variable.
                   // addresses = (Hashtable)formatter.Deserialize(fs);
                    classifier = (Classifier)formatter.Deserialize(objectStream);
                }
                catch (Exception e)
                {
                    Console.WriteLine("Failed to deserialize. Reason: " + e.Message);
                    throw;
                }
                finally
                {
                    objectStream.Close();
                    //fs.Close();
                }


				objectInputStream.Close();
			}
			
			// backup of fully setup classifier for cross-validation
			classifierBackup = Classifier.makeCopy(classifier);
			
			// Build the classifier if no object file provided
			if ((classifier is UpdateableClassifier) && (testFileName.Length != 0) && (costMatrix == null) && (trainFileName.Length != 0))
			{
				
				// Build classifier incrementally
				trainingEvaluation.Priors = train;
				testingEvaluation.Priors = train;
				trainTimeStart = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
				if (objectInputFileName.Length == 0)
				{
					classifier.buildClassifier(train);
				}
				while (train.readInstance(trainReader))
				{
					
					trainingEvaluation.updatePriors(train.instance(0));
					testingEvaluation.updatePriors(train.instance(0));
					((UpdateableClassifier) classifier).updateClassifier(train.instance(0));
					train.delete(0);
				}
				trainTimeElapsed = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - trainTimeStart;
				trainReader.Close();
			}
			else if (objectInputFileName.Length == 0)
			{
				
				// Build classifier in one go
				tempTrain = new Instances(train);
				trainingEvaluation.Priors = tempTrain;
				testingEvaluation.Priors = tempTrain;
				trainTimeStart = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
				classifier.buildClassifier(tempTrain);
				trainTimeElapsed = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - trainTimeStart;
			}
			
			// Save the classifier if an object output file is provided
			if (objectOutputFileName.Length != 0)
			{
				//UPGRADE_TODO: Constructor 'java.io.FileOutputStream.FileOutputStream' was converted to 'System.IO.FileStream.FileStream' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioFileOutputStreamFileOutputStream_javalangString'"
				System.IO.Stream os = new System.IO.FileStream(objectOutputFileName, System.IO.FileMode.Create);
				if (objectOutputFileName.EndsWith(".gz"))
				{
					//UPGRADE_ISSUE: Constructor 'java.util.zip.GZIPOutputStream.GZIPOutputStream' was not converted. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1000_javautilzipGZIPOutputStream'"
					os = new ICSharpCode.SharpZipLib.GZip.GZipOutputStream(os);
				}
				//UPGRADE_TODO: Class 'java.io.ObjectOutputStream' was converted to 'System.IO.BinaryWriter' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioObjectOutputStream'"
				System.IO.BinaryWriter objectOutputStream = new System.IO.BinaryWriter(os);
				//UPGRADE_TODO: Method 'java.io.ObjectOutputStream.writeObject' was converted to 'SupportClass.Serialize' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioObjectOutputStreamwriteObject_javalangObject'"
				//SupportClass.Serialize(objectOutputStream, classifier);

                BinaryFormatter bformatter = new BinaryFormatter();
                bformatter.Serialize(os, classifier);                               

				objectOutputStream.Flush();
				objectOutputStream.Close();
			}
			
			// If classifier is drawable output string describing graph
			if ((classifier is Drawable) && (printGraph))
			{
				return ((Drawable) classifier).graph();
			}
			
			// Output the classifier as equivalent source
			if ((classifier is Sourcable) && (printSource))
			{
				return wekaStaticWrapper((Sourcable) classifier, sourceClass);
			}
			
			// Output test instance predictions only
			if (printClassifications)
			{
				return toPrintClassifications(classifier, new Instances(template, 0), testFileName, classIndex, attributesToOutput);
			}
			
			// Output model
			if (!(noOutput || printMargins))
			{
				//			if (classifier instanceof OptionHandler) 
				//			{
				//				if (schemeOptionsText != null) 
				//				{
				//					text.append("\nOptions: "+schemeOptionsText);
				//					text.append("\n");
				//				}
				//			}
				//UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Object.toString' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'"
				text.Append("\n" + classifier.ToString() + "\n");
			}
			
			if (!printMargins && (costMatrix != null))
			{
				text.Append("\n=== Evaluation Cost Matrix ===\n\n").Append(costMatrix.ToString());
			}
			
			// Compute error estimate from training data
			if ((trainStatistics) && (trainFileName.Length != 0))
			{
				
				if ((classifier is UpdateableClassifier) && (testFileName.Length != 0) && (costMatrix == null))
				{
					
					// Classifier was trained incrementally, so we have to 
					// reopen the training data in order to test on it.
					//UPGRADE_TODO: The differences in the expected value  of parameters for constructor 'java.io.BufferedReader.BufferedReader'  may cause compilation errors.  "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'"
					//UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'"
					//UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'"
					trainReader = new System.IO.StreamReader(new System.IO.StreamReader(trainFileName, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(trainFileName, System.Text.Encoding.Default).CurrentEncoding);
					
					// Incremental testing
					train = new Instances(trainReader, 1);
					if (classIndex != - 1)
					{
						train.ClassIndex = classIndex - 1;
					}
					else
					{
						train.ClassIndex = train.numAttributes() - 1;
					}
					testTimeStart = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
					while (train.readInstance(trainReader))
					{
						
						trainingEvaluation.evaluateModelOnce((Classifier) classifier, train.instance(0));
						train.delete(0);
					}
					testTimeElapsed = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - testTimeStart;
					trainReader.Close();
				}
				else
				{
					testTimeStart = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
					trainingEvaluation.evaluateModel(classifier, train);
					testTimeElapsed = (System.DateTime.Now.Ticks - 621355968000000000) / 10000 - testTimeStart;
				}
				
				// Print the results of the training evaluation
				if (printMargins)
				{
					return trainingEvaluation.toCumulativeMarginDistributionString();
				}
				else
				{
					text.Append("\nTime taken to build model: " + Utils.doubleToString(trainTimeElapsed / 1000.0, 2) + " seconds");
					text.Append("\nTime taken to test model on training data: " + Utils.doubleToString(testTimeElapsed / 1000.0, 2) + " seconds");
					text.Append(trainingEvaluation.toSummaryString("\n\n=== Error on training" + " data ===\n", printComplexityStatistics));
					if (template.classAttribute().Nominal)
					{
						if (classStatistics)
						{
							text.Append("\n\n" + trainingEvaluation.toClassDetailsString());
						}
						text.Append("\n\n" + trainingEvaluation.toMatrixString());
					}
				}
			}
			
			// Compute proper error estimates
			if (testFileName.Length != 0)
			{
				
				// Testing is on the supplied test data
				while (test.readInstance(testReader))
				{
					
					testingEvaluation.evaluateModelOnce((Classifier) classifier, test.instance(0));
					test.delete(0);
				}
				testReader.Close();
				
				text.Append("\n\n" + testingEvaluation.toSummaryString("=== Error on test data ===\n", printComplexityStatistics));
			}
			else if (trainFileName.Length != 0)
			{
				
				// Testing is via cross-validation on training data
				//UPGRADE_TODO: The differences in the expected value  of parameters for constructor 'java.util.Random.Random'  may cause compilation errors.  "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'"
				System.Random random = new System.Random((System.Int32) seed);
				// use untrained (!) classifier for cross-validation
				classifier = Classifier.makeCopy(classifierBackup);
				testingEvaluation.crossValidateModel(classifier, train, folds, random);
				if (template.classAttribute().Numeric)
				{
					text.Append("\n\n\n" + testingEvaluation.toSummaryString("=== Cross-validation ===\n", printComplexityStatistics));
				}
				else
				{
					text.Append("\n\n\n" + testingEvaluation.toSummaryString("=== Stratified " + "cross-validation ===\n", printComplexityStatistics));
				}
			}
			if (template.classAttribute().Nominal)
			{
				if (classStatistics)
				{
					text.Append("\n\n" + testingEvaluation.toClassDetailsString());
				}
				text.Append("\n\n" + testingEvaluation.toMatrixString());
			}
			return text.ToString();
		}
Ejemplo n.º 11
0
		/// <summary> Evaluates the classifier on a given set of instances. Note that
		/// the data must have exactly the same format (e.g. order of
		/// attributes) as the data used to train the classifier! Otherwise
		/// the results will generally be meaningless.
		/// 
		/// </summary>
		/// <param name="classifier">machine learning classifier
		/// </param>
		/// <param name="data">set of test instances for evaluation
		/// </param>
		/// <returns> the predictions
		/// </returns>
		/// <throws>  Exception if model could not be evaluated  </throws>
		/// <summary> successfully 
		/// </summary>
		public virtual double[] evaluateModel(Classifier classifier, Instances data)
		{
			
			double[] predictions = new double[data.numInstances()];
			
			for (int i = 0; i < data.numInstances(); i++)
			{
				predictions[i] = evaluateModelOnce((Classifier) classifier, data.instance(i));
			}
			return predictions;
		}
Ejemplo n.º 12
0
		/// <summary> Creates split on numeric attribute.
		/// 
		/// </summary>
		/// <exception cref="Exception">if something goes wrong
		/// </exception>
		private void  handleNumericAttribute(Instances trainInstances)
		{
			
			int firstMiss;
			int next = 1;
			int last = 0;
			int splitIndex = - 1;
			double currentInfoGain;
			double defaultEnt;
			double minSplit;
			Instance instance;
			int i;
			
			// Current attribute is a numeric attribute.
			m_distribution = new Distribution(2, trainInstances.numClasses());
			
			// Only Instances with known values are relevant.
			System.Collections.IEnumerator enu = trainInstances.enumerateInstances();
			i = 0;
			//UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'"
			while (enu.MoveNext())
			{
				//UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'"
				instance = (Instance) enu.Current;
				if (instance.isMissing(m_attIndex))
					break;
				m_distribution.add(1, instance);
				i++;
			}
			firstMiss = i;
			
			// Compute minimum number of Instances required in each
			// subset.
			minSplit = 0.1 * (m_distribution.total()) / ((double) trainInstances.numClasses());
			if (Utils.smOrEq(minSplit, m_minNoObj))
				minSplit = m_minNoObj;
			else if (Utils.gr(minSplit, 25))
				minSplit = 25;
			
			// Enough Instances with known values?
			if (Utils.sm((double) firstMiss, 2 * minSplit))
				return ;
			
			// Compute values of criteria for all possible split
			// indices.
			defaultEnt = infoGainCrit.oldEnt(m_distribution);
			while (next < firstMiss)
			{
				
				if (trainInstances.instance(next - 1).value_Renamed(m_attIndex) + 1e-5 < trainInstances.instance(next).value_Renamed(m_attIndex))
				{
					
					// Move class values for all Instances up to next 
					// possible split point.
					m_distribution.shiftRange(1, 0, trainInstances, last, next);
					
					// Check if enough Instances in each subset and compute
					// values for criteria.
					if (Utils.grOrEq(m_distribution.perBag(0), minSplit) && Utils.grOrEq(m_distribution.perBag(1), minSplit))
					{
						currentInfoGain = infoGainCrit.splitCritValue(m_distribution, m_sumOfWeights, defaultEnt);
						if (Utils.gr(currentInfoGain, m_infoGain))
						{
							m_infoGain = currentInfoGain;
							splitIndex = next - 1;
						}
						m_index++;
					}
					last = next;
				}
				next++;
			}
			
			// Was there any useful split?
			if (m_index == 0)
				return ;
			
			// Compute modified information gain for best split.
			m_infoGain = m_infoGain - (Utils.log2(m_index) / m_sumOfWeights);
			if (Utils.smOrEq(m_infoGain, 0))
				return ;
			
			// Set instance variables' values to values for
			// best split.
			m_numSubsets = 2;
			m_splitPoint = (trainInstances.instance(splitIndex + 1).value_Renamed(m_attIndex) + trainInstances.instance(splitIndex).value_Renamed(m_attIndex)) / 2;
			
			// In case we have a numerical precision problem we need to choose the
			// smaller value
			if (m_splitPoint == trainInstances.instance(splitIndex + 1).value_Renamed(m_attIndex))
			{
				m_splitPoint = trainInstances.instance(splitIndex).value_Renamed(m_attIndex);
			}
			
			// Restore distributioN for best split.
			m_distribution = new Distribution(2, trainInstances.numClasses());
			m_distribution.addRange(0, trainInstances, 0, splitIndex + 1);
			m_distribution.addRange(1, trainInstances, splitIndex + 1, firstMiss);
			
			// Compute modified gain ratio for best split.
			m_gainRatio = gainRatioCrit.splitCritValue(m_distribution, m_sumOfWeights, m_infoGain);
		}
Ejemplo n.º 13
0
		/// <summary> Creates a new dataset of the same size using random sampling
		/// with replacement according to the given weight vector. The
		/// weights of the instances in the new dataset are set to one.
		/// The length of the weight vector has to be the same as the
		/// number of instances in the dataset, and all weights have to
		/// be positive.
		/// 
		/// </summary>
		/// <param name="random">a random number generator
		/// </param>
		/// <param name="weights">the weight vector
		/// </param>
		/// <returns> the new dataset
		/// </returns>
		/// <exception cref="IllegalArgumentException">if the weights array is of the wrong
		/// length or contains negative weights.
		/// </exception>
		public virtual Instances resampleWithWeights(System.Random random, double[] weights)
		{
			
			if (weights.Length != numInstances())
			{
				throw new System.ArgumentException("weights.length != numInstances.");
			}
			Instances newData = new Instances(this, numInstances());
			if (numInstances() == 0)
			{
				return newData;
			}
			double[] probabilities = new double[numInstances()];
			double sumProbs = 0, sumOfWeights = Utils.sum(weights);
			for (int i = 0; i < numInstances(); i++)
			{
				sumProbs += random.NextDouble();
				probabilities[i] = sumProbs;
			}
			Utils.normalize(probabilities, sumProbs / sumOfWeights);
			
			// Make sure that rounding errors don't mess things up
			probabilities[numInstances() - 1] = sumOfWeights;
			int k = 0; int l = 0;
			sumProbs = 0;
			while ((k < numInstances() && (l < numInstances())))
			{
				if (weights[l] < 0)
				{
					throw new System.ArgumentException("Weights have to be positive.");
				}
				sumProbs += weights[l];
				while ((k < numInstances()) && (probabilities[k] <= sumProbs))
				{
					newData.add(instance(l));
					newData.instance(k).Weight = 1;
					k++;
				}
				l++;
			}
			return newData;
		}
Ejemplo n.º 14
0
        public static double classifyTrain_Test(string classifierFileName, int baseClasses, Classifier _classifier)
        {
            double performance = 0.0;

            try
            {
                List <BrResult> results = new List <BrResult>();
                for (int singleClass = 1; singleClass <= baseClasses; singleClass++)
                {
                    string eachFileName = String.Format("{0}_{1}.arff", classifierFileName, singleClass);

                    BrResult result = new BrResult();
                    result.classNumber = singleClass;

                    FileReader          javaFileReader = new FileReader(eachFileName);
                    weka.core.Instances insts          = new weka.core.Instances(javaFileReader);
                    javaFileReader.close();

                    insts.setClassIndex(insts.numAttributes() - 1);

                    System.Console.WriteLine("Performing " + percentSplit + "% split evaluation.");

                    int trainSize             = insts.numInstances() * percentSplit / 100;
                    int testSize              = insts.numInstances() - trainSize;
                    weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize);

                    _classifier.buildClassifier(train);

                    int           numCorrect   = 0;
                    var           numnerOfInst = insts.numInstances();
                    List <Result> eachResults  = new List <Result>();
                    int           dataIndex    = 0;
                    for (int i = trainSize; i < numnerOfInst; i++)
                    {
                        dataIndex++;
                        Result eachRow = new Result();
                        eachRow.lineIndex = 0;
                        weka.core.Instance currentInst = insts.instance(i);

                        double   predictClass = _classifier.classifyInstance(currentInst);
                        double[] dist         = _classifier.distributionForInstance(currentInst);

                        string actualClass    = insts.classAttribute().value((int)insts.instance(i).classValue());
                        string predictedClass = insts.classAttribute().value((int)predictClass);


                        var abcd = _classifier.getClass();

                        if (predictedClass == actualClass)
                        {
                            eachRow.correct = "1";
                            numCorrect++;
                        }
                        else
                        {
                            eachRow.correct = "0";
                        }
                        eachRow.lineIndex      = dataIndex;
                        eachRow.classActual    = actualClass;
                        eachRow.classPredicted = predictedClass;

                        eachResults.Add(eachRow);
                    }
                    result.classResult = eachResults;
                    results.Add(result);

                    System.Console.WriteLine(numCorrect + " out of " + testSize + " correct (" + (double)((double)numCorrect / (double)testSize * 100.0) + "%)");
                }

                #region Evaludation Matrix
                var evaluationMatrix = new Dictionary <int, string>();

                foreach (var res in results)
                {
                    foreach (var classRes in res.classResult)
                    {
                        if (!evaluationMatrix.Keys.Contains(classRes.lineIndex))
                        {
                            evaluationMatrix[classRes.lineIndex] = classRes.correct.toString();
                        }
                        else
                        {
                            evaluationMatrix[classRes.lineIndex] = evaluationMatrix[classRes.lineIndex].toString() + "," + classRes.correct.toString();
                        }
                    }
                }
                #endregion

                #region
                int correnctlyClassified   = 0;
                int incorrenctlyClassified = 0;
                int totalData = evaluationMatrix.Count;
                foreach (var key in evaluationMatrix.Keys)
                {
                    string   multiLevelClass = evaluationMatrix[key].ToString();
                    string[] a = multiLevelClass.Split(',');

                    int classPredect = 0;
                    for (int i = 0; i < a.Length; i++)
                    {
                        if (a[i] == "0")
                        {
                            classPredect++;
                        }
                    }
                    if (classPredect == 0)
                    {
                        correnctlyClassified++;
                    }
                    else if (classPredect > 0)
                    {
                        incorrenctlyClassified++;
                    }
                }

                performance = (double)((double)correnctlyClassified / (double)totalData) * 100;
                System.Console.WriteLine(performance);
                #endregion
            }
            catch (java.lang.Exception ex)
            {
                ex.printStackTrace();
            }
            return(performance);
        }
    // Test the classification result of each map that a user played,
    // with the data available as if they were playing through it
    public static void classifyTest(String dataString, String playerID)
    {
        try {
            java.io.StringReader stringReader = new java.io.StringReader(dataString);
            java.io.BufferedReader buffReader = new java.io.BufferedReader(stringReader);

            /* NOTE THAT FOR NAIVE BAYES ALL WEIGHTS CAN BE = 1*/
            //weka.core.converters.ConverterUtils.DataSource source = new weka.core.converters.ConverterUtils.DataSource("iris.arff");
            weka.core.Instances thisData = new weka.core.Instances(buffReader); //source.getDataSet();
            if (thisData.classIndex() == -1)
                thisData.setClassIndex(thisData.numAttributes() - 1);

            weka.core.Instances thisUniqueData = new weka.core.Instances(thisData);
            if (thisUniqueData.classIndex() == -1)
                thisUniqueData.setClassIndex(thisUniqueData.numAttributes() - 1);
            thisUniqueData.delete();

            if (allUniqueData == null) {
                allUniqueData = new weka.core.Instances(thisData);
                if (allUniqueData.classIndex() == -1)
                    allUniqueData.setClassIndex(allUniqueData.numAttributes() - 1);
                allUniqueData.delete();
            }

            weka.core.InstanceComparator com = new weka.core.InstanceComparator(false);

            for (int i = 0; i < thisData.numInstances(); i++)
            {
                bool dup = false;
                for (int j = 0; j < allUniqueData.numInstances(); j++)
                {
                    if (com.compare(thisData.instance(i),allUniqueData.instance(j)) == 0)
                    {
                        Debug.Log("Duplicate found!");
                        dup = true;
                        break;
                    }
                }
                if (!dup)
                    allUniqueData.add(thisData.instance(i));
                else
                    dupInstances++;
            }

            for (int i = 0; i < thisData.numInstances(); i++)
            {
                bool dup = false;
                for (int j = 0; j < thisUniqueData.numInstances(); j++)
                {
                    if (com.compare(thisData.instance(i),thisUniqueData.instance(j)) == 0)
                    {
                        Debug.Log("Duplicate found!");
                        dup = true;
                        break;
                    }
                }
                if (!dup)
                    thisUniqueData.add(thisData.instance(i));
                else
                    dupInstancesSamePlayer++;
            }

            //Debug.Log("All Data Instance Count = " + thisData.numInstances());
            //Debug.Log("Unique Data Instance Count = " + thisUniqueData.numInstances());
            //Debug.Log("Done!");

        } catch (java.lang.Exception ex)
        {
            Debug.LogError(ex.getMessage());
        }
    }
Ejemplo n.º 16
0
		/// <summary> Performs one boosting iteration.</summary>
		private void  performIteration(double[][] trainYs, double[][] trainFs, double[][] probs, Instances data, double origSumOfWeights)
		{
			
			if (m_Debug)
			{
				System.Console.Error.WriteLine("Training classifier " + (m_NumGenerated + 1));
			}
			
			// Build the new models
			for (int j = 0; j < m_NumClasses; j++)
			{
				if (m_Debug)
				{
					System.Console.Error.WriteLine("\t...for class " + (j + 1) + " (" + m_ClassAttribute.name() + "=" + m_ClassAttribute.value_Renamed(j) + ")");
				}
				
				// Make copy because we want to save the weights
				Instances boostData = new Instances(data);
				
				// Set instance pseudoclass and weights
				for (int i = 0; i < probs.Length; i++)
				{
					
					// Compute response and weight
					double p = probs[i][j];
					double z, actual = trainYs[i][j];
					if (actual == 1 - m_Offset)
					{
						z = 1.0 / p;
						if (z > Z_MAX)
						{
							// threshold
							z = Z_MAX;
						}
					}
					else
					{
						z = (- 1.0) / (1.0 - p);
						if (z < - Z_MAX)
						{
							// threshold
							z = - Z_MAX;
						}
					}
					double w = (actual - p) / z;
					
					// Set values for instance
					Instance current = boostData.instance(i);
					current.setValue(boostData.classIndex(), z);
					current.Weight = current.weight() * w;
				}
				
				// Scale the weights (helps with some base learners)
				double sumOfWeights = boostData.sumOfWeights();
				double scalingFactor = (double) origSumOfWeights / sumOfWeights;
				for (int i = 0; i < probs.Length; i++)
				{
					Instance current = boostData.instance(i);
					current.Weight = current.weight() * scalingFactor;
				}
				
				// Select instances to train the classifier on
				Instances trainData = boostData;
				if (m_WeightThreshold < 100)
				{
					trainData = selectWeightQuantile(boostData, (double) m_WeightThreshold / 100);
				}
				else
				{
					if (m_UseResampling)
					{
						double[] weights = new double[boostData.numInstances()];
						for (int kk = 0; kk < weights.Length; kk++)
						{
							weights[kk] = boostData.instance(kk).weight();
						}
						trainData = boostData.resampleWithWeights(m_RandomInstance, weights);
					}
				}
				
				// Build the classifier
				m_Classifiers[j][m_NumGenerated].buildClassifier(trainData);
			}
			
			// Evaluate / increment trainFs from the classifier
			for (int i = 0; i < trainFs.Length; i++)
			{
				double[] pred = new double[m_NumClasses];
				double predSum = 0;
				for (int j = 0; j < m_NumClasses; j++)
				{
					pred[j] = m_Shrinkage * m_Classifiers[j][m_NumGenerated].classifyInstance(data.instance(i));
					predSum += pred[j];
				}
				predSum /= m_NumClasses;
				for (int j = 0; j < m_NumClasses; j++)
				{
					trainFs[i][j] += (pred[j] - predSum) * (m_NumClasses - 1) / m_NumClasses;
				}
			}
			m_NumGenerated++;
			
			// Compute the current probability estimates
			for (int i = 0; i < trainYs.Length; i++)
			{
                probs[i] = Calculateprobs(trainFs[i]);
			}
		}
Ejemplo n.º 17
0
		public override void  buildClassifier(Instances insts)
		{
			
			if (insts.checkForStringAttributes())
			{
				throw new Exception("Cannot handle string attributes!");
			}
			if (insts.numClasses() > 2)
			{
				throw new System.Exception("Can only handle two-class datasets!");
			}
			if (insts.classAttribute().Numeric)
			{
				throw new Exception("Can't handle a numeric class!");
			}
			
			// Filter data
			m_Train = new Instances(insts);
			m_Train.deleteWithMissingClass();
			m_ReplaceMissingValues = new ReplaceMissingValues();
			m_ReplaceMissingValues.setInputFormat(m_Train);
			m_Train = Filter.useFilter(m_Train, m_ReplaceMissingValues);
			
			m_NominalToBinary = new NominalToBinary();
			m_NominalToBinary.setInputFormat(m_Train);
			m_Train = Filter.useFilter(m_Train, m_NominalToBinary);
			
			/** Randomize training data */
			//UPGRADE_TODO: The differences in the expected value  of parameters for constructor 'java.util.Random.Random'  may cause compilation errors.  "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'"
			m_Train.randomize(new System.Random((System.Int32) m_Seed));
			
			/** Make space to store perceptrons */
			m_Additions = new int[m_MaxK + 1];
			m_IsAddition = new bool[m_MaxK + 1];
			m_Weights = new int[m_MaxK + 1];
			
			/** Compute perceptrons */
			m_K = 0;
			for (int it = 0; it < m_NumIterations; it++)
			{
				for (int i = 0; i < m_Train.numInstances(); i++)
				{
					Instance inst = m_Train.instance(i);
					if (!inst.classIsMissing())
					{
						int prediction = makePrediction(m_K, inst);
						//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
						int classValue = (int) inst.classValue();
						if (prediction == classValue)
						{
							m_Weights[m_K]++;
						}
						else
						{
							m_IsAddition[m_K] = (classValue == 1);
							m_Additions[m_K] = i;
							m_K++;
							m_Weights[m_K]++;
						}
						if (m_K == m_MaxK)
						{
							//UPGRADE_NOTE: Labeled break statement was changed to a goto statement. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1012'"
							goto out_brk;
						}
					}
				}
			}
			//UPGRADE_NOTE: Label 'out_brk' was added. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1011'"

out_brk: ;
			
		}
Ejemplo n.º 18
0
		/// <summary> Merges two sets of Instances together. The resulting set will have
		/// all the attributes of the first set plus all the attributes of the 
		/// second set. The number of instances in both sets must be the same.
		/// 
		/// </summary>
		/// <param name="first">the first set of Instances
		/// </param>
		/// <param name="second">the second set of Instances
		/// </param>
		/// <returns> the merged set of Instances
		/// </returns>
		/// <exception cref="IllegalArgumentException">if the datasets are not the same size
		/// </exception>
		public static Instances mergeInstances(Instances first, Instances second)
		{
			
			if (first.numInstances() != second.numInstances())
			{
				throw new System.ArgumentException("Instance sets must be of the same size");
			}
			
			// Create the vector of merged attributes
			FastVector newAttributes = new FastVector();
			for (int i = 0; i < first.numAttributes(); i++)
			{
				newAttributes.addElement(first.attribute(i));
			}
			for (int i = 0; i < second.numAttributes(); i++)
			{
				newAttributes.addElement(second.attribute(i));
			}
			
			// Create the set of Instances
			Instances merged = new Instances(first.relationName() + '_' + second.relationName(), newAttributes, first.numInstances());
			// Merge each instance
			for (int i = 0; i < first.numInstances(); i++)
			{
				merged.add(first.instance(i).mergeInstance(second.instance(i)));
			}
			return merged;
		}
Ejemplo n.º 19
0
		/// <summary> Sets distribution associated with model.</summary>
		public override void  resetDistribution(Instances data)
		{
			
			Instances insts = new Instances(data, data.numInstances());
			for (int i = 0; i < data.numInstances(); i++)
			{
				if (whichSubset(data.instance(i)) > - 1)
				{
					insts.add(data.instance(i));
				}
			}
			Distribution newD = new Distribution(insts, this);
			newD.addInstWithUnknown(data, m_attIndex);
			m_distribution = newD;
		}
        public async Task <string> classifyTest(weka.classifiers.Classifier cl)
        {
            string a    = "";
            double rate = 0;

            try
            {
                //instsTest = Instances.mergeInstances(ins,null);

                /*if (ins.classIndex() == -1)
                 *  ins.setClassIndex(insts.numAttributes() - 1);*/

                System.Console.WriteLine("Performing " + percentSplit + "% split evaluation.");

                weka.filters.Filter normalized = new weka.filters.unsupervised.attribute.Normalize();
                normalized.setInputFormat(insts);
                insts = weka.filters.Filter.useFilter(insts, normalized);

                //randomize the order of the instances in the dataset.
                weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize();
                myRandom.setInputFormat(insts);
                insts = weka.filters.Filter.useFilter(insts, myRandom);

                //replace missing values
                weka.filters.Filter replaceMissingValues = new weka.filters.unsupervised.attribute.ReplaceMissingValues();
                replaceMissingValues.setInputFormat(insts);
                insts = weka.filters.Filter.useFilter(insts, replaceMissingValues);


                int trainSize             = insts.numInstances() * percentSplit / 100;
                int testSize              = insts.numInstances() - trainSize;
                weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize);

                cl.buildClassifier(train);

                //double label = cl.classifyInstance(instsTest.instance(0));
                double label = cl.classifyInstance(ins);
                ins.setClassValue(label);
                //instsTest.instance(0).setClassValue(label);
                a = ins.toString(ins.numAttributes() - 1);

                weka.core.SerializationHelper.write("mymodel.model", cl);
                int numCorrect = 0;
                for (int i = trainSize; i < insts.numInstances(); i++)
                {
                    weka.core.Instance currentInst    = insts.instance(i);
                    double             predictedClass = cl.classifyInstance(currentInst);
                    if (predictedClass == insts.instance(i).classValue())
                    {
                        numCorrect++;
                    }
                }

                rate = (double)((double)numCorrect / (double)testSize * 100.0);
            }
            catch (java.lang.Exception ex)
            {
                //ex.printStackTrace();
                rate = -1;
            }
            return(rate.ToString() + ";" + a ?? "");
        }
Ejemplo n.º 21
0
		/// <summary> Prints the predictions for the given dataset into a String variable.
		/// 
		/// </summary>
		/// <param name="classifier		the">classifier to use
		/// </param>
		/// <param name="train		the">training data
		/// </param>
		/// <param name="testFileName	the">name of the test file
		/// </param>
		/// <param name="classIndex		the">class index
		/// </param>
		/// <param name="attributesToOutput	the">indices of the attributes to output
		/// </param>
		/// <returns>			the generated predictions for the attribute range
		/// </returns>
		/// <throws>  Exception 		if test file cannot be opened </throws>
		protected internal static System.String toPrintClassifications(Classifier classifier, Instances train, System.String testFileName, int classIndex, Range attributesToOutput)
		{
			
			System.Text.StringBuilder text = new System.Text.StringBuilder();
			if (testFileName.Length != 0)
			{
				System.IO.StreamReader testReader = null;
				try
				{
					//UPGRADE_TODO: The differences in the expected value  of parameters for constructor 'java.io.BufferedReader.BufferedReader'  may cause compilation errors.  "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'"
					//UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'"
					//UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'"
					testReader = new System.IO.StreamReader(new System.IO.StreamReader(testFileName, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(testFileName, System.Text.Encoding.Default).CurrentEncoding);
				}
				catch (System.Exception e)
				{
					//UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Throwable.getMessage' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'"
					throw new System.Exception("Can't open file " + e.Message + '.');
				}
				Instances test = new Instances(testReader, 1);
				if (classIndex != - 1)
				{
					test.ClassIndex = classIndex - 1;
				}
				else
				{
					test.ClassIndex = test.numAttributes() - 1;
				}
				int i = 0;
				while (test.readInstance(testReader))
				{
					Instance instance = test.instance(0);
					Instance withMissing = (Instance) instance.copy();
					withMissing.Dataset = test;
					double predValue = ((Classifier) classifier).classifyInstance(withMissing);
					if (test.classAttribute().Numeric)
					{
						if (Instance.isMissingValue(predValue))
						{
							text.Append(i + " missing ");
						}
						else
						{
							text.Append(i + " " + predValue + " ");
						}
						if (instance.classIsMissing())
						{
							text.Append("missing");
						}
						else
						{
							text.Append(instance.classValue());
						}
						text.Append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n");
					}
					else
					{
						if (Instance.isMissingValue(predValue))
						{
							text.Append(i + " missing ");
						}
						else
						{
							//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
							text.Append(i + " " + test.classAttribute().value_Renamed((int) predValue) + " ");
						}
						if (Instance.isMissingValue(predValue))
						{
							text.Append("missing ");
						}
						else
						{
							//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
							text.Append(classifier.distributionForInstance(withMissing)[(int) predValue] + " ");
						}
						text.Append(instance.toString(instance.classIndex()) + " " + attributeValuesString(withMissing, attributesToOutput) + "\n");
					}
					test.delete(0);
					i++;
				}
				testReader.Close();
			}
			return text.ToString();
		}
        public void trainSMOUsingWeka(string wekaFile, string modelName)
        {
            try
            {
                weka.core.converters.CSVLoader csvLoader = new weka.core.converters.CSVLoader();
                csvLoader.setSource(new java.io.File(wekaFile));
                weka.core.Instances insts = csvLoader.getDataSet();
                //weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader(wekaFile));
                insts.setClassIndex(insts.numAttributes() - 1);

                cl = new weka.classifiers.functions.SMO();
                cl.setBatchSize("100");

                cl.setCalibrator(new weka.classifiers.functions.Logistic());
                cl.setKernel(new weka.classifiers.functions.supportVector.PolyKernel());
                cl.setEpsilon(1.02E-12);
                cl.setC(1.0);
                cl.setDebug(false);
                cl.setChecksTurnedOff(false);
                cl.setFilterType(new SelectedTag(weka.classifiers.functions.SMO.FILTER_NORMALIZE, weka.classifiers.functions.SMO.TAGS_FILTER));

                System.Console.WriteLine("Performing " + percentSplit + "% split evaluation.");

                //randomize the order of the instances in the dataset.
                // weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize();
                //myRandom.setInputFormat(insts);
                // insts = weka.filters.Filter.useFilter(insts, myRandom);

                int trainSize             = insts.numInstances() * percentSplit / 100;
                int testSize              = insts.numInstances() - trainSize;
                weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize);
                java.io.File        path  = new java.io.File("/models/");
                cl.buildClassifier(train);
                saveModel(cl, modelName, path);
                #region test whole set
                int numCorrect = 0;
                for (int i = 0; i < insts.numInstances(); i++)
                {
                    weka.core.Instance currentInst = insts.instance(i);
                    if (i == 12)
                    {
                        array = new List <float>();
                        foreach (float value in currentInst.toDoubleArray())
                        {
                            array.Add(value);
                        }
                    }

                    double predictedClass = cl.classifyInstance(currentInst);
                    if (predictedClass == insts.instance(i).classValue())
                    {
                        numCorrect++;
                    }
                }

                System.Console.WriteLine(numCorrect + " out of " + testSize + " correct (" +
                                         (double)((double)numCorrect / (double)testSize * 100.0) + "%)");
                #endregion
            }
            catch (java.lang.Exception ex)
            {
                ex.printStackTrace();
            }
        }
Ejemplo n.º 23
0
		/// <summary> Deletes all instances in given range from given bag.
		/// 
		/// </summary>
		/// <exception cref="Exception">if something goes wrong
		/// </exception>
		public void  delRange(int bagIndex, Instances source, int startIndex, int lastPlusOne)
		{
			
			double sumOfWeights = 0;
			int classIndex;
			Instance instance;
			int i;
			
			for (i = startIndex; i < lastPlusOne; i++)
			{
				instance = (Instance) source.instance(i);
				//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
				classIndex = (int) instance.classValue();
				sumOfWeights = sumOfWeights + instance.weight();
				m_perClassPerBag[bagIndex][classIndex] -= instance.weight();
				m_perClass[classIndex] -= instance.weight();
			}
			m_perBag[bagIndex] -= sumOfWeights;
			totaL -= sumOfWeights;
		}
Ejemplo n.º 24
0
		/// <summary> Shifts all instances in given range from one bag to another one.
		/// 
		/// </summary>
		/// <exception cref="Exception">if something goes wrong
		/// </exception>
		public void  shiftRange(int from, int to, Instances source, int startIndex, int lastPlusOne)
		{
			
			int classIndex;
			double weight;
			Instance instance;
			int i;
			
			for (i = startIndex; i < lastPlusOne; i++)
			{
				instance = (Instance) source.instance(i);
				//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
				classIndex = (int) instance.classValue();
				weight = instance.weight();
				m_perClassPerBag[from][classIndex] -= weight;
				m_perClassPerBag[to][classIndex] += weight;
				m_perBag[from] -= weight;
				m_perBag[to] += weight;
			}
		}
Ejemplo n.º 25
0
        public static void Test()
        {
            weka.core.Instances data = new weka.core.Instances(new java.io.FileReader("./data/Classification/Communication.arff"));
            data.setClassIndex(data.numAttributes() - 1);

            weka.classifiers.Classifier cls = new weka.classifiers.bayes.BayesNet();


            //Save BayesNet results in .txt file
            using (System.IO.StreamWriter file = new System.IO.StreamWriter("./data/Classification/Communication_Report.txt"))
            {
                file.WriteLine("Performing " + percentSplit + "% split evaluation.");

                int runs = 1;

                // perform cross-validation
                for (int i = 0; i < runs; i++)
                {
                    // randomize data
                    int seed = i + 1;
                    java.util.Random    rand     = new java.util.Random(seed);
                    weka.core.Instances randData = new weka.core.Instances(data);
                    randData.randomize(rand);

                    //weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(randData);

                    int trainSize             = (int)Math.Round((double)data.numInstances() * percentSplit / 100);
                    int testSize              = data.numInstances() - trainSize;
                    weka.core.Instances train = new weka.core.Instances(data, 0, 0);
                    weka.core.Instances test  = new weka.core.Instances(data, 0, 0);
                    train.setClassIndex(train.numAttributes() - 1);
                    test.setClassIndex(test.numAttributes() - 1);

                    //Print classifier analytics for all the dataset
                    file.WriteLine("EVALUATION OF TEST DATASET.");

                    //int numCorrect = 0;
                    for (int j = 0; j < data.numInstances(); j++)
                    {
                        weka.core.Instance currentInst = randData.instance(j);

                        if (j < trainSize)
                        {
                            train.add(currentInst);
                        }

                        else
                        {
                            test.add(currentInst);

                            /*
                             * double predictedClass = cls.classifyInstance(currentInst);
                             *
                             * double[] prediction = cls.distributionForInstance(currentInst);
                             *
                             * for (int p = 0; p < prediction.Length; p++)
                             * {
                             *  file.WriteLine("Probability of class [{0}] for [{1}] is: {2}", currentInst.classAttribute().value(p), currentInst, Math.Round(prediction[p], 4));
                             * }
                             * file.WriteLine();
                             *
                             * file.WriteLine();
                             * if (predictedClass == data.instance(j).classValue())
                             *  numCorrect++;*/
                        }
                    }

                    // build and evaluate classifier
                    cls.buildClassifier(train);

                    // Test the model
                    weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(randData);
                    eval.evaluateModel(cls, test);

                    // Print the results as in Weka explorer:
                    //Print statistics
                    String strSummaryTest = eval.toSummaryString();

                    file.WriteLine(strSummaryTest);
                    file.WriteLine();

                    //Print detailed class statistics
                    file.WriteLine(eval.toClassDetailsString());
                    file.WriteLine();

                    //Print confusion matrix
                    file.WriteLine(eval.toMatrixString());
                    file.WriteLine();

                    // Get the confusion matrix
                    double[][] cmMatrixTest = eval.confusionMatrix();

                    System.Console.WriteLine("Bayesian Network results saved in Communication_Report.txt file successfully.");
                }
            }
        }
Ejemplo n.º 26
0
		/// <summary> Select only instances with weights that contribute to 
		/// the specified quantile of the weight distribution
		/// 
		/// </summary>
		/// <param name="data">the input instances
		/// </param>
		/// <param name="quantile">the specified quantile eg 0.9 to select 
		/// 90% of the weight mass
		/// </param>
		/// <returns> the selected instances
		/// </returns>
		protected internal virtual Instances selectWeightQuantile(Instances data, double quantile)
		{
			
			int numInstances = data.numInstances();
			Instances trainData = new Instances(data, numInstances);
			double[] weights = new double[numInstances];
			
			double sumOfWeights = 0;
			for (int i = 0; i < numInstances; i++)
			{
				weights[i] = data.instance(i).weight();
				sumOfWeights += weights[i];
			}
			double weightMassToSelect = sumOfWeights * quantile;
			int[] sortedIndices = Utils.sort(weights);
			
			// Select the instances
			sumOfWeights = 0;
			for (int i = numInstances - 1; i >= 0; i--)
			{
				Instance instance = (Instance) data.instance(sortedIndices[i]).copy();
				trainData.add(instance);
				sumOfWeights += weights[sortedIndices[i]];
				if ((sumOfWeights > weightMassToSelect) && (i > 0) && (weights[sortedIndices[i]] != weights[sortedIndices[i - 1]]))
				{
					break;
				}
			}
			if (m_Debug)
			{
				System.Console.Error.WriteLine("Selected " + trainData.numInstances() + " out of " + numInstances);
			}
			return trainData;
		}