public GeneralizedExpectationObjectiveFunction(GeneralDataset <L, F> labeledDataset, IList <IDatum <L, F> > unlabeledDataList, IList <F> geFeatures) { System.Console.Out.WriteLine("Number of labeled examples:" + labeledDataset.size + "\nNumber of unlabeled examples:" + unlabeledDataList.Count); System.Console.Out.WriteLine("Number of GE features:" + geFeatures.Count); this.numFeatures = labeledDataset.NumFeatures(); this.numClasses = labeledDataset.NumClasses(); this.labeledDataset = labeledDataset; this.unlabeledDataList = unlabeledDataList; this.geFeatures = geFeatures; this.classifier = new LinearClassifier <L, F>(null, labeledDataset.featureIndex, labeledDataset.labelIndex); ComputeEmpiricalStatistics(geFeatures); }
private double[] GetModelProbs(IDatum <L, F> datum) { double[] condDist = new double[labeledDataset.NumClasses()]; ICounter <L> probCounter = classifier.ProbabilityOf(datum); foreach (L label in probCounter.KeySet()) { int labelID = labeledDataset.labelIndex.IndexOf(label); condDist[labelID] = probCounter.GetCount(label); } return(condDist); }
public virtual MultinomialLogisticClassifier <L, F> TrainClassifier(GeneralDataset <L, F> dataset) { numClasses = dataset.NumClasses(); numFeatures = dataset.NumFeatures(); data = dataset.GetDataArray(); if (dataset is RVFDataset <object, object> ) { dataValues = dataset.GetValuesArray(); } else { dataValues = LogisticUtils.InitializeDataValues(data); } AugmentFeatureMatrix(data, dataValues); labels = dataset.GetLabelsArray(); return(new MultinomialLogisticClassifier <L, F>(TrainWeights(), dataset.featureIndex, dataset.labelIndex)); }
// public static void main(String[] args) { // List examples = new ArrayList(); // String leftLight = "leftLight"; // String rightLight = "rightLight"; // String broken = "BROKEN"; // String ok = "OK"; // Counter c1 = new ClassicCounter<>(); // c1.incrementCount(leftLight, 0); // c1.incrementCount(rightLight, 0); // RVFDatum d1 = new RVFDatum(c1, broken); // examples.add(d1); // Counter c2 = new ClassicCounter<>(); // c2.incrementCount(leftLight, 1); // c2.incrementCount(rightLight, 1); // RVFDatum d2 = new RVFDatum(c2, ok); // examples.add(d2); // Counter c3 = new ClassicCounter<>(); // c3.incrementCount(leftLight, 0); // c3.incrementCount(rightLight, 1); // RVFDatum d3 = new RVFDatum(c3, ok); // examples.add(d3); // Counter c4 = new ClassicCounter<>(); // c4.incrementCount(leftLight, 1); // c4.incrementCount(rightLight, 0); // RVFDatum d4 = new RVFDatum(c4, ok); // examples.add(d4); // Dataset data = new Dataset(examples.size()); // data.addAll(examples); // NaiveBayesClassifier classifier = (NaiveBayesClassifier) // new NaiveBayesClassifierFactory(200, 200, 1.0, // LogPrior.LogPriorType.QUADRATIC.ordinal(), // NaiveBayesClassifierFactory.CL) // .trainClassifier(data); // classifier.print(); // //now classifiy // for (int i = 0; i < examples.size(); i++) { // RVFDatum d = (RVFDatum) examples.get(i); // Counter scores = classifier.scoresOf(d); // System.out.println("for datum " + d + " scores are " + scores.toString()); // System.out.println(" class is " + Counters.topKeys(scores, 1)); // System.out.println(" class should be " + d.label()); // } // } // String trainFile = args[0]; // String testFile = args[1]; // NominalDataReader nR = new NominalDataReader(); // Map<Integer, Index<String>> indices = Generics.newHashMap(); // List<RVFDatum<String, Integer>> train = nR.readData(trainFile, indices); // List<RVFDatum<String, Integer>> test = nR.readData(testFile, indices); // System.out.println("Constrained conditional likelihood no prior :"); // for (int j = 0; j < 100; j++) { // NaiveBayesClassifier<String, Integer> classifier = new NaiveBayesClassifierFactory<String, Integer>(0.1, 0.01, 0.6, LogPrior.LogPriorType.NULL.ordinal(), NaiveBayesClassifierFactory.CL).trainClassifier(train); // classifier.print(); // //now classifiy // // float accTrain = classifier.accuracy(train.iterator()); // log.info("training accuracy " + accTrain); // float accTest = classifier.accuracy(test.iterator()); // log.info("test accuracy " + accTest); // // } // System.out.println("Unconstrained conditional likelihood no prior :"); // for (int j = 0; j < 100; j++) { // NaiveBayesClassifier<String, Integer> classifier = new NaiveBayesClassifierFactory<String, Integer>(0.1, 0.01, 0.6, LogPrior.LogPriorType.NULL.ordinal(), NaiveBayesClassifierFactory.UCL).trainClassifier(train); // classifier.print(); // //now classify // // float accTrain = classifier.accuracy(train.iterator()); // log.info("training accuracy " + accTrain); // float accTest = classifier.accuracy(test.iterator()); // log.info("test accuracy " + accTest); // } // } public virtual NaiveBayesClassifier <L, F> TrainClassifier(GeneralDataset <L, F> dataset) { if (dataset is RVFDataset) { throw new Exception("Not sure if RVFDataset runs correctly in this method. Please update this code if it does."); } return(TrainClassifier(dataset.GetDataArray(), dataset.labels, dataset.NumFeatures(), dataset.NumClasses(), dataset.labelIndex, dataset.featureIndex)); }
public BiasedLogConditionalObjectiveFunction(GeneralDataset <object, object> dataset, double[][] confusionMatrix, LogPrior prior) : this(dataset.NumFeatures(), dataset.NumClasses(), dataset.GetDataArray(), dataset.GetLabelsArray(), confusionMatrix, prior) { }
public virtual SVMLightClassifier <L, F> TrainClassifierBasic(GeneralDataset <L, F> dataset) { IIndex <L> labelIndex = dataset.LabelIndex(); IIndex <F> featureIndex = dataset.featureIndex; bool multiclass = (dataset.NumClasses() > 2); try { // this is the file that the model will be saved to File modelFile = File.CreateTempFile("svm-", ".model"); if (deleteTempFilesOnExit) { modelFile.DeleteOnExit(); } // this is the file that the svm light formated dataset // will be printed to File dataFile = File.CreateTempFile("svm-", ".data"); if (deleteTempFilesOnExit) { dataFile.DeleteOnExit(); } // print the dataset PrintWriter pw = new PrintWriter(new FileWriter(dataFile)); dataset.PrintSVMLightFormat(pw); pw.Close(); // -v 0 makes it not verbose // -m 400 gives it a larger cache, for faster training string cmd = (multiclass ? svmStructLearn : (useSVMPerf ? svmPerfLearn : svmLightLearn)) + " -v " + svmLightVerbosity + " -m 400 "; // set the value of C if we have one specified if (C > 0.0) { cmd = cmd + " -c " + C + " "; } else { // C value if (useSVMPerf) { cmd = cmd + " -c " + 0.01 + " "; } } //It's required to specify this parameter for SVM perf // Alpha File if (useAlphaFile) { File newAlphaFile = File.CreateTempFile("svm-", ".alphas"); if (deleteTempFilesOnExit) { newAlphaFile.DeleteOnExit(); } cmd = cmd + " -a " + newAlphaFile.GetAbsolutePath(); if (alphaFile != null) { cmd = cmd + " -y " + alphaFile.GetAbsolutePath(); } alphaFile = newAlphaFile; } // File and Model Data cmd = cmd + " " + dataFile.GetAbsolutePath() + " " + modelFile.GetAbsolutePath(); if (verbose) { logger.Info("<< " + cmd + " >>"); } /*Process p = Runtime.getRuntime().exec(cmd); * * p.waitFor(); * * if (p.exitValue() != 0) throw new RuntimeException("Error Training SVM Light exit value: " + p.exitValue()); * p.destroy(); */ SystemUtils.Run(new ProcessBuilder(whitespacePattern.Split(cmd)), new PrintWriter(System.Console.Error), new PrintWriter(System.Console.Error)); if (doEval) { File predictFile = File.CreateTempFile("svm-", ".pred"); if (deleteTempFilesOnExit) { predictFile.DeleteOnExit(); } string evalCmd = (multiclass ? svmStructClassify : (useSVMPerf ? svmPerfClassify : svmLightClassify)) + " " + dataFile.GetAbsolutePath() + " " + modelFile.GetAbsolutePath() + " " + predictFile.GetAbsolutePath(); if (verbose) { logger.Info("<< " + evalCmd + " >>"); } SystemUtils.Run(new ProcessBuilder(whitespacePattern.Split(evalCmd)), new PrintWriter(System.Console.Error), new PrintWriter(System.Console.Error)); } // read in the model file Pair <double, ClassicCounter <int> > weightsAndThresh = ReadModel(modelFile, multiclass); double threshold = weightsAndThresh.First(); ClassicCounter <Pair <F, L> > weights = ConvertWeights(weightsAndThresh.Second(), featureIndex, labelIndex, multiclass); ClassicCounter <L> thresholds = new ClassicCounter <L>(); if (!multiclass) { thresholds.SetCount(labelIndex.Get(0), -threshold); thresholds.SetCount(labelIndex.Get(1), threshold); } SVMLightClassifier <L, F> classifier = new SVMLightClassifier <L, F>(weights, thresholds); if (doEval) { File predictFile = File.CreateTempFile("svm-", ".pred2"); if (deleteTempFilesOnExit) { predictFile.DeleteOnExit(); } PrintWriter pw2 = new PrintWriter(predictFile); NumberFormat nf = NumberFormat.GetNumberInstance(); nf.SetMaximumFractionDigits(5); foreach (IDatum <L, F> datum in dataset) { ICounter <L> scores = classifier.ScoresOf(datum); pw2.Println(Counters.ToString(scores, nf)); } pw2.Close(); } if (useSigmoid) { if (verbose) { System.Console.Out.Write("fitting sigmoid..."); } classifier.SetPlatt(FitSigmoid(classifier, dataset)); if (verbose) { System.Console.Out.WriteLine("done"); } } return(classifier); } catch (Exception e) { throw new Exception(e); } }