/// <summary> /// Entry point for setting up and running the experiments. /// </summary> public void Initialize() { //load gesture data and report on the number and type of available samples dataset = DataLoader.LoadGestureDataFrom(Config.DataPath); int training = 0; foreach (UserDataSet user_i in dataset) { training += user_i.TrainingSamples.Count; } ToTrain = new Dictionary <GestureType, List <GestureSample> >(); foreach (GestureType gesture in Config.GesturesToUse) { ToTrain.Add(gesture, new List <GestureSample>()); } foreach (UserDataSet uData in dataset) { //1-construct/prune the 'ToTrain' collection for training the Classifier //add all training samples into the 'ToTrain' collection. //sort training samples into classes foreach (GestureSample sample in uData.TrainingSamples) { if (Config.GesturesToUse.Contains(sample.Gesture)) { ToTrain[sample.Gesture].Add(sample); } } } Recognizer = new LinearClassifier(ToTrain); }
/// <summary> /// Entry point for setting up and running the experiments. /// </summary> public void RunExperiments() { //load gesture data and report on the number and type of available samples dataset = DataLoader.LoadGestureDataFrom(Config.DataPath); int training = 0; foreach (UserDataSet user_i in dataset) { training += user_i.TrainingSamples.Count; } ToTrain = new Dictionary <GestureType, List <GestureSample> >(); ToRecognize_Training = new Dictionary <GestureType, List <GestureSample> >(); foreach (GestureType gesture in Config.GesturesToUse) { ToTrain.Add(gesture, new List <GestureSample>()); ToRecognize_Training.Add(gesture, new List <GestureSample>()); } PopulateDataSets(); Recognizer = new LinearClassifier(ToTrain); Run(); Console.WriteLine(); }
public static void Main(string[] args) { // Create a training set IList <IDatum <string, string> > trainingData = new List <IDatum <string, string> >(); trainingData.Add(MakeStopLights(Green, Red)); trainingData.Add(MakeStopLights(Green, Red)); trainingData.Add(MakeStopLights(Green, Red)); trainingData.Add(MakeStopLights(Red, Green)); trainingData.Add(MakeStopLights(Red, Green)); trainingData.Add(MakeStopLights(Red, Green)); trainingData.Add(MakeStopLights(Red, Red)); // Create a test set IDatum <string, string> workingLights = MakeStopLights(Green, Red); IDatum <string, string> brokenLights = MakeStopLights(Red, Red); // Build a classifier factory LinearClassifierFactory <string, string> factory = new LinearClassifierFactory <string, string>(); factory.UseConjugateGradientAscent(); // Turn on per-iteration convergence updates factory.SetVerbose(true); //Small amount of smoothing factory.SetSigma(10.0); // Build a classifier LinearClassifier <string, string> classifier = factory.TrainClassifier(trainingData); // Check out the learned weights classifier.Dump(); // Test the classifier System.Console.Out.WriteLine("Working instance got: " + classifier.ClassOf(workingLights)); classifier.JustificationOf(workingLights); System.Console.Out.WriteLine("Broken instance got: " + classifier.ClassOf(brokenLights)); classifier.JustificationOf(brokenLights); }
public virtual void TrainMulticlass(GeneralDataset <string, string> trainSet) { if (Sharpen.Runtime.EqualsIgnoreCase(relationExtractorClassifierType, "linear")) { LinearClassifierFactory <string, string> lcFactory = new LinearClassifierFactory <string, string>(1e-4, false, sigma); lcFactory.SetVerbose(false); // use in-place SGD instead of QN. this is faster but much worse! // lcFactory.useInPlaceStochasticGradientDescent(-1, -1, 1.0); // use a hybrid minimizer: start with in-place SGD, continue with QN // lcFactory.useHybridMinimizerWithInPlaceSGD(50, -1, sigma); classifier = lcFactory.TrainClassifier(trainSet); } else { if (Sharpen.Runtime.EqualsIgnoreCase(relationExtractorClassifierType, "svm")) { SVMLightClassifierFactory <string, string> svmFactory = new SVMLightClassifierFactory <string, string>(); svmFactory.SetC(sigma); classifier = svmFactory.TrainClassifier(trainSet); } else { throw new Exception("Invalid classifier type: " + relationExtractorClassifierType); } } if (logger.IsLoggable(Level.Fine)) { ReportWeights(classifier, null); } }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> private static void DemonstrateSerialization() { System.Console.Out.WriteLine(); System.Console.Out.WriteLine("Demonstrating working with a serialized classifier"); ColumnDataClassifier cdc = new ColumnDataClassifier(where + "examples/cheese2007.prop"); IClassifier <string, string> cl = cdc.MakeClassifier(cdc.ReadTrainingExamples(where + "examples/cheeseDisease.train")); // Exhibit serialization and deserialization working. Serialized to bytes in memory for simplicity System.Console.Out.WriteLine(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); ObjectOutputStream oos = new ObjectOutputStream(baos); oos.WriteObject(cl); oos.Close(); byte[] @object = baos.ToByteArray(); ByteArrayInputStream bais = new ByteArrayInputStream(@object); ObjectInputStream ois = new ObjectInputStream(bais); LinearClassifier <string, string> lc = ErasureUtils.UncheckedCast(ois.ReadObject()); ois.Close(); ColumnDataClassifier cdc2 = new ColumnDataClassifier(where + "examples/cheese2007.prop"); // We compare the output of the deserialized classifier lc versus the original one cl // For both we use a ColumnDataClassifier to convert text lines to examples System.Console.Out.WriteLine(); System.Console.Out.WriteLine("Making predictions with both classifiers"); foreach (string line in ObjectBank.GetLineIterator(where + "examples/cheeseDisease.test", "utf-8")) { IDatum <string, string> d = cdc.MakeDatumFromLine(line); IDatum <string, string> d2 = cdc2.MakeDatumFromLine(line); System.Console.Out.Printf("%s =origi=> %s (%.4f)%n", line, cl.ClassOf(d), cl.ScoresOf(d).GetCount(cl.ClassOf(d))); System.Console.Out.Printf("%s =deser=> %s (%.4f)%n", line, lc.ClassOf(d2), lc.ScoresOf(d).GetCount(lc.ClassOf(d))); } }
/// <summary>Train a multinomial classifier off of the provided dataset.</summary> /// <param name="dataset">The dataset to train the classifier off of.</param> /// <returns>A classifier.</returns> public static IClassifier <string, string> TrainMultinomialClassifier(GeneralDataset <string, string> dataset, int featureThreshold, double sigma) { // Set up the dataset and factory log.Info("Applying feature threshold (" + featureThreshold + ")..."); dataset.ApplyFeatureCountThreshold(featureThreshold); log.Info("Randomizing dataset..."); dataset.Randomize(42l); log.Info("Creating factory..."); LinearClassifierFactory <string, string> factory = InitFactory(sigma); // Train the final classifier log.Info("BEGIN training"); LinearClassifier <string, string> classifier = factory.TrainClassifier(dataset); log.Info("END training"); // Debug KBPRelationExtractor.Accuracy trainAccuracy = new KBPRelationExtractor.Accuracy(); foreach (IDatum <string, string> datum in dataset) { string guess = classifier.ClassOf(datum); trainAccuracy.Predict(Java.Util.Collections.Singleton(guess), Java.Util.Collections.Singleton(datum.Label())); } log.Info("Training accuracy:"); log.Info(trainAccuracy.ToString()); log.Info(string.Empty); // Return the classifier return(classifier); }
public static void TestDataset() { Dataset <string, string> data = new Dataset <string, string>(); data.Add(new BasicDatum <string, string>(Arrays.AsList(new string[] { "fever", "cough", "congestion" }), "cold")); data.Add(new BasicDatum <string, string>(Arrays.AsList(new string[] { "fever", "cough", "nausea" }), "flu")); data.Add(new BasicDatum <string, string>(Arrays.AsList(new string[] { "cough", "congestion" }), "cold")); // data.summaryStatistics(); NUnit.Framework.Assert.AreEqual(4, data.NumFeatures()); NUnit.Framework.Assert.AreEqual(4, data.NumFeatureTypes()); NUnit.Framework.Assert.AreEqual(2, data.NumClasses()); NUnit.Framework.Assert.AreEqual(8, data.NumFeatureTokens()); NUnit.Framework.Assert.AreEqual(3, data.Size()); data.ApplyFeatureCountThreshold(2); NUnit.Framework.Assert.AreEqual(3, data.NumFeatures()); NUnit.Framework.Assert.AreEqual(3, data.NumFeatureTypes()); NUnit.Framework.Assert.AreEqual(2, data.NumClasses()); NUnit.Framework.Assert.AreEqual(7, data.NumFeatureTokens()); NUnit.Framework.Assert.AreEqual(3, data.Size()); //Dataset data = Dataset.readSVMLightFormat(args[0]); //double[] scores = data.getInformationGains(); //System.out.println(ArrayMath.mean(scores)); //System.out.println(ArrayMath.variance(scores)); LinearClassifierFactory <string, string> factory = new LinearClassifierFactory <string, string>(); LinearClassifier <string, string> classifier = factory.TrainClassifier(data); IDatum <string, string> d = new BasicDatum <string, string>(Arrays.AsList(new string[] { "cough", "fever" })); NUnit.Framework.Assert.AreEqual("Classification incorrect", "flu", classifier.ClassOf(d)); ICounter <string> probs = classifier.ProbabilityOf(d); NUnit.Framework.Assert.AreEqual("Returned probability incorrect", 0.4553, probs.GetCount("cold"), 0.0001); NUnit.Framework.Assert.AreEqual("Returned probability incorrect", 0.5447, probs.GetCount("flu"), 0.0001); System.Console.Out.WriteLine(); }
public static void Main(string[] args) { Edu.Stanford.Nlp.Classify.RVFDataset <string, string> data = new Edu.Stanford.Nlp.Classify.RVFDataset <string, string>(); ClassicCounter <string> c1 = new ClassicCounter <string>(); c1.IncrementCount("fever", 3.5); c1.IncrementCount("cough", 1.1); c1.IncrementCount("congestion", 4.2); ClassicCounter <string> c2 = new ClassicCounter <string>(); c2.IncrementCount("fever", 1.5); c2.IncrementCount("cough", 2.1); c2.IncrementCount("nausea", 3.2); ClassicCounter <string> c3 = new ClassicCounter <string>(); c3.IncrementCount("cough", 2.5); c3.IncrementCount("congestion", 3.2); data.Add(new RVFDatum <string, string>(c1, "cold")); data.Add(new RVFDatum <string, string>(c2, "flu")); data.Add(new RVFDatum <string, string>(c3, "cold")); data.SummaryStatistics(); LinearClassifierFactory <string, string> factory = new LinearClassifierFactory <string, string>(); factory.UseQuasiNewton(); LinearClassifier <string, string> c = factory.TrainClassifier(data); ClassicCounter <string> c4 = new ClassicCounter <string>(); c4.IncrementCount("cough", 2.3); c4.IncrementCount("fever", 1.3); RVFDatum <string, string> datum = new RVFDatum <string, string>(c4); c.JustificationOf((IDatum <string, string>)datum); }
public void AddParameterHinten(char art) { bezeichnung = bezeichnung + art; bezeichnung = LinearClassifier.SwitchPosInArr(bezeichnung.ToArray(), bezeichnung.Length - 1, bezeichnung.Length - 2); double[] a = { 1 }; parameter = parameter.Concat(a).ToArray(); LinearClassifier.SwitchPosInArr(parameter, parameter.Length - 1, parameter.Length - 2); }
public LinearInterpol(IFunction func = null) { if (func == null) { func = new Polynomial(3); } this.func = func; classifier = new LinearClassifier(func.n); }
public GeneralizedExpectationObjectiveFunction(GeneralDataset <L, F> labeledDataset, IList <IDatum <L, F> > unlabeledDataList, IList <F> geFeatures) { System.Console.Out.WriteLine("Number of labeled examples:" + labeledDataset.size + "\nNumber of unlabeled examples:" + unlabeledDataList.Count); System.Console.Out.WriteLine("Number of GE features:" + geFeatures.Count); this.numFeatures = labeledDataset.NumFeatures(); this.numClasses = labeledDataset.NumClasses(); this.labeledDataset = labeledDataset; this.unlabeledDataList = unlabeledDataList; this.geFeatures = geFeatures; this.classifier = new LinearClassifier <L, F>(null, labeledDataset.featureIndex, labeledDataset.labelIndex); ComputeEmpiricalStatistics(geFeatures); }
public virtual void FinishTraining() { IntCounter <string> tagCounter = new IntCounter <string>(); WeightedDataset data = new WeightedDataset(datumCounter.Size()); foreach (TaggedWord word in datumCounter.KeySet()) { int count = datumCounter.GetIntCount(word); if (trainOnLowCount && count > trainCountThreshold) { continue; } if (functionWordTags.Contains(word.Word())) { continue; } tagCounter.IncrementCount(word.Tag()); if (trainByType) { count = 1; } data.Add(new BasicDatum(featExtractor.MakeFeatures(word.Word()), word.Tag()), count); } datumCounter = null; tagDist = Distribution.LaplaceSmoothedDistribution(tagCounter, tagCounter.Size(), 0.5); tagCounter = null; ApplyThresholds(data); Verbose("Making classifier..."); QNMinimizer minim = new QNMinimizer(); //new ResultStoringMonitor(5, "weights")); // minim.shutUp(); LinearClassifierFactory factory = new LinearClassifierFactory(minim); factory.SetTol(tol); factory.SetSigma(sigma); scorer = factory.TrainClassifier(data); Verbose("Done training."); }
protected internal static void ReportWeights(LinearClassifier <string, string> classifier, string classLabel) { if (classLabel != null) { logger.Fine("CLASSIFIER WEIGHTS FOR LABEL " + classLabel); } IDictionary <string, ICounter <string> > labelsToFeatureWeights = classifier.WeightsAsMapOfCounters(); IList <string> labels = new List <string>(labelsToFeatureWeights.Keys); labels.Sort(); foreach (string label in labels) { ICounter <string> featWeights = labelsToFeatureWeights[label]; IList <Pair <string, double> > sorted = Counters.ToSortedListWithCounts(featWeights); StringBuilder bos = new StringBuilder(); bos.Append("WEIGHTS FOR LABEL ").Append(label).Append(':'); foreach (Pair <string, double> feat in sorted) { bos.Append(' ').Append(feat.First()).Append(':').Append(feat.Second() + "\n"); } logger.Fine(bos.ToString()); } }
public virtual void SetPlatt(LinearClassifier <L, L> platt) { this.platt = platt; }
public SVMLightClassifier(ClassicCounter <Pair <F, L> > weightCounter, ClassicCounter <L> thresholds, LinearClassifier <L, L> platt) : base(weightCounter, thresholds) { this.platt = platt; }
/// <summary>Train a sentiment model from a set of data.</summary> /// <param name="data">The data to train the model from.</param> /// <param name="modelLocation"> /// An optional location to save the model. /// Note that this stream will be closed in this method, /// and should not be written to thereafter. /// </param> /// <returns>A sentiment classifier, ready to use.</returns> public static SimpleSentiment Train(IStream <SimpleSentiment.SentimentDatum> data, Optional <OutputStream> modelLocation) { // Some useful variables configuring how we train bool useL1 = true; double sigma = 1.0; int featureCountThreshold = 5; // Featurize the data Redwood.Util.ForceTrack("Featurizing"); RVFDataset <SentimentClass, string> dataset = new RVFDataset <SentimentClass, string>(); AtomicInteger datasize = new AtomicInteger(0); ICounter <SentimentClass> distribution = new ClassicCounter <SentimentClass>(); data.Unordered().Parallel().Map(null).ForEach(null); Redwood.Util.EndTrack("Featurizing"); // Print label distribution Redwood.Util.StartTrack("Distribution"); foreach (SentimentClass label in SentimentClass.Values()) { Redwood.Util.Log(string.Format("%7d", (int)distribution.GetCount(label)) + " " + label); } Redwood.Util.EndTrack("Distribution"); // Train the classifier Redwood.Util.ForceTrack("Training"); if (featureCountThreshold > 1) { dataset.ApplyFeatureCountThreshold(featureCountThreshold); } dataset.Randomize(42L); LinearClassifierFactory <SentimentClass, string> factory = new LinearClassifierFactory <SentimentClass, string>(); factory.SetVerbose(true); try { factory.SetMinimizerCreator(null); } catch (Exception) { } factory.SetSigma(sigma); LinearClassifier <SentimentClass, string> classifier = factory.TrainClassifier(dataset); // Optionally save the model modelLocation.IfPresent(null); Redwood.Util.EndTrack("Training"); // Evaluate the model Redwood.Util.ForceTrack("Evaluating"); factory.SetVerbose(false); double sumAccuracy = 0.0; ICounter <SentimentClass> sumP = new ClassicCounter <SentimentClass>(); ICounter <SentimentClass> sumR = new ClassicCounter <SentimentClass>(); int numFolds = 4; for (int fold = 0; fold < numFolds; ++fold) { Pair <GeneralDataset <SentimentClass, string>, GeneralDataset <SentimentClass, string> > trainTest = dataset.SplitOutFold(fold, numFolds); LinearClassifier <SentimentClass, string> foldClassifier = factory.TrainClassifierWithInitialWeights(trainTest.first, classifier); // convex objective, so this should be OK sumAccuracy += foldClassifier.EvaluateAccuracy(trainTest.second); foreach (SentimentClass label_1 in SentimentClass.Values()) { Pair <double, double> pr = foldClassifier.EvaluatePrecisionAndRecall(trainTest.second, label_1); sumP.IncrementCount(label_1, pr.first); sumP.IncrementCount(label_1, pr.second); } } DecimalFormat df = new DecimalFormat("0.000%"); log.Info("----------"); double aveAccuracy = sumAccuracy / ((double)numFolds); log.Info(string.Empty + numFolds + "-fold accuracy: " + df.Format(aveAccuracy)); log.Info(string.Empty); foreach (SentimentClass label_2 in SentimentClass.Values()) { double p = sumP.GetCount(label_2) / numFolds; double r = sumR.GetCount(label_2) / numFolds; log.Info(label_2 + " (P) = " + df.Format(p)); log.Info(label_2 + " (R) = " + df.Format(r)); log.Info(label_2 + " (F1) = " + df.Format(2 * p * r / (p + r))); log.Info(string.Empty); } log.Info("----------"); Redwood.Util.EndTrack("Evaluating"); // Return return(new SimpleSentiment(classifier)); }
public static void TestLC() { //double[][] input = new double[][] //{ // new double[]{0,0,0}, // new double[]{0,0,1}, // new double[]{0,1,0}, // new double[]{0,1,1}, // new double[]{1,0,0}, // new double[]{1,0,1}, // new double[]{1,1,0}, // new double[]{1,1,1} //}; //bool[] result = new bool[] //{ // false, // false, // false, // false, // false, // false, // false, // true //}; double[][] input = new double[][] { new double[] { 0, 0 }, new double[] { 0, 1 }, new double[] { 1, 0 }, new double[] { 1.1, 1.1 } }; bool[] result = new bool[] { true, false, false, true }; LinearClassifier LC = new LinearClassifier(input, result); while (true) { try { Console.WriteLine("Eingabe: "); double[] loop = new double[2]; for (int i = 0; i < 2; ++i) { loop[i] = Convert.ToDouble(Console.ReadLine()); } Console.WriteLine("Ergebniss: " + LC.Classify(loop).ToString()); } catch { Console.WriteLine("Fehler bei der Eingabe"); } } }