예제 #1
0
        /// <summary>Train a multinomial classifier off of the provided dataset.</summary>
        /// <param name="dataset">The dataset to train the classifier off of.</param>
        /// <returns>A classifier.</returns>
        public static IClassifier <string, string> TrainMultinomialClassifier(GeneralDataset <string, string> dataset, int featureThreshold, double sigma)
        {
            // Set up the dataset and factory
            log.Info("Applying feature threshold (" + featureThreshold + ")...");
            dataset.ApplyFeatureCountThreshold(featureThreshold);
            log.Info("Randomizing dataset...");
            dataset.Randomize(42l);
            log.Info("Creating factory...");
            LinearClassifierFactory <string, string> factory = InitFactory(sigma);

            // Train the final classifier
            log.Info("BEGIN training");
            LinearClassifier <string, string> classifier = factory.TrainClassifier(dataset);

            log.Info("END training");
            // Debug
            KBPRelationExtractor.Accuracy trainAccuracy = new KBPRelationExtractor.Accuracy();
            foreach (IDatum <string, string> datum in dataset)
            {
                string guess = classifier.ClassOf(datum);
                trainAccuracy.Predict(Java.Util.Collections.Singleton(guess), Java.Util.Collections.Singleton(datum.Label()));
            }
            log.Info("Training accuracy:");
            log.Info(trainAccuracy.ToString());
            log.Info(string.Empty);
            // Return the classifier
            return(classifier);
        }
 public virtual void TestAccuracyNoRelation()
 {
     KBPRelationExtractor.Accuracy accuracy = new KBPRelationExtractor.Accuracy();
     accuracy.Predict(new HashSet <string>(Arrays.AsList("a")), new HashSet <string>(Arrays.AsList("a")));
     accuracy.Predict(new HashSet <string>(Arrays.AsList("a")), new HashSet <string>(Arrays.AsList("no_relation")));
     accuracy.Predict(new HashSet <string>(Arrays.AsList("no_relation")), new HashSet <string>(Arrays.AsList("b")));
     accuracy.Predict(new HashSet <string>(Arrays.AsList("b")), new HashSet <string>(Arrays.AsList("no_relation")));
     accuracy.Predict(new HashSet <string>(Arrays.AsList("b")), new HashSet <string>(Arrays.AsList("b")));
     accuracy.Predict(new HashSet <string>(Arrays.AsList("b")), new HashSet <string>(Arrays.AsList("b")));
     NUnit.Framework.Assert.AreEqual(accuracy.Precision("a"), 1e-10, 0.5);
     NUnit.Framework.Assert.AreEqual(accuracy.Recall("a"), 1e-10, 1.0);
     NUnit.Framework.Assert.AreEqual(accuracy.F1("a"), 1e-10, 2.0 * 1.0 * 0.5 / (1.0 + 0.5));
     NUnit.Framework.Assert.AreEqual(accuracy.Precision("b"), 1e-10, 2.0 / 3.0);
     NUnit.Framework.Assert.AreEqual(accuracy.Recall("b"), 1e-10, 2.0 / 3.0);
     NUnit.Framework.Assert.AreEqual(accuracy.PrecisionMicro(), 1e-10, 3.0 / 5.0);
     NUnit.Framework.Assert.AreEqual(accuracy.PrecisionMacro(), 1e-10, 7.0 / 12.0);
     NUnit.Framework.Assert.AreEqual(accuracy.RecallMicro(), 1e-10, 3.0 / 4.0);
     NUnit.Framework.Assert.AreEqual(accuracy.RecallMacro(), 1e-10, 5.0 / 6.0);
 }