/// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> public static void Main(string[] args) { RedwoodConfiguration.Standard().Apply(); // Disable SLF4J crap. ArgumentParser.FillOptions(typeof(Edu.Stanford.Nlp.IE.KBPEnsembleExtractor), args); object @object = IOUtils.ReadObjectFromURLOrClasspathOrFileSystem(StatisticalModel); IKBPRelationExtractor statisticalExtractor; if (@object is LinearClassifier) { //noinspection unchecked statisticalExtractor = new KBPStatisticalExtractor((IClassifier <string, string>)@object); } else { if (@object is KBPStatisticalExtractor) { statisticalExtractor = (KBPStatisticalExtractor)@object; } else { throw new InvalidCastException(@object.GetType() + " cannot be cast into a " + typeof(KBPStatisticalExtractor)); } } logger.Info("Read statistical model from " + StatisticalModel); IKBPRelationExtractor extractor = new Edu.Stanford.Nlp.IE.KBPEnsembleExtractor(new KBPTokensregexExtractor(TokensregexDir), new KBPSemgrexExtractor(SemgrexDir), statisticalExtractor); IList <Pair <KBPRelationExtractor.KBPInput, string> > testExamples = IKBPRelationExtractor.ReadDataset(TestFile); extractor.ComputeAccuracy(testExamples.Stream(), Predictions.Map(null)); }
/// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { RedwoodConfiguration.Standard().Apply(); // Disable SLF4J crap. ArgumentParser.FillOptions(typeof(Edu.Stanford.Nlp.IE.KBPSemgrexExtractor), args); Edu.Stanford.Nlp.IE.KBPSemgrexExtractor extractor = new Edu.Stanford.Nlp.IE.KBPSemgrexExtractor(Dir); IList <Pair <KBPRelationExtractor.KBPInput, string> > testExamples = IKBPRelationExtractor.ReadDataset(TestFile); extractor.ComputeAccuracy(testExamples.Stream(), Predictions.Map(null)); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> public static void Main(string[] args) { RedwoodConfiguration.Standard().Apply(); // Disable SLF4J crap. ArgumentParser.FillOptions(typeof(Edu.Stanford.Nlp.IE.KBPStatisticalExtractor), args); // Fill command-line options // Load the test (or dev) data Redwood.Util.ForceTrack("Test data"); IList <Pair <KBPRelationExtractor.KBPInput, string> > testExamples = IKBPRelationExtractor.ReadDataset(TestFile); log.Info("Read " + testExamples.Count + " examples"); Redwood.Util.EndTrack("Test data"); // If we can't find an existing model, train one if (!IOUtils.ExistsInClasspathOrFileSystem(ModelFile)) { Redwood.Util.ForceTrack("Training data"); IList <Pair <KBPRelationExtractor.KBPInput, string> > trainExamples = IKBPRelationExtractor.ReadDataset(TrainFile); log.Info("Read " + trainExamples.Count + " examples"); log.Info(string.Empty + trainExamples.Stream().Map(null).Filter(null).Count() + " are " + KBPRelationExtractorConstants.NoRelation); Redwood.Util.EndTrack("Training data"); // Featurize + create the dataset Redwood.Util.ForceTrack("Creating dataset"); RVFDataset <string, string> dataset = new RVFDataset <string, string>(); AtomicInteger i = new AtomicInteger(0); long beginTime = Runtime.CurrentTimeMillis(); trainExamples.Stream().Parallel().ForEach(null); // This takes a while per example trainExamples.Clear(); // Free up some memory Redwood.Util.EndTrack("Creating dataset"); // Train the classifier log.Info("Training classifier:"); IClassifier <string, string> classifier = TrainMultinomialClassifier(dataset, FeatureThreshold, Sigma); dataset.Clear(); // Free up some memory // Save the classifier IOUtils.WriteObjectToFile(new Edu.Stanford.Nlp.IE.KBPStatisticalExtractor(classifier), ModelFile); } // Read either a newly-trained or pre-trained model object model = IOUtils.ReadObjectFromURLOrClasspathOrFileSystem(ModelFile); Edu.Stanford.Nlp.IE.KBPStatisticalExtractor classifier_1; if (model is IClassifier) { //noinspection unchecked classifier_1 = new Edu.Stanford.Nlp.IE.KBPStatisticalExtractor((IClassifier <string, string>)model); } else { classifier_1 = ((Edu.Stanford.Nlp.IE.KBPStatisticalExtractor)model); } // Evaluate the model classifier_1.ComputeAccuracy(testExamples.Stream(), Predictions.Map(null)); }
/// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { RedwoodConfiguration.Standard().Apply(); Redwood.Util.StartTrack("main"); // Read the data IStream <SimpleSentiment.SentimentDatum> data = IStream.Concat(IStream.Concat(IStream.Concat(Imdb("/users/gabor/tmp/aclImdb/train/pos", SentimentClass.Positive), Imdb("/users/gabor/tmp/aclImdb/train/neg", SentimentClass.Negative)), IStream.Concat (Imdb("/users/gabor/tmp/aclImdb/test/pos", SentimentClass.Positive), Imdb("/users/gabor/tmp/aclImdb/test/neg", SentimentClass.Negative))), IStream.Concat(IStream.Concat(Stanford("/users/gabor/tmp/train.tsv"), Stanford("/users/gabor/tmp/test.tsv" )), IStream.Concat(Twitter("/users/gabor/tmp/twitter.csv"), Unlabelled("/users/gabor/tmp/wikipedia")))); // Train the model OutputStream stream = IOUtils.GetFileOutputStream("/users/gabor/tmp/model.ser.gz"); SimpleSentiment classifier = SimpleSentiment.Train(data, Optional.Of(stream)); stream.Close(); log.Info(classifier.Classify("I think life is great")); Redwood.Util.EndTrack("main"); }