/// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        public static void Main(string[] args)
        {
            RedwoodConfiguration.Standard().Apply();
            // Disable SLF4J crap.
            ArgumentParser.FillOptions(typeof(Edu.Stanford.Nlp.IE.KBPEnsembleExtractor), args);
            object @object = IOUtils.ReadObjectFromURLOrClasspathOrFileSystem(StatisticalModel);
            IKBPRelationExtractor statisticalExtractor;

            if (@object is LinearClassifier)
            {
                //noinspection unchecked
                statisticalExtractor = new KBPStatisticalExtractor((IClassifier <string, string>)@object);
            }
            else
            {
                if (@object is KBPStatisticalExtractor)
                {
                    statisticalExtractor = (KBPStatisticalExtractor)@object;
                }
                else
                {
                    throw new InvalidCastException(@object.GetType() + " cannot be cast into a " + typeof(KBPStatisticalExtractor));
                }
            }
            logger.Info("Read statistical model from " + StatisticalModel);
            IKBPRelationExtractor extractor = new Edu.Stanford.Nlp.IE.KBPEnsembleExtractor(new KBPTokensregexExtractor(TokensregexDir), new KBPSemgrexExtractor(SemgrexDir), statisticalExtractor);
            IList <Pair <KBPRelationExtractor.KBPInput, string> > testExamples = IKBPRelationExtractor.ReadDataset(TestFile);

            extractor.ComputeAccuracy(testExamples.Stream(), Predictions.Map(null));
        }
Пример #2
0
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            RedwoodConfiguration.Standard().Apply();
            // Disable SLF4J crap.
            ArgumentParser.FillOptions(typeof(Edu.Stanford.Nlp.IE.KBPSemgrexExtractor), args);
            Edu.Stanford.Nlp.IE.KBPSemgrexExtractor extractor = new Edu.Stanford.Nlp.IE.KBPSemgrexExtractor(Dir);
            IList <Pair <KBPRelationExtractor.KBPInput, string> > testExamples = IKBPRelationExtractor.ReadDataset(TestFile);

            extractor.ComputeAccuracy(testExamples.Stream(), Predictions.Map(null));
        }
Пример #3
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        public static void Main(string[] args)
        {
            RedwoodConfiguration.Standard().Apply();
            // Disable SLF4J crap.
            ArgumentParser.FillOptions(typeof(Edu.Stanford.Nlp.IE.KBPStatisticalExtractor), args);
            // Fill command-line options
            // Load the test (or dev) data
            Redwood.Util.ForceTrack("Test data");
            IList <Pair <KBPRelationExtractor.KBPInput, string> > testExamples = IKBPRelationExtractor.ReadDataset(TestFile);

            log.Info("Read " + testExamples.Count + " examples");
            Redwood.Util.EndTrack("Test data");
            // If we can't find an existing model, train one
            if (!IOUtils.ExistsInClasspathOrFileSystem(ModelFile))
            {
                Redwood.Util.ForceTrack("Training data");
                IList <Pair <KBPRelationExtractor.KBPInput, string> > trainExamples = IKBPRelationExtractor.ReadDataset(TrainFile);
                log.Info("Read " + trainExamples.Count + " examples");
                log.Info(string.Empty + trainExamples.Stream().Map(null).Filter(null).Count() + " are " + KBPRelationExtractorConstants.NoRelation);
                Redwood.Util.EndTrack("Training data");
                // Featurize + create the dataset
                Redwood.Util.ForceTrack("Creating dataset");
                RVFDataset <string, string> dataset = new RVFDataset <string, string>();
                AtomicInteger i         = new AtomicInteger(0);
                long          beginTime = Runtime.CurrentTimeMillis();
                trainExamples.Stream().Parallel().ForEach(null);
                // This takes a while per example
                trainExamples.Clear();
                // Free up some memory
                Redwood.Util.EndTrack("Creating dataset");
                // Train the classifier
                log.Info("Training classifier:");
                IClassifier <string, string> classifier = TrainMultinomialClassifier(dataset, FeatureThreshold, Sigma);
                dataset.Clear();
                // Free up some memory
                // Save the classifier
                IOUtils.WriteObjectToFile(new Edu.Stanford.Nlp.IE.KBPStatisticalExtractor(classifier), ModelFile);
            }
            // Read either a newly-trained or pre-trained model
            object model = IOUtils.ReadObjectFromURLOrClasspathOrFileSystem(ModelFile);

            Edu.Stanford.Nlp.IE.KBPStatisticalExtractor classifier_1;
            if (model is IClassifier)
            {
                //noinspection unchecked
                classifier_1 = new Edu.Stanford.Nlp.IE.KBPStatisticalExtractor((IClassifier <string, string>)model);
            }
            else
            {
                classifier_1 = ((Edu.Stanford.Nlp.IE.KBPStatisticalExtractor)model);
            }
            // Evaluate the model
            classifier_1.ComputeAccuracy(testExamples.Stream(), Predictions.Map(null));
        }
Пример #4
0
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            RedwoodConfiguration.Standard().Apply();
            Redwood.Util.StartTrack("main");
            // Read the data
            IStream <SimpleSentiment.SentimentDatum> data = IStream.Concat(IStream.Concat(IStream.Concat(Imdb("/users/gabor/tmp/aclImdb/train/pos", SentimentClass.Positive), Imdb("/users/gabor/tmp/aclImdb/train/neg", SentimentClass.Negative)), IStream.Concat
                                                                                              (Imdb("/users/gabor/tmp/aclImdb/test/pos", SentimentClass.Positive), Imdb("/users/gabor/tmp/aclImdb/test/neg", SentimentClass.Negative))), IStream.Concat(IStream.Concat(Stanford("/users/gabor/tmp/train.tsv"), Stanford("/users/gabor/tmp/test.tsv"
                                                                                                                                                                                                                                                                                                                        )), IStream.Concat(Twitter("/users/gabor/tmp/twitter.csv"), Unlabelled("/users/gabor/tmp/wikipedia"))));
            // Train the model
            OutputStream    stream     = IOUtils.GetFileOutputStream("/users/gabor/tmp/model.ser.gz");
            SimpleSentiment classifier = SimpleSentiment.Train(data, Optional.Of(stream));

            stream.Close();
            log.Info(classifier.Classify("I think life is great"));
            Redwood.Util.EndTrack("main");
        }