コード例 #1
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        public static void Main(string[] args)
        {
            RedwoodConfiguration.Standard().Apply();
            // Disable SLF4J crap.
            ArgumentParser.FillOptions(typeof(Edu.Stanford.Nlp.IE.KBPEnsembleExtractor), args);
            object @object = IOUtils.ReadObjectFromURLOrClasspathOrFileSystem(StatisticalModel);
            IKBPRelationExtractor statisticalExtractor;

            if (@object is LinearClassifier)
            {
                //noinspection unchecked
                statisticalExtractor = new KBPStatisticalExtractor((IClassifier <string, string>)@object);
            }
            else
            {
                if (@object is KBPStatisticalExtractor)
                {
                    statisticalExtractor = (KBPStatisticalExtractor)@object;
                }
                else
                {
                    throw new InvalidCastException(@object.GetType() + " cannot be cast into a " + typeof(KBPStatisticalExtractor));
                }
            }
            logger.Info("Read statistical model from " + StatisticalModel);
            IKBPRelationExtractor extractor = new Edu.Stanford.Nlp.IE.KBPEnsembleExtractor(new KBPTokensregexExtractor(TokensregexDir), new KBPSemgrexExtractor(SemgrexDir), statisticalExtractor);
            IList <Pair <KBPRelationExtractor.KBPInput, string> > testExamples = IKBPRelationExtractor.ReadDataset(TestFile);

            extractor.ComputeAccuracy(testExamples.Stream(), Predictions.Map(null));
        }
コード例 #2
0
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            RedwoodConfiguration.Standard().Apply();
            // Disable SLF4J crap.
            ArgumentParser.FillOptions(typeof(Edu.Stanford.Nlp.IE.KBPSemgrexExtractor), args);
            Edu.Stanford.Nlp.IE.KBPSemgrexExtractor extractor = new Edu.Stanford.Nlp.IE.KBPSemgrexExtractor(Dir);
            IList <Pair <KBPRelationExtractor.KBPInput, string> > testExamples = IKBPRelationExtractor.ReadDataset(TestFile);

            extractor.ComputeAccuracy(testExamples.Stream(), Predictions.Map(null));
        }
コード例 #3
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        public static void Main(string[] args)
        {
            RedwoodConfiguration.Standard().Apply();
            // Disable SLF4J crap.
            ArgumentParser.FillOptions(typeof(Edu.Stanford.Nlp.IE.KBPStatisticalExtractor), args);
            // Fill command-line options
            // Load the test (or dev) data
            Redwood.Util.ForceTrack("Test data");
            IList <Pair <KBPRelationExtractor.KBPInput, string> > testExamples = IKBPRelationExtractor.ReadDataset(TestFile);

            log.Info("Read " + testExamples.Count + " examples");
            Redwood.Util.EndTrack("Test data");
            // If we can't find an existing model, train one
            if (!IOUtils.ExistsInClasspathOrFileSystem(ModelFile))
            {
                Redwood.Util.ForceTrack("Training data");
                IList <Pair <KBPRelationExtractor.KBPInput, string> > trainExamples = IKBPRelationExtractor.ReadDataset(TrainFile);
                log.Info("Read " + trainExamples.Count + " examples");
                log.Info(string.Empty + trainExamples.Stream().Map(null).Filter(null).Count() + " are " + KBPRelationExtractorConstants.NoRelation);
                Redwood.Util.EndTrack("Training data");
                // Featurize + create the dataset
                Redwood.Util.ForceTrack("Creating dataset");
                RVFDataset <string, string> dataset = new RVFDataset <string, string>();
                AtomicInteger i         = new AtomicInteger(0);
                long          beginTime = Runtime.CurrentTimeMillis();
                trainExamples.Stream().Parallel().ForEach(null);
                // This takes a while per example
                trainExamples.Clear();
                // Free up some memory
                Redwood.Util.EndTrack("Creating dataset");
                // Train the classifier
                log.Info("Training classifier:");
                IClassifier <string, string> classifier = TrainMultinomialClassifier(dataset, FeatureThreshold, Sigma);
                dataset.Clear();
                // Free up some memory
                // Save the classifier
                IOUtils.WriteObjectToFile(new Edu.Stanford.Nlp.IE.KBPStatisticalExtractor(classifier), ModelFile);
            }
            // Read either a newly-trained or pre-trained model
            object model = IOUtils.ReadObjectFromURLOrClasspathOrFileSystem(ModelFile);

            Edu.Stanford.Nlp.IE.KBPStatisticalExtractor classifier_1;
            if (model is IClassifier)
            {
                //noinspection unchecked
                classifier_1 = new Edu.Stanford.Nlp.IE.KBPStatisticalExtractor((IClassifier <string, string>)model);
            }
            else
            {
                classifier_1 = ((Edu.Stanford.Nlp.IE.KBPStatisticalExtractor)model);
            }
            // Evaluate the model
            classifier_1.ComputeAccuracy(testExamples.Stream(), Predictions.Map(null));
        }
コード例 #4
0
        /// <summary>Create a new KBP annotator from the given properties.</summary>
        /// <param name="props">The properties to use when creating this extractor.</param>
        public KBPAnnotator(string name, Properties props)
        {
            //@ArgumentParser.Option(name="kbp.language", gloss="language for kbp")
            //private String language = "english";

            /*
             * A TokensRegexNER annotator for the special KBP NER types (case-sensitive).
             */
            //private final TokensRegexNERAnnotator casedNER;

            /*
             * A TokensRegexNER annotator for the special KBP NER types (case insensitive).
             */
            //private final TokensRegexNERAnnotator caselessNER;
            // Parse standard properties
            ArgumentParser.FillOptions(this, name, props);
            //Locale kbpLanguage =
            //(language.toLowerCase().equals("zh") || language.toLowerCase().equals("chinese")) ?
            //Locale.CHINESE : Locale.ENGLISH ;
            kbpProperties = props;
            try
            {
                List <IKBPRelationExtractor> extractors = new List <IKBPRelationExtractor>();
                // add tokensregex rules
                if (!tokensregexdir.Equals(NotProvided))
                {
                    extractors.Add(new KBPTokensregexExtractor(tokensregexdir, Verbose));
                }
                // add semgrex rules
                if (!semgrexdir.Equals(NotProvided))
                {
                    extractors.Add(new KBPSemgrexExtractor(semgrexdir, Verbose));
                }
                // attempt to add statistical model
                if (!model.Equals(NotProvided))
                {
                    log.Info("Loading KBP classifier from: " + model);
                    object @object = IOUtils.ReadObjectFromURLOrClasspathOrFileSystem(model);
                    IKBPRelationExtractor statisticalExtractor;
                    if (@object is LinearClassifier)
                    {
                        //noinspection unchecked
                        statisticalExtractor = new KBPStatisticalExtractor((IClassifier <string, string>)@object);
                    }
                    else
                    {
                        if (@object is KBPStatisticalExtractor)
                        {
                            statisticalExtractor = (KBPStatisticalExtractor)@object;
                        }
                        else
                        {
                            throw new InvalidCastException(@object.GetType() + " cannot be cast into a " + typeof(KBPStatisticalExtractor));
                        }
                    }
                    extractors.Add(statisticalExtractor);
                }
                // build extractor
                this.extractor = new KBPEnsembleExtractor(Sharpen.Collections.ToArray(extractors, new IKBPRelationExtractor[extractors.Count]));
                // set maximum length of sentence to operate on
                maxLength = System.Convert.ToInt32(props.GetProperty("kbp.maxlen", "-1"));
            }
            catch (Exception e)
            {
                throw new RuntimeIOException(e);
            }
            // set up map for converting between older and new KBP relation names
            relationNameConversionMap = new Dictionary <string, string>();
            relationNameConversionMap["org:dissolved"] = "org:date_dissolved";
            relationNameConversionMap["org:founded"]   = "org:date_founded";
            relationNameConversionMap["org:number_of_employees/members"]     = "org:number_of_employees_members";
            relationNameConversionMap["org:political/religious_affiliation"] = "org:political_religious_affiliation";
            relationNameConversionMap["org:top_members/employees"]           = "org:top_members_employees";
            relationNameConversionMap["per:member_of"]   = "per:employee_or_member_of";
            relationNameConversionMap["per:employee_of"] = "per:employee_or_member_of";
            relationNameConversionMap["per:stateorprovinces_of_residence"] = "per:statesorprovinces_of_residence";
            // set up KBP language
            kbpLanguage = LanguageInfo.GetLanguageFromString(props.GetProperty("kbp.language", "en"));
            // build the Spanish coref system if necessary
            if (LanguageInfo.HumanLanguage.Spanish.Equals(kbpLanguage))
            {
                spanishCorefSystem = new KBPBasicSpanishCorefSystem();
            }
        }