public EntityMentionsAnnotator(string name, Properties props)
 {
     // note: used in annotate.properties
     // if the user has supplied custom CoreAnnotations for the ner tags and entity mentions override the default keys
     try
     {
         if (props.Contains(name + ".nerCoreAnnotation"))
         {
             nerCoreAnnotationClass = (Type)Sharpen.Runtime.GetType(props.GetProperty(name + ".nerCoreAnnotation"));
         }
         if (props.Contains(name + ".nerNormalizedCoreAnnotation"))
         {
             nerNormalizedCoreAnnotationClass = (Type)Sharpen.Runtime.GetType(props.GetProperty(name + ".nerNormalizedCoreAnnotation"));
         }
         if (props.Contains(name + ".mentionsCoreAnnotation"))
         {
             mentionsCoreAnnotationClass = (Type)Sharpen.Runtime.GetType(props.GetProperty(name + ".mentionsCoreAnnotation"));
         }
     }
     catch (TypeLoadException e)
     {
         log.Error(e.Message);
     }
     chunkIdentifier = new LabeledChunkIdentifier();
     doAcronyms      = bool.ParseBoolean(props.GetProperty(name + ".acronyms", props.GetProperty("acronyms", "false")));
     // set up language info, this is needed for handling creating pronominal mentions
     entityMentionsLanguage = LanguageInfo.GetLanguageFromString(props.GetProperty(name + ".language", "en"));
 }
示例#2
0
 /// <summary>Check if language is a segmenter language, return boolean.</summary>
 public static bool IsSegmenterLanguage(LanguageInfo.HumanLanguage language)
 {
     return(language == LanguageInfo.HumanLanguage.Arabic || language == LanguageInfo.HumanLanguage.Chinese);
 }
        /// <exception cref="System.IO.IOException"/>
        public NERCombinerAnnotator(Properties properties)
        {
            IList <string> models     = new List <string>();
            string         modelNames = properties.GetProperty("ner.model");

            if (modelNames == null)
            {
                modelNames = DefaultPaths.DefaultNerThreeclassModel + ',' + DefaultPaths.DefaultNerMucModel + ',' + DefaultPaths.DefaultNerConllModel;
            }
            if (!modelNames.IsEmpty())
            {
                Sharpen.Collections.AddAll(models, Arrays.AsList(modelNames.Split(",")));
            }
            if (models.IsEmpty())
            {
                // Allow for no real NER model - can just use numeric classifiers or SUTime.
                // Have to unset ner.model, so unlikely that people got here by accident.
                log.Info("WARNING: no NER models specified");
            }
            bool applyNumericClassifiers = PropertiesUtils.GetBool(properties, NERClassifierCombiner.ApplyNumericClassifiersProperty, NERClassifierCombiner.ApplyNumericClassifiersDefault);
            bool applyRegexner           = PropertiesUtils.GetBool(properties, NERClassifierCombiner.ApplyGazetteProperty, NERClassifierCombiner.ApplyGazetteDefault);
            bool useSUTime = PropertiesUtils.GetBool(properties, NumberSequenceClassifier.UseSutimeProperty, NumberSequenceClassifier.UseSutimeDefault);

            // option for setting doc date to be the present during each annotation
            usePresentDateForDocDate = PropertiesUtils.GetBool(properties, "ner." + "usePresentDateForDocDate", false);
            // option for setting doc date from a provided string
            providedDocDate = PropertiesUtils.GetString(properties, "ner." + "providedDocDate", string.Empty);
            Pattern p = Pattern.Compile("[0-9]{4}\\-[0-9]{2}\\-[0-9]{2}");
            Matcher m = p.Matcher(providedDocDate);

            if (!m.Matches())
            {
                providedDocDate = string.Empty;
            }
            NERClassifierCombiner.Language nerLanguage = NERClassifierCombiner.Language.FromString(PropertiesUtils.GetString(properties, NERClassifierCombiner.NerLanguageProperty, null), NERClassifierCombiner.NerLanguageDefault);
            bool verbose = PropertiesUtils.GetBool(properties, "ner." + "verbose", false);

            string[]   loadPaths          = Sharpen.Collections.ToArray(models, new string[models.Count]);
            Properties combinerProperties = PropertiesUtils.ExtractSelectedProperties(properties, NERClassifierCombiner.DefaultPassDownProperties);

            if (useSUTime)
            {
                // Make sure SUTime parameters are included
                Properties sutimeProps = PropertiesUtils.ExtractPrefixedProperties(properties, NumberSequenceClassifier.SutimeProperty + '.', true);
                PropertiesUtils.OverWriteProperties(combinerProperties, sutimeProps);
            }
            NERClassifierCombiner nerCombiner = new NERClassifierCombiner(applyNumericClassifiers, nerLanguage, useSUTime, applyRegexner, combinerProperties, loadPaths);

            this.nThreads          = PropertiesUtils.GetInt(properties, "ner.nthreads", PropertiesUtils.GetInt(properties, "nthreads", 1));
            this.maxTime           = PropertiesUtils.GetLong(properties, "ner.maxtime", 0);
            this.maxSentenceLength = PropertiesUtils.GetInt(properties, "ner.maxlen", int.MaxValue);
            this.language          = LanguageInfo.GetLanguageFromString(PropertiesUtils.GetString(properties, "ner.language", "en"));
            // in case of Spanish, use the Spanish number regexner annotator
            if (language.Equals(LanguageInfo.HumanLanguage.Spanish))
            {
                Properties spanishNumberRegexNerProperties = new Properties();
                spanishNumberRegexNerProperties["spanish.number.regexner.mapping"]         = spanishNumberRegexRules;
                spanishNumberRegexNerProperties["spanish.number.regexner.validpospattern"] = "^(NUM).*";
                spanishNumberRegexNerProperties["spanish.number.regexner.ignorecase"]      = "true";
                spanishNumberAnnotator = new TokensRegexNERAnnotator("spanish.number.regexner", spanishNumberRegexNerProperties);
            }
            // set up fine grained ner
            SetUpFineGrainedNER(properties);
            // set up additional rules ner
            SetUpAdditionalRulesNER(properties);
            // set up entity mentions
            SetUpEntityMentionBuilding(properties);
            Verbose  = verbose;
            this.ner = nerCombiner;
        }
        /// <summary>Create a new KBP annotator from the given properties.</summary>
        /// <param name="props">The properties to use when creating this extractor.</param>
        public KBPAnnotator(string name, Properties props)
        {
            //@ArgumentParser.Option(name="kbp.language", gloss="language for kbp")
            //private String language = "english";

            /*
             * A TokensRegexNER annotator for the special KBP NER types (case-sensitive).
             */
            //private final TokensRegexNERAnnotator casedNER;

            /*
             * A TokensRegexNER annotator for the special KBP NER types (case insensitive).
             */
            //private final TokensRegexNERAnnotator caselessNER;
            // Parse standard properties
            ArgumentParser.FillOptions(this, name, props);
            //Locale kbpLanguage =
            //(language.toLowerCase().equals("zh") || language.toLowerCase().equals("chinese")) ?
            //Locale.CHINESE : Locale.ENGLISH ;
            kbpProperties = props;
            try
            {
                List <IKBPRelationExtractor> extractors = new List <IKBPRelationExtractor>();
                // add tokensregex rules
                if (!tokensregexdir.Equals(NotProvided))
                {
                    extractors.Add(new KBPTokensregexExtractor(tokensregexdir, Verbose));
                }
                // add semgrex rules
                if (!semgrexdir.Equals(NotProvided))
                {
                    extractors.Add(new KBPSemgrexExtractor(semgrexdir, Verbose));
                }
                // attempt to add statistical model
                if (!model.Equals(NotProvided))
                {
                    log.Info("Loading KBP classifier from: " + model);
                    object @object = IOUtils.ReadObjectFromURLOrClasspathOrFileSystem(model);
                    IKBPRelationExtractor statisticalExtractor;
                    if (@object is LinearClassifier)
                    {
                        //noinspection unchecked
                        statisticalExtractor = new KBPStatisticalExtractor((IClassifier <string, string>)@object);
                    }
                    else
                    {
                        if (@object is KBPStatisticalExtractor)
                        {
                            statisticalExtractor = (KBPStatisticalExtractor)@object;
                        }
                        else
                        {
                            throw new InvalidCastException(@object.GetType() + " cannot be cast into a " + typeof(KBPStatisticalExtractor));
                        }
                    }
                    extractors.Add(statisticalExtractor);
                }
                // build extractor
                this.extractor = new KBPEnsembleExtractor(Sharpen.Collections.ToArray(extractors, new IKBPRelationExtractor[extractors.Count]));
                // set maximum length of sentence to operate on
                maxLength = System.Convert.ToInt32(props.GetProperty("kbp.maxlen", "-1"));
            }
            catch (Exception e)
            {
                throw new RuntimeIOException(e);
            }
            // set up map for converting between older and new KBP relation names
            relationNameConversionMap = new Dictionary <string, string>();
            relationNameConversionMap["org:dissolved"] = "org:date_dissolved";
            relationNameConversionMap["org:founded"]   = "org:date_founded";
            relationNameConversionMap["org:number_of_employees/members"]     = "org:number_of_employees_members";
            relationNameConversionMap["org:political/religious_affiliation"] = "org:political_religious_affiliation";
            relationNameConversionMap["org:top_members/employees"]           = "org:top_members_employees";
            relationNameConversionMap["per:member_of"]   = "per:employee_or_member_of";
            relationNameConversionMap["per:employee_of"] = "per:employee_or_member_of";
            relationNameConversionMap["per:stateorprovinces_of_residence"] = "per:statesorprovinces_of_residence";
            // set up KBP language
            kbpLanguage = LanguageInfo.GetLanguageFromString(props.GetProperty("kbp.language", "en"));
            // build the Spanish coref system if necessary
            if (LanguageInfo.HumanLanguage.Spanish.Equals(kbpLanguage))
            {
                spanishCorefSystem = new KBPBasicSpanishCorefSystem();
            }
        }