/// <summary>Annotate dependency relations in sentences</summary>
        public virtual IAnnotator Dependencies(Properties properties)
        {
            Properties relevantProperties = PropertiesUtils.ExtractPrefixedProperties(properties, AnnotatorConstants.StanfordDependencies + '.');

            return(new DependencyParseAnnotator(relevantProperties));
        }
        /// <summary>Annotate operators (e.g., quantifiers) and polarity of tokens in a sentence</summary>
        public virtual IAnnotator Natlog(Properties properties)
        {
            Properties relevantProperties = PropertiesUtils.ExtractPrefixedProperties(properties, AnnotatorConstants.StanfordNatlog + '.');

            return(new NaturalLogicAnnotator(relevantProperties));
        }
Ejemplo n.º 3
0
        /// <exception cref="System.IO.IOException"/>
        public NERCombinerAnnotator(Properties properties)
        {
            IList <string> models     = new List <string>();
            string         modelNames = properties.GetProperty("ner.model");

            if (modelNames == null)
            {
                modelNames = DefaultPaths.DefaultNerThreeclassModel + ',' + DefaultPaths.DefaultNerMucModel + ',' + DefaultPaths.DefaultNerConllModel;
            }
            if (!modelNames.IsEmpty())
            {
                Sharpen.Collections.AddAll(models, Arrays.AsList(modelNames.Split(",")));
            }
            if (models.IsEmpty())
            {
                // Allow for no real NER model - can just use numeric classifiers or SUTime.
                // Have to unset ner.model, so unlikely that people got here by accident.
                log.Info("WARNING: no NER models specified");
            }
            bool applyNumericClassifiers = PropertiesUtils.GetBool(properties, NERClassifierCombiner.ApplyNumericClassifiersProperty, NERClassifierCombiner.ApplyNumericClassifiersDefault);
            bool applyRegexner           = PropertiesUtils.GetBool(properties, NERClassifierCombiner.ApplyGazetteProperty, NERClassifierCombiner.ApplyGazetteDefault);
            bool useSUTime = PropertiesUtils.GetBool(properties, NumberSequenceClassifier.UseSutimeProperty, NumberSequenceClassifier.UseSutimeDefault);

            // option for setting doc date to be the present during each annotation
            usePresentDateForDocDate = PropertiesUtils.GetBool(properties, "ner." + "usePresentDateForDocDate", false);
            // option for setting doc date from a provided string
            providedDocDate = PropertiesUtils.GetString(properties, "ner." + "providedDocDate", string.Empty);
            Pattern p = Pattern.Compile("[0-9]{4}\\-[0-9]{2}\\-[0-9]{2}");
            Matcher m = p.Matcher(providedDocDate);

            if (!m.Matches())
            {
                providedDocDate = string.Empty;
            }
            NERClassifierCombiner.Language nerLanguage = NERClassifierCombiner.Language.FromString(PropertiesUtils.GetString(properties, NERClassifierCombiner.NerLanguageProperty, null), NERClassifierCombiner.NerLanguageDefault);
            bool verbose = PropertiesUtils.GetBool(properties, "ner." + "verbose", false);

            string[]   loadPaths          = Sharpen.Collections.ToArray(models, new string[models.Count]);
            Properties combinerProperties = PropertiesUtils.ExtractSelectedProperties(properties, NERClassifierCombiner.DefaultPassDownProperties);

            if (useSUTime)
            {
                // Make sure SUTime parameters are included
                Properties sutimeProps = PropertiesUtils.ExtractPrefixedProperties(properties, NumberSequenceClassifier.SutimeProperty + '.', true);
                PropertiesUtils.OverWriteProperties(combinerProperties, sutimeProps);
            }
            NERClassifierCombiner nerCombiner = new NERClassifierCombiner(applyNumericClassifiers, nerLanguage, useSUTime, applyRegexner, combinerProperties, loadPaths);

            this.nThreads          = PropertiesUtils.GetInt(properties, "ner.nthreads", PropertiesUtils.GetInt(properties, "nthreads", 1));
            this.maxTime           = PropertiesUtils.GetLong(properties, "ner.maxtime", 0);
            this.maxSentenceLength = PropertiesUtils.GetInt(properties, "ner.maxlen", int.MaxValue);
            this.language          = LanguageInfo.GetLanguageFromString(PropertiesUtils.GetString(properties, "ner.language", "en"));
            // in case of Spanish, use the Spanish number regexner annotator
            if (language.Equals(LanguageInfo.HumanLanguage.Spanish))
            {
                Properties spanishNumberRegexNerProperties = new Properties();
                spanishNumberRegexNerProperties["spanish.number.regexner.mapping"]         = spanishNumberRegexRules;
                spanishNumberRegexNerProperties["spanish.number.regexner.validpospattern"] = "^(NUM).*";
                spanishNumberRegexNerProperties["spanish.number.regexner.ignorecase"]      = "true";
                spanishNumberAnnotator = new TokensRegexNERAnnotator("spanish.number.regexner", spanishNumberRegexNerProperties);
            }
            // set up fine grained ner
            SetUpFineGrainedNER(properties);
            // set up additional rules ner
            SetUpAdditionalRulesNER(properties);
            // set up entity mentions
            SetUpEntityMentionBuilding(properties);
            Verbose  = verbose;
            this.ner = nerCombiner;
        }