public ParserAnnotator(string annotatorName, Properties props) { string model = props.GetProperty(annotatorName + ".model", LexicalizedParser.DefaultParserLoc); if (model == null) { throw new ArgumentException("No model specified for Parser annotator " + annotatorName); } this.Verbose = PropertiesUtils.GetBool(props, annotatorName + ".debug", false); string[] flags = ConvertFlagsToArray(props.GetProperty(annotatorName + ".flags")); this.parser = LoadModel(model, Verbose, flags); this.maxSentenceLength = PropertiesUtils.GetInt(props, annotatorName + ".maxlen", -1); string treeMapClass = props.GetProperty(annotatorName + ".treemap"); if (treeMapClass == null) { this.treeMap = null; } else { this.treeMap = ReflectionLoading.LoadByReflection(treeMapClass, props); } this.maxParseTime = PropertiesUtils.GetLong(props, annotatorName + ".maxtime", -1); this.kBest = PropertiesUtils.GetInt(props, annotatorName + ".kbest", 1); this.keepPunct = PropertiesUtils.GetBool(props, annotatorName + ".keepPunct", true); string buildGraphsProperty = annotatorName + ".buildgraphs"; if (!this.parser.GetTLPParams().SupportsBasicDependencies()) { if (PropertiesUtils.GetBool(props, buildGraphsProperty)) { log.Info("WARNING: " + buildGraphsProperty + " set to true, but " + this.parser.GetTLPParams().GetType() + " does not support dependencies"); } this.BuildGraphs = false; } else { this.BuildGraphs = PropertiesUtils.GetBool(props, buildGraphsProperty, true); } if (this.BuildGraphs) { bool generateOriginalDependencies = PropertiesUtils.GetBool(props, annotatorName + ".originalDependencies", false); parser.GetTLPParams().SetGenerateOriginalDependencies(generateOriginalDependencies); ITreebankLanguagePack tlp = parser.GetTLPParams().TreebankLanguagePack(); IPredicate <string> punctFilter = this.keepPunct ? Filters.AcceptFilter() : tlp.PunctuationWordRejectFilter(); this.gsf = tlp.GrammaticalStructureFactory(punctFilter, parser.GetTLPParams().TypedDependencyHeadFinder()); } else { this.gsf = null; } this.nThreads = PropertiesUtils.GetInt(props, annotatorName + ".nthreads", PropertiesUtils.GetInt(props, "nthreads", 1)); bool usesBinary = StanfordCoreNLP.UsesBinaryTrees(props); this.saveBinaryTrees = PropertiesUtils.GetBool(props, annotatorName + ".binaryTrees", usesBinary); this.noSquash = PropertiesUtils.GetBool(props, annotatorName + ".nosquash", false); this.extraDependencies = MetaClass.Cast(props.GetProperty(annotatorName + ".extradependencies", "NONE"), typeof(GrammaticalStructure.Extras)); }
public DependencyParseAnnotator(Properties properties) { string modelPath = PropertiesUtils.GetString(properties, "model", DependencyParser.DefaultModel); parser = DependencyParser.LoadFromModelFile(modelPath, properties); nThreads = PropertiesUtils.GetInt(properties, "testThreads", DefaultNthreads); maxTime = PropertiesUtils.GetLong(properties, "sentenceTimeout", DefaultMaxtime); extraDependencies = MetaClass.Cast(properties.GetProperty("extradependencies", "NONE"), typeof(GrammaticalStructure.Extras)); }
/// <exception cref="System.IO.IOException"/> public NERCombinerAnnotator(Properties properties) { IList <string> models = new List <string>(); string modelNames = properties.GetProperty("ner.model"); if (modelNames == null) { modelNames = DefaultPaths.DefaultNerThreeclassModel + ',' + DefaultPaths.DefaultNerMucModel + ',' + DefaultPaths.DefaultNerConllModel; } if (!modelNames.IsEmpty()) { Sharpen.Collections.AddAll(models, Arrays.AsList(modelNames.Split(","))); } if (models.IsEmpty()) { // Allow for no real NER model - can just use numeric classifiers or SUTime. // Have to unset ner.model, so unlikely that people got here by accident. log.Info("WARNING: no NER models specified"); } bool applyNumericClassifiers = PropertiesUtils.GetBool(properties, NERClassifierCombiner.ApplyNumericClassifiersProperty, NERClassifierCombiner.ApplyNumericClassifiersDefault); bool applyRegexner = PropertiesUtils.GetBool(properties, NERClassifierCombiner.ApplyGazetteProperty, NERClassifierCombiner.ApplyGazetteDefault); bool useSUTime = PropertiesUtils.GetBool(properties, NumberSequenceClassifier.UseSutimeProperty, NumberSequenceClassifier.UseSutimeDefault); // option for setting doc date to be the present during each annotation usePresentDateForDocDate = PropertiesUtils.GetBool(properties, "ner." + "usePresentDateForDocDate", false); // option for setting doc date from a provided string providedDocDate = PropertiesUtils.GetString(properties, "ner." + "providedDocDate", string.Empty); Pattern p = Pattern.Compile("[0-9]{4}\\-[0-9]{2}\\-[0-9]{2}"); Matcher m = p.Matcher(providedDocDate); if (!m.Matches()) { providedDocDate = string.Empty; } NERClassifierCombiner.Language nerLanguage = NERClassifierCombiner.Language.FromString(PropertiesUtils.GetString(properties, NERClassifierCombiner.NerLanguageProperty, null), NERClassifierCombiner.NerLanguageDefault); bool verbose = PropertiesUtils.GetBool(properties, "ner." + "verbose", false); string[] loadPaths = Sharpen.Collections.ToArray(models, new string[models.Count]); Properties combinerProperties = PropertiesUtils.ExtractSelectedProperties(properties, NERClassifierCombiner.DefaultPassDownProperties); if (useSUTime) { // Make sure SUTime parameters are included Properties sutimeProps = PropertiesUtils.ExtractPrefixedProperties(properties, NumberSequenceClassifier.SutimeProperty + '.', true); PropertiesUtils.OverWriteProperties(combinerProperties, sutimeProps); } NERClassifierCombiner nerCombiner = new NERClassifierCombiner(applyNumericClassifiers, nerLanguage, useSUTime, applyRegexner, combinerProperties, loadPaths); this.nThreads = PropertiesUtils.GetInt(properties, "ner.nthreads", PropertiesUtils.GetInt(properties, "nthreads", 1)); this.maxTime = PropertiesUtils.GetLong(properties, "ner.maxtime", 0); this.maxSentenceLength = PropertiesUtils.GetInt(properties, "ner.maxlen", int.MaxValue); this.language = LanguageInfo.GetLanguageFromString(PropertiesUtils.GetString(properties, "ner.language", "en")); // in case of Spanish, use the Spanish number regexner annotator if (language.Equals(LanguageInfo.HumanLanguage.Spanish)) { Properties spanishNumberRegexNerProperties = new Properties(); spanishNumberRegexNerProperties["spanish.number.regexner.mapping"] = spanishNumberRegexRules; spanishNumberRegexNerProperties["spanish.number.regexner.validpospattern"] = "^(NUM).*"; spanishNumberRegexNerProperties["spanish.number.regexner.ignorecase"] = "true"; spanishNumberAnnotator = new TokensRegexNERAnnotator("spanish.number.regexner", spanishNumberRegexNerProperties); } // set up fine grained ner SetUpFineGrainedNER(properties); // set up additional rules ner SetUpAdditionalRulesNER(properties); // set up entity mentions SetUpEntityMentionBuilding(properties); Verbose = verbose; this.ner = nerCombiner; }