public DependencyParseAnnotator(Properties properties) { string modelPath = PropertiesUtils.GetString(properties, "model", DependencyParser.DefaultModel); parser = DependencyParser.LoadFromModelFile(modelPath, properties); nThreads = PropertiesUtils.GetInt(properties, "testThreads", DefaultNthreads); maxTime = PropertiesUtils.GetLong(properties, "sentenceTimeout", DefaultMaxtime); extraDependencies = MetaClass.Cast(properties.GetProperty("extradependencies", "NONE"), typeof(GrammaticalStructure.Extras)); }
// ---------- Mention Detection ---------- public static CorefProperties.MentionDetectionType MdType(Properties props) { string type = PropertiesUtils.GetString(props, "coref.md.type", UseConstituencyParse(props) ? "RULE" : "dep"); if (Sharpen.Runtime.EqualsIgnoreCase(type, "dep")) { type = "DEPENDENCY"; } return(CorefProperties.MentionDetectionType.ValueOf(type.ToUpper())); }
private void SetProperties(Properties props) { trainingThreads = PropertiesUtils.GetInt(props, "trainingThreads", trainingThreads); wordCutOff = PropertiesUtils.GetInt(props, "wordCutOff", wordCutOff); initRange = PropertiesUtils.GetDouble(props, "initRange", initRange); maxIter = PropertiesUtils.GetInt(props, "maxIter", maxIter); batchSize = PropertiesUtils.GetInt(props, "batchSize", batchSize); adaEps = PropertiesUtils.GetDouble(props, "adaEps", adaEps); adaAlpha = PropertiesUtils.GetDouble(props, "adaAlpha", adaAlpha); regParameter = PropertiesUtils.GetDouble(props, "regParameter", regParameter); dropProb = PropertiesUtils.GetDouble(props, "dropProb", dropProb); hiddenSize = PropertiesUtils.GetInt(props, "hiddenSize", hiddenSize); embeddingSize = PropertiesUtils.GetInt(props, "embeddingSize", embeddingSize); numPreComputed = PropertiesUtils.GetInt(props, "numPreComputed", numPreComputed); evalPerIter = PropertiesUtils.GetInt(props, "evalPerIter", evalPerIter); clearGradientsPerIter = PropertiesUtils.GetInt(props, "clearGradientsPerIter", clearGradientsPerIter); saveIntermediate = PropertiesUtils.GetBool(props, "saveIntermediate", saveIntermediate); unlabeled = PropertiesUtils.GetBool(props, "unlabeled", unlabeled); cPOS = PropertiesUtils.GetBool(props, "cPOS", cPOS); noPunc = PropertiesUtils.GetBool(props, "noPunc", noPunc); doWordEmbeddingGradUpdate = PropertiesUtils.GetBool(props, "doWordEmbeddingGradUpdate", doWordEmbeddingGradUpdate); // Runtime parsing options sentenceDelimiter = PropertiesUtils.GetString(props, "sentenceDelimiter", sentenceDelimiter); tagger = PropertiesUtils.GetString(props, "tagger.model", tagger); string escaperClass = props.GetProperty("escaper"); escaper = escaperClass != null?ReflectionLoading.LoadByReflection(escaperClass) : null; // Language options language = props.Contains("language") ? GetLanguage(props.GetProperty("language")) : language; tlp = [email protected](); // if a tlp was specified go with that string tlpCanonicalName = props.GetProperty("tlp"); if (tlpCanonicalName != null) { try { tlp = ReflectionLoading.LoadByReflection(tlpCanonicalName); System.Console.Error.WriteLine("Loaded TreebankLanguagePack: " + tlpCanonicalName); } catch (Exception) { System.Console.Error.WriteLine("Error: Failed to load TreebankLanguagePack: " + tlpCanonicalName); } } }
/// <exception cref="System.IO.IOException"/> public NERClassifierCombiner(Properties props) : base(props) { // todo [cdm 2015]: Could avoid constructing this if applyNumericClassifiers is false applyNumericClassifiers = PropertiesUtils.GetBool(props, ApplyNumericClassifiersProperty, ApplyNumericClassifiersDefault); nerLanguage = NERClassifierCombiner.Language.FromString(PropertiesUtils.GetString(props, NerLanguageProperty, null), NerLanguageDefault); useSUTime = PropertiesUtils.GetBool(props, NumberSequenceClassifier.UseSutimeProperty, NumberSequenceClassifier.UseSutimeDefault); nsc = new NumberSequenceClassifier(new Properties(), useSUTime, props); if (PropertiesUtils.GetBool(props, NERClassifierCombiner.ApplyGazetteProperty, NERClassifierCombiner.ApplyGazetteDefault)) { this.gazetteMapping = ReadRegexnerGazette(DefaultPaths.DefaultNerGazetteMapping); } else { this.gazetteMapping = Java.Util.Collections.EmptyMap(); } }
public static Sieve.ClassifierType GetClassifierType(Properties props, string sievename) { if (dcorefSieveNames.Contains(sievename)) { return(Sieve.ClassifierType.Rule); } if (sievename.ToLower().EndsWith("-rf")) { return(Sieve.ClassifierType.Rf); } if (sievename.ToLower().EndsWith("-oracle")) { return(Sieve.ClassifierType.Oracle); } string classifierType = PropertiesUtils.GetString(props, ClassifierTypeProp.Replace("SIEVENAME", sievename), null); return(Sieve.ClassifierType.ValueOf(classifierType)); }
public static Locale GetLanguage(Properties props) { string lang = PropertiesUtils.GetString(props, "coref.language", "en"); if (Sharpen.Runtime.EqualsIgnoreCase(lang, "en") || Sharpen.Runtime.EqualsIgnoreCase(lang, "english")) { return(Locale.English); } else { if (Sharpen.Runtime.EqualsIgnoreCase(lang, "zh") || Sharpen.Runtime.EqualsIgnoreCase(lang, "chinese")) { return(Locale.Chinese); } else { throw new ArgumentException("unsupported language"); } } }
public static string GetGenderNumber(Properties props) { return(PropertiesUtils.GetString(props, GenderNumberProp, "edu/stanford/nlp/models/dcoref/gender.data.gz")); }
public static string GetPathWord2Vec(Properties props) { return(PropertiesUtils.GetString(props, Word2vecProp, null)); }
public static string GetCurrentSieveForTrain(Properties props) { return(PropertiesUtils.GetString(props, CurrentSieveForTrainProp, null)); }
public static string GetPathSerializedWordVectors(Properties props) { return(PropertiesUtils.GetString(props, Word2vecSerializedProp, "/u/scr/nlp/data/coref/wordvectors/en/vector.ser.gz")); }
public static string GetSkipAntecedentType(Properties props) { return(PropertiesUtils.GetString(props, AnalysisSkipAtypeProp, null)); }
public static string ClassificationModelPath(Properties props) { return(PropertiesUtils.GetString(props, "coref.statistical.classificationModel", GetDefaultModelPath(props, "classification_model"))); }
public static string AnaphoricityModelPath(Properties props) { return(PropertiesUtils.GetString(props, "coref.statistical.anaphoricityModel", GetDefaultModelPath(props, "anaphoricity_model"))); }
public static string ModelPath(Properties props) { string defaultPath = "edu/stanford/nlp/models/coref/neural/" + (CorefProperties.GetLanguage(props) == Locale.Chinese ? "chinese" : "english") + (CorefProperties.Conll(props) ? "-model-conll" : "-model-default") + ".ser.gz"; return(PropertiesUtils.GetString(props, "coref.neural.modelPath", defaultPath)); }
private static string GetPathSingletonPredictor(Properties props) { return(PropertiesUtils.GetString(props, "coref.path.singletonPredictor", "edu/stanford/nlp/models/dcoref/singleton.predictor.ser")); }
public static string ClusteringModelPath(Properties props) { return(PropertiesUtils.GetString(props, "coref.statistical.clusteringModel", GetDefaultModelPath(props, "clustering_model"))); }
/// <exception cref="System.IO.IOException"/> public NERCombinerAnnotator(Properties properties) { IList <string> models = new List <string>(); string modelNames = properties.GetProperty("ner.model"); if (modelNames == null) { modelNames = DefaultPaths.DefaultNerThreeclassModel + ',' + DefaultPaths.DefaultNerMucModel + ',' + DefaultPaths.DefaultNerConllModel; } if (!modelNames.IsEmpty()) { Sharpen.Collections.AddAll(models, Arrays.AsList(modelNames.Split(","))); } if (models.IsEmpty()) { // Allow for no real NER model - can just use numeric classifiers or SUTime. // Have to unset ner.model, so unlikely that people got here by accident. log.Info("WARNING: no NER models specified"); } bool applyNumericClassifiers = PropertiesUtils.GetBool(properties, NERClassifierCombiner.ApplyNumericClassifiersProperty, NERClassifierCombiner.ApplyNumericClassifiersDefault); bool applyRegexner = PropertiesUtils.GetBool(properties, NERClassifierCombiner.ApplyGazetteProperty, NERClassifierCombiner.ApplyGazetteDefault); bool useSUTime = PropertiesUtils.GetBool(properties, NumberSequenceClassifier.UseSutimeProperty, NumberSequenceClassifier.UseSutimeDefault); // option for setting doc date to be the present during each annotation usePresentDateForDocDate = PropertiesUtils.GetBool(properties, "ner." + "usePresentDateForDocDate", false); // option for setting doc date from a provided string providedDocDate = PropertiesUtils.GetString(properties, "ner." + "providedDocDate", string.Empty); Pattern p = Pattern.Compile("[0-9]{4}\\-[0-9]{2}\\-[0-9]{2}"); Matcher m = p.Matcher(providedDocDate); if (!m.Matches()) { providedDocDate = string.Empty; } NERClassifierCombiner.Language nerLanguage = NERClassifierCombiner.Language.FromString(PropertiesUtils.GetString(properties, NERClassifierCombiner.NerLanguageProperty, null), NERClassifierCombiner.NerLanguageDefault); bool verbose = PropertiesUtils.GetBool(properties, "ner." + "verbose", false); string[] loadPaths = Sharpen.Collections.ToArray(models, new string[models.Count]); Properties combinerProperties = PropertiesUtils.ExtractSelectedProperties(properties, NERClassifierCombiner.DefaultPassDownProperties); if (useSUTime) { // Make sure SUTime parameters are included Properties sutimeProps = PropertiesUtils.ExtractPrefixedProperties(properties, NumberSequenceClassifier.SutimeProperty + '.', true); PropertiesUtils.OverWriteProperties(combinerProperties, sutimeProps); } NERClassifierCombiner nerCombiner = new NERClassifierCombiner(applyNumericClassifiers, nerLanguage, useSUTime, applyRegexner, combinerProperties, loadPaths); this.nThreads = PropertiesUtils.GetInt(properties, "ner.nthreads", PropertiesUtils.GetInt(properties, "nthreads", 1)); this.maxTime = PropertiesUtils.GetLong(properties, "ner.maxtime", 0); this.maxSentenceLength = PropertiesUtils.GetInt(properties, "ner.maxlen", int.MaxValue); this.language = LanguageInfo.GetLanguageFromString(PropertiesUtils.GetString(properties, "ner.language", "en")); // in case of Spanish, use the Spanish number regexner annotator if (language.Equals(LanguageInfo.HumanLanguage.Spanish)) { Properties spanishNumberRegexNerProperties = new Properties(); spanishNumberRegexNerProperties["spanish.number.regexner.mapping"] = spanishNumberRegexRules; spanishNumberRegexNerProperties["spanish.number.regexner.validpospattern"] = "^(NUM).*"; spanishNumberRegexNerProperties["spanish.number.regexner.ignorecase"] = "true"; spanishNumberAnnotator = new TokensRegexNERAnnotator("spanish.number.regexner", spanishNumberRegexNerProperties); } // set up fine grained ner SetUpFineGrainedNER(properties); // set up additional rules ner SetUpAdditionalRulesNER(properties); // set up entity mentions SetUpEntityMentionBuilding(properties); Verbose = verbose; this.ner = nerCombiner; }
public static string WordCountsPath(Properties props) { return(PropertiesUtils.GetString(props, "coref.statistical.wordCounts", "edu/stanford/nlp/models/coref/statistical/word_counts.ser.gz")); }
public static string PretrainedEmbeddingsPath(Properties props) { string defaultPath = "edu/stanford/nlp/models/coref/neural/" + (CorefProperties.GetLanguage(props) == Locale.Chinese ? "chinese" : "english") + "-embeddings.ser.gz"; return(PropertiesUtils.GetString(props, "coref.neural.embeddingsPath", defaultPath)); }
public static string GetSieves(Properties props) { return(PropertiesUtils.GetString(props, SievesProp, "SpeakerMatch,PreciseConstructs,pp-rf,cc-rf,pc-rf,ll-rf,pr-rf")); }
// static methods //---------- Coreference Algorithms ---------- public static CorefProperties.CorefAlgorithmType Algorithm(Properties props) { string type = PropertiesUtils.GetString(props, "coref.algorithm", GetLanguage(props) == Locale.English ? "statistical" : "neural"); return(CorefProperties.CorefAlgorithmType.ValueOf(type.ToUpper())); }
public static bool DoPMIFeatureSelection(Properties props, string sievename) { return(Sharpen.Runtime.EqualsIgnoreCase(PropertiesUtils.GetString(props, FeatureSelectionProp.Replace("SIEVENAME", sievename), "pmi"), "pmi")); }
public static string GetMentionDetectionModel(Properties props) { return(PropertiesUtils.GetString(props, "coref.md.model", UseConstituencyParse(props) ? "edu/stanford/nlp/models/coref/md-model.ser" : "edu/stanford/nlp/models/coref/md-model-dep.ser.gz")); }
public static string GetSkipMentionType(Properties props) { return(PropertiesUtils.GetString(props, AnalysisSkipMtypeProp, null)); }