Пример #1
0
        public DependencyParseAnnotator(Properties properties)
        {
            string modelPath = PropertiesUtils.GetString(properties, "model", DependencyParser.DefaultModel);

            parser            = DependencyParser.LoadFromModelFile(modelPath, properties);
            nThreads          = PropertiesUtils.GetInt(properties, "testThreads", DefaultNthreads);
            maxTime           = PropertiesUtils.GetLong(properties, "sentenceTimeout", DefaultMaxtime);
            extraDependencies = MetaClass.Cast(properties.GetProperty("extradependencies", "NONE"), typeof(GrammaticalStructure.Extras));
        }
        // ---------- Mention Detection ----------
        public static CorefProperties.MentionDetectionType MdType(Properties props)
        {
            string type = PropertiesUtils.GetString(props, "coref.md.type", UseConstituencyParse(props) ? "RULE" : "dep");

            if (Sharpen.Runtime.EqualsIgnoreCase(type, "dep"))
            {
                type = "DEPENDENCY";
            }
            return(CorefProperties.MentionDetectionType.ValueOf(type.ToUpper()));
        }
Пример #3
0
        private void SetProperties(Properties props)
        {
            trainingThreads       = PropertiesUtils.GetInt(props, "trainingThreads", trainingThreads);
            wordCutOff            = PropertiesUtils.GetInt(props, "wordCutOff", wordCutOff);
            initRange             = PropertiesUtils.GetDouble(props, "initRange", initRange);
            maxIter               = PropertiesUtils.GetInt(props, "maxIter", maxIter);
            batchSize             = PropertiesUtils.GetInt(props, "batchSize", batchSize);
            adaEps                = PropertiesUtils.GetDouble(props, "adaEps", adaEps);
            adaAlpha              = PropertiesUtils.GetDouble(props, "adaAlpha", adaAlpha);
            regParameter          = PropertiesUtils.GetDouble(props, "regParameter", regParameter);
            dropProb              = PropertiesUtils.GetDouble(props, "dropProb", dropProb);
            hiddenSize            = PropertiesUtils.GetInt(props, "hiddenSize", hiddenSize);
            embeddingSize         = PropertiesUtils.GetInt(props, "embeddingSize", embeddingSize);
            numPreComputed        = PropertiesUtils.GetInt(props, "numPreComputed", numPreComputed);
            evalPerIter           = PropertiesUtils.GetInt(props, "evalPerIter", evalPerIter);
            clearGradientsPerIter = PropertiesUtils.GetInt(props, "clearGradientsPerIter", clearGradientsPerIter);
            saveIntermediate      = PropertiesUtils.GetBool(props, "saveIntermediate", saveIntermediate);
            unlabeled             = PropertiesUtils.GetBool(props, "unlabeled", unlabeled);
            cPOS   = PropertiesUtils.GetBool(props, "cPOS", cPOS);
            noPunc = PropertiesUtils.GetBool(props, "noPunc", noPunc);
            doWordEmbeddingGradUpdate = PropertiesUtils.GetBool(props, "doWordEmbeddingGradUpdate", doWordEmbeddingGradUpdate);
            // Runtime parsing options
            sentenceDelimiter = PropertiesUtils.GetString(props, "sentenceDelimiter", sentenceDelimiter);
            tagger            = PropertiesUtils.GetString(props, "tagger.model", tagger);
            string escaperClass = props.GetProperty("escaper");

            escaper = escaperClass != null?ReflectionLoading.LoadByReflection(escaperClass) : null;

            // Language options
            language = props.Contains("language") ? GetLanguage(props.GetProperty("language")) : language;
            tlp      = [email protected]();
            // if a tlp was specified go with that
            string tlpCanonicalName = props.GetProperty("tlp");

            if (tlpCanonicalName != null)
            {
                try
                {
                    tlp = ReflectionLoading.LoadByReflection(tlpCanonicalName);
                    System.Console.Error.WriteLine("Loaded TreebankLanguagePack: " + tlpCanonicalName);
                }
                catch (Exception)
                {
                    System.Console.Error.WriteLine("Error: Failed to load TreebankLanguagePack: " + tlpCanonicalName);
                }
            }
        }
Пример #4
0
 /// <exception cref="System.IO.IOException"/>
 public NERClassifierCombiner(Properties props)
     : base(props)
 {
     // todo [cdm 2015]: Could avoid constructing this if applyNumericClassifiers is false
     applyNumericClassifiers = PropertiesUtils.GetBool(props, ApplyNumericClassifiersProperty, ApplyNumericClassifiersDefault);
     nerLanguage             = NERClassifierCombiner.Language.FromString(PropertiesUtils.GetString(props, NerLanguageProperty, null), NerLanguageDefault);
     useSUTime = PropertiesUtils.GetBool(props, NumberSequenceClassifier.UseSutimeProperty, NumberSequenceClassifier.UseSutimeDefault);
     nsc       = new NumberSequenceClassifier(new Properties(), useSUTime, props);
     if (PropertiesUtils.GetBool(props, NERClassifierCombiner.ApplyGazetteProperty, NERClassifierCombiner.ApplyGazetteDefault))
     {
         this.gazetteMapping = ReadRegexnerGazette(DefaultPaths.DefaultNerGazetteMapping);
     }
     else
     {
         this.gazetteMapping = Java.Util.Collections.EmptyMap();
     }
 }
Пример #5
0
        public static Sieve.ClassifierType GetClassifierType(Properties props, string sievename)
        {
            if (dcorefSieveNames.Contains(sievename))
            {
                return(Sieve.ClassifierType.Rule);
            }
            if (sievename.ToLower().EndsWith("-rf"))
            {
                return(Sieve.ClassifierType.Rf);
            }
            if (sievename.ToLower().EndsWith("-oracle"))
            {
                return(Sieve.ClassifierType.Oracle);
            }
            string classifierType = PropertiesUtils.GetString(props, ClassifierTypeProp.Replace("SIEVENAME", sievename), null);

            return(Sieve.ClassifierType.ValueOf(classifierType));
        }
        public static Locale GetLanguage(Properties props)
        {
            string lang = PropertiesUtils.GetString(props, "coref.language", "en");

            if (Sharpen.Runtime.EqualsIgnoreCase(lang, "en") || Sharpen.Runtime.EqualsIgnoreCase(lang, "english"))
            {
                return(Locale.English);
            }
            else
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(lang, "zh") || Sharpen.Runtime.EqualsIgnoreCase(lang, "chinese"))
                {
                    return(Locale.Chinese);
                }
                else
                {
                    throw new ArgumentException("unsupported language");
                }
            }
        }
Пример #7
0
 public static string GetGenderNumber(Properties props)
 {
     return(PropertiesUtils.GetString(props, GenderNumberProp, "edu/stanford/nlp/models/dcoref/gender.data.gz"));
 }
Пример #8
0
 public static string GetPathWord2Vec(Properties props)
 {
     return(PropertiesUtils.GetString(props, Word2vecProp, null));
 }
Пример #9
0
 public static string GetCurrentSieveForTrain(Properties props)
 {
     return(PropertiesUtils.GetString(props, CurrentSieveForTrainProp, null));
 }
Пример #10
0
 public static string GetPathSerializedWordVectors(Properties props)
 {
     return(PropertiesUtils.GetString(props, Word2vecSerializedProp, "/u/scr/nlp/data/coref/wordvectors/en/vector.ser.gz"));
 }
Пример #11
0
 public static string GetSkipAntecedentType(Properties props)
 {
     return(PropertiesUtils.GetString(props, AnalysisSkipAtypeProp, null));
 }
Пример #12
0
 public static string ClassificationModelPath(Properties props)
 {
     return(PropertiesUtils.GetString(props, "coref.statistical.classificationModel", GetDefaultModelPath(props, "classification_model")));
 }
Пример #13
0
 public static string AnaphoricityModelPath(Properties props)
 {
     return(PropertiesUtils.GetString(props, "coref.statistical.anaphoricityModel", GetDefaultModelPath(props, "anaphoricity_model")));
 }
        public static string ModelPath(Properties props)
        {
            string defaultPath = "edu/stanford/nlp/models/coref/neural/" + (CorefProperties.GetLanguage(props) == Locale.Chinese ? "chinese" : "english") + (CorefProperties.Conll(props) ? "-model-conll" : "-model-default") + ".ser.gz";

            return(PropertiesUtils.GetString(props, "coref.neural.modelPath", defaultPath));
        }
 private static string GetPathSingletonPredictor(Properties props)
 {
     return(PropertiesUtils.GetString(props, "coref.path.singletonPredictor", "edu/stanford/nlp/models/dcoref/singleton.predictor.ser"));
 }
Пример #16
0
 public static string ClusteringModelPath(Properties props)
 {
     return(PropertiesUtils.GetString(props, "coref.statistical.clusteringModel", GetDefaultModelPath(props, "clustering_model")));
 }
        /// <exception cref="System.IO.IOException"/>
        public NERCombinerAnnotator(Properties properties)
        {
            IList <string> models     = new List <string>();
            string         modelNames = properties.GetProperty("ner.model");

            if (modelNames == null)
            {
                modelNames = DefaultPaths.DefaultNerThreeclassModel + ',' + DefaultPaths.DefaultNerMucModel + ',' + DefaultPaths.DefaultNerConllModel;
            }
            if (!modelNames.IsEmpty())
            {
                Sharpen.Collections.AddAll(models, Arrays.AsList(modelNames.Split(",")));
            }
            if (models.IsEmpty())
            {
                // Allow for no real NER model - can just use numeric classifiers or SUTime.
                // Have to unset ner.model, so unlikely that people got here by accident.
                log.Info("WARNING: no NER models specified");
            }
            bool applyNumericClassifiers = PropertiesUtils.GetBool(properties, NERClassifierCombiner.ApplyNumericClassifiersProperty, NERClassifierCombiner.ApplyNumericClassifiersDefault);
            bool applyRegexner           = PropertiesUtils.GetBool(properties, NERClassifierCombiner.ApplyGazetteProperty, NERClassifierCombiner.ApplyGazetteDefault);
            bool useSUTime = PropertiesUtils.GetBool(properties, NumberSequenceClassifier.UseSutimeProperty, NumberSequenceClassifier.UseSutimeDefault);

            // option for setting doc date to be the present during each annotation
            usePresentDateForDocDate = PropertiesUtils.GetBool(properties, "ner." + "usePresentDateForDocDate", false);
            // option for setting doc date from a provided string
            providedDocDate = PropertiesUtils.GetString(properties, "ner." + "providedDocDate", string.Empty);
            Pattern p = Pattern.Compile("[0-9]{4}\\-[0-9]{2}\\-[0-9]{2}");
            Matcher m = p.Matcher(providedDocDate);

            if (!m.Matches())
            {
                providedDocDate = string.Empty;
            }
            NERClassifierCombiner.Language nerLanguage = NERClassifierCombiner.Language.FromString(PropertiesUtils.GetString(properties, NERClassifierCombiner.NerLanguageProperty, null), NERClassifierCombiner.NerLanguageDefault);
            bool verbose = PropertiesUtils.GetBool(properties, "ner." + "verbose", false);

            string[]   loadPaths          = Sharpen.Collections.ToArray(models, new string[models.Count]);
            Properties combinerProperties = PropertiesUtils.ExtractSelectedProperties(properties, NERClassifierCombiner.DefaultPassDownProperties);

            if (useSUTime)
            {
                // Make sure SUTime parameters are included
                Properties sutimeProps = PropertiesUtils.ExtractPrefixedProperties(properties, NumberSequenceClassifier.SutimeProperty + '.', true);
                PropertiesUtils.OverWriteProperties(combinerProperties, sutimeProps);
            }
            NERClassifierCombiner nerCombiner = new NERClassifierCombiner(applyNumericClassifiers, nerLanguage, useSUTime, applyRegexner, combinerProperties, loadPaths);

            this.nThreads          = PropertiesUtils.GetInt(properties, "ner.nthreads", PropertiesUtils.GetInt(properties, "nthreads", 1));
            this.maxTime           = PropertiesUtils.GetLong(properties, "ner.maxtime", 0);
            this.maxSentenceLength = PropertiesUtils.GetInt(properties, "ner.maxlen", int.MaxValue);
            this.language          = LanguageInfo.GetLanguageFromString(PropertiesUtils.GetString(properties, "ner.language", "en"));
            // in case of Spanish, use the Spanish number regexner annotator
            if (language.Equals(LanguageInfo.HumanLanguage.Spanish))
            {
                Properties spanishNumberRegexNerProperties = new Properties();
                spanishNumberRegexNerProperties["spanish.number.regexner.mapping"]         = spanishNumberRegexRules;
                spanishNumberRegexNerProperties["spanish.number.regexner.validpospattern"] = "^(NUM).*";
                spanishNumberRegexNerProperties["spanish.number.regexner.ignorecase"]      = "true";
                spanishNumberAnnotator = new TokensRegexNERAnnotator("spanish.number.regexner", spanishNumberRegexNerProperties);
            }
            // set up fine grained ner
            SetUpFineGrainedNER(properties);
            // set up additional rules ner
            SetUpAdditionalRulesNER(properties);
            // set up entity mentions
            SetUpEntityMentionBuilding(properties);
            Verbose  = verbose;
            this.ner = nerCombiner;
        }
Пример #18
0
 public static string WordCountsPath(Properties props)
 {
     return(PropertiesUtils.GetString(props, "coref.statistical.wordCounts", "edu/stanford/nlp/models/coref/statistical/word_counts.ser.gz"));
 }
        public static string PretrainedEmbeddingsPath(Properties props)
        {
            string defaultPath = "edu/stanford/nlp/models/coref/neural/" + (CorefProperties.GetLanguage(props) == Locale.Chinese ? "chinese" : "english") + "-embeddings.ser.gz";

            return(PropertiesUtils.GetString(props, "coref.neural.embeddingsPath", defaultPath));
        }
Пример #20
0
 public static string GetSieves(Properties props)
 {
     return(PropertiesUtils.GetString(props, SievesProp, "SpeakerMatch,PreciseConstructs,pp-rf,cc-rf,pc-rf,ll-rf,pr-rf"));
 }
        // static methods
        //---------- Coreference Algorithms ----------
        public static CorefProperties.CorefAlgorithmType Algorithm(Properties props)
        {
            string type = PropertiesUtils.GetString(props, "coref.algorithm", GetLanguage(props) == Locale.English ? "statistical" : "neural");

            return(CorefProperties.CorefAlgorithmType.ValueOf(type.ToUpper()));
        }
Пример #22
0
 public static bool DoPMIFeatureSelection(Properties props, string sievename)
 {
     return(Sharpen.Runtime.EqualsIgnoreCase(PropertiesUtils.GetString(props, FeatureSelectionProp.Replace("SIEVENAME", sievename), "pmi"), "pmi"));
 }
 public static string GetMentionDetectionModel(Properties props)
 {
     return(PropertiesUtils.GetString(props, "coref.md.model", UseConstituencyParse(props) ? "edu/stanford/nlp/models/coref/md-model.ser" : "edu/stanford/nlp/models/coref/md-model-dep.ser.gz"));
 }
Пример #24
0
 public static string GetSkipMentionType(Properties props)
 {
     return(PropertiesUtils.GetString(props, AnalysisSkipMtypeProp, null));
 }