private StanfordCoreNLP GetStanfordCoreNLP(Properties props) { if (coreNLP != null) { return(coreNLP); } Properties pipelineProps = new Properties(props); if (CorefProperties.Conll(props)) { pipelineProps.SetProperty("annotators", (CorefProperties.GetLanguage(props) == Locale.Chinese ? "lemma, ner" : "lemma") + (CorefProperties.UseGoldMentions(props) ? string.Empty : ", coref.mention")); pipelineProps.SetProperty("ner.applyFineGrained", "false"); } else { pipelineProps.SetProperty("annotators", "pos, lemma, ner, " + (CorefProperties.UseConstituencyParse(props) ? "parse" : "depparse") + (CorefProperties.UseGoldMentions(props) ? string.Empty : ", coref.mention")); pipelineProps.SetProperty("ner.applyFineGrained", "false"); } return(coreNLP = new StanfordCoreNLP(pipelineProps, false)); }
private static IDocReader GetDocumentReader(Properties props) { string corpusPath = CorefProperties.GetInputPath(props); if (corpusPath == null) { return(null); } CoNLLDocumentReader.Options options = new CoNLLDocumentReader.Options(); if (!PropertiesUtils.GetBool(props, "coref.printConLLLoadingMessage", true)) { options.printConLLLoadingMessage = false; } options.annotateTokenCoref = false; string conllFileFilter = props.GetProperty("coref.conllFileFilter", ".*_auto_conll$"); options.SetFilter(conllFileFilter); options.lang = CorefProperties.GetLanguage(props); return(new CoNLLDocumentReader(corpusPath, options)); }
public CategoricalFeatureExtractor(Properties props, Dictionaries dictionaries) { this.dictionaries = dictionaries; conll = CorefProperties.Conll(props); if (conll) { genres = new Dictionary <string, int>(); genres["bc"] = 0; genres["bn"] = 1; genres["mz"] = 2; genres["nw"] = 3; bool english = CorefProperties.GetLanguage(props) == Locale.English; if (english) { genres["pt"] = 4; } genres["tc"] = english ? 5 : 4; genres["wb"] = english ? 6 : 5; } else { genres = null; } }
public static string PretrainedEmbeddingsPath(Properties props) { string defaultPath = "edu/stanford/nlp/models/coref/neural/" + (CorefProperties.GetLanguage(props) == Locale.Chinese ? "chinese" : "english") + "-embeddings.ser.gz"; return(PropertiesUtils.GetString(props, "coref.neural.embeddingsPath", defaultPath)); }
public static string ModelPath(Properties props) { string defaultPath = "edu/stanford/nlp/models/coref/neural/" + (CorefProperties.GetLanguage(props) == Locale.Chinese ? "chinese" : "english") + (CorefProperties.Conll(props) ? "-model-conll" : "-model-default") + ".ser.gz"; return(PropertiesUtils.GetString(props, "coref.neural.modelPath", defaultPath)); }
public RuleBasedCorefMentionFinder(IHeadFinder headFinder, Properties props) : this(true, headFinder, CorefProperties.GetLanguage(props)) { }
/// <exception cref="System.TypeLoadException"/> /// <exception cref="System.IO.IOException"/> public HybridCorefMentionFinder(IHeadFinder headFinder, Properties props) { this.headFinder = headFinder; this.lang = CorefProperties.GetLanguage(props); mdClassifier = (CorefProperties.IsMentionDetectionTraining(props)) ? null : IOUtils.ReadObjectFromURLOrClasspathOrFileSystem(CorefProperties.GetMentionDetectionModel(props)); }
public CorefAnnotator(Properties props) { this.props = props; try { // if user tries to run with coref.language = ENGLISH and coref.algorithm = hybrid, throw Exception // we do not support those settings at this time if (CorefProperties.Algorithm(props).Equals(CorefProperties.CorefAlgorithmType.Hybrid) && CorefProperties.GetLanguage(props).Equals(Locale.English)) { log.Error("Error: coref.algorithm=hybrid is not supported for English, " + "please change coref.algorithm or coref.language"); throw new Exception(); } // suppress props.SetProperty("coref.printConLLLoadingMessage", "false"); corefSystem = new CorefSystem(props); props.Remove("coref.printConLLLoadingMessage"); } catch (Exception e) { log.Error("Error creating CorefAnnotator...terminating pipeline construction!"); log.Error(e); throw new Exception(e); } // unless custom mention detection is set, just use the default coref mention detector performMentionDetection = !PropertiesUtils.GetBool(props, "coref.useCustomMentionDetection", false); if (performMentionDetection) { mentionAnnotator = new CorefMentionAnnotator(props); } }
public virtual void Annotate(Annotation annotation) { IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)); // TO DO: be careful, this could introduce a really hard to find bug // this is necessary for Chinese coreference // removeNested needs to be set to "false" for newswire text or big performance drop string docID = annotation.Get(typeof(CoreAnnotations.DocIDAnnotation)); if (docID == null) { docID = string.Empty; } if (docID.Contains("nw") && (CorefProperties.Conll(corefProperties) || corefProperties.GetProperty("coref.input.type", "raw").Equals("conll")) && CorefProperties.GetLanguage(corefProperties) == Locale.Chinese && PropertiesUtils.GetBool(corefProperties , "coref.specialCaseNewswire")) { corefProperties.SetProperty("removeNestedMentions", "false"); } else { corefProperties.SetProperty("removeNestedMentions", "true"); } IList <IList <Mention> > mentions = md.FindMentions(annotation, dictionaries, corefProperties); // build list of coref mentions in this document annotation.Set(typeof(CorefCoreAnnotations.CorefMentionsAnnotation), new List <Mention>()); // initialize indexes int mentionIndex = 0; int currIndex = 0; // initialize each token with an empty set of corresponding coref mention id's foreach (CoreLabel token in annotation.Get(typeof(CoreAnnotations.TokensAnnotation))) { token.Set(typeof(CorefCoreAnnotations.CorefMentionIndexesAnnotation), new ArraySet <int>()); } foreach (ICoreMap sentence in sentences) { IList <Mention> mentionsForThisSentence = mentions[currIndex]; sentence.Set(typeof(CorefCoreAnnotations.CorefMentionsAnnotation), mentionsForThisSentence); Sharpen.Collections.AddAll(annotation.Get(typeof(CorefCoreAnnotations.CorefMentionsAnnotation)), mentionsForThisSentence); // set sentNum correctly for each coref mention foreach (Mention corefMention in mentionsForThisSentence) { corefMention.sentNum = currIndex; } // increment to next list of mentions currIndex++; // assign latest mentionID, annotate tokens with coref mention info foreach (Mention m in mentionsForThisSentence) { m.mentionID = mentionIndex; // go through all the tokens corresponding to this coref mention // annotate them with the index into the document wide coref mention list for (int corefMentionTokenIndex = m.startIndex; corefMentionTokenIndex < m.endIndex; corefMentionTokenIndex++) { CoreLabel currToken = sentence.Get(typeof(CoreAnnotations.TokensAnnotation))[corefMentionTokenIndex]; currToken.Get(typeof(CorefCoreAnnotations.CorefMentionIndexesAnnotation)).Add(mentionIndex); } mentionIndex++; } } // synch coref mentions to entity mentions Dictionary <int, int> corefMentionToEntityMentionMapping = new Dictionary <int, int>(); Dictionary <int, int> entityMentionToCorefMentionMapping = new Dictionary <int, int>(); foreach (CoreLabel token_1 in annotation.Get(typeof(CoreAnnotations.TokensAnnotation))) { if (token_1.Get(typeof(CoreAnnotations.EntityMentionIndexAnnotation)) != null) { int tokenEntityMentionIndex = token_1.Get(typeof(CoreAnnotations.EntityMentionIndexAnnotation)); ICoreMap tokenEntityMention = annotation.Get(typeof(CoreAnnotations.MentionsAnnotation))[tokenEntityMentionIndex]; foreach (int candidateCorefMentionIndex in token_1.Get(typeof(CorefCoreAnnotations.CorefMentionIndexesAnnotation))) { Mention candidateTokenCorefMention = annotation.Get(typeof(CorefCoreAnnotations.CorefMentionsAnnotation))[candidateCorefMentionIndex]; if (SynchCorefMentionEntityMention(annotation, candidateTokenCorefMention, tokenEntityMention)) { entityMentionToCorefMentionMapping[tokenEntityMentionIndex] = candidateCorefMentionIndex; corefMentionToEntityMentionMapping[candidateCorefMentionIndex] = tokenEntityMentionIndex; } } } } // store mappings between entity mentions and coref mentions in annotation annotation.Set(typeof(CoreAnnotations.CorefMentionToEntityMentionMappingAnnotation), corefMentionToEntityMentionMapping); annotation.Set(typeof(CoreAnnotations.EntityMentionToCorefMentionMappingAnnotation), entityMentionToCorefMentionMapping); }