Beispiel #1
0
        private StanfordCoreNLP GetStanfordCoreNLP(Properties props)
        {
            if (coreNLP != null)
            {
                return(coreNLP);
            }
            Properties pipelineProps = new Properties(props);

            if (CorefProperties.Conll(props))
            {
                pipelineProps.SetProperty("annotators", (CorefProperties.GetLanguage(props) == Locale.Chinese ? "lemma, ner" : "lemma") + (CorefProperties.UseGoldMentions(props) ? string.Empty : ", coref.mention"));
                pipelineProps.SetProperty("ner.applyFineGrained", "false");
            }
            else
            {
                pipelineProps.SetProperty("annotators", "pos, lemma, ner, " + (CorefProperties.UseConstituencyParse(props) ? "parse" : "depparse") + (CorefProperties.UseGoldMentions(props) ? string.Empty : ", coref.mention"));
                pipelineProps.SetProperty("ner.applyFineGrained", "false");
            }
            return(coreNLP = new StanfordCoreNLP(pipelineProps, false));
        }
Beispiel #2
0
        private static IDocReader GetDocumentReader(Properties props)
        {
            string corpusPath = CorefProperties.GetInputPath(props);

            if (corpusPath == null)
            {
                return(null);
            }
            CoNLLDocumentReader.Options options = new CoNLLDocumentReader.Options();
            if (!PropertiesUtils.GetBool(props, "coref.printConLLLoadingMessage", true))
            {
                options.printConLLLoadingMessage = false;
            }
            options.annotateTokenCoref = false;
            string conllFileFilter = props.GetProperty("coref.conllFileFilter", ".*_auto_conll$");

            options.SetFilter(conllFileFilter);
            options.lang = CorefProperties.GetLanguage(props);
            return(new CoNLLDocumentReader(corpusPath, options));
        }
 public CategoricalFeatureExtractor(Properties props, Dictionaries dictionaries)
 {
     this.dictionaries = dictionaries;
     conll             = CorefProperties.Conll(props);
     if (conll)
     {
         genres       = new Dictionary <string, int>();
         genres["bc"] = 0;
         genres["bn"] = 1;
         genres["mz"] = 2;
         genres["nw"] = 3;
         bool english = CorefProperties.GetLanguage(props) == Locale.English;
         if (english)
         {
             genres["pt"] = 4;
         }
         genres["tc"] = english ? 5 : 4;
         genres["wb"] = english ? 6 : 5;
     }
     else
     {
         genres = null;
     }
 }
        public static string PretrainedEmbeddingsPath(Properties props)
        {
            string defaultPath = "edu/stanford/nlp/models/coref/neural/" + (CorefProperties.GetLanguage(props) == Locale.Chinese ? "chinese" : "english") + "-embeddings.ser.gz";

            return(PropertiesUtils.GetString(props, "coref.neural.embeddingsPath", defaultPath));
        }
        public static string ModelPath(Properties props)
        {
            string defaultPath = "edu/stanford/nlp/models/coref/neural/" + (CorefProperties.GetLanguage(props) == Locale.Chinese ? "chinese" : "english") + (CorefProperties.Conll(props) ? "-model-conll" : "-model-default") + ".ser.gz";

            return(PropertiesUtils.GetString(props, "coref.neural.modelPath", defaultPath));
        }
Beispiel #6
0
 public RuleBasedCorefMentionFinder(IHeadFinder headFinder, Properties props)
     : this(true, headFinder, CorefProperties.GetLanguage(props))
 {
 }
Beispiel #7
0
 /// <exception cref="System.TypeLoadException"/>
 /// <exception cref="System.IO.IOException"/>
 public HybridCorefMentionFinder(IHeadFinder headFinder, Properties props)
 {
     this.headFinder = headFinder;
     this.lang       = CorefProperties.GetLanguage(props);
     mdClassifier    = (CorefProperties.IsMentionDetectionTraining(props)) ? null : IOUtils.ReadObjectFromURLOrClasspathOrFileSystem(CorefProperties.GetMentionDetectionModel(props));
 }
Beispiel #8
0
 public CorefAnnotator(Properties props)
 {
     this.props = props;
     try
     {
         // if user tries to run with coref.language = ENGLISH and coref.algorithm = hybrid, throw Exception
         // we do not support those settings at this time
         if (CorefProperties.Algorithm(props).Equals(CorefProperties.CorefAlgorithmType.Hybrid) && CorefProperties.GetLanguage(props).Equals(Locale.English))
         {
             log.Error("Error: coref.algorithm=hybrid is not supported for English, " + "please change coref.algorithm or coref.language");
             throw new Exception();
         }
         // suppress
         props.SetProperty("coref.printConLLLoadingMessage", "false");
         corefSystem = new CorefSystem(props);
         props.Remove("coref.printConLLLoadingMessage");
     }
     catch (Exception e)
     {
         log.Error("Error creating CorefAnnotator...terminating pipeline construction!");
         log.Error(e);
         throw new Exception(e);
     }
     // unless custom mention detection is set, just use the default coref mention detector
     performMentionDetection = !PropertiesUtils.GetBool(props, "coref.useCustomMentionDetection", false);
     if (performMentionDetection)
     {
         mentionAnnotator = new CorefMentionAnnotator(props);
     }
 }
        public virtual void Annotate(Annotation annotation)
        {
            IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));
            // TO DO: be careful, this could introduce a really hard to find bug
            // this is necessary for Chinese coreference
            // removeNested needs to be set to "false" for newswire text or big performance drop
            string docID = annotation.Get(typeof(CoreAnnotations.DocIDAnnotation));

            if (docID == null)
            {
                docID = string.Empty;
            }
            if (docID.Contains("nw") && (CorefProperties.Conll(corefProperties) || corefProperties.GetProperty("coref.input.type", "raw").Equals("conll")) && CorefProperties.GetLanguage(corefProperties) == Locale.Chinese && PropertiesUtils.GetBool(corefProperties
                                                                                                                                                                                                                                                        , "coref.specialCaseNewswire"))
            {
                corefProperties.SetProperty("removeNestedMentions", "false");
            }
            else
            {
                corefProperties.SetProperty("removeNestedMentions", "true");
            }
            IList <IList <Mention> > mentions = md.FindMentions(annotation, dictionaries, corefProperties);

            // build list of coref mentions in this document
            annotation.Set(typeof(CorefCoreAnnotations.CorefMentionsAnnotation), new List <Mention>());
            // initialize indexes
            int mentionIndex = 0;
            int currIndex    = 0;

            // initialize each token with an empty set of corresponding coref mention id's
            foreach (CoreLabel token in annotation.Get(typeof(CoreAnnotations.TokensAnnotation)))
            {
                token.Set(typeof(CorefCoreAnnotations.CorefMentionIndexesAnnotation), new ArraySet <int>());
            }
            foreach (ICoreMap sentence in sentences)
            {
                IList <Mention> mentionsForThisSentence = mentions[currIndex];
                sentence.Set(typeof(CorefCoreAnnotations.CorefMentionsAnnotation), mentionsForThisSentence);
                Sharpen.Collections.AddAll(annotation.Get(typeof(CorefCoreAnnotations.CorefMentionsAnnotation)), mentionsForThisSentence);
                // set sentNum correctly for each coref mention
                foreach (Mention corefMention in mentionsForThisSentence)
                {
                    corefMention.sentNum = currIndex;
                }
                // increment to next list of mentions
                currIndex++;
                // assign latest mentionID, annotate tokens with coref mention info
                foreach (Mention m in mentionsForThisSentence)
                {
                    m.mentionID = mentionIndex;
                    // go through all the tokens corresponding to this coref mention
                    // annotate them with the index into the document wide coref mention list
                    for (int corefMentionTokenIndex = m.startIndex; corefMentionTokenIndex < m.endIndex; corefMentionTokenIndex++)
                    {
                        CoreLabel currToken = sentence.Get(typeof(CoreAnnotations.TokensAnnotation))[corefMentionTokenIndex];
                        currToken.Get(typeof(CorefCoreAnnotations.CorefMentionIndexesAnnotation)).Add(mentionIndex);
                    }
                    mentionIndex++;
                }
            }
            // synch coref mentions to entity mentions
            Dictionary <int, int> corefMentionToEntityMentionMapping = new Dictionary <int, int>();
            Dictionary <int, int> entityMentionToCorefMentionMapping = new Dictionary <int, int>();

            foreach (CoreLabel token_1 in annotation.Get(typeof(CoreAnnotations.TokensAnnotation)))
            {
                if (token_1.Get(typeof(CoreAnnotations.EntityMentionIndexAnnotation)) != null)
                {
                    int      tokenEntityMentionIndex = token_1.Get(typeof(CoreAnnotations.EntityMentionIndexAnnotation));
                    ICoreMap tokenEntityMention      = annotation.Get(typeof(CoreAnnotations.MentionsAnnotation))[tokenEntityMentionIndex];
                    foreach (int candidateCorefMentionIndex in token_1.Get(typeof(CorefCoreAnnotations.CorefMentionIndexesAnnotation)))
                    {
                        Mention candidateTokenCorefMention = annotation.Get(typeof(CorefCoreAnnotations.CorefMentionsAnnotation))[candidateCorefMentionIndex];
                        if (SynchCorefMentionEntityMention(annotation, candidateTokenCorefMention, tokenEntityMention))
                        {
                            entityMentionToCorefMentionMapping[tokenEntityMentionIndex]    = candidateCorefMentionIndex;
                            corefMentionToEntityMentionMapping[candidateCorefMentionIndex] = tokenEntityMentionIndex;
                        }
                    }
                }
            }
            // store mappings between entity mentions and coref mentions in annotation
            annotation.Set(typeof(CoreAnnotations.CorefMentionToEntityMentionMappingAnnotation), corefMentionToEntityMentionMapping);
            annotation.Set(typeof(CoreAnnotations.EntityMentionToCorefMentionMappingAnnotation), entityMentionToCorefMentionMapping);
        }