Esempio n. 1
0
 public FeatureExtractor(Properties props, Dictionaries dictionaries, Compressor <string> compressor, ICollection <string> vocabulary)
 {
     this.dictionaries         = dictionaries;
     this.compressor           = compressor;
     this.vocabulary           = vocabulary;
     this.useDocSource         = CorefProperties.Conll(props);
     this.useConstituencyParse = CorefProperties.UseConstituencyParse(props);
 }
Esempio n. 2
0
 public NeuralCorefAlgorithm(Properties props, Dictionaries dictionaries)
 {
     greedyness         = NeuralCorefProperties.Greedyness(props);
     maxMentionDistance = CorefProperties.MaxMentionDistance(props);
     maxMentionDistanceWithStringMatch = CorefProperties.MaxMentionDistanceWithStringMatch(props);
     model = IOUtils.ReadObjectAnnouncingTimingFromURLOrClasspathOrFileSystem(log, "Loading coref model", NeuralCorefProperties.ModelPath(props));
     embeddingExtractor = new EmbeddingExtractor(CorefProperties.Conll(props), IOUtils.ReadObjectAnnouncingTimingFromURLOrClasspathOrFileSystem(log, "Loading coref embeddings", NeuralCorefProperties.PretrainedEmbeddingsPath(props)), model.GetWordEmbeddings
                                                     ());
     featureExtractor = new CategoricalFeatureExtractor(props, dictionaries);
 }
 public NeuralCorefDataExporter(Properties props, Dictionaries dictionaries, string dataPath, string goldClusterPath)
 {
     conll             = CorefProperties.Conll(props);
     this.dictionaries = dictionaries;
     try
     {
         dataWriter        = IOUtils.GetPrintWriter(dataPath);
         goldClusterWriter = IOUtils.GetPrintWriter(goldClusterPath);
     }
     catch (Exception e)
     {
         throw new Exception("Error creating data exporter", e);
     }
 }
Esempio n. 4
0
        private StanfordCoreNLP GetStanfordCoreNLP(Properties props)
        {
            if (coreNLP != null)
            {
                return(coreNLP);
            }
            Properties pipelineProps = new Properties(props);

            if (CorefProperties.Conll(props))
            {
                pipelineProps.SetProperty("annotators", (CorefProperties.GetLanguage(props) == Locale.Chinese ? "lemma, ner" : "lemma") + (CorefProperties.UseGoldMentions(props) ? string.Empty : ", coref.mention"));
                pipelineProps.SetProperty("ner.applyFineGrained", "false");
            }
            else
            {
                pipelineProps.SetProperty("annotators", "pos, lemma, ner, " + (CorefProperties.UseConstituencyParse(props) ? "parse" : "depparse") + (CorefProperties.UseGoldMentions(props) ? string.Empty : ", coref.mention"));
                pipelineProps.SetProperty("ner.applyFineGrained", "false");
            }
            return(coreNLP = new StanfordCoreNLP(pipelineProps, false));
        }
Esempio n. 5
0
        /// <exception cref="System.Exception"/>
        public virtual Document NextDoc()
        {
            InputDoc input = reader.NextDoc();

            if (input == null)
            {
                return(null);
            }
            if (!CorefProperties.UseConstituencyParse(props))
            {
                foreach (ICoreMap sentence in input.annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)))
                {
                    sentence.Remove(typeof(TreeCoreAnnotations.TreeAnnotation));
                }
            }
            GetStanfordCoreNLP(props).Annotate(input.annotation);
            if (CorefProperties.Conll(props))
            {
                input.annotation.Set(typeof(CoreAnnotations.UseMarkedDiscourseAnnotation), true);
            }
            return(MakeDocument(input));
        }
 public CategoricalFeatureExtractor(Properties props, Dictionaries dictionaries)
 {
     this.dictionaries = dictionaries;
     conll             = CorefProperties.Conll(props);
     if (conll)
     {
         genres       = new Dictionary <string, int>();
         genres["bc"] = 0;
         genres["bn"] = 1;
         genres["mz"] = 2;
         genres["nw"] = 3;
         bool english = CorefProperties.GetLanguage(props) == Locale.English;
         if (english)
         {
             genres["pt"] = 4;
         }
         genres["tc"] = english ? 5 : 4;
         genres["wb"] = english ? 6 : 5;
     }
     else
     {
         genres = null;
     }
 }
        public static string ModelPath(Properties props)
        {
            string defaultPath = "edu/stanford/nlp/models/coref/neural/" + (CorefProperties.GetLanguage(props) == Locale.Chinese ? "chinese" : "english") + (CorefProperties.Conll(props) ? "-model-conll" : "-model-default") + ".ser.gz";

            return(PropertiesUtils.GetString(props, "coref.neural.modelPath", defaultPath));
        }
        public virtual void Annotate(Annotation annotation)
        {
            IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));
            // TO DO: be careful, this could introduce a really hard to find bug
            // this is necessary for Chinese coreference
            // removeNested needs to be set to "false" for newswire text or big performance drop
            string docID = annotation.Get(typeof(CoreAnnotations.DocIDAnnotation));

            if (docID == null)
            {
                docID = string.Empty;
            }
            if (docID.Contains("nw") && (CorefProperties.Conll(corefProperties) || corefProperties.GetProperty("coref.input.type", "raw").Equals("conll")) && CorefProperties.GetLanguage(corefProperties) == Locale.Chinese && PropertiesUtils.GetBool(corefProperties
                                                                                                                                                                                                                                                        , "coref.specialCaseNewswire"))
            {
                corefProperties.SetProperty("removeNestedMentions", "false");
            }
            else
            {
                corefProperties.SetProperty("removeNestedMentions", "true");
            }
            IList <IList <Mention> > mentions = md.FindMentions(annotation, dictionaries, corefProperties);

            // build list of coref mentions in this document
            annotation.Set(typeof(CorefCoreAnnotations.CorefMentionsAnnotation), new List <Mention>());
            // initialize indexes
            int mentionIndex = 0;
            int currIndex    = 0;

            // initialize each token with an empty set of corresponding coref mention id's
            foreach (CoreLabel token in annotation.Get(typeof(CoreAnnotations.TokensAnnotation)))
            {
                token.Set(typeof(CorefCoreAnnotations.CorefMentionIndexesAnnotation), new ArraySet <int>());
            }
            foreach (ICoreMap sentence in sentences)
            {
                IList <Mention> mentionsForThisSentence = mentions[currIndex];
                sentence.Set(typeof(CorefCoreAnnotations.CorefMentionsAnnotation), mentionsForThisSentence);
                Sharpen.Collections.AddAll(annotation.Get(typeof(CorefCoreAnnotations.CorefMentionsAnnotation)), mentionsForThisSentence);
                // set sentNum correctly for each coref mention
                foreach (Mention corefMention in mentionsForThisSentence)
                {
                    corefMention.sentNum = currIndex;
                }
                // increment to next list of mentions
                currIndex++;
                // assign latest mentionID, annotate tokens with coref mention info
                foreach (Mention m in mentionsForThisSentence)
                {
                    m.mentionID = mentionIndex;
                    // go through all the tokens corresponding to this coref mention
                    // annotate them with the index into the document wide coref mention list
                    for (int corefMentionTokenIndex = m.startIndex; corefMentionTokenIndex < m.endIndex; corefMentionTokenIndex++)
                    {
                        CoreLabel currToken = sentence.Get(typeof(CoreAnnotations.TokensAnnotation))[corefMentionTokenIndex];
                        currToken.Get(typeof(CorefCoreAnnotations.CorefMentionIndexesAnnotation)).Add(mentionIndex);
                    }
                    mentionIndex++;
                }
            }
            // synch coref mentions to entity mentions
            Dictionary <int, int> corefMentionToEntityMentionMapping = new Dictionary <int, int>();
            Dictionary <int, int> entityMentionToCorefMentionMapping = new Dictionary <int, int>();

            foreach (CoreLabel token_1 in annotation.Get(typeof(CoreAnnotations.TokensAnnotation)))
            {
                if (token_1.Get(typeof(CoreAnnotations.EntityMentionIndexAnnotation)) != null)
                {
                    int      tokenEntityMentionIndex = token_1.Get(typeof(CoreAnnotations.EntityMentionIndexAnnotation));
                    ICoreMap tokenEntityMention      = annotation.Get(typeof(CoreAnnotations.MentionsAnnotation))[tokenEntityMentionIndex];
                    foreach (int candidateCorefMentionIndex in token_1.Get(typeof(CorefCoreAnnotations.CorefMentionIndexesAnnotation)))
                    {
                        Mention candidateTokenCorefMention = annotation.Get(typeof(CorefCoreAnnotations.CorefMentionsAnnotation))[candidateCorefMentionIndex];
                        if (SynchCorefMentionEntityMention(annotation, candidateTokenCorefMention, tokenEntityMention))
                        {
                            entityMentionToCorefMentionMapping[tokenEntityMentionIndex]    = candidateCorefMentionIndex;
                            corefMentionToEntityMentionMapping[candidateCorefMentionIndex] = tokenEntityMentionIndex;
                        }
                    }
                }
            }
            // store mappings between entity mentions and coref mentions in annotation
            annotation.Set(typeof(CoreAnnotations.CorefMentionToEntityMentionMappingAnnotation), corefMentionToEntityMentionMapping);
            annotation.Set(typeof(CoreAnnotations.EntityMentionToCorefMentionMappingAnnotation), entityMentionToCorefMentionMapping);
        }
Esempio n. 9
0
 private static string GetDefaultModelPath(Properties props, string modelName)
 {
     return("edu/stanford/nlp/models/coref/statistical/" + modelName + (CorefProperties.Conll(props) ? "_conll" : string.Empty) + ".ser.gz");
 }