Example #1
0
        /// <exception cref="System.Exception"/>
        public static void DoTraining(Properties props)
        {
            SetTrainingPath(props);
            Dictionaries dictionaries = new Dictionaries(props);

            SetDataPath("train");
            wordCountsFile = trainingPath + "train/word_counts.ser";
            CorefProperties.SetInput(props, CorefProperties.Dataset.Train);
            Preprocess(props, dictionaries, true);
            SetDataPath("dev");
            CorefProperties.SetInput(props, CorefProperties.Dataset.Dev);
            Preprocess(props, dictionaries, false);
            SetDataPath("train");
            dictionaries = null;
            PairwiseModel classificationModel = PairwiseModel.NewBuilder(ClassificationModel, MetaFeatureExtractor.NewBuilder().Build()).Build();
            PairwiseModel rankingModel        = PairwiseModel.NewBuilder(RankingModel, MetaFeatureExtractor.NewBuilder().Build()).Build();
            PairwiseModel anaphoricityModel   = PairwiseModel.NewBuilder(AnaphoricityModel, MetaFeatureExtractor.AnaphoricityMFE()).TrainingExamples(5000000).Build();

            PairwiseModelTrainer.TrainRanking(rankingModel);
            PairwiseModelTrainer.TrainClassification(classificationModel, false);
            PairwiseModelTrainer.TrainClassification(anaphoricityModel, true);
            SetDataPath("dev");
            PairwiseModelTrainer.Test(classificationModel, predictionsName, false);
            PairwiseModelTrainer.Test(rankingModel, predictionsName, false);
            PairwiseModelTrainer.Test(anaphoricityModel, predictionsName, true);
            new Clusterer().DoTraining(ClusteringModelName);
        }
Example #2
0
        /// <summary>Main method of mention detection.</summary>
        /// <remarks>
        /// Main method of mention detection.
        /// Extract all NP, PRP or NE, and filter out by manually written patterns.
        /// </remarks>
        public override IList <IList <Mention> > FindMentions(Annotation doc, Dictionaries dict, Properties props)
        {
            IList <IList <Mention> >       predictedMentions  = new List <IList <Mention> >();
            ICollection <string>           neStrings          = Generics.NewHashSet();
            IList <ICollection <IntPair> > mentionSpanSetList = Generics.NewArrayList();
            IList <ICoreMap> sentences = doc.Get(typeof(CoreAnnotations.SentencesAnnotation));

            foreach (ICoreMap s in sentences)
            {
                IList <Mention> mentions = new List <Mention>();
                predictedMentions.Add(mentions);
                ICollection <IntPair> mentionSpanSet     = Generics.NewHashSet();
                ICollection <IntPair> namedEntitySpanSet = Generics.NewHashSet();
                ExtractPremarkedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet);
                HybridCorefMentionFinder.ExtractNamedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet);
                ExtractNPorPRPFromDependency(s, mentions, mentionSpanSet, namedEntitySpanSet);
                AddNamedEntityStrings(s, neStrings, namedEntitySpanSet);
                mentionSpanSetList.Add(mentionSpanSet);
            }
            //    extractNamedEntityModifiers(sentences, mentionSpanSetList, predictedMentions, neStrings);
            for (int i = 0; i < sentences.Count; i++)
            {
                FindHead(sentences[i], predictedMentions[i]);
            }
            // mention selection based on document-wise info
            RemoveSpuriousMentions(doc, predictedMentions, dict, CorefProperties.RemoveNestedMentions(props), lang);
            // if this is for MD training, skip classification
            if (!CorefProperties.IsMentionDetectionTraining(props))
            {
                mdClassifier.ClassifyMentions(predictedMentions, dict, props);
            }
            return(predictedMentions);
        }
Example #3
0
        /// <summary>When mention boundaries are given</summary>
        public virtual IList <IList <Mention> > FilterPredictedMentions(IList <IList <Mention> > allGoldMentions, Annotation doc, Dictionaries dict, Properties props)
        {
            IList <IList <Mention> > predictedMentions = new List <IList <Mention> >();

            for (int i = 0; i < allGoldMentions.Count; i++)
            {
                ICoreMap        s            = doc.Get(typeof(CoreAnnotations.SentencesAnnotation))[i];
                IList <Mention> goldMentions = allGoldMentions[i];
                IList <Mention> mentions     = new List <Mention>();
                predictedMentions.Add(mentions);
                Sharpen.Collections.AddAll(mentions, goldMentions);
                FindHead(s, mentions);
                // todo [cdm 2013]: This block seems to do nothing - the two sets are never used
                ICollection <IntPair> mentionSpanSet     = Generics.NewHashSet();
                ICollection <IntPair> namedEntitySpanSet = Generics.NewHashSet();
                foreach (Mention m in mentions)
                {
                    mentionSpanSet.Add(new IntPair(m.startIndex, m.endIndex));
                    if (!m.headWord.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)).Equals("O"))
                    {
                        namedEntitySpanSet.Add(new IntPair(m.startIndex, m.endIndex));
                    }
                }
                SetBarePlural(mentions);
            }
            RemoveSpuriousMentions(doc, predictedMentions, dict, CorefProperties.RemoveNestedMentions(props), lang);
            return(predictedMentions);
        }
Example #4
0
 public CorefAnnotator(Properties props)
 {
     this.props = props;
     try
     {
         // if user tries to run with coref.language = ENGLISH and coref.algorithm = hybrid, throw Exception
         // we do not support those settings at this time
         if (CorefProperties.Algorithm(props).Equals(CorefProperties.CorefAlgorithmType.Hybrid) && CorefProperties.GetLanguage(props).Equals(Locale.English))
         {
             log.Error("Error: coref.algorithm=hybrid is not supported for English, " + "please change coref.algorithm or coref.language");
             throw new Exception();
         }
         // suppress
         props.SetProperty("coref.printConLLLoadingMessage", "false");
         corefSystem = new CorefSystem(props);
         props.Remove("coref.printConLLLoadingMessage");
     }
     catch (Exception e)
     {
         log.Error("Error creating CorefAnnotator...terminating pipeline construction!");
         log.Error(e);
         throw new Exception(e);
     }
     // unless custom mention detection is set, just use the default coref mention detector
     performMentionDetection = !PropertiesUtils.GetBool(props, "coref.useCustomMentionDetection", false);
     if (performMentionDetection)
     {
         mentionAnnotator = new CorefMentionAnnotator(props);
     }
 }
        /// <exception cref="System.TypeLoadException"/>
        /// <exception cref="System.IO.IOException"/>
        private CorefMentionFinder GetMentionFinder(Properties props, IHeadFinder headFinder)
        {
            switch (CorefProperties.MdType(props))
            {
            case CorefProperties.MentionDetectionType.Dependency:
            {
                mdName = "dependency";
                return(new DependencyCorefMentionFinder(props));
            }

            case CorefProperties.MentionDetectionType.Hybrid:
            {
                mdName = "hybrid";
                mentionAnnotatorRequirements.Add(typeof(TreeCoreAnnotations.TreeAnnotation));
                mentionAnnotatorRequirements.Add(typeof(CoreAnnotations.BeginIndexAnnotation));
                mentionAnnotatorRequirements.Add(typeof(CoreAnnotations.EndIndexAnnotation));
                return(new HybridCorefMentionFinder(headFinder, props));
            }

            case CorefProperties.MentionDetectionType.Rule:
            default:
            {
                mentionAnnotatorRequirements.Add(typeof(TreeCoreAnnotations.TreeAnnotation));
                mentionAnnotatorRequirements.Add(typeof(CoreAnnotations.BeginIndexAnnotation));
                mentionAnnotatorRequirements.Add(typeof(CoreAnnotations.EndIndexAnnotation));
                mdName = "rule";
                return(new RuleBasedCorefMentionFinder(headFinder, props));
            }
            }
        }
Example #6
0
 /// <exception cref="System.TypeLoadException"/>
 /// <exception cref="System.IO.IOException"/>
 public DocumentMaker(Properties props, Dictionaries dictionaries)
 {
     this.props = props;
     this.dict  = dictionaries;
     reader     = GetDocumentReader(props);
     headFinder = CorefProperties.GetHeadFinder(props);
     md         = CorefProperties.UseGoldMentions(props) ? new RuleBasedCorefMentionFinder(headFinder, props) : null;
 }
Example #7
0
 public FeatureExtractor(Properties props, Dictionaries dictionaries, Compressor <string> compressor, ICollection <string> vocabulary)
 {
     this.dictionaries         = dictionaries;
     this.compressor           = compressor;
     this.vocabulary           = vocabulary;
     this.useDocSource         = CorefProperties.Conll(props);
     this.useConstituencyParse = CorefProperties.UseConstituencyParse(props);
 }
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            Properties   props        = StringUtils.ArgsToProperties(new string[] { "-props", args[0] });
            Dictionaries dictionaries = new Dictionaries(props);

            CorefProperties.SetInput(props, CorefProperties.Dataset.Train);
            new MentionDetectionEvaluator().Run(props, dictionaries);
        }
Example #9
0
 public NeuralCorefAlgorithm(Properties props, Dictionaries dictionaries)
 {
     greedyness         = NeuralCorefProperties.Greedyness(props);
     maxMentionDistance = CorefProperties.MaxMentionDistance(props);
     maxMentionDistanceWithStringMatch = CorefProperties.MaxMentionDistanceWithStringMatch(props);
     model = IOUtils.ReadObjectAnnouncingTimingFromURLOrClasspathOrFileSystem(log, "Loading coref model", NeuralCorefProperties.ModelPath(props));
     embeddingExtractor = new EmbeddingExtractor(CorefProperties.Conll(props), IOUtils.ReadObjectAnnouncingTimingFromURLOrClasspathOrFileSystem(log, "Loading coref embeddings", NeuralCorefProperties.PretrainedEmbeddingsPath(props)), model.GetWordEmbeddings
                                                     ());
     featureExtractor = new CategoricalFeatureExtractor(props, dictionaries);
 }
        /// <exception cref="System.Exception"/>
        public static void ExportData(string outputPath, CorefProperties.Dataset dataset, Properties props, Dictionaries dictionaries)
        {
            CorefProperties.SetInput(props, dataset);
            string dataPath        = outputPath + "/data_raw/";
            string goldClusterPath = outputPath + "/gold/";

            IOUtils.EnsureDir(new File(outputPath));
            IOUtils.EnsureDir(new File(dataPath));
            IOUtils.EnsureDir(new File(goldClusterPath));
            new Edu.Stanford.Nlp.Coref.Neural.NeuralCorefDataExporter(props, dictionaries, dataPath + dataset.ToString().ToLower(), goldClusterPath + dataset.ToString().ToLower()).Run(props, dictionaries);
        }
Example #11
0
        /// <summary>Main method of mention detection.</summary>
        /// <remarks>
        /// Main method of mention detection.
        /// Extract all NP, PRP or NE, and filter out by manually written patterns.
        /// </remarks>
        public override IList <IList <Mention> > FindMentions(Annotation doc, Dictionaries dict, Properties props)
        {
            IList <IList <Mention> >       predictedMentions  = new List <IList <Mention> >();
            ICollection <string>           neStrings          = Generics.NewHashSet();
            IList <ICollection <IntPair> > mentionSpanSetList = Generics.NewArrayList();
            IList <ICoreMap> sentences = doc.Get(typeof(CoreAnnotations.SentencesAnnotation));

            // extract premarked mentions, NP/PRP, named entity, enumerations
            foreach (ICoreMap s in sentences)
            {
                IList <Mention> mentions = new List <Mention>();
                predictedMentions.Add(mentions);
                ICollection <IntPair> mentionSpanSet     = Generics.NewHashSet();
                ICollection <IntPair> namedEntitySpanSet = Generics.NewHashSet();
                ExtractPremarkedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet);
                ExtractNamedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet);
                ExtractNPorPRP(s, mentions, mentionSpanSet, namedEntitySpanSet);
                ExtractEnumerations(s, mentions, mentionSpanSet, namedEntitySpanSet);
                AddNamedEntityStrings(s, neStrings, namedEntitySpanSet);
                mentionSpanSetList.Add(mentionSpanSet);
            }
            if (CorefProperties.LiberalMD(props))
            {
                ExtractNamedEntityModifiers(sentences, mentionSpanSetList, predictedMentions, neStrings);
            }
            // find head
            for (int i = 0; i < sz; i++)
            {
                FindHead(sentences[i], predictedMentions[i]);
                SetBarePlural(predictedMentions[i]);
            }
            // mention selection based on document-wise info
            if (lang == Locale.English && !CorefProperties.LiberalMD(props))
            {
                RemoveSpuriousMentionsEn(doc, predictedMentions, dict);
            }
            else
            {
                if (lang == Locale.Chinese)
                {
                    if (CorefProperties.LiberalMD(props))
                    {
                        RemoveSpuriousMentionsZhSimple(doc, predictedMentions, dict);
                    }
                    else
                    {
                        RemoveSpuriousMentionsZh(doc, predictedMentions, dict, CorefProperties.RemoveNestedMentions(props));
                    }
                }
            }
            return(predictedMentions);
        }
 public NeuralCorefDataExporter(Properties props, Dictionaries dictionaries, string dataPath, string goldClusterPath)
 {
     conll             = CorefProperties.Conll(props);
     this.dictionaries = dictionaries;
     try
     {
         dataWriter        = IOUtils.GetPrintWriter(dataPath);
         goldClusterWriter = IOUtils.GetPrintWriter(goldClusterPath);
     }
     catch (Exception e)
     {
         throw new Exception("Error creating data exporter", e);
     }
 }
Example #13
0
        public virtual ICollection <Type> Requires()
        {
            ICollection <Type> requirements = new HashSet <Type>(Arrays.AsList(typeof(CoreAnnotations.TextAnnotation), typeof(CoreAnnotations.TokensAnnotation), typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), typeof(CoreAnnotations.CharacterOffsetEndAnnotation
                                                                                                                                                                                                                                ), typeof(CoreAnnotations.IndexAnnotation), typeof(CoreAnnotations.ValueAnnotation), typeof(CoreAnnotations.SentencesAnnotation), typeof(CoreAnnotations.SentenceIndexAnnotation), typeof(CoreAnnotations.PartOfSpeechAnnotation), typeof(CoreAnnotations.LemmaAnnotation
                                                                                                                                                                                                                                                                                                                                                                                                                                                                          ), typeof(CoreAnnotations.NamedEntityTagAnnotation), typeof(CoreAnnotations.EntityTypeAnnotation), typeof(CoreAnnotations.MentionsAnnotation), typeof(CoreAnnotations.EntityMentionIndexAnnotation), typeof(CoreAnnotations.CoarseNamedEntityTagAnnotation
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      ), typeof(CoreAnnotations.FineGrainedNamedEntityTagAnnotation), typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation), typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)));

            if (CorefProperties.MdType(this.props) != CorefProperties.MentionDetectionType.Dependency)
            {
                requirements.Add(typeof(TreeCoreAnnotations.TreeAnnotation));
                requirements.Add(typeof(CoreAnnotations.CategoryAnnotation));
            }
            if (!performMentionDetection)
            {
                requirements.Add(typeof(CorefCoreAnnotations.CorefMentionsAnnotation));
            }
            return(Java.Util.Collections.UnmodifiableSet(requirements));
        }
Example #14
0
        private static IDocReader GetDocumentReader(Properties props)
        {
            string corpusPath = CorefProperties.GetInputPath(props);

            if (corpusPath == null)
            {
                return(null);
            }
            CoNLLDocumentReader.Options options = new CoNLLDocumentReader.Options();
            if (!PropertiesUtils.GetBool(props, "coref.printConLLLoadingMessage", true))
            {
                options.printConLLLoadingMessage = false;
            }
            options.annotateTokenCoref = false;
            string conllFileFilter = props.GetProperty("coref.conllFileFilter", ".*_auto_conll$");

            options.SetFilter(conllFileFilter);
            options.lang = CorefProperties.GetLanguage(props);
            return(new CoNLLDocumentReader(corpusPath, options));
        }
Example #15
0
        private StanfordCoreNLP GetStanfordCoreNLP(Properties props)
        {
            if (coreNLP != null)
            {
                return(coreNLP);
            }
            Properties pipelineProps = new Properties(props);

            if (CorefProperties.Conll(props))
            {
                pipelineProps.SetProperty("annotators", (CorefProperties.GetLanguage(props) == Locale.Chinese ? "lemma, ner" : "lemma") + (CorefProperties.UseGoldMentions(props) ? string.Empty : ", coref.mention"));
                pipelineProps.SetProperty("ner.applyFineGrained", "false");
            }
            else
            {
                pipelineProps.SetProperty("annotators", "pos, lemma, ner, " + (CorefProperties.UseConstituencyParse(props) ? "parse" : "depparse") + (CorefProperties.UseGoldMentions(props) ? string.Empty : ", coref.mention"));
                pipelineProps.SetProperty("ner.applyFineGrained", "false");
            }
            return(coreNLP = new StanfordCoreNLP(pipelineProps, false));
        }
Example #16
0
        /// <exception cref="System.Exception"/>
        public virtual Document NextDoc()
        {
            InputDoc input = reader.NextDoc();

            if (input == null)
            {
                return(null);
            }
            if (!CorefProperties.UseConstituencyParse(props))
            {
                foreach (ICoreMap sentence in input.annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)))
                {
                    sentence.Remove(typeof(TreeCoreAnnotations.TreeAnnotation));
                }
            }
            GetStanfordCoreNLP(props).Annotate(input.annotation);
            if (CorefProperties.Conll(props))
            {
                input.annotation.Set(typeof(CoreAnnotations.UseMarkedDiscourseAnnotation), true);
            }
            return(MakeDocument(input));
        }
 public CorefMentionAnnotator(Properties props)
 {
     try
     {
         corefProperties = props;
         //System.out.println("corefProperties: "+corefProperties);
         dictionaries = new Dictionaries(props);
         //System.out.println("got dictionaries");
         headFinder = CorefProperties.GetHeadFinder(props);
         //System.out.println("got head finder");
         md = GetMentionFinder(props, headFinder);
         log.Info("Using mention detector type: " + mdName);
         Sharpen.Collections.AddAll(mentionAnnotatorRequirements, Arrays.AsList(typeof(CoreAnnotations.TokensAnnotation), typeof(CoreAnnotations.SentencesAnnotation), typeof(CoreAnnotations.PartOfSpeechAnnotation), typeof(CoreAnnotations.NamedEntityTagAnnotation
                                                                                                                                                                                                                              ), typeof(CoreAnnotations.EntityTypeAnnotation), typeof(CoreAnnotations.IndexAnnotation), typeof(CoreAnnotations.TextAnnotation), typeof(CoreAnnotations.ValueAnnotation), typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation), typeof(
                                                                                    SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)));
     }
     catch (Exception e)
     {
         log.Info("Error with building coref mention annotator!");
         log.Info(e);
     }
 }
 public CategoricalFeatureExtractor(Properties props, Dictionaries dictionaries)
 {
     this.dictionaries = dictionaries;
     conll             = CorefProperties.Conll(props);
     if (conll)
     {
         genres       = new Dictionary <string, int>();
         genres["bc"] = 0;
         genres["bn"] = 1;
         genres["mz"] = 2;
         genres["nw"] = 3;
         bool english = CorefProperties.GetLanguage(props) == Locale.English;
         if (english)
         {
             genres["pt"] = 4;
         }
         genres["tc"] = english ? 5 : 4;
         genres["wb"] = english ? 6 : 5;
     }
     else
     {
         genres = null;
     }
 }
Example #19
0
        /// <exception cref="System.Exception"/>
        public virtual Document MakeDocument(InputDoc input)
        {
            IList <IList <Mention> > mentions = new List <IList <Mention> >();

            if (CorefProperties.UseGoldMentions(props))
            {
                IList <ICoreMap> sentences = input.annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));
                for (int i = 0; i < sentences.Count; i++)
                {
                    ICoreMap          sentence         = sentences[i];
                    IList <CoreLabel> sentenceWords    = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
                    IList <Mention>   sentenceMentions = new List <Mention>();
                    mentions.Add(sentenceMentions);
                    foreach (Mention g in input.goldMentions[i])
                    {
                        sentenceMentions.Add(new Mention(-1, g.startIndex, g.endIndex, sentenceWords, null, null, new List <CoreLabel>(sentenceWords.SubList(g.startIndex, g.endIndex))));
                    }
                    md.FindHead(sentence, sentenceMentions);
                }
            }
            else
            {
                foreach (ICoreMap sentence in input.annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)))
                {
                    mentions.Add(sentence.Get(typeof(CorefCoreAnnotations.CorefMentionsAnnotation)));
                }
            }
            Document doc = new Document(input, mentions);

            if (input.goldMentions != null)
            {
                FindGoldMentionHeads(doc);
            }
            DocumentPreprocessor.Preprocess(doc, dict, null, headFinder);
            return(doc);
        }
 public StatisticalCorefAlgorithm(Properties props, Dictionaries dictionaries)
     : this(props, dictionaries, StatisticalCorefProperties.WordCountsPath(props), StatisticalCorefProperties.RankingModelPath(props), CorefProperties.MaxMentionDistance(props), CorefProperties.MaxMentionDistanceWithStringMatch(props), StatisticalCorefProperties
            .PairwiseScoreThresholds(props))
 {
 }
        public static string PretrainedEmbeddingsPath(Properties props)
        {
            string defaultPath = "edu/stanford/nlp/models/coref/neural/" + (CorefProperties.GetLanguage(props) == Locale.Chinese ? "chinese" : "english") + "-embeddings.ser.gz";

            return(PropertiesUtils.GetString(props, "coref.neural.embeddingsPath", defaultPath));
        }
        public virtual void Annotate(Annotation annotation)
        {
            IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));
            // TO DO: be careful, this could introduce a really hard to find bug
            // this is necessary for Chinese coreference
            // removeNested needs to be set to "false" for newswire text or big performance drop
            string docID = annotation.Get(typeof(CoreAnnotations.DocIDAnnotation));

            if (docID == null)
            {
                docID = string.Empty;
            }
            if (docID.Contains("nw") && (CorefProperties.Conll(corefProperties) || corefProperties.GetProperty("coref.input.type", "raw").Equals("conll")) && CorefProperties.GetLanguage(corefProperties) == Locale.Chinese && PropertiesUtils.GetBool(corefProperties
                                                                                                                                                                                                                                                        , "coref.specialCaseNewswire"))
            {
                corefProperties.SetProperty("removeNestedMentions", "false");
            }
            else
            {
                corefProperties.SetProperty("removeNestedMentions", "true");
            }
            IList <IList <Mention> > mentions = md.FindMentions(annotation, dictionaries, corefProperties);

            // build list of coref mentions in this document
            annotation.Set(typeof(CorefCoreAnnotations.CorefMentionsAnnotation), new List <Mention>());
            // initialize indexes
            int mentionIndex = 0;
            int currIndex    = 0;

            // initialize each token with an empty set of corresponding coref mention id's
            foreach (CoreLabel token in annotation.Get(typeof(CoreAnnotations.TokensAnnotation)))
            {
                token.Set(typeof(CorefCoreAnnotations.CorefMentionIndexesAnnotation), new ArraySet <int>());
            }
            foreach (ICoreMap sentence in sentences)
            {
                IList <Mention> mentionsForThisSentence = mentions[currIndex];
                sentence.Set(typeof(CorefCoreAnnotations.CorefMentionsAnnotation), mentionsForThisSentence);
                Sharpen.Collections.AddAll(annotation.Get(typeof(CorefCoreAnnotations.CorefMentionsAnnotation)), mentionsForThisSentence);
                // set sentNum correctly for each coref mention
                foreach (Mention corefMention in mentionsForThisSentence)
                {
                    corefMention.sentNum = currIndex;
                }
                // increment to next list of mentions
                currIndex++;
                // assign latest mentionID, annotate tokens with coref mention info
                foreach (Mention m in mentionsForThisSentence)
                {
                    m.mentionID = mentionIndex;
                    // go through all the tokens corresponding to this coref mention
                    // annotate them with the index into the document wide coref mention list
                    for (int corefMentionTokenIndex = m.startIndex; corefMentionTokenIndex < m.endIndex; corefMentionTokenIndex++)
                    {
                        CoreLabel currToken = sentence.Get(typeof(CoreAnnotations.TokensAnnotation))[corefMentionTokenIndex];
                        currToken.Get(typeof(CorefCoreAnnotations.CorefMentionIndexesAnnotation)).Add(mentionIndex);
                    }
                    mentionIndex++;
                }
            }
            // synch coref mentions to entity mentions
            Dictionary <int, int> corefMentionToEntityMentionMapping = new Dictionary <int, int>();
            Dictionary <int, int> entityMentionToCorefMentionMapping = new Dictionary <int, int>();

            foreach (CoreLabel token_1 in annotation.Get(typeof(CoreAnnotations.TokensAnnotation)))
            {
                if (token_1.Get(typeof(CoreAnnotations.EntityMentionIndexAnnotation)) != null)
                {
                    int      tokenEntityMentionIndex = token_1.Get(typeof(CoreAnnotations.EntityMentionIndexAnnotation));
                    ICoreMap tokenEntityMention      = annotation.Get(typeof(CoreAnnotations.MentionsAnnotation))[tokenEntityMentionIndex];
                    foreach (int candidateCorefMentionIndex in token_1.Get(typeof(CorefCoreAnnotations.CorefMentionIndexesAnnotation)))
                    {
                        Mention candidateTokenCorefMention = annotation.Get(typeof(CorefCoreAnnotations.CorefMentionsAnnotation))[candidateCorefMentionIndex];
                        if (SynchCorefMentionEntityMention(annotation, candidateTokenCorefMention, tokenEntityMention))
                        {
                            entityMentionToCorefMentionMapping[tokenEntityMentionIndex]    = candidateCorefMentionIndex;
                            corefMentionToEntityMentionMapping[candidateCorefMentionIndex] = tokenEntityMentionIndex;
                        }
                    }
                }
            }
            // store mappings between entity mentions and coref mentions in annotation
            annotation.Set(typeof(CoreAnnotations.CorefMentionToEntityMentionMappingAnnotation), corefMentionToEntityMentionMapping);
            annotation.Set(typeof(CoreAnnotations.EntityMentionToCorefMentionMappingAnnotation), entityMentionToCorefMentionMapping);
        }
        /// <exception cref="System.Exception"/>
        public static void RunCoref(Properties props)
        {
            /*
             * property, environment setting
             */
            Redwood.HideChannelsEverywhere("debug-cluster", "debug-mention", "debug-preprocessor", "debug-docreader", "debug-mergethres", "debug-featureselection", "debug-md");
            int    nThreads  = HybridCorefProperties.GetThreadCounts(props);
            string timeStamp = Calendar.GetInstance().GetTime().ToString().ReplaceAll("\\s", "-").ReplaceAll(":", "-");
            Logger logger    = Logger.GetLogger(typeof(Edu.Stanford.Nlp.Coref.Hybrid.HybridCorefSystem).FullName);

            // set log file path
            if (props.Contains(HybridCorefProperties.LogProp))
            {
                File logFile = new File(props.GetProperty(HybridCorefProperties.LogProp));
                RedwoodConfiguration.Current().Handlers(RedwoodConfiguration.Handlers.File(logFile)).Apply();
                Redwood.Log("Starting coref log");
            }
            log.Info(props.ToString());
            if (HybridCorefProperties.CheckMemory(props))
            {
                CheckMemoryUsage();
            }
            Edu.Stanford.Nlp.Coref.Hybrid.HybridCorefSystem cs = new Edu.Stanford.Nlp.Coref.Hybrid.HybridCorefSystem(props);

            /*
             * output setting
             */
            // prepare conll output
            string      goldOutput        = null;
            string      beforeCorefOutput = null;
            string      afterCorefOutput  = null;
            PrintWriter writerGold        = null;
            PrintWriter writerBeforeCoref = null;
            PrintWriter writerAfterCoref  = null;

            if (HybridCorefProperties.DoScore(props))
            {
                string pathOutput = CorefProperties.ConllOutputPath(props);
                (new File(pathOutput)).Mkdir();
                goldOutput        = pathOutput + "output-" + timeStamp + ".gold.txt";
                beforeCorefOutput = pathOutput + "output-" + timeStamp + ".predicted.txt";
                afterCorefOutput  = pathOutput + "output-" + timeStamp + ".coref.predicted.txt";
                writerGold        = new PrintWriter(new FileOutputStream(goldOutput));
                writerBeforeCoref = new PrintWriter(new FileOutputStream(beforeCorefOutput));
                writerAfterCoref  = new PrintWriter(new FileOutputStream(afterCorefOutput));
            }
            // run coref
            MulticoreWrapper <Pair <Document, Edu.Stanford.Nlp.Coref.Hybrid.HybridCorefSystem>, StringBuilder[]> wrapper = new MulticoreWrapper <Pair <Document, Edu.Stanford.Nlp.Coref.Hybrid.HybridCorefSystem>, StringBuilder[]>(nThreads, new _IThreadsafeProcessor_134
                                                                                                                                                                                                                                        ());
            // conll output and logs
            DateTime startTime = null;

            if (HybridCorefProperties.CheckTime(props))
            {
                startTime = new DateTime();
                System.Console.Error.Printf("END-TO-END COREF Start time: %s\n", startTime);
            }
            // run processes
            int docCnt = 0;

            while (true)
            {
                Document document = cs.docMaker.NextDoc();
                if (document == null)
                {
                    break;
                }
                wrapper.Put(Pair.MakePair(document, cs));
                docCnt = LogOutput(wrapper, writerGold, writerBeforeCoref, writerAfterCoref, docCnt);
            }
            // Finished reading the input. Wait for jobs to finish
            wrapper.Join();
            docCnt = LogOutput(wrapper, writerGold, writerBeforeCoref, writerAfterCoref, docCnt);
            IOUtils.CloseIgnoringExceptions(writerGold);
            IOUtils.CloseIgnoringExceptions(writerBeforeCoref);
            IOUtils.CloseIgnoringExceptions(writerAfterCoref);
            if (HybridCorefProperties.CheckTime(props))
            {
                System.Console.Error.Printf("END-TO-END COREF Elapsed time: %.3f seconds\n", (((new DateTime()).GetTime() - startTime.GetTime()) / 1000F));
            }
            //      System.err.printf("CORENLP PROCESS TIME TOTAL: %.3f seconds\n", cs.mentionExtractor.corenlpProcessTime);
            if (HybridCorefProperties.CheckMemory(props))
            {
                CheckMemoryUsage();
            }
            // scoring
            if (HybridCorefProperties.DoScore(props))
            {
                string summary = CorefScorer.GetEvalSummary(CorefProperties.GetScorerPath(props), goldOutput, beforeCorefOutput);
                CorefScorer.PrintScoreSummary(summary, logger, false);
                summary = CorefScorer.GetEvalSummary(CorefProperties.GetScorerPath(props), goldOutput, afterCorefOutput);
                CorefScorer.PrintScoreSummary(summary, logger, true);
                CorefScorer.PrintFinalConllScore(summary);
            }
        }
        public static string ModelPath(Properties props)
        {
            string defaultPath = "edu/stanford/nlp/models/coref/neural/" + (CorefProperties.GetLanguage(props) == Locale.Chinese ? "chinese" : "english") + (CorefProperties.Conll(props) ? "-model-conll" : "-model-default") + ".ser.gz";

            return(PropertiesUtils.GetString(props, "coref.neural.modelPath", defaultPath));
        }
Example #25
0
 public RuleBasedCorefMentionFinder(IHeadFinder headFinder, Properties props)
     : this(true, headFinder, CorefProperties.GetLanguage(props))
 {
 }
Example #26
0
 /// <exception cref="System.TypeLoadException"/>
 /// <exception cref="System.IO.IOException"/>
 public HybridCorefMentionFinder(IHeadFinder headFinder, Properties props)
 {
     this.headFinder = headFinder;
     this.lang       = CorefProperties.GetLanguage(props);
     mdClassifier    = (CorefProperties.IsMentionDetectionTraining(props)) ? null : IOUtils.ReadObjectFromURLOrClasspathOrFileSystem(CorefProperties.GetMentionDetectionModel(props));
 }
Example #27
0
 private static string GetDefaultModelPath(Properties props, string modelName)
 {
     return("edu/stanford/nlp/models/coref/statistical/" + modelName + (CorefProperties.Conll(props) ? "_conll" : string.Empty) + ".ser.gz");
 }