public DeterministicCorefAnnotator(Properties props) { // for backward compatibility try { corefSystem = new SieveCoreferenceSystem(props); mentionExtractor = new MentionExtractor(corefSystem.Dictionaries(), corefSystem.Semantics()); OldFormat = bool.Parse(props.GetProperty("oldCorefFormat", "false")); allowReparsing = PropertiesUtils.GetBool(props, Constants.AllowReparsingProp, Constants.AllowReparsing); // unless custom mention detection is set, just use the default coref mention detector performMentionDetection = !PropertiesUtils.GetBool(props, "dcoref.useCustomMentionDetection", false); if (performMentionDetection) { mentionAnnotator = new CorefMentionAnnotator(props); } } catch (Exception e) { log.Error("cannot create DeterministicCorefAnnotator!"); log.Error(e); throw new Exception(e); } }
public virtual void Annotate(Annotation annotation) { // temporarily set the primary named entity tag to the coarse tag SetNamedEntityTagGranularity(annotation, "coarse"); if (performMentionDetection) { mentionAnnotator.Annotate(annotation); } try { IList <Tree> trees = new List <Tree>(); IList <IList <CoreLabel> > sentences = new List <IList <CoreLabel> >(); // extract trees and sentence words // we are only supporting the new annotation standard for this Annotator! bool hasSpeakerAnnotations = false; if (annotation.ContainsKey(typeof(CoreAnnotations.SentencesAnnotation))) { // int sentNum = 0; foreach (ICoreMap sentence in annotation.Get(typeof(CoreAnnotations.SentencesAnnotation))) { IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); sentences.Add(tokens); Tree tree = sentence.Get(typeof(TreeCoreAnnotations.TreeAnnotation)); trees.Add(tree); SemanticGraph dependencies = SemanticGraphFactory.MakeFromTree(tree, SemanticGraphFactory.Mode.Collapsed, GrammaticalStructure.Extras.None, null, true); // locking here is crucial for correct threading! sentence.Set(typeof(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation), dependencies); if (!hasSpeakerAnnotations) { // check for speaker annotations foreach (CoreLabel t in tokens) { if (t.Get(typeof(CoreAnnotations.SpeakerAnnotation)) != null) { hasSpeakerAnnotations = true; break; } } } MentionExtractor.MergeLabels(tree, tokens); MentionExtractor.InitializeUtterance(tokens); } } else { log.Error("this coreference resolution system requires SentencesAnnotation!"); return; } if (hasSpeakerAnnotations) { annotation.Set(typeof(CoreAnnotations.UseMarkedDiscourseAnnotation), true); } // extract all possible mentions // this is created for each new annotation because it is not threadsafe RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder(allowReparsing); IList <IList <Mention> > allUnprocessedMentions = finder.ExtractPredictedMentions(annotation, 0, corefSystem.Dictionaries()); // add the relevant info to mentions and order them for coref Document document = mentionExtractor.Arrange(annotation, sentences, trees, allUnprocessedMentions); IList <IList <Mention> > orderedMentions = document.GetOrderedMentions(); IDictionary <int, CorefChain> result = corefSystem.CorefReturnHybridOutput(document); annotation.Set(typeof(CorefCoreAnnotations.CorefChainAnnotation), result); if (OldFormat) { IDictionary <int, CorefChain> oldResult = corefSystem.Coref(document); AddObsoleteCoreferenceAnnotations(annotation, orderedMentions, oldResult); } } catch (Exception e) { throw; } catch (Exception e) { throw new Exception(e); } finally { // restore to the fine-grained SetNamedEntityTagGranularity(annotation, "fine"); } }