/// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            string home = "/home/mjfang/action_grammars/";
            // make the first argument one for a base directory
            string specificFile = "1PPDevUncollapsed.props";

            if (args.Length >= 1)
            {
                home = args[0];
            }
            if (args.Length >= 2)
            {
                specificFile = args[1];
            }
            System.Console.Out.WriteLine("Base directory: " + home);
            Properties props = StringUtils.PropFileToProperties(home + "ExtractQuotesXMLScripts/" + specificFile);

            XMLToAnnotation.Data data      = XMLToAnnotation.ReadXMLFormat(props.GetProperty("file"));
            Properties           propsPara = new Properties();

            propsPara.SetProperty("paragraphBreak", "one");
            ParagraphAnnotator pa = new ParagraphAnnotator(propsPara, false);

            pa.Annotate(data.doc);
            Properties annotatorProps = new Properties();

            annotatorProps.SetProperty("charactersPath", props.GetProperty("charactersPath"));
            //"characterList.txt"
            annotatorProps.SetProperty("booknlpCoref", props.GetProperty("booknlpCoref"));
            annotatorProps.SetProperty("modelPath", props.GetProperty("modelPath"));
            //"model.ser");
            QuoteAttributionAnnotator qaa = new QuoteAttributionAnnotator(annotatorProps);

            qaa.Annotate(data.doc);
            ChapterAnnotator ca = new ChapterAnnotator();

            ca.Annotate(data.doc);
            Train(data, annotatorProps);
        }
Beispiel #2
0
        public virtual void Annotate(Annotation annotation)
        {
            bool perDocumentCharacterMap = false;

            if (buildCharacterMapPerAnnotation)
            {
                if (annotation.ContainsKey(typeof(CoreAnnotations.MentionsAnnotation)))
                {
                    EntityMentionsToCharacterMap(annotation);
                }
            }
            // 0. pre-preprocess the text with paragraph annotations
            // TODO: maybe move this out, definitely make it so that you can set paragraph breaks
            Properties propsPara = new Properties();

            propsPara.SetProperty("paragraphBreak", "one");
            ParagraphAnnotator pa = new ParagraphAnnotator(propsPara, false);

            pa.Annotate(annotation);
            // 1. preprocess the text
            // a) setup coref
            IDictionary <int, string> pronounCorefMap = QuoteAttributionUtils.SetupCoref(CorefPath, characterMap, annotation);

            //annotate chapter numbers in sentences. Useful for denoting chapter boundaries
            new ChapterAnnotator().Annotate(annotation);
            // to incorporate sentences across paragraphs
            QuoteAttributionUtils.AddEnhancedSentences(annotation);
            //annotate depparse of quote-removed sentences
            QuoteAttributionUtils.AnnotateForDependencyParse(annotation);
            Annotation preprocessed = annotation;
            // 2. Quote->Mention annotation
            IDictionary <string, QMSieve> qmSieves = GetQMMapping(preprocessed, pronounCorefMap);

            foreach (string sieveName in qmSieveList.Split(","))
            {
                qmSieves[sieveName].DoQuoteToMention(preprocessed);
            }
            // 3. Mention->Speaker annotation
            IDictionary <string, MSSieve> msSieves = GetMSMapping(preprocessed, pronounCorefMap);

            foreach (string sieveName_1 in msSieveList.Split(","))
            {
                msSieves[sieveName_1].DoMentionToSpeaker(preprocessed);
            }
            // see if any speaker's could be matched to a canonical entity mention
            foreach (ICoreMap quote in QuoteAnnotator.GatherQuotes(annotation))
            {
                int firstSpeakerTokenIndex = quote.Get(typeof(QuoteAttributionAnnotator.MentionBeginAnnotation));
                if (firstSpeakerTokenIndex != null)
                {
                    CoreLabel firstSpeakerToken  = annotation.Get(typeof(CoreAnnotations.TokensAnnotation))[firstSpeakerTokenIndex];
                    int       entityMentionIndex = firstSpeakerToken.Get(typeof(CoreAnnotations.EntityMentionIndexAnnotation));
                    if (entityMentionIndex != null)
                    {
                        // set speaker string
                        ICoreMap entityMention = annotation.Get(typeof(CoreAnnotations.MentionsAnnotation))[entityMentionIndex];
                        int      canonicalEntityMentionIndex = entityMention.Get(typeof(CoreAnnotations.CanonicalEntityMentionIndexAnnotation));
                        if (canonicalEntityMentionIndex != null)
                        {
                            ICoreMap canonicalEntityMention = annotation.Get(typeof(CoreAnnotations.MentionsAnnotation))[canonicalEntityMentionIndex];
                            // add canonical entity mention info to quote
                            quote.Set(typeof(QuoteAttributionAnnotator.CanonicalMentionAnnotation), canonicalEntityMention.Get(typeof(CoreAnnotations.TextAnnotation)));
                            // set first and last tokens of canonical entity mention
                            IList <CoreLabel> canonicalEntityMentionTokens     = canonicalEntityMention.Get(typeof(CoreAnnotations.TokensAnnotation));
                            CoreLabel         canonicalEntityMentionFirstToken = canonicalEntityMentionTokens[0];
                            CoreLabel         canonicalEntityMentionLastToken  = canonicalEntityMentionTokens[canonicalEntityMentionTokens.Count - 1];
                            quote.Set(typeof(QuoteAttributionAnnotator.CanonicalMentionBeginAnnotation), canonicalEntityMentionFirstToken.Get(typeof(CoreAnnotations.TokenBeginAnnotation)));
                            quote.Set(typeof(QuoteAttributionAnnotator.CanonicalMentionEndAnnotation), canonicalEntityMentionLastToken.Get(typeof(CoreAnnotations.TokenBeginAnnotation)));
                        }
                    }
                }
            }
        }