Exemple #1
0
        private EventMention ConvertAceEventMention(AceEventMention aceEventMention, string docId, ICoreMap sentence, IDictionary <string, EntityMention> entityMap, int tokenOffset)
        {
            ICollection <string> roleSet = aceEventMention.GetRoles();
            IList <string>       roles   = new List <string>();

            foreach (string role in roleSet)
            {
                roles.Add(role);
            }
            IList <ExtractionObject> convertedArgs = new List <ExtractionObject>();
            int left  = int.MaxValue;
            int right = int.MinValue;

            foreach (string role_1 in roles)
            {
                AceEntityMention arg = aceEventMention.GetArg(role_1);
                ExtractionObject o   = entityMap[arg.GetId()];
                if (o == null)
                {
                    logger.Severe("READER ERROR: Failed to find event argument with id " + arg.GetId());
                    logger.Severe("This happens because a few event mentions illegally span multiple sentences. Will ignore this mention.");
                    return(null);
                }
                convertedArgs.Add(o);
                if (o.GetExtentTokenStart() < left)
                {
                    left = o.GetExtentTokenStart();
                }
                if (o.GetExtentTokenEnd() > right)
                {
                    right = o.GetExtentTokenEnd();
                }
            }
            AceCharSeq       anchor       = aceEventMention.GetAnchor();
            ExtractionObject anchorObject = new ExtractionObject(aceEventMention.GetId() + "-anchor", sentence, new Span(anchor.GetTokenStart() - tokenOffset, anchor.GetTokenEnd() + 1 - tokenOffset), "ANCHOR", null);
            EventMention     em           = new EventMention(aceEventMention.GetId(), sentence, new Span(left, right), aceEventMention.GetParent().GetType(), aceEventMention.GetParent().GetSubtype(), anchorObject, convertedArgs, roles);

            return(em);
        }
Exemple #2
0
        private static void ParseEventInputFlags(string[] args, ref Event evt, string timezone)
        {
            uint i          = 0;
            int  argsLength = args.Length;

            while (i < argsLength)
            {
                if (args[i].StartsWith("--"))
                {
                    var key = args[i].Substring(2);
                    i++;
                    var values = new List <string>();
                    while (i < argsLength && !args[i].StartsWith("--"))
                    {
                        values.Add(args[i]);
                        i++;
                    }

                    switch (key)
                    {
                    case "repeat":
                        if (values.Count == 0)
                        {
                            evt.Repeat = RepeatType.None;
                        }
                        else
                        {
                            switch (values[0])
                            {
                            case "d":
                                evt.Repeat = RepeatType.Daily;
                                break;

                            case "w":
                                evt.Repeat = RepeatType.Weekly;
                                break;

                            case "m":
                                evt.Repeat = RepeatType.Monthly;
                                break;

                            case "mw":
                                evt.Repeat = RepeatType.MonthlyWeekday;
                                break;

                            case "n":
                                evt.Repeat = RepeatType.None;
                                break;

                            default:
                                if (evt.Repeat != RepeatType.Daily && evt.Repeat != RepeatType.Weekly && evt.Repeat != RepeatType.Monthly && evt.Repeat != RepeatType.MonthlyWeekday)
                                {
                                    evt.Repeat = RepeatType.None;
                                }
                                break;
                            }
                        }
                        break;

                    case "desc":
                        evt.Description = string.Join(' ', values.ToArray());
                        break;

                    case "mention":
                        evt.Mentions = new List <EventMention>();
                        foreach (var value in values)
                        {
                            if (value == "@everyone")
                            {
                                evt.Mentions.Clear();
                                evt.Mentions.Add(new EventMention
                                {
                                    Type = MentionType.Everyone
                                });

                                break;
                            }
                            else if (value.ToLower() == "rsvp")
                            {
                                evt.Mentions.Add(new EventMention
                                {
                                    Type = MentionType.RSVP
                                });
                            }
                            else if (value.StartsWith("<@") && value.EndsWith(">"))
                            {
                                var    mention = new EventMention();
                                string mentionIdString;
                                if (value.StartsWith("<@&"))
                                {
                                    mention.Type    = MentionType.Role;
                                    mentionIdString = value.Substring(3);
                                }
                                else
                                {
                                    mention.Type = MentionType.User;
                                    if (value.StartsWith("<@!"))
                                    {
                                        mentionIdString = value.Substring(3);
                                    }
                                    else
                                    {
                                        mentionIdString = value.Substring(2);
                                    }
                                }
                                mentionIdString  = mentionIdString.TrimEnd('>');
                                mention.TargetId = ulong.Parse(mentionIdString);
                                evt.Mentions.Add(mention);
                            }
                        }
                        break;

                    case "remind":
                        var reminderString = string.Join(' ', values);
                        var tz             = DateTimeZoneProviders.Tzdb.GetZoneOrNull(timezone);
                        if (tz == null)
                        {
                            throw new InvalidTimeZoneException();
                        }

                        var results = DateTimeRecognizer.RecognizeDateTime(reminderString, Culture.English);
                        if (results.Count > 0 && results.First().TypeName.StartsWith("datetimeV2"))
                        {
                            var first            = results.First();
                            var resolutionValues = (IList <Dictionary <string, string> >)first.Resolution["values"];

                            var subType = first.TypeName.Split('.').Last();
                            if (subType == "duration")
                            {
                                string value             = resolutionValues.Select(v => v["value"]).FirstOrDefault();
                                double seconds           = double.Parse(value);
                                var    reminderTimestamp = evt.StartTimestamp.AddSeconds(-seconds);
                                if (IsFuture(reminderTimestamp))
                                {
                                    evt.ReminderTimestamp = reminderTimestamp;
                                }
                                else
                                {
                                    throw new DateTimeInPastException();
                                }
                            }
                        }
                        break;

                    default:
                        break;
                    }
                }
                else
                {
                    i++;
                }
            }
        }
Exemple #3
0
        /// <summary>
        /// Reads in a single ACE*.apf.xml file and convert it to RelationSentence
        /// objects.
        /// </summary>
        /// <remarks>
        /// Reads in a single ACE*.apf.xml file and convert it to RelationSentence
        /// objects. However, you probably should call parse() instead.
        /// </remarks>
        /// <param name="prefix">
        /// prefix of ACE filename to read (e.g.
        /// "/u/mcclosky/scr/data/ACE2005/english_test/bc/CNN_CF_20030827.1630.01"
        /// ) (no ".apf.xml" extension)
        /// </param>
        /// <returns>list of RelationSentence objects</returns>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="Org.Xml.Sax.SAXException"/>
        /// <exception cref="Javax.Xml.Parsers.ParserConfigurationException"/>
        private IList <ICoreMap> ReadDocument(string prefix, Annotation corpus)
        {
            logger.Info("Reading document: " + prefix);
            IList <ICoreMap> results = new List <ICoreMap>();
            AceDocument      aceDocument;

            if (aceVersion.Equals("ACE2004"))
            {
                aceDocument = AceDocument.ParseDocument(prefix, false, aceVersion);
            }
            else
            {
                aceDocument = AceDocument.ParseDocument(prefix, false);
            }
            string docId = aceDocument.GetId();
            // map entity mention ID strings to their EntityMention counterparts
            IDictionary <string, EntityMention> entityMentionMap = Generics.NewHashMap();

            /*
             * for (int sentenceIndex = 0; sentenceIndex < aceDocument.getSentenceCount(); sentenceIndex++) {
             * List<AceToken> tokens = aceDocument.getSentence(sentenceIndex);
             * StringBuffer b = new StringBuffer();
             * for(AceToken t: tokens) b.append(t.getLiteral() + " " );
             * logger.info("SENTENCE: " + b.toString());
             * }
             */
            int tokenOffset = 0;

            for (int sentenceIndex = 0; sentenceIndex < aceDocument.GetSentenceCount(); sentenceIndex++)
            {
                IList <AceToken>  tokens      = aceDocument.GetSentence(sentenceIndex);
                IList <CoreLabel> words       = new List <CoreLabel>();
                StringBuilder     textContent = new StringBuilder();
                for (int i = 0; i < tokens.Count; i++)
                {
                    CoreLabel l = new CoreLabel();
                    l.SetWord(tokens[i].GetLiteral());
                    l.Set(typeof(CoreAnnotations.ValueAnnotation), l.Word());
                    l.Set(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), tokens[i].GetByteStart());
                    l.Set(typeof(CoreAnnotations.CharacterOffsetEndAnnotation), tokens[i].GetByteEnd());
                    words.Add(l);
                    if (i > 0)
                    {
                        textContent.Append(" ");
                    }
                    textContent.Append(tokens[i].GetLiteral());
                }
                // skip "sentences" that are really just SGML tags (which come from using the RobustTokenizer)
                if (words.Count == 1)
                {
                    string word = words[0].Word();
                    if (word.StartsWith("<") && word.EndsWith(">"))
                    {
                        tokenOffset += tokens.Count;
                        continue;
                    }
                }
                ICoreMap sentence = new Annotation(textContent.ToString());
                sentence.Set(typeof(CoreAnnotations.DocIDAnnotation), docId);
                sentence.Set(typeof(CoreAnnotations.TokensAnnotation), words);
                logger.Info("Reading sentence: \"" + textContent + "\"");
                IList <AceEntityMention>   entityMentions   = aceDocument.GetEntityMentions(sentenceIndex);
                IList <AceRelationMention> relationMentions = aceDocument.GetRelationMentions(sentenceIndex);
                IList <AceEventMention>    eventMentions    = aceDocument.GetEventMentions(sentenceIndex);
                // convert entity mentions
                foreach (AceEntityMention aceEntityMention in entityMentions)
                {
                    string corefID = string.Empty;
                    foreach (string entityID in aceDocument.GetKeySetEntities())
                    {
                        AceEntity e = aceDocument.GetEntity(entityID);
                        if (e.GetMentions().Contains(aceEntityMention))
                        {
                            corefID = entityID;
                            break;
                        }
                    }
                    EntityMention convertedMention = ConvertAceEntityMention(aceEntityMention, docId, sentence, tokenOffset, corefID);
                    //        EntityMention convertedMention = convertAceEntityMention(aceEntityMention, docId, sentence, tokenOffset);
                    entityCounts.IncrementCount(convertedMention.GetType());
                    logger.Info("CONVERTED MENTION HEAD SPAN: " + convertedMention.GetHead());
                    logger.Info("CONVERTED ENTITY MENTION: " + convertedMention);
                    AnnotationUtils.AddEntityMention(sentence, convertedMention);
                    entityMentionMap[aceEntityMention.GetId()] = convertedMention;
                }
                // TODO: make Entity objects as needed
                // convert relation mentions
                foreach (AceRelationMention aceRelationMention in relationMentions)
                {
                    RelationMention convertedMention = ConvertAceRelationMention(aceRelationMention, docId, sentence, entityMentionMap);
                    if (convertedMention != null)
                    {
                        relationCounts.IncrementCount(convertedMention.GetType());
                        logger.Info("CONVERTED RELATION MENTION: " + convertedMention);
                        AnnotationUtils.AddRelationMention(sentence, convertedMention);
                    }
                }
                // TODO: make Relation objects
                // convert EventMentions
                foreach (AceEventMention aceEventMention in eventMentions)
                {
                    EventMention convertedMention = ConvertAceEventMention(aceEventMention, docId, sentence, entityMentionMap, tokenOffset);
                    if (convertedMention != null)
                    {
                        eventCounts.IncrementCount(convertedMention.GetType());
                        logger.Info("CONVERTED EVENT MENTION: " + convertedMention);
                        AnnotationUtils.AddEventMention(sentence, convertedMention);
                    }
                }
                // TODO: make Event objects
                results.Add(sentence);
                tokenOffset += tokens.Count;
            }
            return(results);
        }