private EventMention ConvertAceEventMention(AceEventMention aceEventMention, string docId, ICoreMap sentence, IDictionary <string, EntityMention> entityMap, int tokenOffset) { ICollection <string> roleSet = aceEventMention.GetRoles(); IList <string> roles = new List <string>(); foreach (string role in roleSet) { roles.Add(role); } IList <ExtractionObject> convertedArgs = new List <ExtractionObject>(); int left = int.MaxValue; int right = int.MinValue; foreach (string role_1 in roles) { AceEntityMention arg = aceEventMention.GetArg(role_1); ExtractionObject o = entityMap[arg.GetId()]; if (o == null) { logger.Severe("READER ERROR: Failed to find event argument with id " + arg.GetId()); logger.Severe("This happens because a few event mentions illegally span multiple sentences. Will ignore this mention."); return(null); } convertedArgs.Add(o); if (o.GetExtentTokenStart() < left) { left = o.GetExtentTokenStart(); } if (o.GetExtentTokenEnd() > right) { right = o.GetExtentTokenEnd(); } } AceCharSeq anchor = aceEventMention.GetAnchor(); ExtractionObject anchorObject = new ExtractionObject(aceEventMention.GetId() + "-anchor", sentence, new Span(anchor.GetTokenStart() - tokenOffset, anchor.GetTokenEnd() + 1 - tokenOffset), "ANCHOR", null); EventMention em = new EventMention(aceEventMention.GetId(), sentence, new Span(left, right), aceEventMention.GetParent().GetType(), aceEventMention.GetParent().GetSubtype(), anchorObject, convertedArgs, roles); return(em); }
private static void ParseEventInputFlags(string[] args, ref Event evt, string timezone) { uint i = 0; int argsLength = args.Length; while (i < argsLength) { if (args[i].StartsWith("--")) { var key = args[i].Substring(2); i++; var values = new List <string>(); while (i < argsLength && !args[i].StartsWith("--")) { values.Add(args[i]); i++; } switch (key) { case "repeat": if (values.Count == 0) { evt.Repeat = RepeatType.None; } else { switch (values[0]) { case "d": evt.Repeat = RepeatType.Daily; break; case "w": evt.Repeat = RepeatType.Weekly; break; case "m": evt.Repeat = RepeatType.Monthly; break; case "mw": evt.Repeat = RepeatType.MonthlyWeekday; break; case "n": evt.Repeat = RepeatType.None; break; default: if (evt.Repeat != RepeatType.Daily && evt.Repeat != RepeatType.Weekly && evt.Repeat != RepeatType.Monthly && evt.Repeat != RepeatType.MonthlyWeekday) { evt.Repeat = RepeatType.None; } break; } } break; case "desc": evt.Description = string.Join(' ', values.ToArray()); break; case "mention": evt.Mentions = new List <EventMention>(); foreach (var value in values) { if (value == "@everyone") { evt.Mentions.Clear(); evt.Mentions.Add(new EventMention { Type = MentionType.Everyone }); break; } else if (value.ToLower() == "rsvp") { evt.Mentions.Add(new EventMention { Type = MentionType.RSVP }); } else if (value.StartsWith("<@") && value.EndsWith(">")) { var mention = new EventMention(); string mentionIdString; if (value.StartsWith("<@&")) { mention.Type = MentionType.Role; mentionIdString = value.Substring(3); } else { mention.Type = MentionType.User; if (value.StartsWith("<@!")) { mentionIdString = value.Substring(3); } else { mentionIdString = value.Substring(2); } } mentionIdString = mentionIdString.TrimEnd('>'); mention.TargetId = ulong.Parse(mentionIdString); evt.Mentions.Add(mention); } } break; case "remind": var reminderString = string.Join(' ', values); var tz = DateTimeZoneProviders.Tzdb.GetZoneOrNull(timezone); if (tz == null) { throw new InvalidTimeZoneException(); } var results = DateTimeRecognizer.RecognizeDateTime(reminderString, Culture.English); if (results.Count > 0 && results.First().TypeName.StartsWith("datetimeV2")) { var first = results.First(); var resolutionValues = (IList <Dictionary <string, string> >)first.Resolution["values"]; var subType = first.TypeName.Split('.').Last(); if (subType == "duration") { string value = resolutionValues.Select(v => v["value"]).FirstOrDefault(); double seconds = double.Parse(value); var reminderTimestamp = evt.StartTimestamp.AddSeconds(-seconds); if (IsFuture(reminderTimestamp)) { evt.ReminderTimestamp = reminderTimestamp; } else { throw new DateTimeInPastException(); } } } break; default: break; } } else { i++; } } }
/// <summary> /// Reads in a single ACE*.apf.xml file and convert it to RelationSentence /// objects. /// </summary> /// <remarks> /// Reads in a single ACE*.apf.xml file and convert it to RelationSentence /// objects. However, you probably should call parse() instead. /// </remarks> /// <param name="prefix"> /// prefix of ACE filename to read (e.g. /// "/u/mcclosky/scr/data/ACE2005/english_test/bc/CNN_CF_20030827.1630.01" /// ) (no ".apf.xml" extension) /// </param> /// <returns>list of RelationSentence objects</returns> /// <exception cref="System.IO.IOException"/> /// <exception cref="Org.Xml.Sax.SAXException"/> /// <exception cref="Javax.Xml.Parsers.ParserConfigurationException"/> private IList <ICoreMap> ReadDocument(string prefix, Annotation corpus) { logger.Info("Reading document: " + prefix); IList <ICoreMap> results = new List <ICoreMap>(); AceDocument aceDocument; if (aceVersion.Equals("ACE2004")) { aceDocument = AceDocument.ParseDocument(prefix, false, aceVersion); } else { aceDocument = AceDocument.ParseDocument(prefix, false); } string docId = aceDocument.GetId(); // map entity mention ID strings to their EntityMention counterparts IDictionary <string, EntityMention> entityMentionMap = Generics.NewHashMap(); /* * for (int sentenceIndex = 0; sentenceIndex < aceDocument.getSentenceCount(); sentenceIndex++) { * List<AceToken> tokens = aceDocument.getSentence(sentenceIndex); * StringBuffer b = new StringBuffer(); * for(AceToken t: tokens) b.append(t.getLiteral() + " " ); * logger.info("SENTENCE: " + b.toString()); * } */ int tokenOffset = 0; for (int sentenceIndex = 0; sentenceIndex < aceDocument.GetSentenceCount(); sentenceIndex++) { IList <AceToken> tokens = aceDocument.GetSentence(sentenceIndex); IList <CoreLabel> words = new List <CoreLabel>(); StringBuilder textContent = new StringBuilder(); for (int i = 0; i < tokens.Count; i++) { CoreLabel l = new CoreLabel(); l.SetWord(tokens[i].GetLiteral()); l.Set(typeof(CoreAnnotations.ValueAnnotation), l.Word()); l.Set(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation), tokens[i].GetByteStart()); l.Set(typeof(CoreAnnotations.CharacterOffsetEndAnnotation), tokens[i].GetByteEnd()); words.Add(l); if (i > 0) { textContent.Append(" "); } textContent.Append(tokens[i].GetLiteral()); } // skip "sentences" that are really just SGML tags (which come from using the RobustTokenizer) if (words.Count == 1) { string word = words[0].Word(); if (word.StartsWith("<") && word.EndsWith(">")) { tokenOffset += tokens.Count; continue; } } ICoreMap sentence = new Annotation(textContent.ToString()); sentence.Set(typeof(CoreAnnotations.DocIDAnnotation), docId); sentence.Set(typeof(CoreAnnotations.TokensAnnotation), words); logger.Info("Reading sentence: \"" + textContent + "\""); IList <AceEntityMention> entityMentions = aceDocument.GetEntityMentions(sentenceIndex); IList <AceRelationMention> relationMentions = aceDocument.GetRelationMentions(sentenceIndex); IList <AceEventMention> eventMentions = aceDocument.GetEventMentions(sentenceIndex); // convert entity mentions foreach (AceEntityMention aceEntityMention in entityMentions) { string corefID = string.Empty; foreach (string entityID in aceDocument.GetKeySetEntities()) { AceEntity e = aceDocument.GetEntity(entityID); if (e.GetMentions().Contains(aceEntityMention)) { corefID = entityID; break; } } EntityMention convertedMention = ConvertAceEntityMention(aceEntityMention, docId, sentence, tokenOffset, corefID); // EntityMention convertedMention = convertAceEntityMention(aceEntityMention, docId, sentence, tokenOffset); entityCounts.IncrementCount(convertedMention.GetType()); logger.Info("CONVERTED MENTION HEAD SPAN: " + convertedMention.GetHead()); logger.Info("CONVERTED ENTITY MENTION: " + convertedMention); AnnotationUtils.AddEntityMention(sentence, convertedMention); entityMentionMap[aceEntityMention.GetId()] = convertedMention; } // TODO: make Entity objects as needed // convert relation mentions foreach (AceRelationMention aceRelationMention in relationMentions) { RelationMention convertedMention = ConvertAceRelationMention(aceRelationMention, docId, sentence, entityMentionMap); if (convertedMention != null) { relationCounts.IncrementCount(convertedMention.GetType()); logger.Info("CONVERTED RELATION MENTION: " + convertedMention); AnnotationUtils.AddRelationMention(sentence, convertedMention); } } // TODO: make Relation objects // convert EventMentions foreach (AceEventMention aceEventMention in eventMentions) { EventMention convertedMention = ConvertAceEventMention(aceEventMention, docId, sentence, entityMentionMap, tokenOffset); if (convertedMention != null) { eventCounts.IncrementCount(convertedMention.GetType()); logger.Info("CONVERTED EVENT MENTION: " + convertedMention); AnnotationUtils.AddEventMention(sentence, convertedMention); } } // TODO: make Event objects results.Add(sentence); tokenOffset += tokens.Count; } return(results); }