private static TimePoint AnnotateToTimePoint(string str) { if (str == "" || str == "YMDHMS") { return(null); } Regex pointRegex = new Regex(@"(?<year>[0-9]*)Y(?<month>[0-9]*)M(?<day>[0-9]*)D(?<hour>[0-9]*)H(?<minute>[0-9]*)M(?<second>[0-9]*)S"); Match match = pointRegex.Match(str); if (match == null) { return(null); } TimePoint ret = new TimePoint(); string[] names = { "year", "month", "day", "hour", "minute", "second" }; for (int i = 0; i < names.Length; i++) { string val = match.Groups[names[i]].Value; if (val != "") { switch (i) { case 0: ret.setYear(Convert.ToInt32(val)); break; case 1: ret.setMonth(Convert.ToInt32(val)); break; case 2: ret.setDay(Convert.ToInt32(val)); break; case 3: ret.setHour(Convert.ToInt32(val)); break; case 4: ret.setMinute(Convert.ToInt32(val)); break; case 5: ret.setSecond(Convert.ToInt32(val)); break; } } } return(ret); }
public virtual void Load() { string[] lines; Console.WriteLine("Loading " + Filename + " ..."); string[] wikifile = File.ReadAllLines(@".\" + Filename + ".wiki"); //Load words lines = File.ReadAllLines(filename + raw_suffix); words = new List <List <Word> >(); texts = new List <string>(); for (int i = 0; i < lines.Length; i++) { texts.Add(lines[i]); string[] terms = lines[i].Split(new char[] { ' ', '\t' }, StringSplitOptions.RemoveEmptyEntries); List <Word> lst = new List <Word>(); for (int j = 0; j < terms.Length; j++) { if (terms[j] != ";" && terms[j].Contains(";")) { terms[j].Replace(";", ""); } if (terms[j] != ":" && terms[j].Contains(":")) { terms[j].Replace(":", ""); } Word word = new Word(terms[j], new TimeExtractor.units.TextIdentifier(Filename, i + 1, j)); word.Pos = j; for (int c = 0; c < wikifile.Count(); c++) { if (wikifile[c].Substring(0, wikifile[c].IndexOf("|")).Contains(terms[j].ToLower()) && wikifile[c].Length > (wikifile[c].LastIndexOf('|') + 1)) { word.Wiki = (wikifile[c].Substring(wikifile[c].LastIndexOf('|') + 1)).Split(new char[] { ' ', '_' }, StringSplitOptions.RemoveEmptyEntries); } } lst.Add(word); } words.Add(lst); } //Load time entities (including normalization) entities = new List <Entity>(); if (File.Exists(filename + time_con_suffix)) { lines = File.ReadAllLines(filename + time_con_suffix); foreach (string line in lines) { Entity entity = EntityUtil.I2b2formToEntity(line, filename); entity.Fr = this; entities.Add(entity); } } //Load PMT entities if (File.Exists(filename + event_con_suffix)) { lines = File.ReadAllLines(filename + event_con_suffix); List <Entity> e = new List <Entity>(); foreach (string line in lines) { Entity entity = EntityUtil.I2b2formToEntity(line, filename); entity.Fr = this; entities.Add(entity); } } //remove duplicate entities RemoveDuplicateEntities(); //Load TLinks if (File.Exists(filename + tlink_suffix)) { lines = File.ReadAllLines(filename + tlink_suffix); links = new List <EntityLink>(); foreach (string line in lines) { EntityLink link = EntityUtil.I2b2formToTLink(line, filename, this); if (link != null) { links.Add(link); } } } //Load Section entities if (File.Exists(filename + section_entity_suffix)) { ad_tp = new TimePoint(); dc_tp = new TimePoint(); lines = File.ReadAllLines(filename + section_entity_suffix); List <Entity> e = new List <Entity>(); int i = 0; foreach (string line in lines) { Entity entity = EntityUtil.I2b2formToEntity(line, filename); entity.Fr = this; entity.is_section = true; entities.Add(entity); if (i == 0) { ad_tp = entity.getFirstTimePoint(); } if (i == 1) { dc_tp = entity.getFirstTimePoint(); } i++; } } else { //very naive way to extract the admission date and discharge date ad_tp = new TimePoint(); dc_tp = new TimePoint(); int adline = -1; int dcline = -1; Regex regex = new Regex(@"^(?<d1>[0-9]+)(\\|\/|\-)(?<d2>[0-9]+)(\\|\/|\-)(?<d3>[0-9]+)$"); Regex regex2 = new Regex(@"^(?<d1>[0-9]{4})(?<d2>[0-9]{2})(?<d3>[0-9]{2})$"); for (int i = 0; i < words.Count; i++) { if (i > 0 && (texts[i - 1].ToLower().Replace(" ", "") == "admissiondate:" || texts[i - 1].ToLower().Replace(" ", "") == "registrationdate:" || texts[i - 1].ToLower().Replace(" ", "") == "dischargedate:")) { for (int j = 0; j < words[i].Count; j++) { if (j > 0) { break; } string s = words[i][j].WordText; if (regex.IsMatch(s)) { Match match = regex.Match(s); int d1 = Convert.ToInt32(match.Groups["d1"].Value); int d2 = Convert.ToInt32(match.Groups["d2"].Value); int d3 = Convert.ToInt32(match.Groups["d3"].Value); if (texts[i - 1].ToLower().Replace(" ", "") == "admissiondate:" || texts[i - 1].ToLower().Replace(" ", "") == "registrationdate:") { ad_tp = get_tp_from_3digit(d1, d2, d3); adline = i; } else { dc_tp = get_tp_from_3digit(d1, d2, d3); dcline = i; } } if (regex2.IsMatch(s)) { Match match = regex2.Match(s); int d1 = Convert.ToInt32(match.Groups["d1"].Value); int d2 = Convert.ToInt32(match.Groups["d2"].Value); int d3 = Convert.ToInt32(match.Groups["d3"].Value); if (texts[i - 1].ToLower().Replace(" ", "") == "admissiondate:" || texts[i - 1].ToLower().Replace(" ", "") == "registrationdate:") { ad_tp = get_tp_from_3digit(d1, d2, d3); adline = i; } else { dc_tp = get_tp_from_3digit(d1, d2, d3); dcline = i; } } } } } //Store the section time into the section time file List <string> seccons = new List <string>(); if (adline != -1) { seccons.Add("SECTIME=\"" + words[adline][0].WordText + "\" " + (adline + 1) + ":0 " + (adline + 1) + ":0||type=\"ADMISSION\"||dvalue=\"" + append_zero(ad_tp.getYear(), 4) + "-" + append_zero(ad_tp.getMonth(), 2) + "-" + append_zero(ad_tp.getDay(), 2) + "\""); } if (dcline != -1) { seccons.Add("SECTIME=\"" + words[dcline][0].WordText + "\" " + (dcline + 1) + ":0 " + (dcline + 1) + ":0||type=\"DISCHARGE\"||dvalue=\"" + append_zero(dc_tp.getYear(), 4) + "-" + append_zero(dc_tp.getMonth(), 2) + "-" + append_zero(dc_tp.getDay(), 2) + "\""); } File.WriteAllLines(filename + section_entity_suffix, seccons.ToArray()); } //Load Sections sections = new List <string>(); if (File.Exists(filename + section_suffix)) { lines = File.ReadAllLines(filename + section_suffix); foreach (string line in lines) { int t = line.LastIndexOf("\t"); sections.Add(line.Substring(t + 1)); } } else { for (int i = 0; i < texts.Count; i++) { sections.Add(texts[i]); } } //Load and generate "virtual" entities (special entities) OpEntity = new TimeEntity(); AdEntity = new TimeEntity(); DcEntity = new TimeEntity(); TrEntity = new TimeEntity(); TimePoint admissionTP = new TimePoint(); TimePoint dischargeTP = new TimePoint(); if (File.Exists(Filename + keytp_suffix)) { string text = File.ReadAllText(Filename + keytp_suffix); //text = text.Replace(" ", ""); //text = text.Replace("\t", ""); Regex adRegex = new Regex(@"admission=(?<year>[0-9]+)\s(?<month>[0-9]+)\s(?<day>[0-9]+)"); if (adRegex.IsMatch(text)) { Match match = adRegex.Match(text); admissionTP.setYear(Convert.ToInt32(match.Groups["year"].Value)); admissionTP.setMonth(Convert.ToInt32(match.Groups["month"].Value)); admissionTP.setDay(Convert.ToInt32(match.Groups["day"].Value)); } Regex dcRegex = new Regex(@"discharge=(?<year>[0-9]+)\s(?<month>[0-9]+)\s(?<day>[0-9]+)"); if (dcRegex.IsMatch(text)) { Match match = dcRegex.Match(text); dischargeTP.setYear(Convert.ToInt32(match.Groups["year"].Value)); dischargeTP.setMonth(Convert.ToInt32(match.Groups["month"].Value)); dischargeTP.setDay(Convert.ToInt32(match.Groups["day"].Value)); } } AdEntity.addTimePoint(admissionTP); DcEntity.addTimePoint(dischargeTP); }