static void Main(string[] args) { Entity.Preprocess(); int op = 3; if (op > 0) { //TimeExtractor.Preprocessing.preprocess(); } for (int packageNo = 1; packageNo <= 6; packageNo++) { string folder = "docs\\Package " + packageNo; // string folder = "docs"; string[] files = Directory.GetFiles(folder, "*.txt"); foreach (string file in files) { switch (op) { case 0: { //Tag with stemmed PMT tags Regex fileregex = new Regex(@"[0-9]+\.txt"); Match match = fileregex.Match(file); if (match == null) { break; } string concept_filename = "concepts\\" + match.Value.Replace("txt", "con"); if (!File.Exists(concept_filename)) { Console.WriteLine("Uoh, concept file \"" + concept_filename + "\" not found."); break; } string[] cons = File.ReadAllLines(concept_filename); List <Entity> entities = new List <Entity>(); foreach (string con in cons) { if (con.Length <= 0) { continue; } Entity entity = EntityUtil.I2b2formToEntity(con, file); entities.Add(entity); } IEnumerable <Entity> PMTEntities = from entity in entities where entity.type == "problem" || entity.type == "treatment" || entity.type == "test" select entity; Annotator.Annotate(file, ".PMTstem.con", entities, true); break; } case 1: { TimeExtractor.tools.Init.setFilePath(file); TimeExtractor.TimeMapping.process(false); List <SenseGroup> sensegroups = TimeVariables.TIME_ENTITIES; List <Entity> timeEntities = new List <Entity>(); foreach (SenseGroup sg in sensegroups) { Entity entity = new TimeEntity(); entity.text = sg.getWords()[0]; entity.startLoc = sg.startLoc; entity.endLoc = sg.endLoc; entity.setTimePoint(sg.getTimePeriod().getFirstTimePoint()); timeEntities.Add(entity); } Annotator.Annotate(file, ".time.con", timeEntities, false); break; } case 2: { //Tag with PMT tags + guessed classifications Regex fileregex = new Regex(@"[0-9]+\.txt"); Match match = fileregex.Match(file); if (match == null) { break; } string concept_filename = "concepts\\" + match.Value.Replace("txt", "con"); if (!File.Exists(concept_filename)) { Console.WriteLine("Uoh, concept file \"" + concept_filename + "\" not found."); break; } string sectionfile = file.Replace("txt", "section"); string[] sections = File.ReadAllLines(sectionfile); string[] cons = File.ReadAllLines(concept_filename); List <Entity> entities = new List <Entity>(); foreach (string con in cons) { if (con.Length <= 0) { continue; } PMTEntity entity = (PMTEntity)EntityUtil.I2b2formToEntity(con, file); int lineNumber = entity.startLoc.line; int ptab = sections[lineNumber - 1].IndexOf("\t"); string no = sections[lineNumber - 1].Substring(0, ptab); string[] wa_nos = { "1.1", "5.34", "5.34.78", "5.34.78.93", "5.34.78.93.35", "5.34.78.93.38", "5.34.78.96", "5.34.78.96.45", "5.34.79", "5.34.79.103.60", "5.35", "5.35.84", "5.35.91.108" }; string[] a_nos = { "5.15", "5.22.44" }; string[] ad_nos = { "5.37.106.125" }; entities.Add(entity); } TimeExtractor.tools.Init.setFilePath(file); TimeExtractor.TimeMapping.process(false); List <SenseGroup> sensegroups = TimeVariables.TIME_ENTITIES; foreach (SenseGroup sg in sensegroups) { Entity entity = new TimeEntity(); entity.text = sg.getWords()[0]; entity.startLoc = sg.startLoc; entity.endLoc = sg.endLoc; entity.setTimePoint(sg.getTimePeriod().getFirstTimePoint()); entities.Add(entity); } IEnumerable <Entity> PMTEntities = from entity in entities where entity.type == "problem" || entity.type == "treatment" || entity.type == "test" || entity.type == "time" select entity; Annotator.Annotate(file, ".PMTrelation.con", entities, false); break; } case 3: { //From annotated time expression (including normalized) to concept files string annotate_file = file.Replace(".txt", ".time.con"); string con_file = file.Replace(".txt", ".time-con"); Entity[] entities = Annotator.ReadAnnotate(annotate_file); EntityUtil.ExportConcept(con_file, entities, false); break; } case 4: { //Update the original data //WARNING: THINK BEFORE YOU DO THIS string annotate_file = file.Replace(".txt", ".time.con"); string raw_file = file; Annotator.UpdateOriginalData(annotate_file, raw_file); break; } case 5: { //Tag with standard PMT tags + revised time taggings (including normalizations) string time_con_file = file.Replace(".txt", ".time-con"); string pmt_con_file = "concepts\\" + FileNameUtil.FileNameNoSuffix(file) + ".con"; Entity[] TIMEentities = EntityUtil.ImportConcept(time_con_file, "*.time-con", "*.txt"); Entity[] PMTentities = EntityUtil.ImportConcept(pmt_con_file, "*.con", "*.txt"); List <Entity> entities = new List <Entity>(); foreach (Entity entity in TIMEentities) { entities.Add(entity); } foreach (Entity entity in PMTentities) { entities.Add(entity); } Annotator.Annotate(file, ".PMTrelation.con", entities, false); break; } } } } }
public static Entity I2b2formToEntity(string i2b2str, string filename) { Entity ret = null; if (i2b2str.IndexOf("three years prior") != -1) { ret = ret; } if (i2b2str.IndexOf("~~") == -1) { string[] terms = i2b2str.Split(new string[] { "||" }, StringSplitOptions.RemoveEmptyEntries); Regex conregex = new Regex(@"""(?<con>[A-Za-z_]+)"""); Match matchcon = conregex.Match(terms[1]); string con = matchcon.Groups["con"].Value; if (con == "time") { ret = new TimeEntity(); for (int i = 2; i < terms.Length; i++) { Regex normRegex = new Regex(@"normalization=\((?<year>[0-9]+),(?<month>[0-9]+),(?<day>[0-9]+),(?<hour>[0-9]+),(?<minute>[0-9]+),(?<second>[0-9]+)\)"); Match normMatch = normRegex.Match(terms[i]); int year = Convert.ToInt32(normMatch.Groups["year"].Value); int month = Convert.ToInt32(normMatch.Groups["month"].Value); int day = Convert.ToInt32(normMatch.Groups["day"].Value); int hour = Convert.ToInt32(normMatch.Groups["hour"].Value); int minute = Convert.ToInt32(normMatch.Groups["minute"].Value); int second = Convert.ToInt32(normMatch.Groups["second"].Value); TimePoint tp = new TimePoint(year, month, day, hour, minute, second); ret.addTimePoint(tp); } ret.type = con; } else if (terms[0].StartsWith("sectime", StringComparison.CurrentCultureIgnoreCase)) { Regex regex = new Regex(@"dvalue=""(?<year>[0-9]+)-(?<month>[0-9]+)-(?<day>[0-9]+)"""); Match match = regex.Match(terms[2]); int year = Convert.ToInt32(match.Groups["year"].Value); int month = Convert.ToInt32(match.Groups["month"].Value); int day = Convert.ToInt32(match.Groups["day"].Value); TimeEntity te = new TimeEntity(); te.mode = TimeEntityMode.NA; te.type = TimeEntityType.DATE; te.setTimePoint(new TimePoint(year, month, day)); ret = te; ret.type = "time"; } else if (con == "DATE" || con == "TIME" || con == "DURATION" || con == "FREQUENCY" || con == "RELATIVE") { TimeEntity te = new TimeEntity(); //value if (terms.Length > 2) { string val = terms[2].Substring(5, terms[2].Length - 6); ValGetTimeEntity(val, te); } //mode if (terms.Length > 2) { Regex modeRegex = new Regex(@"""(?<mod>[A-Za-z]+)""$"); Match match = modeRegex.Match(terms[3]); string mod = match.Groups["mod"].Value.ToUpper(); if (mod == "NA") { te.mode = TimeEntityMode.NA; } else if (mod == "APPROX") { te.mode = TimeEntityMode.APPROX; } else if (mod == "END") { te.mode = TimeEntityMode.END; } else if (mod == "LESS") { te.mode = TimeEntityMode.LESS; } else if (mod == "MORE") { te.mode = TimeEntityMode.MORE; } else if (mod == "MIDDLE") { te.mode = TimeEntityMode.MIDDLE; } else if (mod == "START") { te.mode = TimeEntityMode.START; } } //type if (con == "DATE") { te.type = TimeEntityType.DATE; } else if (con == "TIME") { te.type = TimeEntityType.TIME; } else if (con == "DURATION") { te.type = TimeEntityType.DURATION; } else if (con == "FREQUENCY") { te.type = TimeEntityType.FREQUENCY; } else { te.type = TimeEntityType.RELATIVE_TP; } if (terms[terms.Length - 1].IndexOf("relative") != -1) { te.is_relative_tp = true; } ret = te; ret.type = con; } else if (con.ToUpper() == "IMPLICIT" || con.ToUpper() == "EXPLICIT" || con.ToUpper() == "TREATMENT" || con.ToUpper() == "CLINICAL_DEPT" || con.ToUpper() == "EVIDENTIAL" || con.ToUpper() == "OCCURRENCE") { PMTEntity pe = new PMTEntity(); pe.type = con.ToUpper(); //modality if (terms.Length > 2) { Regex modRegex = new Regex(@"""(?<mod>[A-Za-z]+)""$"); Match match = modRegex.Match(terms[2]); string mode = match.Groups["mod"].Value.ToUpper(); if (mode == "CONDITIONAL") { pe.modality = Modality.CONDITIONAL; } else if (mode == "FACTUAL" || mode == "ACTUAL") { pe.modality = Modality.FACTUAL; } else if (mode == "POSSIBLE") { pe.modality = Modality.POSSIBLE; } else { pe.modality = Modality.PROPOSED; } } //polarity if (terms.Length > 2) { Regex polRegex = new Regex(@"""(?<pol>[A-Za-z]+)""$"); Match match = polRegex.Match(terms[3]); string pol = match.Groups["pol"].Value.ToUpper(); if (pol == "POS") { pe.polarity = Polarity.POS; } else { pe.polarity = Polarity.NEG; } } //sec_time_rel if (terms.Length > 4) { Regex secRegex = new Regex(@"""(?<sec>.+)""$"); Match match = secRegex.Match(terms[4]); string sec = match.Groups["sec"].Value.ToUpper(); pe.sec_time_rel = sec; } ret = pe; ret.type = con.ToUpper(); } else { ret = new PMTEntity(); ret.type = con; } Regex textregex = new Regex(@"^[A-Za-z0-9]+\=""(?<text>.+)"" [0-9]+\:[0-9]+ [0-9]+\:[0-9]+$"); Match matchtext = textregex.Match(terms[0]); ret.text = matchtext.Groups["text"].Value; Regex posregex = new Regex(@"(?<sline>[0-9]+)[:](?<scol>[0-9]+) (?<eline>[0-9]+)[:](?<ecol>[0-9]+)"); Match matchpos = posregex.Match(terms[0]); int sline = Convert.ToInt32(matchpos.Groups["sline"].Value); int scol = Convert.ToInt32(matchpos.Groups["scol"].Value); /* if (i2b2str.StartsWith("TIMEX3") || i2b2str.StartsWith("EVENT")) * sline++;*/ ret.startLoc = new TextIdentifier(filename, sline, scol); int eline = Convert.ToInt32(matchpos.Groups["eline"].Value); int ecol = Convert.ToInt32(matchpos.Groups["ecol"].Value); /* if (i2b2str.StartsWith("TIMEX3") || i2b2str.StartsWith("EVENT")) * eline++;*/ ret.endLoc = new TextIdentifier(filename, eline, ecol); } if (ret == null) { ret = null; } return(ret); }