Exemplo n.º 1
0
        public static Entity I2b2formToEntity(string i2b2str, string filename)
        {
            Entity ret = null;

            if (i2b2str.IndexOf("three years prior") != -1)
            {
                ret = ret;
            }

            if (i2b2str.IndexOf("~~") == -1)
            {
                string[] terms = i2b2str.Split(new string[] { "||" }, StringSplitOptions.RemoveEmptyEntries);

                Regex  conregex = new Regex(@"""(?<con>[A-Za-z_]+)""");
                Match  matchcon = conregex.Match(terms[1]);
                string con      = matchcon.Groups["con"].Value;

                if (con == "time")
                {
                    ret = new TimeEntity();
                    for (int i = 2; i < terms.Length; i++)
                    {
                        Regex     normRegex = new Regex(@"normalization=\((?<year>[0-9]+),(?<month>[0-9]+),(?<day>[0-9]+),(?<hour>[0-9]+),(?<minute>[0-9]+),(?<second>[0-9]+)\)");
                        Match     normMatch = normRegex.Match(terms[i]);
                        int       year      = Convert.ToInt32(normMatch.Groups["year"].Value);
                        int       month     = Convert.ToInt32(normMatch.Groups["month"].Value);
                        int       day       = Convert.ToInt32(normMatch.Groups["day"].Value);
                        int       hour      = Convert.ToInt32(normMatch.Groups["hour"].Value);
                        int       minute    = Convert.ToInt32(normMatch.Groups["minute"].Value);
                        int       second    = Convert.ToInt32(normMatch.Groups["second"].Value);
                        TimePoint tp        = new TimePoint(year, month, day, hour, minute, second);
                        ret.addTimePoint(tp);
                    }
                    ret.type = con;
                }

                else if (terms[0].StartsWith("sectime", StringComparison.CurrentCultureIgnoreCase))
                {
                    Regex regex = new Regex(@"dvalue=""(?<year>[0-9]+)-(?<month>[0-9]+)-(?<day>[0-9]+)""");
                    Match match = regex.Match(terms[2]);

                    int year  = Convert.ToInt32(match.Groups["year"].Value);
                    int month = Convert.ToInt32(match.Groups["month"].Value);
                    int day   = Convert.ToInt32(match.Groups["day"].Value);

                    TimeEntity te = new TimeEntity();

                    te.mode = TimeEntityMode.NA;
                    te.type = TimeEntityType.DATE;
                    te.setTimePoint(new TimePoint(year, month, day));

                    ret      = te;
                    ret.type = "time";
                }

                else if (con == "DATE" || con == "TIME" || con == "DURATION" || con == "FREQUENCY" || con == "RELATIVE")
                {
                    TimeEntity te = new TimeEntity();

                    //value
                    if (terms.Length > 2)
                    {
                        string val = terms[2].Substring(5, terms[2].Length - 6);
                        ValGetTimeEntity(val, te);
                    }

                    //mode
                    if (terms.Length > 2)
                    {
                        Regex  modeRegex = new Regex(@"""(?<mod>[A-Za-z]+)""$");
                        Match  match     = modeRegex.Match(terms[3]);
                        string mod       = match.Groups["mod"].Value.ToUpper();
                        if (mod == "NA")
                        {
                            te.mode = TimeEntityMode.NA;
                        }
                        else if (mod == "APPROX")
                        {
                            te.mode = TimeEntityMode.APPROX;
                        }
                        else if (mod == "END")
                        {
                            te.mode = TimeEntityMode.END;
                        }
                        else if (mod == "LESS")
                        {
                            te.mode = TimeEntityMode.LESS;
                        }
                        else if (mod == "MORE")
                        {
                            te.mode = TimeEntityMode.MORE;
                        }
                        else if (mod == "MIDDLE")
                        {
                            te.mode = TimeEntityMode.MIDDLE;
                        }
                        else if (mod == "START")
                        {
                            te.mode = TimeEntityMode.START;
                        }
                    }
                    //type
                    if (con == "DATE")
                    {
                        te.type = TimeEntityType.DATE;
                    }
                    else if (con == "TIME")
                    {
                        te.type = TimeEntityType.TIME;
                    }
                    else if (con == "DURATION")
                    {
                        te.type = TimeEntityType.DURATION;
                    }
                    else if (con == "FREQUENCY")
                    {
                        te.type = TimeEntityType.FREQUENCY;
                    }
                    else
                    {
                        te.type = TimeEntityType.RELATIVE_TP;
                    }

                    if (terms[terms.Length - 1].IndexOf("relative") != -1)
                    {
                        te.is_relative_tp = true;
                    }

                    ret      = te;
                    ret.type = con;
                }

                else if (con.ToUpper() == "IMPLICIT" || con.ToUpper() == "EXPLICIT" || con.ToUpper() == "TREATMENT" ||
                         con.ToUpper() == "CLINICAL_DEPT" || con.ToUpper() == "EVIDENTIAL" || con.ToUpper() == "OCCURRENCE")
                {
                    PMTEntity pe = new PMTEntity();
                    pe.type = con.ToUpper();

                    //modality
                    if (terms.Length > 2)
                    {
                        Regex  modRegex = new Regex(@"""(?<mod>[A-Za-z]+)""$");
                        Match  match    = modRegex.Match(terms[2]);
                        string mode     = match.Groups["mod"].Value.ToUpper();
                        if (mode == "CONDITIONAL")
                        {
                            pe.modality = Modality.CONDITIONAL;
                        }
                        else if (mode == "FACTUAL" || mode == "ACTUAL")
                        {
                            pe.modality = Modality.FACTUAL;
                        }
                        else if (mode == "POSSIBLE")
                        {
                            pe.modality = Modality.POSSIBLE;
                        }
                        else
                        {
                            pe.modality = Modality.PROPOSED;
                        }
                    }

                    //polarity
                    if (terms.Length > 2)
                    {
                        Regex  polRegex = new Regex(@"""(?<pol>[A-Za-z]+)""$");
                        Match  match    = polRegex.Match(terms[3]);
                        string pol      = match.Groups["pol"].Value.ToUpper();
                        if (pol == "POS")
                        {
                            pe.polarity = Polarity.POS;
                        }
                        else
                        {
                            pe.polarity = Polarity.NEG;
                        }
                    }

                    //sec_time_rel
                    if (terms.Length > 4)
                    {
                        Regex  secRegex = new Regex(@"""(?<sec>.+)""$");
                        Match  match    = secRegex.Match(terms[4]);
                        string sec      = match.Groups["sec"].Value.ToUpper();
                        pe.sec_time_rel = sec;
                    }

                    ret      = pe;
                    ret.type = con.ToUpper();
                }

                else
                {
                    ret      = new PMTEntity();
                    ret.type = con;
                }

                Regex textregex = new Regex(@"^[A-Za-z0-9]+\=""(?<text>.+)"" [0-9]+\:[0-9]+ [0-9]+\:[0-9]+$");
                Match matchtext = textregex.Match(terms[0]);
                ret.text = matchtext.Groups["text"].Value;

                Regex posregex = new Regex(@"(?<sline>[0-9]+)[:](?<scol>[0-9]+) (?<eline>[0-9]+)[:](?<ecol>[0-9]+)");

                Match matchpos = posregex.Match(terms[0]);

                int sline = Convert.ToInt32(matchpos.Groups["sline"].Value);
                int scol  = Convert.ToInt32(matchpos.Groups["scol"].Value);

                /* if (i2b2str.StartsWith("TIMEX3") || i2b2str.StartsWith("EVENT"))
                 *   sline++;*/

                ret.startLoc = new TextIdentifier(filename, sline, scol);
                int eline = Convert.ToInt32(matchpos.Groups["eline"].Value);
                int ecol  = Convert.ToInt32(matchpos.Groups["ecol"].Value);

                /*  if (i2b2str.StartsWith("TIMEX3") || i2b2str.StartsWith("EVENT"))
                 *    eline++;*/

                ret.endLoc = new TextIdentifier(filename, eline, ecol);
            }

            if (ret == null)
            {
                ret = null;
            }

            return(ret);
        }
Exemplo n.º 2
0
        static void Main(string[] args)
        {
            Entity.Preprocess();

            int op = 3;

            if (op > 0)
            {
                //TimeExtractor.Preprocessing.preprocess();
            }

            for (int packageNo = 1; packageNo <= 6; packageNo++)
            {
                string folder = "docs\\Package " + packageNo;
                // string folder = "docs";
                string[] files = Directory.GetFiles(folder, "*.txt");
                foreach (string file in files)
                {
                    switch (op)
                    {
                    case 0:
                    {
                        //Tag with stemmed PMT tags

                        Regex fileregex = new Regex(@"[0-9]+\.txt");
                        Match match     = fileregex.Match(file);
                        if (match == null)
                        {
                            break;
                        }
                        string concept_filename = "concepts\\" + match.Value.Replace("txt", "con");
                        if (!File.Exists(concept_filename))
                        {
                            Console.WriteLine("Uoh, concept file \"" + concept_filename + "\" not found.");
                            break;
                        }

                        string[]      cons     = File.ReadAllLines(concept_filename);
                        List <Entity> entities = new List <Entity>();
                        foreach (string con in cons)
                        {
                            if (con.Length <= 0)
                            {
                                continue;
                            }
                            Entity entity = EntityUtil.I2b2formToEntity(con, file);
                            entities.Add(entity);
                        }

                        IEnumerable <Entity> PMTEntities =
                            from entity in entities
                            where entity.type == "problem" || entity.type == "treatment" || entity.type == "test"
                            select entity;

                        Annotator.Annotate(file, ".PMTstem.con", entities, true);

                        break;
                    }

                    case 1:
                    {
                        TimeExtractor.tools.Init.setFilePath(file);
                        TimeExtractor.TimeMapping.process(false);

                        List <SenseGroup> sensegroups  = TimeVariables.TIME_ENTITIES;
                        List <Entity>     timeEntities = new List <Entity>();
                        foreach (SenseGroup sg in sensegroups)
                        {
                            Entity entity = new TimeEntity();
                            entity.text     = sg.getWords()[0];
                            entity.startLoc = sg.startLoc;
                            entity.endLoc   = sg.endLoc;
                            entity.setTimePoint(sg.getTimePeriod().getFirstTimePoint());
                            timeEntities.Add(entity);
                        }

                        Annotator.Annotate(file, ".time.con", timeEntities, false);

                        break;
                    }

                    case 2:
                    {
                        //Tag with PMT tags + guessed classifications

                        Regex fileregex = new Regex(@"[0-9]+\.txt");
                        Match match     = fileregex.Match(file);
                        if (match == null)
                        {
                            break;
                        }
                        string concept_filename = "concepts\\" + match.Value.Replace("txt", "con");
                        if (!File.Exists(concept_filename))
                        {
                            Console.WriteLine("Uoh, concept file \"" + concept_filename + "\" not found.");
                            break;
                        }

                        string   sectionfile = file.Replace("txt", "section");
                        string[] sections    = File.ReadAllLines(sectionfile);

                        string[]      cons     = File.ReadAllLines(concept_filename);
                        List <Entity> entities = new List <Entity>();
                        foreach (string con in cons)
                        {
                            if (con.Length <= 0)
                            {
                                continue;
                            }
                            PMTEntity entity = (PMTEntity)EntityUtil.I2b2formToEntity(con, file);

                            int lineNumber = entity.startLoc.line;

                            int    ptab = sections[lineNumber - 1].IndexOf("\t");
                            string no   = sections[lineNumber - 1].Substring(0, ptab);

                            string[] wa_nos = { "1.1", "5.34", "5.34.78", "5.34.78.93", "5.34.78.93.35", "5.34.78.93.38", "5.34.78.96", "5.34.78.96.45", "5.34.79", "5.34.79.103.60", "5.35", "5.35.84", "5.35.91.108" };
                            string[] a_nos  = { "5.15", "5.22.44" };
                            string[] ad_nos = { "5.37.106.125" };

                            entities.Add(entity);
                        }

                        TimeExtractor.tools.Init.setFilePath(file);
                        TimeExtractor.TimeMapping.process(false);

                        List <SenseGroup> sensegroups = TimeVariables.TIME_ENTITIES;
                        foreach (SenseGroup sg in sensegroups)
                        {
                            Entity entity = new TimeEntity();
                            entity.text     = sg.getWords()[0];
                            entity.startLoc = sg.startLoc;
                            entity.endLoc   = sg.endLoc;
                            entity.setTimePoint(sg.getTimePeriod().getFirstTimePoint());
                            entities.Add(entity);
                        }

                        IEnumerable <Entity> PMTEntities =
                            from entity in entities
                            where entity.type == "problem" || entity.type == "treatment" || entity.type == "test" || entity.type == "time"
                            select entity;

                        Annotator.Annotate(file, ".PMTrelation.con", entities, false);

                        break;
                    }

                    case 3:
                    {
                        //From annotated time expression (including normalized) to concept files

                        string   annotate_file = file.Replace(".txt", ".time.con");
                        string   con_file      = file.Replace(".txt", ".time-con");
                        Entity[] entities      = Annotator.ReadAnnotate(annotate_file);
                        EntityUtil.ExportConcept(con_file, entities, false);

                        break;
                    }

                    case 4:
                    {
                        //Update the original data
                        //WARNING: THINK BEFORE YOU DO THIS

                        string annotate_file = file.Replace(".txt", ".time.con");
                        string raw_file      = file;
                        Annotator.UpdateOriginalData(annotate_file, raw_file);

                        break;
                    }

                    case 5:
                    {
                        //Tag with standard PMT tags + revised time taggings (including normalizations)

                        string time_con_file = file.Replace(".txt", ".time-con");
                        string pmt_con_file  = "concepts\\" + FileNameUtil.FileNameNoSuffix(file) + ".con";

                        Entity[]      TIMEentities = EntityUtil.ImportConcept(time_con_file, "*.time-con", "*.txt");
                        Entity[]      PMTentities  = EntityUtil.ImportConcept(pmt_con_file, "*.con", "*.txt");
                        List <Entity> entities     = new List <Entity>();
                        foreach (Entity entity in TIMEentities)
                        {
                            entities.Add(entity);
                        }
                        foreach (Entity entity in PMTentities)
                        {
                            entities.Add(entity);
                        }

                        Annotator.Annotate(file, ".PMTrelation.con", entities, false);

                        break;
                    }
                    }
                }
            }
        }
Exemplo n.º 3
0
        public static void ValGetTimeEntity(string val, TimeEntity e)
        {
            string origval = val;

            val = val.ToUpper();
            val = val.Replace(" ", "");
            val = val.Replace("\t", "");

            if (Regex.IsMatch(val, @"\:(\+|\-)\:"))
            {
                e.type = TimeEntityType.RELATIVE_TP;
                int    p1        = val.IndexOf(":");
                int    p2        = val.LastIndexOf(":");
                string val_dur   = val.Substring(0, p1);
                string val_keyev = val.Substring(p2 + 1);
                string val_rel   = val.Substring(p1 + 1, 1);
                e.duration = GetDurationExpression(val_dur);
                e.relation = (val_rel == "-") ? TLinkType.BEFORE : TLinkType.AFTER;
                if (val_keyev == "ADMISSION")
                {
                    e.keyev = KeyTimeType.ADMISSION;
                }
                else if (val_keyev == "DISCHARGE")
                {
                    e.keyev = KeyTimeType.DISCHARGE;
                }
                else if (val_keyev == "OPERATION")
                {
                    e.keyev = KeyTimeType.OPERATION;
                }
                else if (val_keyev == "TRANSFER")
                {
                    e.keyev = KeyTimeType.TRANSFER;
                }
                else if (val_keyev == "LIFE" || val_keyev == "BIRTH")
                {
                    e.keyev = KeyTimeType.BIRTH;
                }
                else if (val_keyev == "OTHERS" || val_keyev == "THISTP")
                {
                    e.keyev = KeyTimeType.OTHERS;
                }
                return;
            }

            if (val == "")
            {
                //Date
                TimePoint tp = new TimePoint();
                e.setTimePoint(tp);
                return;
            }

            Regex dateRegex1 = new Regex(@"^(?<year>[0-9]+)\-(?<month>[0-9]+)\-(?<day>[0-9]+)$");

            if (dateRegex1.IsMatch(val))
            {
                e.type = TimeEntityType.DATE;
                Match     match = dateRegex1.Match(val);
                TimePoint tp    = new TimePoint(Convert.ToInt32(match.Groups["year"].Value), Convert.ToInt32(match.Groups["month"].Value), Convert.ToInt32(match.Groups["day"].Value));
                e.setTimePoint(tp);
                return;
            }

            Regex dateRegex2 = new Regex(@"^(?<year>[0-9]+)\-(?<month>[0-9]+)$");

            if (dateRegex2.IsMatch(val))
            {
                e.type = TimeEntityType.DATE;
                Match     match = dateRegex2.Match(val);
                TimePoint tp    = new TimePoint(Convert.ToInt32(match.Groups["year"].Value), Convert.ToInt32(match.Groups["month"].Value), TimeConstants.DEFAULT_VALUE);
                e.setTimePoint(tp);
                return;
            }

            Regex dateRegex3 = new Regex(@"^(?<year>[0-9]+)$");

            if (dateRegex3.IsMatch(val))
            {
                e.type = TimeEntityType.DATE;
                Match     match = dateRegex3.Match(val);
                TimePoint tp    = new TimePoint(Convert.ToInt32(match.Groups["year"].Value), TimeConstants.DEFAULT_VALUE, TimeConstants.DEFAULT_VALUE);
                e.setTimePoint(tp);
                return;
            }

            Regex timeRegex1 = new Regex(@"^(?<year>[0-9]+)\-(?<month>[0-9]+)\-(?<day>[0-9]+)T(?<hour>[0-9]+)\:(?<minute>[0-9]+)$");

            if (timeRegex1.IsMatch(val))
            {
                e.type = TimeEntityType.TIME;
                Match     match = timeRegex1.Match(val);
                TimePoint tp    = new TimePoint(Convert.ToInt32(match.Groups["year"].Value), Convert.ToInt32(match.Groups["month"].Value), Convert.ToInt32(match.Groups["day"].Value));
                tp.setHour(Convert.ToInt32(match.Groups["hour"].Value));
                tp.setMinute(Convert.ToInt32(match.Groups["minute"].Value));
                e.setTimePoint(tp);
                return;
            }

            Regex timeRegex2 = new Regex(@"^T(?<hour>[0-9]+)\:(?<minute>[0-9]+)$");

            if (timeRegex2.IsMatch(val))
            {
                e.type = TimeEntityType.TIME;
                Match     match = timeRegex2.Match(val);
                TimePoint tp    = new TimePoint();
                tp.setHour(Convert.ToInt32(match.Groups["hour"].Value));
                tp.setMinute(Convert.ToInt32(match.Groups["minute"].Value));
                e.setTimePoint(tp);
                return;
            }

            Regex timeRegex3 = new Regex(@"^T(?<hour>[0-9][0-9])(?<minute>[0-9][0-9])$");

            if (timeRegex3.IsMatch(val))
            {
                e.type = TimeEntityType.TIME;
                Match     match = timeRegex3.Match(val);
                TimePoint tp    = new TimePoint();
                tp.setHour(Convert.ToInt32(match.Groups["hour"].Value));
                tp.setMinute(Convert.ToInt32(match.Groups["minute"].Value));
                e.setTimePoint(tp);
                return;
            }

            Regex timeRegex4 = new Regex(@"^(?<year>[0-9]+)\-(?<month>[0-9]+)\-(?<day>[0-9]+)T(?<hour>[0-9][0-9])(?<minute>[0-9][0-9])$");

            if (timeRegex4.IsMatch(val))
            {
                e.type = TimeEntityType.TIME;
                Match     match = timeRegex4.Match(val);
                TimePoint tp    = new TimePoint(Convert.ToInt32(match.Groups["year"].Value), Convert.ToInt32(match.Groups["month"].Value), Convert.ToInt32(match.Groups["day"].Value));
                tp.setHour(Convert.ToInt32(match.Groups["hour"].Value));
                tp.setMinute(Convert.ToInt32(match.Groups["minute"].Value));
                e.setTimePoint(tp);
                return;
            }



            Regex tRegex = new Regex(@"T(?<hour>[0-9]+)\:(?<minute>[0-9]+)$");

            if (tRegex.IsMatch(val))
            {
                Match     match = tRegex.Match(val);
                TimePoint tp    = new TimePoint();
                tp.setHour(Convert.ToInt32(match.Groups["hour"].Value));
                tp.setMinute(Convert.ToInt32(match.Groups["minute"].Value));
                e.setTimePoint(tp);
            }


            val = tRegex.Replace(val, "");

            Regex absFreqRegex = new Regex(@"R(?<repeats>[0-9]*)$");

            if (absFreqRegex.IsMatch(val))
            {
                Match match = absFreqRegex.Match(val);
                e.type         = TimeEntityType.FREQUENCY;
                e.repeat_times = 0;
                e.duration     = new Dictionary <DurationUnit, double>();
                if (match.Groups["repeats"].Value.Length > 0)
                {
                    e.repeat_times = Convert.ToInt32(match.Groups["repeats"].Value);
                }
                return;
            }
            else if (val.StartsWith("R"))
            {
                if (val.IndexOf("P") == -1)
                {
                    val = val.Replace("R", "RP");
                }

                e.type = TimeEntityType.FREQUENCY;
                int    p = val.IndexOf("P");
                string t = val.Substring(1, p - 1);
                if (t == "")
                {
                    e.repeat_times = 0;
                }
                else
                {
                    e.repeat_times = Convert.ToDouble(t);
                }
                val = val.Substring(p);
            }
            else
            {
                e.type = TimeEntityType.DURATION;
            }

            e.duration = GetDurationExpression(val);
        }