Exemple #1
0
        static void Main(string[] args)
        {
            Entity.Preprocess();

            int op = 3;

            if (op > 0)
            {
                //TimeExtractor.Preprocessing.preprocess();
            }

            for (int packageNo = 1; packageNo <= 6; packageNo++)
            {
                string folder = "docs\\Package " + packageNo;
                // string folder = "docs";
                string[] files = Directory.GetFiles(folder, "*.txt");
                foreach (string file in files)
                {
                    switch (op)
                    {
                    case 0:
                    {
                        //Tag with stemmed PMT tags

                        Regex fileregex = new Regex(@"[0-9]+\.txt");
                        Match match     = fileregex.Match(file);
                        if (match == null)
                        {
                            break;
                        }
                        string concept_filename = "concepts\\" + match.Value.Replace("txt", "con");
                        if (!File.Exists(concept_filename))
                        {
                            Console.WriteLine("Uoh, concept file \"" + concept_filename + "\" not found.");
                            break;
                        }

                        string[]      cons     = File.ReadAllLines(concept_filename);
                        List <Entity> entities = new List <Entity>();
                        foreach (string con in cons)
                        {
                            if (con.Length <= 0)
                            {
                                continue;
                            }
                            Entity entity = EntityUtil.I2b2formToEntity(con, file);
                            entities.Add(entity);
                        }

                        IEnumerable <Entity> PMTEntities =
                            from entity in entities
                            where entity.type == "problem" || entity.type == "treatment" || entity.type == "test"
                            select entity;

                        Annotator.Annotate(file, ".PMTstem.con", entities, true);

                        break;
                    }

                    case 1:
                    {
                        TimeExtractor.tools.Init.setFilePath(file);
                        TimeExtractor.TimeMapping.process(false);

                        List <SenseGroup> sensegroups  = TimeVariables.TIME_ENTITIES;
                        List <Entity>     timeEntities = new List <Entity>();
                        foreach (SenseGroup sg in sensegroups)
                        {
                            Entity entity = new TimeEntity();
                            entity.text     = sg.getWords()[0];
                            entity.startLoc = sg.startLoc;
                            entity.endLoc   = sg.endLoc;
                            entity.setTimePoint(sg.getTimePeriod().getFirstTimePoint());
                            timeEntities.Add(entity);
                        }

                        Annotator.Annotate(file, ".time.con", timeEntities, false);

                        break;
                    }

                    case 2:
                    {
                        //Tag with PMT tags + guessed classifications

                        Regex fileregex = new Regex(@"[0-9]+\.txt");
                        Match match     = fileregex.Match(file);
                        if (match == null)
                        {
                            break;
                        }
                        string concept_filename = "concepts\\" + match.Value.Replace("txt", "con");
                        if (!File.Exists(concept_filename))
                        {
                            Console.WriteLine("Uoh, concept file \"" + concept_filename + "\" not found.");
                            break;
                        }

                        string   sectionfile = file.Replace("txt", "section");
                        string[] sections    = File.ReadAllLines(sectionfile);

                        string[]      cons     = File.ReadAllLines(concept_filename);
                        List <Entity> entities = new List <Entity>();
                        foreach (string con in cons)
                        {
                            if (con.Length <= 0)
                            {
                                continue;
                            }
                            PMTEntity entity = (PMTEntity)EntityUtil.I2b2formToEntity(con, file);

                            int lineNumber = entity.startLoc.line;

                            int    ptab = sections[lineNumber - 1].IndexOf("\t");
                            string no   = sections[lineNumber - 1].Substring(0, ptab);

                            string[] wa_nos = { "1.1", "5.34", "5.34.78", "5.34.78.93", "5.34.78.93.35", "5.34.78.93.38", "5.34.78.96", "5.34.78.96.45", "5.34.79", "5.34.79.103.60", "5.35", "5.35.84", "5.35.91.108" };
                            string[] a_nos  = { "5.15", "5.22.44" };
                            string[] ad_nos = { "5.37.106.125" };

                            entities.Add(entity);
                        }

                        TimeExtractor.tools.Init.setFilePath(file);
                        TimeExtractor.TimeMapping.process(false);

                        List <SenseGroup> sensegroups = TimeVariables.TIME_ENTITIES;
                        foreach (SenseGroup sg in sensegroups)
                        {
                            Entity entity = new TimeEntity();
                            entity.text     = sg.getWords()[0];
                            entity.startLoc = sg.startLoc;
                            entity.endLoc   = sg.endLoc;
                            entity.setTimePoint(sg.getTimePeriod().getFirstTimePoint());
                            entities.Add(entity);
                        }

                        IEnumerable <Entity> PMTEntities =
                            from entity in entities
                            where entity.type == "problem" || entity.type == "treatment" || entity.type == "test" || entity.type == "time"
                            select entity;

                        Annotator.Annotate(file, ".PMTrelation.con", entities, false);

                        break;
                    }

                    case 3:
                    {
                        //From annotated time expression (including normalized) to concept files

                        string   annotate_file = file.Replace(".txt", ".time.con");
                        string   con_file      = file.Replace(".txt", ".time-con");
                        Entity[] entities      = Annotator.ReadAnnotate(annotate_file);
                        EntityUtil.ExportConcept(con_file, entities, false);

                        break;
                    }

                    case 4:
                    {
                        //Update the original data
                        //WARNING: THINK BEFORE YOU DO THIS

                        string annotate_file = file.Replace(".txt", ".time.con");
                        string raw_file      = file;
                        Annotator.UpdateOriginalData(annotate_file, raw_file);

                        break;
                    }

                    case 5:
                    {
                        //Tag with standard PMT tags + revised time taggings (including normalizations)

                        string time_con_file = file.Replace(".txt", ".time-con");
                        string pmt_con_file  = "concepts\\" + FileNameUtil.FileNameNoSuffix(file) + ".con";

                        Entity[]      TIMEentities = EntityUtil.ImportConcept(time_con_file, "*.time-con", "*.txt");
                        Entity[]      PMTentities  = EntityUtil.ImportConcept(pmt_con_file, "*.con", "*.txt");
                        List <Entity> entities     = new List <Entity>();
                        foreach (Entity entity in TIMEentities)
                        {
                            entities.Add(entity);
                        }
                        foreach (Entity entity in PMTentities)
                        {
                            entities.Add(entity);
                        }

                        Annotator.Annotate(file, ".PMTrelation.con", entities, false);

                        break;
                    }
                    }
                }
            }
        }
Exemple #2
0
        public static Entity I2b2formToEntity(string i2b2str, string filename)
        {
            Entity ret = null;

            if (i2b2str.IndexOf("three years prior") != -1)
            {
                ret = ret;
            }

            if (i2b2str.IndexOf("~~") == -1)
            {
                string[] terms = i2b2str.Split(new string[] { "||" }, StringSplitOptions.RemoveEmptyEntries);

                Regex  conregex = new Regex(@"""(?<con>[A-Za-z_]+)""");
                Match  matchcon = conregex.Match(terms[1]);
                string con      = matchcon.Groups["con"].Value;

                if (con == "time")
                {
                    ret = new TimeEntity();
                    for (int i = 2; i < terms.Length; i++)
                    {
                        Regex     normRegex = new Regex(@"normalization=\((?<year>[0-9]+),(?<month>[0-9]+),(?<day>[0-9]+),(?<hour>[0-9]+),(?<minute>[0-9]+),(?<second>[0-9]+)\)");
                        Match     normMatch = normRegex.Match(terms[i]);
                        int       year      = Convert.ToInt32(normMatch.Groups["year"].Value);
                        int       month     = Convert.ToInt32(normMatch.Groups["month"].Value);
                        int       day       = Convert.ToInt32(normMatch.Groups["day"].Value);
                        int       hour      = Convert.ToInt32(normMatch.Groups["hour"].Value);
                        int       minute    = Convert.ToInt32(normMatch.Groups["minute"].Value);
                        int       second    = Convert.ToInt32(normMatch.Groups["second"].Value);
                        TimePoint tp        = new TimePoint(year, month, day, hour, minute, second);
                        ret.addTimePoint(tp);
                    }
                    ret.type = con;
                }

                else if (terms[0].StartsWith("sectime", StringComparison.CurrentCultureIgnoreCase))
                {
                    Regex regex = new Regex(@"dvalue=""(?<year>[0-9]+)-(?<month>[0-9]+)-(?<day>[0-9]+)""");
                    Match match = regex.Match(terms[2]);

                    int year  = Convert.ToInt32(match.Groups["year"].Value);
                    int month = Convert.ToInt32(match.Groups["month"].Value);
                    int day   = Convert.ToInt32(match.Groups["day"].Value);

                    TimeEntity te = new TimeEntity();

                    te.mode = TimeEntityMode.NA;
                    te.type = TimeEntityType.DATE;
                    te.setTimePoint(new TimePoint(year, month, day));

                    ret      = te;
                    ret.type = "time";
                }

                else if (con == "DATE" || con == "TIME" || con == "DURATION" || con == "FREQUENCY" || con == "RELATIVE")
                {
                    TimeEntity te = new TimeEntity();

                    //value
                    if (terms.Length > 2)
                    {
                        string val = terms[2].Substring(5, terms[2].Length - 6);
                        ValGetTimeEntity(val, te);
                    }

                    //mode
                    if (terms.Length > 2)
                    {
                        Regex  modeRegex = new Regex(@"""(?<mod>[A-Za-z]+)""$");
                        Match  match     = modeRegex.Match(terms[3]);
                        string mod       = match.Groups["mod"].Value.ToUpper();
                        if (mod == "NA")
                        {
                            te.mode = TimeEntityMode.NA;
                        }
                        else if (mod == "APPROX")
                        {
                            te.mode = TimeEntityMode.APPROX;
                        }
                        else if (mod == "END")
                        {
                            te.mode = TimeEntityMode.END;
                        }
                        else if (mod == "LESS")
                        {
                            te.mode = TimeEntityMode.LESS;
                        }
                        else if (mod == "MORE")
                        {
                            te.mode = TimeEntityMode.MORE;
                        }
                        else if (mod == "MIDDLE")
                        {
                            te.mode = TimeEntityMode.MIDDLE;
                        }
                        else if (mod == "START")
                        {
                            te.mode = TimeEntityMode.START;
                        }
                    }
                    //type
                    if (con == "DATE")
                    {
                        te.type = TimeEntityType.DATE;
                    }
                    else if (con == "TIME")
                    {
                        te.type = TimeEntityType.TIME;
                    }
                    else if (con == "DURATION")
                    {
                        te.type = TimeEntityType.DURATION;
                    }
                    else if (con == "FREQUENCY")
                    {
                        te.type = TimeEntityType.FREQUENCY;
                    }
                    else
                    {
                        te.type = TimeEntityType.RELATIVE_TP;
                    }

                    if (terms[terms.Length - 1].IndexOf("relative") != -1)
                    {
                        te.is_relative_tp = true;
                    }

                    ret      = te;
                    ret.type = con;
                }

                else if (con.ToUpper() == "IMPLICIT" || con.ToUpper() == "EXPLICIT" || con.ToUpper() == "TREATMENT" ||
                         con.ToUpper() == "CLINICAL_DEPT" || con.ToUpper() == "EVIDENTIAL" || con.ToUpper() == "OCCURRENCE")
                {
                    PMTEntity pe = new PMTEntity();
                    pe.type = con.ToUpper();

                    //modality
                    if (terms.Length > 2)
                    {
                        Regex  modRegex = new Regex(@"""(?<mod>[A-Za-z]+)""$");
                        Match  match    = modRegex.Match(terms[2]);
                        string mode     = match.Groups["mod"].Value.ToUpper();
                        if (mode == "CONDITIONAL")
                        {
                            pe.modality = Modality.CONDITIONAL;
                        }
                        else if (mode == "FACTUAL" || mode == "ACTUAL")
                        {
                            pe.modality = Modality.FACTUAL;
                        }
                        else if (mode == "POSSIBLE")
                        {
                            pe.modality = Modality.POSSIBLE;
                        }
                        else
                        {
                            pe.modality = Modality.PROPOSED;
                        }
                    }

                    //polarity
                    if (terms.Length > 2)
                    {
                        Regex  polRegex = new Regex(@"""(?<pol>[A-Za-z]+)""$");
                        Match  match    = polRegex.Match(terms[3]);
                        string pol      = match.Groups["pol"].Value.ToUpper();
                        if (pol == "POS")
                        {
                            pe.polarity = Polarity.POS;
                        }
                        else
                        {
                            pe.polarity = Polarity.NEG;
                        }
                    }

                    //sec_time_rel
                    if (terms.Length > 4)
                    {
                        Regex  secRegex = new Regex(@"""(?<sec>.+)""$");
                        Match  match    = secRegex.Match(terms[4]);
                        string sec      = match.Groups["sec"].Value.ToUpper();
                        pe.sec_time_rel = sec;
                    }

                    ret      = pe;
                    ret.type = con.ToUpper();
                }

                else
                {
                    ret      = new PMTEntity();
                    ret.type = con;
                }

                Regex textregex = new Regex(@"^[A-Za-z0-9]+\=""(?<text>.+)"" [0-9]+\:[0-9]+ [0-9]+\:[0-9]+$");
                Match matchtext = textregex.Match(terms[0]);
                ret.text = matchtext.Groups["text"].Value;

                Regex posregex = new Regex(@"(?<sline>[0-9]+)[:](?<scol>[0-9]+) (?<eline>[0-9]+)[:](?<ecol>[0-9]+)");

                Match matchpos = posregex.Match(terms[0]);

                int sline = Convert.ToInt32(matchpos.Groups["sline"].Value);
                int scol  = Convert.ToInt32(matchpos.Groups["scol"].Value);

                /* if (i2b2str.StartsWith("TIMEX3") || i2b2str.StartsWith("EVENT"))
                 *   sline++;*/

                ret.startLoc = new TextIdentifier(filename, sline, scol);
                int eline = Convert.ToInt32(matchpos.Groups["eline"].Value);
                int ecol  = Convert.ToInt32(matchpos.Groups["ecol"].Value);

                /*  if (i2b2str.StartsWith("TIMEX3") || i2b2str.StartsWith("EVENT"))
                 *    eline++;*/

                ret.endLoc = new TextIdentifier(filename, eline, ecol);
            }

            if (ret == null)
            {
                ret = null;
            }

            return(ret);
        }