Example #1
0
        public virtual void Load()
        {
            string[] lines;

            Console.WriteLine("Loading " + Filename + " ...");
            string[] wikifile = File.ReadAllLines(@".\" + Filename + ".wiki");

            //Load words
            lines = File.ReadAllLines(filename + raw_suffix);
            words = new List <List <Word> >();
            texts = new List <string>();
            for (int i = 0; i < lines.Length; i++)
            {
                texts.Add(lines[i]);
                string[]    terms = lines[i].Split(new char[] { ' ', '\t' }, StringSplitOptions.RemoveEmptyEntries);
                List <Word> lst   = new List <Word>();
                for (int j = 0; j < terms.Length; j++)
                {
                    if (terms[j] != ";" && terms[j].Contains(";"))
                    {
                        terms[j].Replace(";", "");
                    }
                    if (terms[j] != ":" && terms[j].Contains(":"))
                    {
                        terms[j].Replace(":", "");
                    }
                    Word word = new Word(terms[j], new TimeExtractor.units.TextIdentifier(Filename, i + 1, j));
                    word.Pos = j;
                    for (int c = 0; c < wikifile.Count(); c++)
                    {
                        if (wikifile[c].Substring(0, wikifile[c].IndexOf("|")).Contains(terms[j].ToLower()) && wikifile[c].Length > (wikifile[c].LastIndexOf('|') + 1))
                        {
                            word.Wiki = (wikifile[c].Substring(wikifile[c].LastIndexOf('|') + 1)).Split(new char[] { ' ', '_' }, StringSplitOptions.RemoveEmptyEntries);
                        }
                    }

                    lst.Add(word);
                }
                words.Add(lst);
            }

            //Load time entities (including normalization)

            entities = new List <Entity>();

            if (File.Exists(filename + time_con_suffix))
            {
                lines = File.ReadAllLines(filename + time_con_suffix);
                foreach (string line in lines)
                {
                    Entity entity = EntityUtil.I2b2formToEntity(line, filename);
                    entity.Fr = this;
                    entities.Add(entity);
                }
            }

            //Load PMT entities

            if (File.Exists(filename + event_con_suffix))
            {
                lines = File.ReadAllLines(filename + event_con_suffix);
                List <Entity> e = new List <Entity>();
                foreach (string line in lines)
                {
                    Entity entity = EntityUtil.I2b2formToEntity(line, filename);
                    entity.Fr = this;
                    entities.Add(entity);
                }
            }



            //remove duplicate entities
            RemoveDuplicateEntities();

            //Load TLinks
            if (File.Exists(filename + tlink_suffix))
            {
                lines = File.ReadAllLines(filename + tlink_suffix);
                links = new List <EntityLink>();
                foreach (string line in lines)
                {
                    EntityLink link = EntityUtil.I2b2formToTLink(line, filename, this);
                    if (link != null)
                    {
                        links.Add(link);
                    }
                }
            }

            //Load Section entities

            if (File.Exists(filename + section_entity_suffix))
            {
                ad_tp = new TimePoint();
                dc_tp = new TimePoint();
                lines = File.ReadAllLines(filename + section_entity_suffix);
                List <Entity> e = new List <Entity>();
                int           i = 0;
                foreach (string line in lines)
                {
                    Entity entity = EntityUtil.I2b2formToEntity(line, filename);
                    entity.Fr         = this;
                    entity.is_section = true;
                    entities.Add(entity);
                    if (i == 0)
                    {
                        ad_tp = entity.getFirstTimePoint();
                    }
                    if (i == 1)
                    {
                        dc_tp = entity.getFirstTimePoint();
                    }
                    i++;
                }
            }
            else
            {
                //very naive way to extract the admission date and discharge date
                ad_tp = new TimePoint();
                dc_tp = new TimePoint();
                int   adline = -1;
                int   dcline = -1;
                Regex regex  = new Regex(@"^(?<d1>[0-9]+)(\\|\/|\-)(?<d2>[0-9]+)(\\|\/|\-)(?<d3>[0-9]+)$");
                Regex regex2 = new Regex(@"^(?<d1>[0-9]{4})(?<d2>[0-9]{2})(?<d3>[0-9]{2})$");
                for (int i = 0; i < words.Count; i++)
                {
                    if (i > 0 && (texts[i - 1].ToLower().Replace(" ", "") == "admissiondate:" ||
                                  texts[i - 1].ToLower().Replace(" ", "") == "registrationdate:" ||
                                  texts[i - 1].ToLower().Replace(" ", "") == "dischargedate:"))
                    {
                        for (int j = 0; j < words[i].Count; j++)
                        {
                            if (j > 0)
                            {
                                break;
                            }
                            string s = words[i][j].WordText;
                            if (regex.IsMatch(s))
                            {
                                Match match = regex.Match(s);
                                int   d1    = Convert.ToInt32(match.Groups["d1"].Value);
                                int   d2    = Convert.ToInt32(match.Groups["d2"].Value);
                                int   d3    = Convert.ToInt32(match.Groups["d3"].Value);
                                if (texts[i - 1].ToLower().Replace(" ", "") == "admissiondate:" ||
                                    texts[i - 1].ToLower().Replace(" ", "") == "registrationdate:")
                                {
                                    ad_tp  = get_tp_from_3digit(d1, d2, d3);
                                    adline = i;
                                }
                                else
                                {
                                    dc_tp  = get_tp_from_3digit(d1, d2, d3);
                                    dcline = i;
                                }
                            }

                            if (regex2.IsMatch(s))
                            {
                                Match match = regex2.Match(s);
                                int   d1    = Convert.ToInt32(match.Groups["d1"].Value);
                                int   d2    = Convert.ToInt32(match.Groups["d2"].Value);
                                int   d3    = Convert.ToInt32(match.Groups["d3"].Value);
                                if (texts[i - 1].ToLower().Replace(" ", "") == "admissiondate:" ||
                                    texts[i - 1].ToLower().Replace(" ", "") == "registrationdate:")
                                {
                                    ad_tp  = get_tp_from_3digit(d1, d2, d3);
                                    adline = i;
                                }
                                else
                                {
                                    dc_tp  = get_tp_from_3digit(d1, d2, d3);
                                    dcline = i;
                                }
                            }
                        }
                    }
                }
                //Store the section time into the section time file
                List <string> seccons = new List <string>();
                if (adline != -1)
                {
                    seccons.Add("SECTIME=\"" + words[adline][0].WordText + "\" " + (adline + 1) + ":0 " + (adline + 1) + ":0||type=\"ADMISSION\"||dvalue=\"" + append_zero(ad_tp.getYear(), 4) + "-" + append_zero(ad_tp.getMonth(), 2) + "-" + append_zero(ad_tp.getDay(), 2) + "\"");
                }
                if (dcline != -1)
                {
                    seccons.Add("SECTIME=\"" + words[dcline][0].WordText + "\" " + (dcline + 1) + ":0 " + (dcline + 1) + ":0||type=\"DISCHARGE\"||dvalue=\"" + append_zero(dc_tp.getYear(), 4) + "-" + append_zero(dc_tp.getMonth(), 2) + "-" + append_zero(dc_tp.getDay(), 2) + "\"");
                }
                File.WriteAllLines(filename + section_entity_suffix, seccons.ToArray());
            }

            //Load Sections

            sections = new List <string>();
            if (File.Exists(filename + section_suffix))
            {
                lines = File.ReadAllLines(filename + section_suffix);
                foreach (string line in lines)
                {
                    int t = line.LastIndexOf("\t");
                    sections.Add(line.Substring(t + 1));
                }
            }
            else
            {
                for (int i = 0; i < texts.Count; i++)
                {
                    sections.Add(texts[i]);
                }
            }

            //Load and generate "virtual" entities (special entities)

            OpEntity = new TimeEntity();
            AdEntity = new TimeEntity();
            DcEntity = new TimeEntity();
            TrEntity = new TimeEntity();

            TimePoint admissionTP = new TimePoint();
            TimePoint dischargeTP = new TimePoint();

            if (File.Exists(Filename + keytp_suffix))
            {
                string text = File.ReadAllText(Filename + keytp_suffix);
                //text = text.Replace(" ", "");
                //text = text.Replace("\t", "");
                Regex adRegex = new Regex(@"admission=(?<year>[0-9]+)\s(?<month>[0-9]+)\s(?<day>[0-9]+)");
                if (adRegex.IsMatch(text))
                {
                    Match match = adRegex.Match(text);
                    admissionTP.setYear(Convert.ToInt32(match.Groups["year"].Value));
                    admissionTP.setMonth(Convert.ToInt32(match.Groups["month"].Value));
                    admissionTP.setDay(Convert.ToInt32(match.Groups["day"].Value));
                }
                Regex dcRegex = new Regex(@"discharge=(?<year>[0-9]+)\s(?<month>[0-9]+)\s(?<day>[0-9]+)");
                if (dcRegex.IsMatch(text))
                {
                    Match match = dcRegex.Match(text);
                    dischargeTP.setYear(Convert.ToInt32(match.Groups["year"].Value));
                    dischargeTP.setMonth(Convert.ToInt32(match.Groups["month"].Value));
                    dischargeTP.setDay(Convert.ToInt32(match.Groups["day"].Value));
                }
            }

            AdEntity.addTimePoint(admissionTP);
            DcEntity.addTimePoint(dischargeTP);
        }