static void Main(string[] args) { string input_notes_folder = ConfigurationManager.AppSettings["input_notes_folder"]; string cTakes_xml_folder = ConfigurationManager.AppSettings["cTakes_xml_folder"]; string cTakes_bin_folder = ConfigurationManager.AppSettings["cTakes_bin_folder"]; string parsed_service_folder = ConfigurationManager.AppSettings["parsed_service_folder"]; string output_notes_html_folder = ConfigurationManager.AppSettings["output_notes_html_folder"]; string bypass_umls = ConfigurationManager.AppSettings["bypass_umls"]; string new_file_txt = string.Empty; string input_file_name = string.Empty; string input_file_path = string.Empty; Dictionary <string, int> input_dict_files; Dictionary <string, int> converted_dict_files; string indications_file = ConfigurationManager.AppSettings["indications_file"]; try { if (!string.IsNullOrEmpty(bypass_umls)) { RunProcess(bypass_umls, "umlsmock.exe", false); } input_dict_files = SQLService.GetInputFileNames(); converted_dict_files = new Dictionary <string, int>(); if (input_dict_files != null && input_dict_files.Count > 0) { string[] input_file_entries = Directory.GetFiles(input_notes_folder); if (input_file_entries.Length > 0) { Console.WriteLine(new string('-', 70)); Console.WriteLine("*** FILES CONVERSION ***"); Console.WriteLine(new string('-', 70)); Console.WriteLine(); Console.WriteLine("Start the file conversion process\r\n\r\nPlease wait........."); foreach (string input_file in input_file_entries) { input_file_name = Path.GetFileName(input_file); if (input_dict_files.ContainsKey(input_file_name)) { new_file_txt = Path.GetFileNameWithoutExtension(input_file) + ".txt"; input_file_path = Path.Combine(input_notes_folder, new_file_txt); FilesHelper.ConvertToTxt(input_file, input_notes_folder, input_file_path); converted_dict_files.Add(new_file_txt, input_dict_files[input_file_name]); } } Console.WriteLine("Finish the file conversion process"); Console.WriteLine(); } Console.WriteLine(new string('-', 70)); Console.WriteLine("*** cTAKES PROCESSING ***"); Console.WriteLine(new string('-', 70)); Console.WriteLine(); RunProcess(cTakes_bin_folder, "runctakesCPE_CLI.bat", true); Parser parser = new Parser(); string[] cTakes_file_entries = Directory.GetFiles(cTakes_xml_folder); string file_without_ext = string.Empty; int note_id = 0; if (cTakes_file_entries.Length > 0) { Console.WriteLine("Start the cTakes .xml files processing\r\n\r\nPlease wait........."); foreach (string xml_file in cTakes_file_entries) { file_without_ext = Path.GetFileNameWithoutExtension(xml_file); if (converted_dict_files.ContainsKey(file_without_ext)) { note_id = converted_dict_files[file_without_ext]; parser.ParseXML(xml_file, input_notes_folder, cTakes_xml_folder, parsed_service_folder, output_notes_html_folder, indications_file, note_id); } } if (!string.IsNullOrEmpty(bypass_umls)) { StopDummyServer("umlsmock"); } Console.WriteLine("Finish the cTakes .xml files processing"); Console.WriteLine(); Console.WriteLine(new string('-', 70)); Console.WriteLine(); Console.WriteLine("THE PROCESS HAS BEEN FINISHED SUCCESSFULLY........"); Console.WriteLine("PRESS ANY KEY TO CLOSE THE PROGRAM........."); } else { Console.WriteLine("NO OUTPUT HTML FILES........"); Console.WriteLine("PRESS ANY KEY TO CLOSE THE PROGRAM........"); } } else { Console.WriteLine("No files to read."); } } catch (Exception ex) { if (!string.IsNullOrEmpty(bypass_umls)) { StopDummyServer("umlsmock"); } LogHelper.SaveLogInfo(string.Format("Exception: {0}", ex.Message), ex.StackTrace); Console.WriteLine(ex.Message); } Console.ReadKey(); }
public void ParseXML(string xml_file_name, string input_notes_folder, string cTakes_xml_folder, string parsed_service_folder, string output_notes_html_folder, string indications_file, int note_id) { try { ConsoleSpinner spin = new ConsoleSpinner(); lst_SNOMED_Codes = new List <dynamic>(); input_xml_fileName = Path.GetFileName(xml_file_name); output_parsed_file = Path.Combine(parsed_service_folder, Path.GetFileNameWithoutExtension(xml_file_name)); Console.WriteLine("\r\nProcessing of the file: {0}.....\r\n", input_xml_fileName); if (!string.IsNullOrEmpty(xml_file_name)) { var excel_rows = GetExcelRows(indications_file) .Select(dataRow => new { code = dataRow["Code"].ToString(), name = dataRow["Name"].ToString().ToLower(), confidence = dataRow["Confidence"].ToString(), status = dataRow["Status"].ToString() }); var indicationNames = excel_rows.Select(n => n.name.ToLower()).ToList(); if (!Directory.Exists(parsed_service_folder)) { Directory.CreateDirectory(parsed_service_folder); } StringBuilder sb_descr = new StringBuilder(); sb_descr.AppendLine(DateTime.Now.ToString()); sb_descr.AppendLine(string.Format("Parsing of file: {0}", input_xml_fileName)); sb_descr.AppendLine(new string('-', 80)); sb_descr.AppendLine("Descriptions"); sb_descr.AppendLine(new string('-', 80)); sb_descr.AppendLine(); XDocument document = XDocument.Load(xml_file_name); cTakesTextstring = document.Descendants("uima.cas.Sofa") .Select(r => r.Attribute("sofaString").Value).FirstOrDefault(); if (!string.IsNullOrEmpty(cTakesTextstring)) { var positions = document.Descendants("org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention") .Select(r => new { begin = r.Attribute("begin").Value, end = r.Attribute("end").Value, arr_ref = r.Attribute("_ref_ontologyConceptArr").Value }); int descr_num = 1; int cur_begin = -1; int cur_end = -1; int prev_begin = -1; int prev_end = -1; if (positions.Count() > 0) { var sentences = document.Descendants("org.apache.ctakes.typesystem.type.textspan.Sentence") .Select(r => new { begin = int.Parse(r.Attribute("begin").Value), end = int.Parse(r.Attribute("end").Value), num = r.Attribute("sentenceNumber").Value }); var treebankNode = document.Descendants("org.apache.ctakes.typesystem.type.syntax.TerminalTreebankNode") .Select(r => new { begin = int.Parse(r.Attribute("begin").Value), end = int.Parse(r.Attribute("end").Value), nodeValue = r.Attribute("nodeValue").Value.ToLower() }); var measurements = document.Descendants("org.apache.ctakes.typesystem.type.textsem.RomanNumeralAnnotation") .Select(r => new { begin = int.Parse(r.Attribute("begin").Value), end = int.Parse(r.Attribute("end").Value), _id = r.Attribute("_id").Value.ToLower() }); var semantics = document.Descendants("org.apache.ctakes.typesystem.type.textsem.SemanticArgument") .Select(r => new { begin = int.Parse(r.Attribute("begin").Value), end = int.Parse(r.Attribute("end").Value), _id = r.Attribute("_id").Value.ToLower() }); foreach (var result in positions) { spin.Turn(); cur_begin = int.Parse(result.begin); cur_end = int.Parse(result.end); var cur_sentence = sentences .Where(e => e.begin <= cur_begin && e.end >= cur_end) .Select(s => s); var checkMeasurement = measurements .Where(e => e.begin == cur_begin && e.end == cur_end) .Select(e => e._id); var checkSemantic = semantics .Where(e => e.begin == cur_begin && e.end == cur_end) .Select(e => e._id); var cur_descr = treebankNode .Where(e => e.begin >= cur_begin && e.end <= cur_end && !( e.begin >= prev_begin && e.end <= prev_end) ) .Select(e => e.nodeValue + " "); var indications = treebankNode .Where(tb => cur_sentence.All(sent => (sent.begin <= tb.begin && sent.end >= tb.end)) && indicationNames.Contains(tb.nodeValue)) .Select(e => new { ind_name = e.nodeValue, begin = e.begin, end = e.end }); if (cur_descr.Count() > 0 && checkSemantic.Count() == 0 && checkMeasurement.Count() == 0) { string descr_val = ""; StringBuilder help_descr = new StringBuilder(); foreach (var d_itm in cur_descr) { descr_val += d_itm; } help_descr.AppendLine(string.Format("Begin: {0}; End: {1}", cur_begin, cur_end)); help_descr.AppendLine("Codes: "); var cas_arr = document.Descendants("uima.cas.FSArray") .Where(e => e.Attribute("_id").Value == result.arr_ref) .Descendants("i") .Select(e => e.Value); int code_num = 1; foreach (var id_itm in cas_arr) { spin.Turn(); string currCode = document.Descendants("org.apache.ctakes.typesystem.type.refsem.UmlsConcept") .Where(e => e.Attribute("_id").Value == id_itm) .Select(e => e.Attribute("code").Value).FirstOrDefault(); var curr_indication = indications.Count() > 0 ? excel_rows .Where(ex => indications.Any(ind => ex.name == ind.ind_name)) : null ; // fill dynamic list lst_SNOMED_Codes.Add(new { snomed_code = currCode, text_descr = descr_val.Trim(), pos_start = cur_begin, pos_end = cur_end, confidence = curr_indication == null ? "-" : curr_indication.Select(ex => ex.confidence).FirstOrDefault(), status = curr_indication == null ? "-" : curr_indication.Select(ex => ex.status).FirstOrDefault() }); string code_with_indications = curr_indication == null ? currCode : (currCode + curr_indication.Select(ex => " - " + ex.code + "(" + ex.name + ")").FirstOrDefault()); help_descr.AppendLine(string.Format("\t{0}) {1}", code_num, code_with_indications)); code_num++; } if (sb_descr.ToString().Contains(descr_val)) { sb_descr.Replace(descr_val, descr_val + "\n" + help_descr.ToString()); } else { sb_descr.AppendLine(string.Format("{0}. Description: {1}", descr_num, descr_val)); sb_descr.AppendLine(help_descr.ToString()); sb_descr.AppendLine(); descr_num++; } prev_begin = cur_begin; prev_end = cur_end; } } using (StreamWriter sw = new StreamWriter(output_parsed_file)) { sw.Write(sb_descr.ToString()); } FilesHelper.BackUpFiles(cTakes_xml_folder, xml_file_name); string file_result_txt = Path.GetFileName(output_parsed_file); string file_result_html = Path.GetFileNameWithoutExtension(output_parsed_file) + ".htm"; lst_result = SQLService.ExecuteCompareSQL(lst_SNOMED_Codes); FilesHelper.CreateHtmlFile(lst_result, input_notes_folder, output_notes_html_folder, file_result_txt, file_result_html, cTakesTextstring, note_id); SQLService.CreateNoteConditions(lst_result, file_result_html, note_id); } else { Console.WriteLine("cTakes text string is empty"); } } } } catch (Exception ex) { LogHelper.SaveLogInfo(string.Format("Exception: {0}", ex.Message), ex.StackTrace); } }