public static void Run([CosmosDBTrigger( databaseName: "%CosmosDBNAME%", collectionName: "%CosmosHL7Collection%", ConnectionStringSetting = "CosmosDBConnection", CreateLeaseCollectionIfNotExists = true, LeaseCollectionPrefix = "srchupd", LeaseCollectionName = "leases")] IReadOnlyList <Document> input, ILogger log) { log.LogInformation("UpdateSearchIndexDiagReports function triggered"); if (input != null && input.Count > 0) { SearchUtilities search = new SearchUtilities(log); log.LogInformation($"There were {input.Count} documents modified in DB..Running NLP/Seach pipeline for modified docs..."); List <MedicalEntities> searcharr = new List <MedicalEntities>(); foreach (Document d in input) { string json = d.ToString(); StringBuilder builder = new StringBuilder(); var obj = JObject.Parse(json); string msgtype = Utilities.getFirstField(obj["hl7message"]["MSH"]["MSH.9"]); if (msgtype.ToLower().Equals("oru") || msgtype.ToLower().Equals("mdm")) { if (obj["hl7message"]["OBX"] != null) { if (obj["hl7message"]["OBX"].Type == JTokenType.Array) { foreach (var obx in obj["hl7message"]["OBX"]) { if (Utilities.getFirstField(obx["OBX.2"]).Equals("TX") || Utilities.getFirstField(obx["OBX.2"]).Equals("FT")) { builder.Append(Utilities.getFirstField(obx["OBX.5"])); } } } else { var obx = obj["hl7message"]["OBX"]; if (Utilities.getFirstField(obx["OBX.2"]).Equals("TX") || Utilities.getFirstField(obx["OBX.2"]).Equals("FT")) { builder.Append(Utilities.getFirstField(obx["OBX.5"])); } } string report = builder.ToString(); report = report.UnEscapeHL7(); report = report.Replace(@"\\", @"\"); string cogurl = Utilities.GetEnvironmentVariable("CogServicesOCRURL"); string responseFromServer = NLPUtilities.ExtractTextUsingCogServices(Encoding.UTF8.GetBytes(report), cogurl, Utilities.GetEnvironmentVariable("CogServicesKey")); if (string.IsNullOrEmpty(responseFromServer)) { responseFromServer = NLPUtilities.ExtractTextUsingTIKA(Encoding.UTF8.GetBytes(report), Utilities.GetEnvironmentVariable("TIKAServerurl")); } if (responseFromServer.StartsWith("TIMEOUT~")) { log.LogTrace("{\"id\":\"" + Utilities.getFirstField(obj["id"]) + "\",\"status\":\"Timeout\",\"readresulturl\":\"" + responseFromServer.Split("~")[1] + "\"}"); } if (string.IsNullOrEmpty(responseFromServer) || responseFromServer.Length < 3) { log.LogError($"TIKA Server may have failed to parse content {(string)obj["id"]}"); } //Send Report to NLP CTakesRequest creq = new CTakesRequest() { Content = responseFromServer, CTAKESUrl = Utilities.GetEnvironmentVariable("CTAKESServerURL"), UMLSUser = Utilities.GetEnvironmentVariable("CTAKESUMLSUser"), UMLSPassword = Utilities.GetEnvironmentVariable("CTAKESUMLSPassword"), Format = Utilities.GetEnvironmentVariable("CTAKESFormat"), }; var result = NLPUtilities.ExtractMedicalEntities(creq); result.Id = (string)obj["id"]; result.Location = (string)obj["rhm"]; if (string.IsNullOrEmpty(result.ParsedText)) { result.ParsedText = responseFromServer; } string doctype = ""; if (msgtype.Equals("MDM")) { doctype = Utilities.getFirstField(obj["hl7message"]["TXA"]["TXA.2"]); } if (doctype == null) { doctype = ""; } result.DocumentType = doctype; searcharr.Add(result); } } } if (searcharr.Count > 0) { search.UploadMedicalEntities(searcharr.ToArray()); } } log.LogInformation("UpdateSearchIndexDiagReports function completed"); }
public static void Run([BlobTrigger("%StorageAccountBlob%/ingest/documents/{name}", Connection = "StorageAccount")] Stream myBlob, string name, ILogger log) { log.LogInformation("NLP Extract Entities File triggered by ingest/documents/" + name); try { string coid = name; int dirend = coid.LastIndexOf("/"); if (dirend > -1) { coid = coid.Substring(dirend + 1); } int extbegin = coid.LastIndexOf("."); if (extbegin > -1) { coid = coid.Substring(0, extbegin); } string loc = "ingest/documents/" + name; byte[] byteArray = null; using (MemoryStream ms = new MemoryStream()) { myBlob.CopyTo(ms); byteArray = ms.ToArray(); } log.LogInformation("Calling CogServices/TIKA to Extract Text from hl7json/ingest/documents/" + name); string cogurl = Utilities.GetEnvironmentVariable("CogServicesOCRURL"); log.LogInformation("Trying CogServices..."); string responseFromServer = NLPUtilities.ExtractTextUsingCogServices(byteArray, cogurl, Utilities.GetEnvironmentVariable("CogServicesKey")); if (string.IsNullOrEmpty(responseFromServer)) { log.LogInformation("No extract Trying TIKA..."); responseFromServer = NLPUtilities.ExtractTextUsingTIKA(byteArray, Utilities.GetEnvironmentVariable("TIKAServerurl")); } if (responseFromServer.StartsWith("TIMEOUT~")) { log.LogTrace("CogServiceExtract Timeout: {\"id\":\"" + coid + "\",\"status\":\"Timeout\",\"readresulturl\":\"" + responseFromServer.Split("~")[1] + "\"}"); } //string responseFromServer = System.Text.Encoding.UTF8.GetString(byteArray, 0, byteArray.Length); log.LogInformation("Extracting Medical Reports from hl7json/ingest/documents/" + name); //Extract Reports From Content (Auto-Detect Medical Exchange Formats (CDA, HL7, FHIR)) List <string> medreports = NLPUtilities.ExtractMedicalReportData(responseFromServer, log); List <MedicalEntities> retVal = new List <MedicalEntities>(); foreach (string medreport in medreports) { CTakesRequest creq = new CTakesRequest() { Content = medreport, CTAKESUrl = Utilities.GetEnvironmentVariable("CTAKESServerURL"), UMLSUser = Utilities.GetEnvironmentVariable("CTAKESUMLSUser"), UMLSPassword = Utilities.GetEnvironmentVariable("CTAKESUMLSPassword"), Format = Utilities.GetEnvironmentVariable("CTAKESFormat"), }; log.LogInformation("Calling CTAKES to extract medical entities from hl7json/ingest/documents/" + name); var result = NLPUtilities.ExtractMedicalEntities(creq); result.Id = coid; result.Location = loc; result.DocumentType = name; retVal.Add(result); } log.LogInformation("Updateing search index with content and medical entities from hl7json/ingest/documents/" + name); SearchUtilities su = new SearchUtilities(log); su.UploadMedicalEntities(retVal.ToArray()); log.LogInformation("Succesfully Completed processing of hl7json/ingest/documents/" + name); } catch (System.Exception e) { log.LogError(e, e.Message); } }
public static MedicalEntities ExtractMedicalEntities(CTakesRequest creq) { var medicalEntities = new MedicalEntities(); medicalEntities.DiseaseDisorderList = new List <Term>(); medicalEntities.MedicationMentionList = new List <Term>(); medicalEntities.SignSymptomMentionList = new List <Term>(); medicalEntities.AnatomicalSiteMentionList = new List <Term>(); medicalEntities.DiseaseDisorderConceptList = new List <OntologyConcept>(); medicalEntities.MedicationMentionConceptList = new List <OntologyConcept>(); medicalEntities.SignSymptomMentionConceptList = new List <OntologyConcept>(); medicalEntities.AnatomicalSiteMentionConceptList = new List <OntologyConcept>(); medicalEntities.ConceptNameDictionary = new List <Concept>(); try { var request = (HttpWebRequest)WebRequest.Create(creq.CTAKESUrl); // Take a max of X KB of text var subText = creq.Content.Substring(0, Math.Min(500000, creq.Content.Length)); var postData = "q=" + HttpUtility.UrlEncode(subText, System.Text.Encoding.ASCII); postData += "&format=" + HttpUtility.UrlEncode(creq.Format, System.Text.Encoding.ASCII); postData += "¨suser="******"¨spw=" + creq.UMLSPassword; var data = Encoding.ASCII.GetBytes(postData); request.Method = "POST"; request.ContentType = "application/x-www-form-urlencoded"; request.ContentLength = data.Length; request.Timeout = 20 * 60 * 1000; // 20 min using (var stream = request.GetRequestStream()) { stream.Write(data, 0, data.Length); } var response = (HttpWebResponse)request.GetResponse(); var responseString = new StreamReader(response.GetResponseStream()).ReadToEnd(); if (responseString != "") { XmlDocument xml = new XmlDocument(); xml.LoadXml(responseString); string parsedText = ""; int begin, end; Guid termId; foreach (var node in xml.ChildNodes[1]) { //Sofa if (((System.Xml.XmlElement)node).LocalName == "Sofa") { parsedText = ((XmlElement)node).GetAttribute("sofaString"); medicalEntities.ParsedText = parsedText; } else if (((System.Xml.XmlElement)node).LocalName == "UmlsConcept") { int id = Convert.ToInt32(((XmlElement)node).GetAttribute("xmi:id")); Concept concept = new Concept() { ConceptName = ((XmlElement)node).GetAttribute("preferredText"), CUI = ((XmlElement)node).GetAttribute("cui"), ConceptId = id, Code = ((XmlElement)node).GetAttribute("code"), CodingSchema = ((XmlElement)node).GetAttribute("codingScheme") }; if (!medicalEntities.ConceptNameDictionary.Contains(concept)) { medicalEntities.ConceptNameDictionary.Add(concept); } } else if (((System.Xml.XmlElement)node).LocalName == "DiseaseDisorderMention") { begin = Convert.ToInt32(((XmlElement)node).GetAttribute("begin")); end = Convert.ToInt32(((XmlElement)node).GetAttribute("end")); if (!(medicalEntities.DiseaseDisorderList.Any(t => t.term == parsedText.Substring(begin, end - begin).ToLower()))) { termId = Guid.NewGuid(); medicalEntities.DiseaseDisorderList.Add(new Term { termId = termId, term = parsedText.Substring(begin, end - begin).ToLower(), }); var ontologyConceptArray = ((XmlElement)node).GetAttribute("ontologyConceptArr").ToString(); if (ontologyConceptArray.Length > 0) { foreach (var c in ontologyConceptArray.Split(' ')) { medicalEntities.DiseaseDisorderConceptList.Add(new OntologyConcept { conceptId = Guid.NewGuid(), termId = termId, ontologyConcept = c }); } } } } else if (((System.Xml.XmlElement)node).LocalName == "MedicationMention") { begin = Convert.ToInt32(((XmlElement)node).GetAttribute("begin")); end = Convert.ToInt32(((XmlElement)node).GetAttribute("end")); if (!(medicalEntities.MedicationMentionList.Any(t => t.term == parsedText.Substring(begin, end - begin).ToLower()))) { termId = Guid.NewGuid(); medicalEntities.MedicationMentionList.Add(new Term { termId = termId, term = parsedText.Substring(begin, end - begin).ToLower() }); var ontologyConceptArray = ((XmlElement)node).GetAttribute("ontologyConceptArr").ToString(); if (ontologyConceptArray.Length > 0) { foreach (var c in ontologyConceptArray.Split(' ')) { medicalEntities.MedicationMentionConceptList.Add(new OntologyConcept { conceptId = Guid.NewGuid(), termId = termId, ontologyConcept = c }); } } } } else if (((System.Xml.XmlElement)node).LocalName == "SignSymptomMention") { begin = Convert.ToInt32(((XmlElement)node).GetAttribute("begin")); end = Convert.ToInt32(((XmlElement)node).GetAttribute("end")); if (!(medicalEntities.SignSymptomMentionList.Any(t => t.term == parsedText.Substring(begin, end - begin).ToLower()))) { termId = Guid.NewGuid(); medicalEntities.SignSymptomMentionList.Add(new Term { termId = termId, term = parsedText.Substring(begin, end - begin).ToLower() }); var ontologyConceptArray = ((XmlElement)node).GetAttribute("ontologyConceptArr").ToString(); if (ontologyConceptArray.Length > 0) { foreach (var c in ontologyConceptArray.Split(' ')) { medicalEntities.SignSymptomMentionConceptList.Add(new OntologyConcept { conceptId = Guid.NewGuid(), termId = termId, ontologyConcept = c }); } } } } else if (((System.Xml.XmlElement)node).LocalName == "AnatomicalSiteMention") { begin = Convert.ToInt32(((XmlElement)node).GetAttribute("begin")); end = Convert.ToInt32(((XmlElement)node).GetAttribute("end")); if (!(medicalEntities.AnatomicalSiteMentionList.Any(t => t.term == parsedText.Substring(begin, end - begin).ToLower()))) { termId = Guid.NewGuid(); medicalEntities.AnatomicalSiteMentionList.Add(new Term { termId = termId, term = parsedText.Substring(begin, end - begin).ToLower() }); var ontologyConceptArray = ((XmlElement)node).GetAttribute("ontologyConceptArr").ToString(); if (ontologyConceptArray.Length > 0) { foreach (var c in ontologyConceptArray.Split(' ')) { medicalEntities.AnatomicalSiteMentionConceptList.Add(new OntologyConcept { conceptId = Guid.NewGuid(), termId = termId, ontologyConcept = c }); } } } } } } } catch (Exception ex) { Console.WriteLine(ex.Message); } return(medicalEntities); }
public static async Task <IActionResult> Run( [HttpTrigger(AuthorizationLevel.Function, "post", Route = null)] HttpRequest req, ILogger log) { log.LogInformation("NLP Extract Entities Http called"); try { string doctype = req.Query["doctype"]; if (doctype == null) { doctype = "Unkown Doc Type"; } string handwritten = req.Query["handwritten"]; string coid = req.Query["id"]; if (coid == null) { coid = Guid.NewGuid().ToString(); } string loc = req.Query["location"]; if (loc == null) { loc = "Adhoc Document Addition"; } string updatesearch = req.Query["updatesearch"]; byte[] byteArray = null; // Read the post data into byte array using (var stream = new MemoryStream()) { await req.Body.CopyToAsync(stream); byteArray = stream.ToArray(); } string cogurl = Utilities.GetEnvironmentVariable("CogServicesOCRURL"); if (handwritten != null) { cogurl += "?mode=Handwritten"; } string responseFromServer = NLPUtilities.ExtractTextUsingCogServices(byteArray, cogurl, Utilities.GetEnvironmentVariable("CogServicesKey")); if (string.IsNullOrEmpty(responseFromServer)) { responseFromServer = NLPUtilities.ExtractTextUsingTIKA(byteArray, Utilities.GetEnvironmentVariable("TIKAServerurl")); } if (responseFromServer.StartsWith("TIMEOUT~")) { return(new JsonResult(JObject.Parse("{\"id\":\"" + coid + "\",\"status\":\"Timeout\",\"readresulturl\":\"" + responseFromServer.Split("~")[1] + "\"}"))); } //Extract Reports From Content (Auto-Detect Medical Exchange Formats (CDA, HL7, FHIR)) List <string> medreports = NLPUtilities.ExtractMedicalReportData(responseFromServer, log); List <MedicalEntities> retVal = new List <MedicalEntities>(); foreach (string medreport in medreports) { CTakesRequest creq = new CTakesRequest() { Content = medreport, CTAKESUrl = Utilities.GetEnvironmentVariable("CTAKESServerURL"), UMLSUser = Utilities.GetEnvironmentVariable("CTAKESUMLSUser"), UMLSPassword = Utilities.GetEnvironmentVariable("CTAKESUMLSPassword"), Format = Utilities.GetEnvironmentVariable("CTAKESFormat"), }; var result = NLPUtilities.ExtractMedicalEntities(creq); result.Id = coid; result.Location = loc; result.DocumentType = doctype; retVal.Add(result); } if (updatesearch != null) { SearchUtilities su = new SearchUtilities(log); su.UploadMedicalEntities(retVal.ToArray()); } return(new JsonResult(retVal)); } catch (System.Exception e) { log.LogError(e, e.Message); return(new System.Web.Http.InternalServerErrorResult()); } }