private Hit GetHit(Dictionary<int, Dictionary<string, Hit>> hits, TextMinerServiceSettingsMinerOntology ontology, ref string ontologyID) { Hit h; // Do any predefined string manipulation on the ontology ID if (ontology.MatchText != null) { foreach (TextMinerServiceSettingsMinerOntologyMatchText matchText in ontology.MatchText) { ontologyID = ontologyID.Replace(matchText.ResultTextID, matchText.DBTextID); } } Dictionary<string, Hit> onthits; if (!hits.TryGetValue(ontology.OntogratorPane, out onthits)) { onthits = new Dictionary<string, Hit>(); hits.Add(ontology.OntogratorPane, onthits); } if (!onthits.TryGetValue(ontologyID, out h)) { h = new Hit(ontologyID); onthits.Add(ontologyID, h); } return h; }
protected Dictionary<int, Dictionary<string, Hit>> MineText(string text) { Dictionary<int, Dictionary<string, Hit>> hits = new Dictionary<int, Dictionary<string, Hit>>(); if (!Alive) return hits; // TextCleaner went down. //try //{ // TextCleanerClient tcc = new TextCleanerClient(); // s = tcc.cleanTextASCII(text); //} //catch (Exception ex) //{ // EventLogWriter.WriteError("Error occurred during text clean: {0}", ex); // return hits; //} //if (!Alive) // return hits; // SentenceSplitter went down. //try //{ // SentenceSplitterClient ssc = new SentenceSplitterClient(); // sentences = ssc.splitIntoSentences(s); //} //catch (Exception ex) //{ // EventLogWriter.WriteError("Error occurred during sentence split: {0}", ex); // return hits; //} string[] sentences = text.Split(new char[] { '.', '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries); foreach (string docSentence in sentences) { if (!Alive) return hits; string sentence = docSentence.Trim(); if (sentence == string.Empty) continue; sentence = CleanString(sentence); // Search for custom hits (Miscellaneous Pane) // I know, not generic, but this stuff is vital to show what Ontogrator can do... // Just remove this crap once a decent ontology goes in Pane 4 // BEGIN CRAP Dictionary<string, string[]> customOntology = new Dictionary<string, string[]>(); customOntology.Add("Phase I", new string[]{ "Phase 1", "Phase I " }); customOntology.Add("Phase II", new string[]{ "Phase 2", "Phase II ", "Phase IIa", "Phase IIb" }); customOntology.Add("Phase II/III", new string[]{ "Phase 2/3", "Phase II/III", "Phase 2/Phase 3" }); customOntology.Add("Phase III", new string[]{ "Phase 3", "Phase III " }); customOntology.Add("Phase IV", new string[]{ "Phase 4", "Phase IV " }); customOntology.Add("Open", new string[] { " open ", "open label" }); customOntology.Add("Blind", new string[] { "Blind" }); customOntology.Add("Double-blind", new string[] { "Double-blind", "Double blind" }); customOntology.Add("Triple-blind", new string[] { "Triple-blind", "Triple blind" }); customOntology.Add("Randomization", new string[] { "Random" }); TextMinerServiceSettingsMinerOntology customOntObject = new TextMinerServiceSettingsMinerOntology(); customOntObject.OntogratorPane = 4; foreach (string custOntID in customOntology.Keys) { string[] custOntSynonyms = customOntology[custOntID]; foreach (string custOntSynonym in custOntSynonyms) { int pos = sentence.IndexOf(custOntSynonym, StringComparison.CurrentCultureIgnoreCase); if(pos >= 0) { string tempontid = custOntID; string custkey = sentence.Substring(pos, custOntSynonym.Length); Hit h = GetHit(hits, customOntObject, ref tempontid); List<string> matchedSentences; if (!h.Keywords.TryGetValue(custkey, out matchedSentences)) { matchedSentences = new List<string>(); h.Keywords.Add(custkey, matchedSentences); } if (!matchedSentences.Contains(sentence)) matchedSentences.Add(sentence); break; } } } // END CRAP if (Repository.Configuration.Miners == null) continue; foreach (TextMinerServiceSettingsMiner miner in Repository.Configuration.Miners) { if (!Alive) return hits; if (!miner.Enabled || miner.Name == MinerName.Unsupported) continue; XmlDocument hit = null; int tries = 0; string errorMsg = string.Empty; while (hit == null && tries <= miner.RetriesOnError) { if (tries++ > 0) Sleep(miner.IntervalOnRetry); try { hit = HttpPost( miner.Uri, string.Format(miner.Arguments, Uri.EscapeUriString(sentence)), miner.ResponseTimeout ); } catch (Exception ex) { errorMsg += string.Format("TRY {0}:\n{1}\n", tries, ex); continue; } } if (hit == null) { EventLogWriter.WriteError("Error occurred during HTTP POST:\nURI: {0}\nArguments: {1}\nSentence: {2}\n{3}", miner.Uri, miner.Arguments, sentence, errorMsg); continue; } switch (miner.Name) { case MinerName.NERCTerminizer: { TerminizerResult result = null; try { result = GetTerminizerResultFromXml(hit); } catch (Exception ex) { EventLogWriter.WriteError("Error occurred serialising from Terminizer POST:\nHit: {0}\n{1}", hit.InnerXml, ex); } if (result != null) { // Build tokens for keywords Dictionary<int, string> tokens = new Dictionary<int, string>(); foreach (object o in result.Items) { if (!(o is Token)) continue; Token token = (Token)o; if (token.indexSpecified && token.RawText != null) { tokens[token.index] = token.RawText; } } foreach (object o in result.Items) { if (!(o is MatchedTermList)) continue; MatchedTermList terms = (MatchedTermList)o; foreach (MatchedTermListMatchedTerm term in terms.MatchedTerm) { if (miner.Ontologies == null) continue; string ontid = term.Accession.Substring(0, term.Accession.IndexOf(':')); TextMinerServiceSettingsMinerOntology ont = null; foreach (TextMinerServiceSettingsMinerOntology ontology in miner.Ontologies) { if (ontology.ID == ontid) { ont = ontology; break; } } if (ont == null) continue; string ontologyID = term.Accession; // Create or get the hit object for this pane and ontology ID Hit h = GetHit(hits, ont, ref ontologyID); // Add any new keywords if (term.TokenIndices != null) { string[] indices = term.TokenIndices.Split(','); foreach (string index in indices) { int i; if (int.TryParse(index, out i)) { try { List<string> matchedSentences; if (!h.Keywords.TryGetValue(tokens[i], out matchedSentences)) { matchedSentences = new List<string>(); h.Keywords.Add(tokens[i], matchedSentences); } if (!matchedSentences.Contains(sentence)) matchedSentences.Add(sentence); } catch {/* Do nothing */} } } } } } } break; } case MinerName.NCBOAnnotator: { success result = null; try { result = GetNCBOAnnotatorResultFromXml(hit); } catch (Exception ex) { EventLogWriter.WriteError("Error occurred serialising from Annotator POST:\nHit: {0}\n{1}", hit.InnerXml, ex); } if (result != null && result.data != null && result.data.annotatorResultBean != null && result.data.annotatorResultBean.annotations != null) { foreach (successDataAnnotatorResultBeanAnnotationBean annotation in result.data.annotatorResultBean.annotations) { successDataAnnotatorResultBeanAnnotationBeanConcept concept = annotation.concept; if (concept != null) { if (miner.Ontologies == null) continue; TextMinerServiceSettingsMinerOntology ont = null; foreach (TextMinerServiceSettingsMinerOntology ontology in miner.Ontologies) { if (ontology.ID == concept.localOntologyId) { ont = ontology; break; } } if (ont == null) continue; string ontologyID = concept.fullId; // Create or get the hit object for this pane and ontology ID Hit h = GetHit(hits, ont, ref ontologyID); // Add any new keywords if (annotation.context != null && annotation.context.@class == "mgrepContextBean" && annotation.context.term != null) { List<string> matchedSentences; if (!h.Keywords.TryGetValue(annotation.context.term.name, out matchedSentences)) { matchedSentences = new List<string>(); h.Keywords.Add(annotation.context.term.name, matchedSentences); } if(!matchedSentences.Contains(sentence)) matchedSentences.Add(sentence); } } } } break; } } } } return hits; }