Ejemplo n.º 1
0
        private Hit GetHit(Dictionary<int, Dictionary<string, Hit>> hits, TextMinerServiceSettingsMinerOntology ontology, ref string ontologyID)
        {
            Hit h;

            // Do any predefined string manipulation on the ontology ID
            if (ontology.MatchText != null)
            {
                foreach (TextMinerServiceSettingsMinerOntologyMatchText matchText in ontology.MatchText)
                {
                    ontologyID = ontologyID.Replace(matchText.ResultTextID, matchText.DBTextID);
                }
            }

            Dictionary<string, Hit> onthits;
            if (!hits.TryGetValue(ontology.OntogratorPane, out onthits))
            {
                onthits = new Dictionary<string, Hit>();
                hits.Add(ontology.OntogratorPane, onthits);
            }

            if (!onthits.TryGetValue(ontologyID, out h))
            {
                h = new Hit(ontologyID);
                onthits.Add(ontologyID, h);
            }

            return h;
        }
Ejemplo n.º 2
0
        protected Dictionary<int, Dictionary<string, Hit>> MineText(string text)
        {
            Dictionary<int, Dictionary<string, Hit>> hits = new Dictionary<int, Dictionary<string, Hit>>();

            if (!Alive)
                return hits;

            // TextCleaner went down.
            //try
            //{
            //    TextCleanerClient tcc = new TextCleanerClient();
            //    s = tcc.cleanTextASCII(text);
            //}
            //catch (Exception ex)
            //{
            //    EventLogWriter.WriteError("Error occurred during text clean: {0}", ex);
            //    return hits;
            //}

            //if (!Alive)
            //    return hits;

            // SentenceSplitter went down.
            //try
            //{
            //    SentenceSplitterClient ssc = new SentenceSplitterClient();
            //    sentences = ssc.splitIntoSentences(s);
            //}
            //catch (Exception ex)
            //{
            //    EventLogWriter.WriteError("Error occurred during sentence split: {0}", ex);
            //    return hits;
            //}

            string[] sentences = text.Split(new char[] { '.', '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);

            foreach (string docSentence in sentences)
            {
                if (!Alive)
                    return hits;

                string sentence = docSentence.Trim();
                if (sentence == string.Empty)
                    continue;

                sentence = CleanString(sentence);

                // Search for custom hits (Miscellaneous Pane)
                // I know, not generic, but this stuff is vital to show what Ontogrator can do...
                // Just remove this crap once a decent ontology goes in Pane 4
                // BEGIN CRAP
                Dictionary<string, string[]> customOntology = new Dictionary<string, string[]>();
                customOntology.Add("Phase I", new string[]{ "Phase 1", "Phase I " });
                customOntology.Add("Phase II", new string[]{ "Phase 2", "Phase II ", "Phase IIa", "Phase IIb" });
                customOntology.Add("Phase II/III", new string[]{ "Phase 2/3", "Phase II/III", "Phase 2/Phase 3" });
                customOntology.Add("Phase III", new string[]{ "Phase 3", "Phase III " });
                customOntology.Add("Phase IV", new string[]{ "Phase 4", "Phase IV " });
                customOntology.Add("Open", new string[] { " open ", "open label" });
                customOntology.Add("Blind", new string[] { "Blind" });
                customOntology.Add("Double-blind", new string[] { "Double-blind", "Double blind" });
                customOntology.Add("Triple-blind", new string[] { "Triple-blind", "Triple blind" });
                customOntology.Add("Randomization", new string[] { "Random" });

                TextMinerServiceSettingsMinerOntology customOntObject = new TextMinerServiceSettingsMinerOntology();
                customOntObject.OntogratorPane = 4;

                foreach (string custOntID in customOntology.Keys)
                {
                    string[] custOntSynonyms = customOntology[custOntID];

                    foreach (string custOntSynonym in custOntSynonyms)
                    {
                        int pos = sentence.IndexOf(custOntSynonym, StringComparison.CurrentCultureIgnoreCase);

                        if(pos >= 0)
                        {
                            string tempontid = custOntID;
                            string custkey = sentence.Substring(pos, custOntSynonym.Length);

                            Hit h = GetHit(hits, customOntObject, ref tempontid);

                            List<string> matchedSentences;
                            if (!h.Keywords.TryGetValue(custkey, out matchedSentences))
                            {
                                matchedSentences = new List<string>();
                                h.Keywords.Add(custkey, matchedSentences);
                            }
                            if (!matchedSentences.Contains(sentence))
                                matchedSentences.Add(sentence);

                            break;
                        }
                    }
                }
                // END CRAP

                if (Repository.Configuration.Miners == null)
                    continue;

                foreach (TextMinerServiceSettingsMiner miner in Repository.Configuration.Miners)
                {
                    if (!Alive)
                        return hits;

                    if (!miner.Enabled || miner.Name == MinerName.Unsupported)
                        continue;

                    XmlDocument hit = null;
                    int tries = 0;
                    string errorMsg = string.Empty;
                    while (hit == null && tries <= miner.RetriesOnError)
                    {
                        if (tries++ > 0)
                            Sleep(miner.IntervalOnRetry);

                        try
                        {
                            hit = HttpPost(
                                miner.Uri,
                                string.Format(miner.Arguments, Uri.EscapeUriString(sentence)),
                                miner.ResponseTimeout
                                );
                        }
                        catch (Exception ex)
                        {
                            errorMsg += string.Format("TRY {0}:\n{1}\n", tries, ex);
                            continue;
                        }
                    }
                    if (hit == null)
                    {
                        EventLogWriter.WriteError("Error occurred during HTTP POST:\nURI: {0}\nArguments: {1}\nSentence: {2}\n{3}", miner.Uri, miner.Arguments, sentence, errorMsg);
                        continue;
                    }

                    switch (miner.Name)
                    {
                        case MinerName.NERCTerminizer:
                            {
                                TerminizerResult result = null;
                                try
                                {
                                    result = GetTerminizerResultFromXml(hit);
                                }
                                catch (Exception ex)
                                {
                                    EventLogWriter.WriteError("Error occurred serialising from Terminizer POST:\nHit: {0}\n{1}", hit.InnerXml, ex);
                                }

                                if (result != null)
                                {
                                    // Build tokens for keywords
                                    Dictionary<int, string> tokens = new Dictionary<int, string>();

                                    foreach (object o in result.Items)
                                    {
                                        if (!(o is Token))
                                            continue;

                                        Token token = (Token)o;
                                        if (token.indexSpecified && token.RawText != null)
                                        {
                                            tokens[token.index] = token.RawText;
                                        }
                                    }

                                    foreach (object o in result.Items)
                                    {
                                        if (!(o is MatchedTermList))
                                            continue;

                                        MatchedTermList terms = (MatchedTermList)o;

                                        foreach (MatchedTermListMatchedTerm term in terms.MatchedTerm)
                                        {
                                            if (miner.Ontologies == null)
                                                continue;

                                            string ontid = term.Accession.Substring(0, term.Accession.IndexOf(':'));

                                            TextMinerServiceSettingsMinerOntology ont = null;
                                            foreach (TextMinerServiceSettingsMinerOntology ontology in miner.Ontologies)
                                            {
                                                if (ontology.ID == ontid)
                                                {
                                                    ont = ontology;
                                                    break;
                                                }
                                            }
                                            if (ont == null)
                                                continue;

                                            string ontologyID = term.Accession;

                                            // Create or get the hit object for this pane and ontology ID
                                            Hit h = GetHit(hits, ont, ref ontologyID);

                                            // Add any new keywords
                                            if (term.TokenIndices != null)
                                            {
                                                string[] indices = term.TokenIndices.Split(',');
                                                foreach (string index in indices)
                                                {
                                                    int i;
                                                    if (int.TryParse(index, out i))
                                                    {
                                                        try
                                                        {
                                                            List<string> matchedSentences;
                                                            if (!h.Keywords.TryGetValue(tokens[i], out matchedSentences))
                                                            {
                                                                matchedSentences = new List<string>();
                                                                h.Keywords.Add(tokens[i], matchedSentences);
                                                            }
                                                            if (!matchedSentences.Contains(sentence))
                                                                matchedSentences.Add(sentence);
                                                        }
                                                        catch {/* Do nothing */}
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }

                                break;
                            }
                        case MinerName.NCBOAnnotator:
                            {
                                success result = null;
                                try
                                {
                                    result = GetNCBOAnnotatorResultFromXml(hit);
                                }
                                catch (Exception ex)
                                {
                                    EventLogWriter.WriteError("Error occurred serialising from Annotator POST:\nHit: {0}\n{1}", hit.InnerXml, ex);
                                }

                                if (result != null && result.data != null && result.data.annotatorResultBean != null && result.data.annotatorResultBean.annotations != null)
                                {
                                    foreach (successDataAnnotatorResultBeanAnnotationBean annotation in result.data.annotatorResultBean.annotations)
                                    {
                                        successDataAnnotatorResultBeanAnnotationBeanConcept concept = annotation.concept;
                                        if (concept != null)
                                        {
                                            if (miner.Ontologies == null)
                                                continue;

                                            TextMinerServiceSettingsMinerOntology ont = null;
                                            foreach (TextMinerServiceSettingsMinerOntology ontology in miner.Ontologies)
                                            {
                                                if (ontology.ID == concept.localOntologyId)
                                                {
                                                    ont = ontology;
                                                    break;
                                                }
                                            }
                                            if (ont == null)
                                                continue;

                                            string ontologyID = concept.fullId;

                                            // Create or get the hit object for this pane and ontology ID
                                            Hit h = GetHit(hits, ont, ref ontologyID);

                                            // Add any new keywords
                                            if (annotation.context != null && annotation.context.@class == "mgrepContextBean" && annotation.context.term != null)
                                            {
                                                List<string> matchedSentences;
                                                if (!h.Keywords.TryGetValue(annotation.context.term.name, out matchedSentences))
                                                {
                                                    matchedSentences = new List<string>();
                                                    h.Keywords.Add(annotation.context.term.name, matchedSentences);
                                                }
                                                if(!matchedSentences.Contains(sentence))
                                                    matchedSentences.Add(sentence);
                                            }
                                        }
                                    }
                                }
                                break;
                            }
                    }
                }
            }

            return hits;
        }