/// <summary>
 /// Create a new Query Result from Selectome
 /// </summary>
 /// <param name="status"></param>
 /// <param name="gene"></param>
 public SelectomeQueryResult(QueryResult status, SelectomeGene gene = null)
 {
     this.Result = status;
     this.pGene = gene;
 }
Exemple #2
0
 /// <summary>
 /// Create a new Query Result from Selectome
 /// </summary>
 /// <param name="status"></param>
 /// <param name="gene"></param>
 public SelectomeQueryResult(QueryResult status, SelectomeGene gene = null)
 {
     this.Result = status;
     this.pGene  = gene;
 }
        /// <summary>
        /// This method expects a single XML document and returns one BlastResult.
        /// </summary>
        /// <returns>The BlastResult.</returns>
        private static SelectomeQueryResult ParseXML(string webResponse)
        {
            webResponse = ReplaceHtmlText(webResponse);
            //PARSE THE XML, example below:
            //<?xml version="1.0" standalone="yes"?>
            //<?xml-stylesheet type="text/xsl" href="features.xsl"?>
            //<!DOCTYPE DASGFF SYSTEM "http://www.biodas.org/dtd/dasgff.dtd">
            //<DASGFF>
            //  <GFF href="http://selectome.unil.ch/das/selectome/features">
            //<SEGMENT id="ENSCPOT00000004318" start="1">
            //<FEATURE id="ENSGT00550000074701_Euteleostomi_1_ENSCPOT00000004318" label="ENSCPOT00000004318">
            //<TYPE id="Positive selection" />
            //<METHOD id="codeml branch-site model" />
            //<NOTE>Positions under positive selection found in ENSCPOT00000004318
            //(sub-family ENSGT00550000074701.Euteleostomi.1)
            //in&#x3C;br/&#x3E;Sarcopterygii/Clupeocephala (57 amino-acid positions, pval = 0.00089362)</NOTE>
            //<LINK href="http://selectome.unil.ch/cgi-bin/subfamily.cgi?ac=ENSGT00550000074701&#x26;sub=1&#x26;tax=Euteleostomi" />
            //</FEATURE>
            //</SEGMENT>
            //  </GFF>
            //</DASGFF>

            //Because the selection results and links appear on different lines, going to merge them with dictionary, grr...
            var    results  = new Dictionary <SelectomeTaxaGroup, SelectomeQuerySubResult>();
            string geneName = String.Empty;
            var    settings = new XmlReaderSettings {
                DtdProcessing = DtdProcessing.Ignore
            };

            using (var sr = new StringReader(webResponse))
            {
                using (XmlReader r = XmlReader.Create(sr, settings))
                {
                    string curElement      = string.Empty;
                    bool   alreadyAdvanced = false;
                    while (alreadyAdvanced || r.Read())
                    {
                        alreadyAdvanced = false;
                        switch (r.NodeType)
                        {
                        case XmlNodeType.Element:
                            curElement = r.Name;
                            if (curElement == "FEATURE")
                            {
                                geneName = r.GetAttribute("label");
                            }
                            else if (curElement == "NOTE")
                            {
                                //Construct the data from the note
                                string innerText = r.ReadInnerXml();
                                alreadyAdvanced = true;
                                string[] groups      = innerText.Split(NoteDivider, StringSplitOptions.RemoveEmptyEntries);
                                var      tempResults = groups.Where(x => x.Contains("selection")).Select(ProcessSelectomeData).ToArray();
                                foreach (var selectionRes in tempResults)
                                {
                                    results[selectionRes.Group] = selectionRes;
                                }
                            }
                            else if (curElement == "LINK")
                            {
                                //example a la =<LINK href="http://selectome.unil.ch/cgi-bin/subfamily.cgi?ac=ENSGT00390000009030&#x26;sub=1&#x26;tax=Euteleostomi" />
                                string             linkText    = r.GetAttribute("href");
                                var                sp1         = linkText.Split('=');
                                var                treename    = sp1[1].Split('&')[0]; //ENSGT0039000000009446
                                var                subTreeName = sp1[2].Split('&')[0]; //1
                                SelectomeTaxaGroup curGroup    = (from kv in SelectomeConstantsAndEnums.GroupToNameList where sp1[3].Contains(kv.Value) select kv.Key).FirstOrDefault();
                                if (curGroup != SelectomeTaxaGroup.NotSet)
                                {
                                    //Verify it isn't over-writing another gene
                                    var sql = new SelectomeQueryLink(curGroup, treename, subTreeName);
                                    if (results.ContainsKey(curGroup) && results[curGroup].RelatedLink != null)
                                    {
                                        var old = results[curGroup];
                                        if ((old.RelatedLink.Group != sql.Group || old.RelatedLink.SubTree != sql.SubTree || old.RelatedLink.Tree != sql.Tree))
                                        {
                                            throw new ArgumentException("Over-writing past tree group, investigate this gene.\r\n" + webResponse);
                                        }
                                    }
                                    else
                                    {
                                        results[curGroup].RelatedLink = sql;
                                    }
                                }
                            }
                            break;

                        case XmlNodeType.XmlDeclaration:
                            break;

                        case XmlNodeType.ProcessingInstruction:
                            break;

                        case XmlNodeType.Comment:
                            break;

                        case XmlNodeType.EndElement:
                            break;
                        }
                    }
                }
            }

            if (results != null && results.Count > 0)
            {
                if (!results.ContainsKey(SelectomeTaxaGroup.Euteleostomi))
                {
                    return(new SelectomeQueryResult(QueryResult.NoVeterbrateTreeDataFound));
                    //throw new Exception("Could not parse the vertebrate group data from the XML received from selectome.  XML is:\n" + webResponse);
                }
                SelectomeGene result = new SelectomeGene(results, geneName);
                return(new SelectomeQueryResult(QueryResult.Success, result));
            }
            return(new SelectomeQueryResult(QueryResult.NoResultsFound));
        }
        /// <summary>
        /// This method expects a single XML document and returns one BlastResult.
        /// </summary>
        /// <returns>The BlastResult.</returns>
        private static SelectomeQueryResult ParseXML(string webResponse)
        {
            webResponse = ReplaceHtmlText(webResponse);
            //PARSE THE XML, example below:            
            //<?xml version="1.0" standalone="yes"?>
            //<?xml-stylesheet type="text/xsl" href="features.xsl"?>
            //<!DOCTYPE DASGFF SYSTEM "http://www.biodas.org/dtd/dasgff.dtd">
            //<DASGFF>
            //  <GFF href="http://selectome.unil.ch/das/selectome/features">
            //<SEGMENT id="ENSCPOT00000004318" start="1">
            //<FEATURE id="ENSGT00550000074701_Euteleostomi_1_ENSCPOT00000004318" label="ENSCPOT00000004318">
            //<TYPE id="Positive selection" />
            //<METHOD id="codeml branch-site model" />
            //<NOTE>Positions under positive selection found in ENSCPOT00000004318 
            //(sub-family ENSGT00550000074701.Euteleostomi.1) 
            //in&#x3C;br/&#x3E;Sarcopterygii/Clupeocephala (57 amino-acid positions, pval = 0.00089362)</NOTE>
            //<LINK href="http://selectome.unil.ch/cgi-bin/subfamily.cgi?ac=ENSGT00550000074701&#x26;sub=1&#x26;tax=Euteleostomi" />
            //</FEATURE>
            //</SEGMENT>
            //  </GFF>
            //</DASGFF>

            //Because the selection results and links appear on different lines, going to merge them with dictionary, grr...
            var results = new Dictionary<SelectomeTaxaGroup,SelectomeQuerySubResult>();
            string geneName = String.Empty;
            var settings = new XmlReaderSettings { DtdProcessing = DtdProcessing.Ignore };
            using (var sr = new StringReader(webResponse))
            {
                using (XmlReader r = XmlReader.Create(sr, settings))
                {
                    string curElement = string.Empty;
                    bool alreadyAdvanced = false;
                    while (alreadyAdvanced || r.Read())
                    {
                        alreadyAdvanced = false;
                        switch (r.NodeType)
                        {
                            case XmlNodeType.Element:
                                curElement = r.Name;
                                if (curElement == "FEATURE")
                                {
                                    geneName = r.GetAttribute("label");
                                }
                                else if (curElement == "NOTE")
                                {
                                    //Construct the data from the note
                                    string innerText = r.ReadInnerXml();
                                    alreadyAdvanced = true;
                                    string[] groups = innerText.Split(NoteDivider, StringSplitOptions.RemoveEmptyEntries);
                                    var tempResults = groups.Where(x => x.Contains("selection")).Select(ProcessSelectomeData).ToArray();
                                    foreach (var selectionRes in tempResults)
                                    {
                                        results[selectionRes.Group] = selectionRes;
                                    }
                                }
                                else if (curElement == "LINK")
                                {
                                    //example a la =<LINK href="http://selectome.unil.ch/cgi-bin/subfamily.cgi?ac=ENSGT00390000009030&#x26;sub=1&#x26;tax=Euteleostomi" />
                                    string linkText = r.GetAttribute("href");
                                    var sp1 = linkText.Split('=');
                                    var treename = sp1[1].Split('&')[0];//ENSGT0039000000009446
                                    var subTreeName = sp1[2].Split('&')[0]; //1
                                    SelectomeTaxaGroup curGroup = (from kv in SelectomeConstantsAndEnums.GroupToNameList where sp1[3].Contains(kv.Value) select kv.Key).FirstOrDefault();
                                    if (curGroup != SelectomeTaxaGroup.NotSet)
                                    {
                                        //Verify it isn't over-writing another gene
                                        var sql = new SelectomeQueryLink(curGroup, treename, subTreeName);
                                        if (results.ContainsKey(curGroup) && results[curGroup].RelatedLink!=null)
                                        {
                                            var old = results[curGroup];
                                            if ((old.RelatedLink.Group != sql.Group || old.RelatedLink.SubTree != sql.SubTree || old.RelatedLink.Tree != sql.Tree))
                                                throw new ArgumentException("Over-writing past tree group, investigate this gene.\r\n" + webResponse);
                                        }
                                        else
                                        {
                                            results[curGroup].RelatedLink = sql;
                                        }
                                    }                                            
                                }
                                break;
                            case XmlNodeType.XmlDeclaration:
                                break;
                            case XmlNodeType.ProcessingInstruction:
                                break;
                            case XmlNodeType.Comment:
                                break;
                            case XmlNodeType.EndElement:
                                break;
                        }
                    }
                }
            }
                
            if (results != null && results.Count> 0)
            {
                if (!results.ContainsKey(SelectomeTaxaGroup.Euteleostomi))
                {
                    return new SelectomeQueryResult(QueryResult.NoVeterbrateTreeDataFound);
                    //throw new Exception("Could not parse the vertebrate group data from the XML received from selectome.  XML is:\n" + webResponse);
                }
                SelectomeGene result = new SelectomeGene(results,geneName);
                return new SelectomeQueryResult(QueryResult.Success, result);
            }
            return new SelectomeQueryResult(QueryResult.NoResultsFound);
        }