/// <summary> /// /// </summary> /// <param name="auto_pfama_reg_full"></param> /// <param name="pfamAlignInfoHash"></param> /// <returns></returns> private PfamAlignInfo GetPfamAlignInfo(int auto_pfama_reg_full, ref Dictionary <int, PfamAlignInfo> pfamAlignInfoHash) { if (pfamAlignInfoHash.ContainsKey(auto_pfama_reg_full)) { return(pfamAlignInfoHash[auto_pfama_reg_full]); } string queryString = string.Format("Select * From PfamA_Reg_Full_Significant Where Auto_PfamA_Reg_full = {0};", auto_pfama_reg_full); DataTable alignInfoTable = dbQuery.Query(mysqlConnect, queryString); PfamAlignInfo pfamAlignInfo = new PfamAlignInfo(); if (alignInfoTable.Rows.Count > 0) { pfamAlignInfo.align_start = alignInfoTable.Rows[0]["ali_start"].ToString(); pfamAlignInfo.align_end = alignInfoTable.Rows[0]["ali_end"].ToString(); pfamAlignInfo.model_start = alignInfoTable.Rows[0]["model_start"].ToString(); pfamAlignInfo.model_end = alignInfoTable.Rows[0]["model_end"].ToString(); pfamAlignInfo.domain_bits_score = alignInfoTable.Rows[0]["domain_bits_score"].ToString(); pfamAlignInfo.domain_evalue_score = alignInfoTable.Rows[0]["domain_evalue_score"].ToString(); pfamAlignInfo.sequence_bits_score = alignInfoTable.Rows[0]["sequence_bits_score"].ToString(); pfamAlignInfo.sequence_evalue_score = alignInfoTable.Rows[0]["sequence_evalue_score"].ToString(); } pfamAlignInfoHash.Add(auto_pfama_reg_full, pfamAlignInfo); return(pfamAlignInfo); }
/// <summary> /// /// </summary> /// <param name="resultXml"></param> public PfamAlignInfo[] ParsePfamResultXmlFile(string resultXml) { XmlDocument xmlDoc = new XmlDocument(); xmlDoc.Load(resultXml); // Create an XmlNamespaceManager for resolving namespaces. XmlNamespaceManager nsManager = new XmlNamespaceManager(xmlDoc.NameTable); nsManager.AddNamespace("pfam", "http://pfam.sanger.ac.uk/"); XmlNode resultsNode = xmlDoc.DocumentElement.FirstChild; XmlNode proteinNode = resultsNode.FirstChild; if (proteinNode == null) { throw new Exception("No Pfam alignments found."); } XmlNode dbNode = proteinNode.FirstChild; XmlNode matchesNode = dbNode.FirstChild; XmlNodeList matchsNodeList = matchesNode.ChildNodes; ArrayList alignInfoList = new ArrayList(); string pfamAcc = ""; string pfamId = ""; string type = ""; string pfamClass = ""; foreach (XmlNode matchNode in matchesNode.ChildNodes) { pfamAcc = matchNode.Attributes["accession"].InnerText; pfamId = matchNode.Attributes["id"].InnerText; type = matchNode.Attributes["type"].InnerText; pfamClass = matchNode.Attributes["class"].InnerText; foreach (XmlNode locationNode in matchNode.ChildNodes) { PfamAlignInfo alignInfo = new PfamAlignInfo(); alignInfo.pfamAcc = pfamAcc; alignInfo.pfamId = pfamId; alignInfo.type = type; alignInfo.pfamClass = pfamClass; alignInfo.startPos = Convert.ToInt32(locationNode.Attributes["start"].InnerText); alignInfo.endPos = Convert.ToInt32(locationNode.Attributes["end"].InnerText); alignInfo.hmmStartPos = Convert.ToInt32(locationNode.Attributes["hmm_start"].InnerText); alignInfo.hmmEndPos = Convert.ToInt32(locationNode.Attributes["hmm_end"].InnerText); alignInfo.bitScore = Convert.ToDouble(locationNode.Attributes ["bitscore"].InnerText); alignInfo.evalue = Convert.ToDouble(locationNode.Attributes["evalue"].InnerText); alignInfo.mode = locationNode.Attributes["mode"].InnerText; alignInfoList.Add(alignInfo); } } PfamAlignInfo[] alignInfos = new PfamAlignInfo [alignInfoList.Count]; alignInfoList.CopyTo(alignInfos); return(alignInfos); }
public void PrintPfamHmmAlignmentInfo() { if (!mysqlConnect.IsConnected()) { string connectString = "Driver={MySQL ODBC 5.1 Driver}; Server=localhost;Database=pfam;UID=root;PWD=;"; mysqlConnect.ConnectString = connectString; mysqlConnect.ConnectToDatabase(); } StreamWriter dataWriter = new StreamWriter(Path.Combine(pfamDataDir, "PdbSeqHmmAlignInfo.txt")); string dataLine = ""; string queryString = "Select * From pdb_pfamA_reg;"; DataTable pdbChainDomainTable = dbQuery.Query(mysqlConnect, queryString); int auto_pfamseq = 0; int auto_pfamA_reg_full = 0; int auto_pfama = 0; Dictionary <int, string[]> pfamDefHash = new Dictionary <int, string[]>(); Dictionary <int, string[]> pfamSeqInfoHash = new Dictionary <int, string[]>(); Dictionary <int, PfamAlignInfo> pfamAlignInfoHash = new Dictionary <int, PfamAlignInfo>(); foreach (DataRow chainRow in pdbChainDomainTable.Rows) { auto_pfamseq = Convert.ToInt32(chainRow["auto_pfamseq"].ToString()); auto_pfama = Convert.ToInt32(chainRow["auto_pfama"].ToString()); auto_pfamA_reg_full = Convert.ToInt32(chainRow["Auto_Pfama_reg_full"].ToString()); string[] pfamInfo = GetPfamAFamilyInfo(auto_pfama, ref pfamDefHash); string[] pfamSeqInfo = GetPfamSequenceInfo(auto_pfamseq, ref pfamSeqInfoHash); PfamAlignInfo pfamAlignInfo = GetPfamAlignInfo(auto_pfamA_reg_full, ref pfamAlignInfoHash); dataLine = chainRow["pdb_id"].ToString() + "\t" + chainRow["Chain"].ToString().TrimEnd() + "\t" + pfamInfo[0] + "\t" + pfamInfo[1] + "\t" + pfamInfo[2] + "\t" + pfamSeqInfo[0] + "\t" + pfamSeqInfo[1] + "\t" + chainRow["pdb_res_start"].ToString() + "\t" + chainRow["pdb_res_end"].ToString() + "\t" + chainRow["seq_start"].ToString() + "\t" + chainRow["seq_end"].ToString() + "\t" + pfamAlignInfo.align_start + "\t" + pfamAlignInfo.align_end + "\t" + pfamAlignInfo.model_start + "\t" + pfamAlignInfo.model_end + "\t" + pfamAlignInfo.domain_bits_score + "\t" + pfamAlignInfo.domain_evalue_score + "\t" + pfamAlignInfo.sequence_bits_score + "\t" + pfamAlignInfo.sequence_evalue_score; dataWriter.WriteLine(dataLine); } dataWriter.Close(); mysqlConnect.DisconnectFromDatabase(); }