private void AddProtein(string ID, string Description) { if (!myProteins.Exists((a) => a.ID.Equals(ID))) { try { ProteinInfo pt = new ProteinInfo(); if (ID.StartsWith("IPI")) { //remove the decimal at the end ID = Regex.Replace(ID, @"\..*", ""); } pt.ID = ID; pt.Description = Description; myProteins.Add(pt); } catch { throw (new Exception("Failed parsing result report")); } } }
/// <summary> /// Load the Sequence to Protein mapping using the specified PHRP result file /// </summary> /// <param name="filePath"></param> /// <param name="seqToProteinMap">Sequence to protein map</param> /// <returns>True if successful, false if an error</returns> private bool LoadSeqToProteinMapping(string filePath, IDictionary <int, List <ProteinInfo> > seqToProteinMap) { var headerLineParsed = false; try { // Initialize the column mapping // Using a case-insensitive comparer var columnHeaders = new SortedDictionary <string, int>(StringComparer.OrdinalIgnoreCase) { { SEQ_PROT_MAP_COLUMN_Unique_Seq_ID, 0 }, { SEQ_PROT_MAP_COLUMN_Cleavage_State, 1 }, { SEQ_PROT_MAP_COLUMN_Terminus_State, 2 }, { SEQ_PROT_MAP_COLUMN_Protein_Name, 3 }, { SEQ_PROT_MAP_COLUMN_Protein_EValue, 4 }, { SEQ_PROT_MAP_COLUMN_Protein_Intensity, 5 } }; // Read the data from the sequence to protein map file using var reader = new StreamReader(new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)); while (!reader.EndOfStream) { var lineIn = reader.ReadLine(); var skipLine = false; if (string.IsNullOrEmpty(lineIn)) { continue; } var splitLine = lineIn.Split('\t'); if (!headerLineParsed) { if (string.Equals(splitLine[0], SEQ_PROT_MAP_COLUMN_Unique_Seq_ID, StringComparison.OrdinalIgnoreCase)) { // Parse the header line to confirm the column ordering ReaderFactory.ParseColumnHeaders(splitLine, columnHeaders); skipLine = true; } headerLineParsed = true; } if (skipLine || splitLine.Length < 3) { continue; } if (!int.TryParse(splitLine[0], out var seqID)) { continue; } var proteinName = ReaderFactory.LookupColumnValue(splitLine, SEQ_PROT_MAP_COLUMN_Protein_Name, columnHeaders, string.Empty); if (string.IsNullOrEmpty(proteinName)) { continue; } var cleavageState = (PeptideCleavageStateCalculator.PeptideCleavageState)ReaderFactory.LookupColumnValue(splitLine, SEQ_PROT_MAP_COLUMN_Cleavage_State, columnHeaders, 0); var terminusState = (PeptideCleavageStateCalculator.PeptideTerminusState)ReaderFactory.LookupColumnValue(splitLine, SEQ_PROT_MAP_COLUMN_Terminus_State, columnHeaders, 0); var proteinInfo = new ProteinInfo(proteinName, seqID, cleavageState, terminusState); if (seqToProteinMap.TryGetValue(seqID, out var proteins)) { // Sequence already exists in seqToProteinMap; add the new protein info if (MaxProteinsPerSeqID == 0 || proteins.Count < MaxProteinsPerSeqID) { proteins.Add(proteinInfo); } } else { // New Sequence ID proteins = new List <ProteinInfo> { proteinInfo }; seqToProteinMap.Add(seqID, proteins); } } } catch (Exception ex) { throw new Exception("Exception loading Seq to Protein Mapping from " + Path.GetFileName(filePath) + ": " + ex.Message); } return(true); }
public bool CreateReport(string TaskID, string FileName, string searchModule, string sc, int distinctPeptides) { //string strReportName = FileName.Substring(0, FileName.LastIndexOf(".")) + ".txt"; Dictionary <string, ProteinInfo> Proteins = new Dictionary <string, ProteinInfo>(); string strReportFile = WebConfigurationManager.ConnectionStrings["UploadPath"].ConnectionString + TaskID + "\\Result.txt"; if (File.Exists(strReportFile)) { FileStream fs = File.Open(strReportFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); StreamReader sr = new StreamReader(fs); string line; while ((line = sr.ReadLine()) != null) { //the sample line: //Y 349 CoPep00035959 0.686646308289647 0.345257544453165 0.686646308289647 60989 //N 369 string[] tokens = line.Split(new string[] { "|" }, StringSplitOptions.None); if (tokens[0] == "Y") { string peptideID = tokens[2]; string strIPI = tokens[12]; //string strSQL = "select ref_protein_id,t1.protein_cop_id ,protein_name,organism_source,sequence_length from protein_tbl t1, pp_relation_tbl t2 where t1.protein_cop_id = t2.protein_cop_id and t2.peptide_cop_id = '{0}'"; //strSQL = string.Format(strSQL, peptideID); //DBInterface.ConnectDB(); //DbDataReader result = DBInterface.QuerySQL(strSQL); //if (result != null) //{ // while (result.Read()) // { // string strIPI = result.GetString (0); if (Proteins.ContainsKey(strIPI)) { ProteinInfo PI = Proteins[strIPI]; ScanPeptide spinfo = new ScanPeptide(); spinfo.ScanNO = tokens[1]; spinfo.PeptideID = peptideID; spinfo.SimilarityScore = tokens[5]; spinfo.DetaM = double.Parse(tokens[6]); spinfo.SpectrumSeq = tokens[7]; spinfo.PeptideSequence = tokens[8]; spinfo.ModifiedType = tokens[9]; spinfo.FinalScore = tokens[10]; spinfo.mzFile = tokens[11]; PI.AddScanPeptide(spinfo); Proteins[strIPI] = PI; } else { string strCOPaID = strIPI; string strProteinName = tokens[13]; string strOrganism; if (this.lbSearchingModule.Text.ToLower().Contains("human")) { strOrganism = "H**o sapiens (Human)"; } else { strOrganism = "Mus musculus (Mouse)"; } int length = int.Parse(tokens[14]); ProteinInfo PI = new ProteinInfo(strIPI, strCOPaID, strProteinName, strOrganism, length); ScanPeptide spinfo = new ScanPeptide(); spinfo.ScanNO = tokens[1]; spinfo.PeptideID = peptideID; spinfo.SimilarityScore = tokens[5]; spinfo.DetaM = double.Parse(tokens[6]); spinfo.SpectrumSeq = tokens[7]; spinfo.PeptideSequence = tokens[8]; spinfo.ModifiedType = tokens[9]; spinfo.FinalScore = tokens[10]; spinfo.mzFile = tokens[11]; PI.AddScanPeptide(spinfo); Proteins.Add(strIPI, PI); } // } // result.Close(); //} //DBInterface.CloseDB(); } } sr.Close(); } //write the protein view info to report 2 XmlWriterSettings settings = new XmlWriterSettings(); settings.Indent = true; settings.IndentChars = (" "); //string strReportFile = Properties.Settings.Default.upload_path + searchInfo.task_id + "\\" + searchInfo.report_filename; string XSLFile = strReportFile + ".xls"; strReportFile += ".xml"; //XSL table colums : SCAN, PeptideSequence, ModifiedType,Protein Access Numbers, Protein Name, Species, Protein_COPa_ID, Peptide_COPa_ID,Spectra_COPa_ID, MatchScore,DetaM string ProteinLine = "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}\t{12}"; try { using (StreamWriter sw = new StreamWriter(XSLFile)) { sw.WriteLine(string.Format(ProteinLine, "mzFile", "Scan", "PeptideSequence", "ModifiedType", "Protein Access Numbers", "Protein Name", "Species", "Protein COPaID", "Peptide COPaID", "Spectrum COPaID", "Similarity Score", "DetaM/Z", "Final Score")); using (XmlWriter writer = XmlWriter.Create(strReportFile, settings)) { writer.WriteStartElement("COPaReport"); writer.WriteAttributeString("TaskID", TaskID); writer.WriteAttributeString("mzFile", FileName); string strLibModule = searchModule; writer.WriteAttributeString("LibModule", strLibModule); writer.WriteAttributeString("SearchFilter", sc); writer.WriteAttributeString("IDProteins", Proteins.Count.ToString()); foreach (KeyValuePair <string, ProteinInfo> kvp in Proteins) { ArrayList splists = kvp.Value.ScanPeptides; if (IsPassFilter(splists, distinctPeptides)) { writer.WriteStartElement("Proteins"); writer.WriteAttributeString("IPI", kvp.Key); writer.WriteAttributeString("COPaID", kvp.Value.COPaID); writer.WriteAttributeString("ProteinName", kvp.Value.ProteinName); writer.WriteAttributeString("Organism", kvp.Value.Organism); //ArrayList splists = kvp.Value.ScanPeptides; writer.WriteAttributeString("SpectraCount", splists.Count.ToString()); writer.WriteAttributeString("NormalizCount", ((float)(splists.Count) / kvp.Value.ProteinLength).ToString()); foreach (ScanPeptide sp in splists) { writer.WriteStartElement("Scan-Peptide"); writer.WriteAttributeString("mzFile", sp.mzFile); writer.WriteAttributeString("Scan", sp.ScanNO); writer.WriteAttributeString("PeptideSequence", sp.PeptideSequence); if (sp.ModifiedType != "") { writer.WriteAttributeString("ModifiedType", sp.ModifiedType); } writer.WriteAttributeString("Peptide", sp.PeptideID); writer.WriteAttributeString("SimilarityScore", sp.SimilarityScore); writer.WriteAttributeString("DetaMZ", sp.DetaM.ToString()); writer.WriteAttributeString("Spectrum", sp.SpectrumSeq); writer.WriteAttributeString("FinalyScore", sp.FinalScore); writer.WriteEndElement(); sw.WriteLine(ProteinLine, sp.mzFile, sp.ScanNO, sp.PeptideSequence, sp.ModifiedType, kvp.Key, kvp.Value.ProteinName, kvp.Value.Organism, kvp.Value.COPaID, sp.PeptideID, sp.SpectrumSeq, sp.SimilarityScore, sp.DetaM, sp.FinalScore); } writer.WriteEndElement(); } } writer.WriteEndElement(); writer.Flush(); sw.Close(); } } } catch (Exception ex) { DBInterface.LogEvent(ex.ToString(), System.Diagnostics.EventLogEntryType.Error); } return(true); }
public bool CreateReport(string TaskID, string FileName, string searchModule, string sc, int distinctPeptides) { //string strReportName = FileName.Substring(0, FileName.LastIndexOf(".")) + ".txt"; Dictionary<string, ProteinInfo> Proteins = new Dictionary<string, ProteinInfo>(); string strReportFile = WebConfigurationManager.ConnectionStrings["UploadPath"].ConnectionString + TaskID + "\\Result.txt"; if (File.Exists(strReportFile)) { FileStream fs = File.Open(strReportFile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); StreamReader sr = new StreamReader(fs); string line; while ((line = sr.ReadLine()) != null) { //the sample line: //Y 349 CoPep00035959 0.686646308289647 0.345257544453165 0.686646308289647 60989 //N 369 string[] tokens = line.Split(new string[] { "|" }, StringSplitOptions.None); if (tokens[0] == "Y") { string peptideID = tokens[2]; string strIPI = tokens[12]; //string strSQL = "select ref_protein_id,t1.protein_cop_id ,protein_name,organism_source,sequence_length from protein_tbl t1, pp_relation_tbl t2 where t1.protein_cop_id = t2.protein_cop_id and t2.peptide_cop_id = '{0}'"; //strSQL = string.Format(strSQL, peptideID); //DBInterface.ConnectDB(); //DbDataReader result = DBInterface.QuerySQL(strSQL); //if (result != null) //{ // while (result.Read()) // { // string strIPI = result.GetString (0); if (Proteins.ContainsKey(strIPI)) { ProteinInfo PI = Proteins[strIPI]; ScanPeptide spinfo = new ScanPeptide(); spinfo.ScanNO = tokens[1]; spinfo.PeptideID = peptideID; spinfo.SimilarityScore = tokens[5]; spinfo.DetaM = double.Parse(tokens[6]); spinfo.SpectrumSeq = tokens[7]; spinfo.PeptideSequence = tokens[8]; spinfo.ModifiedType = tokens[9]; spinfo.FinalScore = tokens[10]; spinfo.mzFile = tokens[11]; PI.AddScanPeptide(spinfo); Proteins[strIPI] = PI; } else { string strCOPaID = strIPI; string strProteinName = tokens[13]; string strOrganism; if (this.lbSearchingModule.Text.ToLower().Contains("human")) strOrganism = "H**o sapiens (Human)"; else strOrganism = "Mus musculus (Mouse)"; int length = int.Parse(tokens[14]); ProteinInfo PI = new ProteinInfo(strIPI, strCOPaID, strProteinName, strOrganism, length); ScanPeptide spinfo = new ScanPeptide(); spinfo.ScanNO = tokens[1]; spinfo.PeptideID = peptideID; spinfo.SimilarityScore = tokens[5]; spinfo.DetaM = double.Parse(tokens[6]); spinfo.SpectrumSeq = tokens[7]; spinfo.PeptideSequence = tokens[8]; spinfo.ModifiedType = tokens[9]; spinfo.FinalScore = tokens[10]; spinfo.mzFile = tokens[11]; PI.AddScanPeptide(spinfo); Proteins.Add(strIPI, PI); } // } // result.Close(); //} //DBInterface.CloseDB(); } } sr.Close(); } //write the protein view info to report 2 XmlWriterSettings settings = new XmlWriterSettings(); settings.Indent = true; settings.IndentChars = (" "); //string strReportFile = Properties.Settings.Default.upload_path + searchInfo.task_id + "\\" + searchInfo.report_filename; string XSLFile = strReportFile + ".xls"; strReportFile += ".xml"; //XSL table colums : SCAN, PeptideSequence, ModifiedType,Protein Access Numbers, Protein Name, Species, Protein_COPa_ID, Peptide_COPa_ID,Spectra_COPa_ID, MatchScore,DetaM string ProteinLine = "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}\t{12}"; try { using (StreamWriter sw = new StreamWriter(XSLFile)) { sw.WriteLine(string.Format(ProteinLine, "mzFile", "Scan", "PeptideSequence", "ModifiedType", "Protein Access Numbers", "Protein Name", "Species", "Protein COPaID", "Peptide COPaID", "Spectrum COPaID", "Similarity Score", "DetaM/Z", "Final Score")); using (XmlWriter writer = XmlWriter.Create(strReportFile, settings)) { writer.WriteStartElement("COPaReport"); writer.WriteAttributeString("TaskID", TaskID); writer.WriteAttributeString("mzFile", FileName); string strLibModule = searchModule; writer.WriteAttributeString("LibModule", strLibModule); writer.WriteAttributeString("SearchFilter", sc); writer.WriteAttributeString("IDProteins", Proteins.Count.ToString()); foreach (KeyValuePair<string, ProteinInfo> kvp in Proteins) { ArrayList splists = kvp.Value.ScanPeptides; if (IsPassFilter(splists, distinctPeptides)) { writer.WriteStartElement("Proteins"); writer.WriteAttributeString("IPI", kvp.Key); writer.WriteAttributeString("COPaID", kvp.Value.COPaID); writer.WriteAttributeString("ProteinName", kvp.Value.ProteinName); writer.WriteAttributeString("Organism", kvp.Value.Organism); //ArrayList splists = kvp.Value.ScanPeptides; writer.WriteAttributeString("SpectraCount", splists.Count.ToString()); writer.WriteAttributeString("NormalizCount", ((float)(splists.Count) / kvp.Value.ProteinLength).ToString()); foreach (ScanPeptide sp in splists) { writer.WriteStartElement("Scan-Peptide"); writer.WriteAttributeString("mzFile", sp.mzFile); writer.WriteAttributeString("Scan", sp.ScanNO); writer.WriteAttributeString("PeptideSequence", sp.PeptideSequence); if (sp.ModifiedType != "") { writer.WriteAttributeString("ModifiedType", sp.ModifiedType); } writer.WriteAttributeString("Peptide", sp.PeptideID); writer.WriteAttributeString("SimilarityScore", sp.SimilarityScore); writer.WriteAttributeString("DetaMZ", sp.DetaM.ToString()); writer.WriteAttributeString("Spectrum", sp.SpectrumSeq); writer.WriteAttributeString("FinalyScore", sp.FinalScore); writer.WriteEndElement(); sw.WriteLine(ProteinLine, sp.mzFile, sp.ScanNO, sp.PeptideSequence, sp.ModifiedType, kvp.Key, kvp.Value.ProteinName, kvp.Value.Organism, kvp.Value.COPaID, sp.PeptideID, sp.SpectrumSeq, sp.SimilarityScore, sp.DetaM, sp.FinalScore); } writer.WriteEndElement(); } } writer.WriteEndElement(); writer.Flush(); sw.Close(); } } } catch (Exception ex) { DBInterface.LogEvent(ex.ToString(), System.Diagnostics.EventLogEntryType.Error); } return true; }