private String GetPtmCsv (Peptide peptide) { String result = ""; List<PTM> ptms = new List<PTM>(); foreach( List<PTM> variant in peptide.Variants ) foreach( PTM ptm in variant ) if( !ptms.Contains(ptm) ) { ptms.Add(ptm); result += peptide.ToString()+"/"+ptm.ToString()+" "; } return result; }
private bool CheckPeptide( Peptide f ) { if( f.Proteins.Count == 0 ) return false; if( Spectra.Count != 0 && (f.Psm == null || f.Psm.Count == 0) ) return false; if( FilterDecoys && f.Decoy ) return false; if (f.Sequence.Length < LengthThreshold) return false; if( Type == SourceType.Plgs && (int)f.Confidence < (int)PlgsThreshold ) return false; return true; }
/// <summary> /// Removes invalid PSMs and their associated peptides (if neccessary). /// </summary> private void FilterPsms() { if( Spectra.Count == 0 ) return; List<Spectrum> spectra = new List<Spectrum>(); // Remove previous relations foreach( Peptide f in Peptides ) f.Psm = new List<PSM>(); foreach( Spectrum spectrum in Spectra ) { if( spectrum.Psm == null ) continue; Spectrum tmp = null; foreach( PSM psm in spectrum.Psm ) { if( !CheckPsm(psm) ) continue; if( tmp == null ) { tmp = new Spectrum(); tmp.File = spectrum.File; tmp.ID = spectrum.ID; tmp.SpectrumID = spectrum.SpectrumID; tmp.Psm = new List<PSM>(); } tmp.Psm.Add(psm); Peptide f = psm.Peptide; if( !f.Psm.Contains(psm) ) f.Psm.Add(psm); if( f.Confidence < psm.Confidence ) f.Confidence = psm.Confidence; } if( tmp != null ) spectra.Add(tmp); } Spectra = spectra; }
private void LoadData( string xmlpath ) { SortedList<int,string> SortedAccession = new SortedList<int, string>(); XmlDocument doc = new XmlDocument(); doc.Load( xmlpath ); // Load proteins XmlNodeList proteins = doc.GetElementsByTagName( "PROTEIN" ); foreach( XmlElement element in proteins ) { int id = int.Parse(element.GetAttribute("ID")); if( SortedAccession.ContainsKey(id) ) continue; string acc = element.GetElementsByTagName("ACCESSION")[0].InnerText; SortedAccession.Add( id, acc ); if( m_SortedProteins.ContainsKey(acc) ) continue; string entry = element.GetElementsByTagName("ENTRY")[0].InnerText; string desc = element.GetElementsByTagName("DESCRIPTION")[0].InnerText.Replace('+',' '); string seq = element.GetElementsByTagName("SEQUENCE")[0].InnerText.ToUpper(); Protein p = new Protein(m_pid++, entry, acc, desc, seq); Proteins.Add( p ); m_SortedProteins.Add( acc, p ); } // Load peptides SortedList<int,Peptide> SortedPeptides = new SortedList<int, Peptide>(); XmlNodeList peptides = doc.GetElementsByTagName( "PEPTIDE" ); foreach( XmlElement element in peptides ) { int id = int.Parse(element.GetAttribute("ID")); int pid = int.Parse(element.GetAttribute("PROT_ID")); int mid = 0; if( PlgsThreshold != Peptide.ConfidenceType.NoThreshold ) mid = int.Parse(element.GetAttribute("QUERY_MASS_ID")); string seq = element.GetAttribute("SEQUENCE").ToUpper(); Peptide f = new Peptide(id, seq); XmlNodeList mods = element.GetElementsByTagName( "MATCH_MODIFIER" ); f.Runs.Add( m_Run ); foreach( XmlElement mod in mods ) { PTM ptm = new PTM(); string[] strs = mod.GetAttribute("NAME").Split(new char[]{'+'}); ptm.Name = strs[0]; string str = mod.GetAttribute("POS"); ptm.Pos = str.Length == 0 ? -1 : int.Parse(str); if( strs.Length > 1 ) ptm.Residues = strs[1]; f.AddPTM( ptm ); } Protein p = null; try { p = m_SortedProteins[SortedAccession[pid]]; } catch { Notify( "Peptide '" + id + "' references unknown protein '" + pid + "'" ); } if( p != null ) { p.Peptides.Add( f ); f.Proteins.Add( p ); if( !p.Sequence.Contains(f.Sequence) ) throw new ApplicationException( "Inconsistent sequence data" ); } Peptides.Add( f ); if( PlgsThreshold != Peptide.ConfidenceType.NoThreshold ) SortedPeptides.Add(mid,f); } if( PlgsThreshold == Peptide.ConfidenceType.NoThreshold ) return; // Scores XmlNodeList scores = doc.GetElementsByTagName( "MASS_MATCH" ); foreach( XmlElement element in scores ) { int id = int.Parse(element.GetAttribute("ID")); double score = double.Parse(element.GetAttribute("SCORE"), m_Format); SortedPeptides[id].Score = score; } }
/// <summary> /// Loads a mzIdentML file /// </summary> protected override void Load( string mzid ) { m_mzid = new mzidFile1_0(); m_mzid.Load( mzid ); // Proteins SortedList<string,string> SortedAccession = new SortedList<string, string>(); foreach( PSIPIanalysissearchDBSequenceType element in m_mzid.ListProteins ) { if( SortedAccession.ContainsKey(element.id) ) continue; string acc = element.accession; SortedAccession.Add( element.id, acc ); if( m_SortedProteins.ContainsKey(acc) ) continue; FuGECommonOntologycvParamType cv; cv = FuGECommonOntologycvParamType.Find( "MS:1001352", element.cvParam ); string entry = cv == null ? "" : cv.value; cv = FuGECommonOntologycvParamType.Find( "MS:1001088", element.cvParam ); string desc = cv == null ? "" : cv.value; string seq = element.seq;//.ToUpper(); Protein p = new Protein(m_pid++, entry, acc, desc, seq); p.DBRef = element.id; Proteins.Add( p ); m_SortedProteins.Add( acc, p ); } // Peptides SortedList<string,Peptide> SortedPeptides = new SortedList<string, Peptide>(); int id = 1; foreach( PSIPIpolypeptidePeptideType element in m_mzid.ListPeptides ) { string seq = element.peptideSequence;//.ToUpper(); Peptide f = new Peptide(id++, seq); f.Confidence = Peptide.ConfidenceType.PassThreshold; // It will be filtered later if neccessary SortedPeptides.Add( element.id, f ); f.Runs.Add( m_Run ); if( element.Modification != null ) foreach( PSIPIpolypeptideModificationType mod in element.Modification ) { PTM ptm = new PTM(); ptm.Pos = mod.locationSpecified ? mod.location : -1; if( mod.residues != null ) foreach( string residue in mod.residues ) ptm.Residues += residue; foreach( FuGECommonOntologycvParamType param in mod.cvParam ) if( param.cvRef.Equals("UNIMOD") ) ptm.Name = param.name; f.AddPTM( ptm ); } Peptides.Add( f ); } // Relations if( m_mzid.Data.DataCollection.AnalysisData.SpectrumIdentificationList.Length != 1 ) throw new ApplicationException( "Multiple spectrum identification lists not supported" ); foreach( PSIPIanalysissearchSpectrumIdentificationResultType idres in m_mzid.Data.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult ) foreach( PSIPIanalysissearchSpectrumIdentificationItemType item in idres.SpectrumIdentificationItem ) { if( !item.passThreshold ) continue; Peptide f = SortedPeptides[item.Peptide_ref]; if( item.PeptideEvidence == null ) continue; f.Confidence = Peptide.ConfidenceType.PassThreshold; foreach( PSIPIanalysisprocessPeptideEvidenceType relation in item.PeptideEvidence ) { Protein p = m_SortedProteins[SortedAccession[relation.DBSequence_Ref]]; if( f.Proteins.Contains(p) ) continue; f.Names.Add( relation.DBSequence_Ref, relation.id ); p.Peptides.Add( f ); f.Proteins.Add( p ); } } }
private String GetPtmCsv(Peptide peptide) { String result = ""; List<PTM> ptms = new List<PTM>(); foreach( List<PTM> variant in peptide.Variants ) foreach( PTM ptm in variant ) if( !ptms.Contains(ptm) ) { ptms.Add(ptm); result += peptide.ToString()+"/"+ptm.ToString()+" "; } return result; }
protected bool CheckPsm( bool passThreshold, int rank, Peptide.ConfidenceType confidence, double score, string type ) { if( RequirePassTh && !passThreshold ) return false; if( RankThreshold != 0 && (rank == 0 || rank > RankThreshold) ) return false; if( Type >= SourceType.mzIdentML110 && Type <= SourceType.mzIdentML120 && (int)confidence < (int)SeqThreshold ) return false; if( type == "Mascot expectation value" && score > MascotThreshold ) return false; if( type == "X!Tandem expect" && score > XTandemThreshold ) return false; return true; }
/// <summary> /// Loads a mzIdentML file /// </summary> override protected void Load( string mzid ) { m_mzid = new mzidFile1_0(); m_mzid.Load( mzid ); // Proteins SortedList<string,string> SortedAccession = new SortedList<string, string>(); foreach( PSIPIanalysissearchDBSequenceType element in m_mzid.ListProteins ) { if( SortedAccession.ContainsKey(element.id) ) continue; string acc = element.accession; SortedAccession.Add( element.id, acc ); if( m_SortedProteins.ContainsKey(acc) ) continue; FuGECommonOntologycvParamType cv; cv = FuGECommonOntologycvParamType.Find( "MS:1001352", element.cvParam ); string entry = cv == null ? "" : cv.value; cv = FuGECommonOntologycvParamType.Find( "MS:1001088", element.cvParam ); string desc = cv == null ? "" : cv.value; string seq = element.seq;//.ToUpper(); Protein p = new Protein(m_pid++, entry, acc, desc, seq); p.DBRef = element.id; Proteins.Add( p ); m_SortedProteins.Add( acc, p ); } // Peptides SortedList<string,Peptide> SortedPeptides = new SortedList<string, Peptide>(); int id = 1; foreach( PSIPIpolypeptidePeptideType element in m_mzid.ListPeptides ) { string seq = element.peptideSequence;//.ToUpper(); Peptide f = new Peptide(id++, seq); f.Confidence = Peptide.ConfidenceType.PassThreshold; // It will be filtered later if neccessary SortedPeptides.Add( element.id, f ); f.Runs.Add( m_Run ); if( element.Modification != null ) foreach( PSIPIpolypeptideModificationType mod in element.Modification ) { PTM ptm = new PTM(); ptm.Pos = mod.locationSpecified ? mod.location : -1; if( mod.residues != null ) foreach( string residue in mod.residues ) ptm.Residues += residue; foreach( FuGECommonOntologycvParamType param in mod.cvParam ) if( param.cvRef.Equals("UNIMOD") ) ptm.Name = param.name; f.AddPTM( ptm ); } Peptides.Add( f ); } // Relations if( m_mzid.Data.DataCollection.AnalysisData.SpectrumIdentificationList.Length != 1 ) throw new ApplicationException( "Multiple spectrum identification lists not supported" ); foreach( PSIPIanalysissearchSpectrumIdentificationResultType idres in m_mzid.Data.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult ) foreach( PSIPIanalysissearchSpectrumIdentificationItemType item in idres.SpectrumIdentificationItem ) { if( !item.passThreshold ) continue; Peptide f = SortedPeptides[item.Peptide_ref]; if( item.PeptideEvidence == null ) continue; f.Confidence = Peptide.ConfidenceType.PassThreshold; foreach( PSIPIanalysisprocessPeptideEvidenceType relation in item.PeptideEvidence ) { Protein p = m_SortedProteins[SortedAccession[relation.DBSequence_Ref]]; if( f.Proteins.Contains(p) ) continue; f.Names.Add( relation.DBSequence_Ref, relation.id ); p.Peptides.Add( f ); f.Proteins.Add( p ); } } }
/// <summary> /// Checks for the presence of the given peptide in the peptide list of the protein /// </summary> public bool HasPeptide( Peptide fo ) { return Peptides.Contains( fo ); }
private SortedList<string, Peptide> LoadPeptides() { SortedList<string,Peptide> SortedPeptides = new SortedList<string, Peptide>(); int id = 1; foreach( PeptideType pep in m_mzid.ListPeptides ) { Peptide p = new Peptide( id++, pep.PeptideSequence ); p.Confidence = Peptide.ConfidenceType.NoThreshold; SortedPeptides.Add( pep.id, p ); p.Runs.Add( m_Run ); if( pep.Modification != null ) foreach( ModificationType mod in pep.Modification ) { PTM ptm = new PTM(); ptm.Pos = mod.locationSpecified ? mod.location : -1; if( mod.residues != null ) foreach( string residue in mod.residues ) ptm.Residues += residue; foreach( CVParamType param in mod.cvParam ) if( param.cvRef.Equals("UNIMOD") ) ptm.Name = param.name; p.AddPTM( ptm ); } p.DBRef = pep.id; Peptides.Add( p ); } return SortedPeptides; }
private void GetPsmScore( SpectrumIdentificationItemType item, out double score, out string type, out Peptide.ConfidenceType confidence ) { score = -1.0; type = "N/A"; confidence = item.passThreshold ? Peptide.ConfidenceType.PassThreshold : Peptide.ConfidenceType.NoThreshold; if( item.Items == null ) return; foreach( AbstractParamType param in item.Items ) { if( !(param is CVParamType) ) continue; CVParamType cv = param as CVParamType; if( cv.accession == "MS:1001155" ) { score = double.Parse(cv.value, m_Format); type = "ProteomeDiscoverer/SEQUEST Confidence XCorr"; if( score >= m_GreenTh[item.chargeState-1] ) confidence = Peptide.ConfidenceType.Green; else if( score >= m_YellowTh[item.chargeState-1] ) confidence = Peptide.ConfidenceType.Yellow; else confidence = Peptide.ConfidenceType.Red; break; } else if( cv.accession == "MS:1001172" ) { score = double.Parse(cv.value, m_Format); type = "Mascot expectation value"; break; } else if( cv.accession == "MS:1001330" ) { score = double.Parse( cv.value, m_Format ); type = "X!Tandem expect"; break; } } }
/// <summary> /// Removes peptides with low score, duplicated (same sequence) or not voted (multirun) /// </summary> private void FilterPeptides() { List<Peptide> peptides = new List<Peptide>(); int id = 1; // Remove previous relations foreach( Protein p in Proteins ) p.Peptides.Clear(); // Filters duplicated (same sequence) peptides SortedList<string,Peptide> SortedPeptides = new SortedList<string, Peptide>(); foreach( Peptide f in Peptides ) { // Low score peptide if( !CheckPeptide(f) ) { if( f.Psm != null ) foreach( PSM psm in f.Psm ) psm.Spectrum.Psm.Remove(psm); continue; } // Duplicated peptide, new protein? if( SortedPeptides.ContainsKey(f.Sequence) ) { Peptide fo = SortedPeptides[f.Sequence]; if( (int)f.Confidence > (int)fo.Confidence ) fo.Confidence = f.Confidence; if( !fo.Runs.Contains(f.Runs[0]) ) fo.Runs.Add(f.Runs[0]); fo.AddVariant( f.LastVariant ); bool dp = false; // duplicated protein?, needed for PLGS foreach( Protein po in fo.Proteins ) if( po.ID == f.Proteins[0].ID ) { dp = true; break; } if( !dp ) fo.Proteins.Add( f.Proteins[0] ); if( fo.Psm == null ) fo.Psm = f.Psm; else if( f.Psm != null ) fo.Psm.AddRange(f.Psm); if( fo.Psm != null ) foreach( PSM psm in fo.Psm ) psm.Peptide = fo; // New peptide } else { f.ID = id++; SortedPeptides.Add( f.Sequence, f ); peptides.Add( f ); } } // Vote peptides if( RunsThreshold > 1 ) { Peptides = new List<Peptide>(); foreach( Peptide f in peptides ) if( f.Runs.Count >= RunsThreshold ) Peptides.Add(f); } else Peptides = peptides; // Asigns new peptides to proteins foreach( Peptide f in Peptides ) foreach( Protein p in f.Proteins ) p.Peptides.Add(f); }
private void WriteProteinDetails( TextWriter w, Protein p ) { Tag tr = new Tag( "tr", true ); Tag td = new Tag( "td", "colspan" ); Tag th = new Tag( "th", "rowspan" ); Tag a = new Tag( "a", "href" ); int i; w.WriteLine( "<table>\n<caption><a name=\""+p.Accession+"\"/>Protein "+p.Accession+"</caption>" ); //w.WriteLine( "<col width=\"10%\"/><col width=\"5%\"/><col width=\"10%\"/><col width=\"75%\"/>" ); w.WriteLine( tr.Render(th.Render("Name")+td.Render("3",p.EntryEx)) ); w.WriteLine( tr.Render(th.Render("Description")+td.Render("3",p.Desc)) ); w.WriteLine( tr.Render(th.Render("Sequence")+td.Render("3","<pre>"+p.ParseSeq(10)+"</pre>")) ); w.WriteLine( tr.Render(th.Render("Evidence")+td.Render("3",p.Evidence.ToString())) ); w.Write( tr+th.Render("Peptide list")+"<td colspan=\"3\">" ); if( p.Peptides.Count > 0 ) { for( i = 0; i < p.Peptides.Count-1; i++ ) w.Write( a.Render("#"+p.Accession+"__"+p.Peptides[i].ID,p.Peptides[i].ToString()) + ", " ); w.Write( a.Render("#"+p.Accession+"__"+p.Peptides[i].ID,p.Peptides[i].ToString()) ); } w.WriteLine( "</td>"+tr ); if( p.Peptides.Count == 0 ) { w.WriteLine( "</table><br/>" ); return; } int rows = 8; if( Spectra.Count != 0 ) rows++; w.Write( tr+th.Render((p.Peptides.Count*rows).ToString(),"Peptides") ); bool first = true; foreach( Peptide f in p.Peptides ) { if( first ) first = false; else w.Write( tr.ToString() ); w.Write( th.Render(rows.ToString(),f.ToString()) ); w.WriteLine( th.Render("<a name=\""+p.Accession+"__"+f.ID+"\"/>Confidence")+td.Render(f.Confidence.ToString())+tr ); tr.Hold = true; w.WriteLine( tr.Render(th.Render("Decoy")+td.Render(f.Decoy.ToString())) ); w.Write( tr+th.Render("Runs")+td ); for( i = 0; i < f.Runs.Count-1; i++ ) w.Write( f.Runs[i].ToString() + ", " ); w.WriteLine( f.Runs[i].ToString()+td+tr ); w.WriteLine( tr.Render(th.Render("Relation")+td.Render(f.Relation.ToString())) ); w.Write( tr+th.Render("Proteins")+td ); for( i = 0; i < f.Proteins.Count-1; i++ ) w.Write( a.Render("#"+f.Proteins[i].Accession,f.Proteins[i].EntryEx) + ", " ); w.WriteLine( a.Render("#"+f.Proteins[i].Accession,f.Proteins[i].EntryEx)+td.ToString()+tr ); w.WriteLine( tr.Render(th.Render("Sequence")+td.Render("<pre>"+f.Sequence+"</pre>")) ); w.WriteLine( tr.Render(th.Render("Position")+td.Render(f.GetPositions(p))) ); w.Write( tr+th.Render("PTMs")+td ); if( f.Variants.Count == 1 ) w.Write( Peptide.Variant2Str(f.LastVariant) ); else { i = 1; foreach( List<PTM> v in f.Variants ) w.Write( "Variant #"+(i++)+": "+Peptide.Variant2Str(v)+"<br/>" ); } w.WriteLine( td.ToString()+tr ); if( Spectra.Count != 0 ) { w.Write( tr+th.Render("PSMs")+td ); if( f.Psm != null ) { for( i = 0; i < f.Psm.Count-1; i++ ) w.Write( a.Render("#PSM"+f.Psm[i].ID,f.Psm[i].ID.ToString()) + ", " ); w.WriteLine( a.Render("#PSM"+f.Psm[i].ID,f.Psm[i].ID.ToString()) ); } w.WriteLine( td.ToString()+tr ); } tr.Hold = false; } w.WriteLine( "</table><br/>" ); }