private string Peptides2Html( Protein p ) { if( p.Peptides.Count == 0 ) return ""; int i; Tag a = new Tag( "a", "href" ); string str = ""; for( i = 0; i < p.Peptides.Count-1; i++ ) str += a.Render("#"+p.Accession+"__"+p.Peptides[i].ID,p.Peptides[i].ToString()) + ", "; str += a.Render("#"+p.Accession+"__"+p.Peptides[i].ID,p.Peptides[i].ToString()); return str; }
/// <summary> /// Gets the positions of the peptide in the specified protein. /// </summary> public string GetPositions( Protein p ) { if( Sequence == null || Sequence.Length == 0 || p.Sequence == null || p.Sequence.Length == 0 ) return ""; string str = ""; int pos = -1; while( (pos=p.Sequence.IndexOf(Sequence,pos+1)) != -1 ) str += (pos+1) + ", "; if( (pos=str.LastIndexOf(',')) != -1 ) return str.Remove( pos ); return str; }
private void SaveCSVEntry( TextWriter w, Protein p, string grp, string sep ) { w.Write( p.ID + sep + p.Entry + sep + p.Accession + sep + p.Evidence.ToString() + sep + grp + sep + p.Desc + sep ); foreach( Peptide f in p.Peptides ) w.Write(f.ToString() + ' '); w.Write( sep + p.Sequence + sep ); String ptm; foreach( Peptide peptide in p.Peptides ) { ptm = GetPtmCsv(peptide); if( ptm.Length != 0 ) w.Write(ptm+" "); } w.WriteLine(); }
private SortedList<string,string> LoadProteins() { SortedList<string,string> SortedAccession = new SortedList<string, string>(); foreach( DBSequenceType prot in m_mzid.ListProteins ) { if( SortedAccession.ContainsKey(prot.id) ) // Avoids duplicated entries in the same file continue; SortedAccession.Add( prot.id, prot.accession ); if( m_SortedProteins.ContainsKey(prot.accession) ) // Avoids duplicated entries between different files continue; CVParamType cv = mzidFile1_1.FindCV("MS:1001352", prot.Items); string entry = cv == null ? "" : cv.value; cv = mzidFile1_1.FindCV("MS:1001088", prot.Items); string desc = cv == null ? "" : cv.value; Protein p = new Protein( m_pid++, entry, prot.accession, desc, prot.Seq ); p.DBRef = prot.id; Proteins.Add( p ); m_SortedProteins.Add( p.Accession, p ); } return SortedAccession; }
private bool IsIndistinguisable( Protein g ) { List<Peptide> discriminating = new List<Peptide>(); foreach( Protein prot in g.Subset ) foreach( Peptide pep in prot.Peptides ) if( pep.Relation == Peptide.RelationType.Discriminating ) discriminating.Add(pep); foreach( Protein prot in g.Subset ) foreach( Peptide pep in discriminating ) if( !prot.Peptides.Contains(pep) ) return false; return true; /*foreach( Peptide f in g.Subset[0].Peptides ) { if( f.Relation != Peptide.RelationType.Discriminating ) continue; foreach( Protein p in g.Subset ) if( !p.HasPeptide(f) ) return false; } return true;*/ }
/// <summary> /// Builds a PDH for the current protein /// </summary> private PSIPIanalysisprocessProteinDetectionHypothesisType BuildHypothesis( Protein p, Protein.EvidenceType ev ) { PSIPIanalysisprocessProteinDetectionHypothesisType h = new PSIPIanalysisprocessProteinDetectionHypothesisType(); h.id = "PDH_" + p.Accession; h.DBSequence_ref = p.DBRef; if( p.Evidence == Protein.EvidenceType.NonConclusive || p.Evidence == Protein.EvidenceType.Filtered ) h.passThreshold = false; else h.passThreshold = true; List<PeptideHypothesisType> listPeptides = new List<PeptideHypothesisType>(); foreach( Peptide f in p.Peptides ) { PeptideHypothesisType peptide = new PeptideHypothesisType(); peptide.PeptideEvidence_Ref = f.Names[p.DBRef]; listPeptides.Add( peptide ); } if( listPeptides.Count > 0 ) h.PeptideHypothesis = listPeptides.ToArray(); h.cvParam = new FuGECommonOntologycvParamType[1]; h.cvParam[0] = new FuGECommonOntologycvParamType( "Protein Inference Confidence Category", "MS:1001600", "PSI-MS" ); h.cvParam[0].value = ParseConfidence( ev ); return h; }
/// <summary> /// Gets the positions of the peptide in the specified protein. /// </summary> public string GetPositions( Protein p ) { if( Sequence == null || Sequence.Length == 0 || p.Sequence == null || p.Sequence.Length == 0 ) return ""; string str = ""; int pos = -1; while( (pos=p.Sequence.IndexOf(Sequence,pos+1)) != -1 ) str += (pos+1) + ", "; if( (pos=str.LastIndexOf(',')) != -1 ) return str.Remove( pos ); return str; }
/// <summary> /// Builds a PDH for the current protein /// </summary> private PSIPIanalysisprocessProteinDetectionHypothesisType BuildHypothesis( Protein p, Protein.EvidenceType ev ) { PSIPIanalysisprocessProteinDetectionHypothesisType h = new PSIPIanalysisprocessProteinDetectionHypothesisType(); h.id = "PDH_" + p.Accession; h.DBSequence_ref = p.DBRef; if( p.Evidence == Protein.EvidenceType.NonConclusive || p.Evidence == Protein.EvidenceType.Filtered ) h.passThreshold = false; else h.passThreshold = true; List<PeptideHypothesisType> listPeptides = new List<PeptideHypothesisType>(); foreach( Peptide f in p.Peptides ) { PeptideHypothesisType peptide = new PeptideHypothesisType(); peptide.PeptideEvidence_Ref = f.Names[p.DBRef]; listPeptides.Add( peptide ); } if( listPeptides.Count > 0 ) h.PeptideHypothesis = listPeptides.ToArray(); h.cvParam = new FuGECommonOntologycvParamType[1]; h.cvParam[0] = new FuGECommonOntologycvParamType( "Protein Inference Confidence Category", "MS:1001600", "PSI-MS" ); h.cvParam[0].value = ParseConfidence( ev ); return h; }
/// <summary> /// Loads a mzIdentML file /// </summary> protected override void Load( string mzid ) { m_mzid = new mzidFile1_0(); m_mzid.Load( mzid ); // Proteins SortedList<string,string> SortedAccession = new SortedList<string, string>(); foreach( PSIPIanalysissearchDBSequenceType element in m_mzid.ListProteins ) { if( SortedAccession.ContainsKey(element.id) ) continue; string acc = element.accession; SortedAccession.Add( element.id, acc ); if( m_SortedProteins.ContainsKey(acc) ) continue; FuGECommonOntologycvParamType cv; cv = FuGECommonOntologycvParamType.Find( "MS:1001352", element.cvParam ); string entry = cv == null ? "" : cv.value; cv = FuGECommonOntologycvParamType.Find( "MS:1001088", element.cvParam ); string desc = cv == null ? "" : cv.value; string seq = element.seq;//.ToUpper(); Protein p = new Protein(m_pid++, entry, acc, desc, seq); p.DBRef = element.id; Proteins.Add( p ); m_SortedProteins.Add( acc, p ); } // Peptides SortedList<string,Peptide> SortedPeptides = new SortedList<string, Peptide>(); int id = 1; foreach( PSIPIpolypeptidePeptideType element in m_mzid.ListPeptides ) { string seq = element.peptideSequence;//.ToUpper(); Peptide f = new Peptide(id++, seq); f.Confidence = Peptide.ConfidenceType.PassThreshold; // It will be filtered later if neccessary SortedPeptides.Add( element.id, f ); f.Runs.Add( m_Run ); if( element.Modification != null ) foreach( PSIPIpolypeptideModificationType mod in element.Modification ) { PTM ptm = new PTM(); ptm.Pos = mod.locationSpecified ? mod.location : -1; if( mod.residues != null ) foreach( string residue in mod.residues ) ptm.Residues += residue; foreach( FuGECommonOntologycvParamType param in mod.cvParam ) if( param.cvRef.Equals("UNIMOD") ) ptm.Name = param.name; f.AddPTM( ptm ); } Peptides.Add( f ); } // Relations if( m_mzid.Data.DataCollection.AnalysisData.SpectrumIdentificationList.Length != 1 ) throw new ApplicationException( "Multiple spectrum identification lists not supported" ); foreach( PSIPIanalysissearchSpectrumIdentificationResultType idres in m_mzid.Data.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult ) foreach( PSIPIanalysissearchSpectrumIdentificationItemType item in idres.SpectrumIdentificationItem ) { if( !item.passThreshold ) continue; Peptide f = SortedPeptides[item.Peptide_ref]; if( item.PeptideEvidence == null ) continue; f.Confidence = Peptide.ConfidenceType.PassThreshold; foreach( PSIPIanalysisprocessPeptideEvidenceType relation in item.PeptideEvidence ) { Protein p = m_SortedProteins[SortedAccession[relation.DBSequence_Ref]]; if( f.Proteins.Contains(p) ) continue; f.Names.Add( relation.DBSequence_Ref, relation.id ); p.Peptides.Add( f ); f.Proteins.Add( p ); } } }
private void WriteProteinList( TextWriter w, Tag tr, Protein.EvidenceType evidence ) { Tag a = new Tag( evidence == Protein.EvidenceType.Filtered ? null : "a", "href" ); Tag td = new Tag( "td" ); Tag tdr = new Tag( "td", "rowspan" ); Tag tdc = new Tag( "td", "colspan" ); foreach( Protein p in Proteins ) { if( p.Evidence != evidence ) continue; if( p.Subset.Count == 0 ) { w.Write( tr.Render( td.Render(a.Render("#"+p.Accession,p.EntryEx))+ td.Render(p.Evidence.ToString())+ tdc.Render("2",Peptides2Html(p))+ td.Render(p.Desc) )); continue; } w.WriteLine( tr.Render( tdr.Render(p.Subset.Count.ToString(),p.Entry)+ tdr.Render(p.Subset.Count.ToString(),p.Evidence.ToString())+ td.Render(a.Render("#"+p.Subset[0].Accession,p.Subset[0].EntryEx)+": ")+ td.Render(Peptides2Html(p.Subset[0]))+ td.Render(p.Subset[0].Desc) )); tr.Hold = true; for( int i = 1; i < p.Subset.Count; i++ ) w.WriteLine( tr.Render( td.Render(a.Render("#"+p.Subset[i].Accession,p.Subset[i].EntryEx)+": ")+ td.Render(Peptides2Html(p.Subset[i]))+ td.Render(p.Subset[i].Desc) )); tr.Hold = false; } }
private void WriteProteinDetails( TextWriter w, Protein p ) { Tag tr = new Tag( "tr", true ); Tag td = new Tag( "td", "colspan" ); Tag th = new Tag( "th", "rowspan" ); Tag a = new Tag( "a", "href" ); int i; w.WriteLine( "<table>\n<caption><a name=\""+p.Accession+"\"/>Protein "+p.Accession+"</caption>" ); //w.WriteLine( "<col width=\"10%\"/><col width=\"5%\"/><col width=\"10%\"/><col width=\"75%\"/>" ); w.WriteLine( tr.Render(th.Render("Name")+td.Render("3",p.EntryEx)) ); w.WriteLine( tr.Render(th.Render("Description")+td.Render("3",p.Desc)) ); w.WriteLine( tr.Render(th.Render("Sequence")+td.Render("3","<pre>"+p.ParseSeq(10)+"</pre>")) ); w.WriteLine( tr.Render(th.Render("Evidence")+td.Render("3",p.Evidence.ToString())) ); w.Write( tr+th.Render("Peptide list")+"<td colspan=\"3\">" ); if( p.Peptides.Count > 0 ) { for( i = 0; i < p.Peptides.Count-1; i++ ) w.Write( a.Render("#"+p.Accession+"__"+p.Peptides[i].ID,p.Peptides[i].ToString()) + ", " ); w.Write( a.Render("#"+p.Accession+"__"+p.Peptides[i].ID,p.Peptides[i].ToString()) ); } w.WriteLine( "</td>"+tr ); if( p.Peptides.Count == 0 ) { w.WriteLine( "</table><br/>" ); return; } int rows = 8; if( Spectra.Count != 0 ) rows++; w.Write( tr+th.Render((p.Peptides.Count*rows).ToString(),"Peptides") ); bool first = true; foreach( Peptide f in p.Peptides ) { if( first ) first = false; else w.Write( tr.ToString() ); w.Write( th.Render(rows.ToString(),f.ToString()) ); w.WriteLine( th.Render("<a name=\""+p.Accession+"__"+f.ID+"\"/>Confidence")+td.Render(f.Confidence.ToString())+tr ); tr.Hold = true; w.WriteLine( tr.Render(th.Render("Decoy")+td.Render(f.Decoy.ToString())) ); w.Write( tr+th.Render("Runs")+td ); for( i = 0; i < f.Runs.Count-1; i++ ) w.Write( f.Runs[i].ToString() + ", " ); w.WriteLine( f.Runs[i].ToString()+td+tr ); w.WriteLine( tr.Render(th.Render("Relation")+td.Render(f.Relation.ToString())) ); w.Write( tr+th.Render("Proteins")+td ); for( i = 0; i < f.Proteins.Count-1; i++ ) w.Write( a.Render("#"+f.Proteins[i].Accession,f.Proteins[i].EntryEx) + ", " ); w.WriteLine( a.Render("#"+f.Proteins[i].Accession,f.Proteins[i].EntryEx)+td.ToString()+tr ); w.WriteLine( tr.Render(th.Render("Sequence")+td.Render("<pre>"+f.Sequence+"</pre>")) ); w.WriteLine( tr.Render(th.Render("Position")+td.Render(f.GetPositions(p))) ); w.Write( tr+th.Render("PTMs")+td ); if( f.Variants.Count == 1 ) w.Write( Peptide.Variant2Str(f.LastVariant) ); else { i = 1; foreach( List<PTM> v in f.Variants ) w.Write( "Variant #"+(i++)+": "+Peptide.Variant2Str(v)+"<br/>" ); } w.WriteLine( td.ToString()+tr ); if( Spectra.Count != 0 ) { w.Write( tr+th.Render("PSMs")+td ); if( f.Psm != null ) { for( i = 0; i < f.Psm.Count-1; i++ ) w.Write( a.Render("#PSM"+f.Psm[i].ID,f.Psm[i].ID.ToString()) + ", " ); w.WriteLine( a.Render("#PSM"+f.Psm[i].ID,f.Psm[i].ID.ToString()) ); } w.WriteLine( td.ToString()+tr ); } tr.Hold = false; } w.WriteLine( "</table><br/>" ); }
private void WriteProteinDetails( TextWriter w, Protein.EvidenceType evidence ) { foreach( Protein p in Proteins ) { if( p.Evidence != evidence ) continue; if( p.Subset.Count == 0 ) { WriteProteinDetails( w, p ); continue; } foreach( Protein p2 in p.Subset ) WriteProteinDetails( w, p2 ); } }
private SortedList<string, string> LoadProteins() { SortedList<string,string> SortedAccession = new SortedList<string, string>(); foreach( DBSequenceType prot in m_mzid.ListProteins ) { if( SortedAccession.ContainsKey(prot.id) ) // Avoids duplicated entries in the same file continue; SortedAccession.Add( prot.id, prot.accession ); if( m_SortedProteins.ContainsKey(prot.accession) ) // Avoids duplicated entries between different files continue; CVParamType cv = mzidFile1_1.FindCV("MS:1001352", prot.Items); string entry = cv == null ? "" : cv.value; cv = mzidFile1_1.FindCV("MS:1001088", prot.Items); string desc = cv == null ? "" : cv.value; Protein p = new Protein( m_pid++, entry, prot.accession, desc, prot.Seq ); p.DBRef = prot.id; Proteins.Add( p ); m_SortedProteins.Add( p.Accession, p ); } return SortedAccession; }
private void WriteProteinDetails( TextWriter w, Protein p ) { Tag tr = new Tag( "tr", true ); Tag td = new Tag( "td", "colspan" ); Tag th = new Tag( "th", "rowspan" ); Tag a = new Tag( "a", "href" ); int i; w.WriteLine( "<table>\n<caption><a name=\""+p.Accession+"\"/>Protein "+p.Accession+"</caption>" ); //w.WriteLine( "<col width=\"10%\"/><col width=\"5%\"/><col width=\"10%\"/><col width=\"75%\"/>" ); w.WriteLine( tr.Render(th.Render("Name")+td.Render("3",p.EntryEx)) ); w.WriteLine( tr.Render(th.Render("Description")+td.Render("3",p.Desc)) ); w.WriteLine( tr.Render(th.Render("Sequence")+td.Render("3","<pre>"+p.ParseSeq(10)+"</pre>")) ); w.WriteLine( tr.Render(th.Render("Evidence")+td.Render("3",p.Evidence.ToString())) ); w.Write( tr+th.Render("Peptide list")+"<td colspan=\"3\">" ); if( p.Peptides.Count > 0 ) { for( i = 0; i < p.Peptides.Count-1; i++ ) w.Write( a.Render("#"+p.Accession+"__"+p.Peptides[i].ID,p.Peptides[i].ToString()) + ", " ); w.Write( a.Render("#"+p.Accession+"__"+p.Peptides[i].ID,p.Peptides[i].ToString()) ); } w.WriteLine( "</td>"+tr ); if( p.Peptides.Count == 0 ) { w.WriteLine( "</table><br/>" ); return; } int rows = 8; if( Spectra.Count != 0 ) rows++; w.Write( tr+th.Render((p.Peptides.Count*rows).ToString(),"Peptides") ); bool first = true; foreach( Peptide f in p.Peptides ) { if( first ) first = false; else w.Write( tr.ToString() ); w.Write( th.Render(rows.ToString(),f.ToString()) ); w.WriteLine( th.Render("<a name=\""+p.Accession+"__"+f.ID+"\"/>Confidence")+td.Render(f.Confidence.ToString())+tr ); tr.Hold = true; w.WriteLine( tr.Render(th.Render("Decoy")+td.Render(f.Decoy.ToString())) ); w.Write( tr+th.Render("Runs")+td ); for( i = 0; i < f.Runs.Count-1; i++ ) w.Write( f.Runs[i].ToString() + ", " ); w.WriteLine( f.Runs[i].ToString()+td+tr ); w.WriteLine( tr.Render(th.Render("Relation")+td.Render(f.Relation.ToString())) ); w.Write( tr+th.Render("Proteins")+td ); for( i = 0; i < f.Proteins.Count-1; i++ ) w.Write( a.Render("#"+f.Proteins[i].Accession,f.Proteins[i].EntryEx) + ", " ); w.WriteLine( a.Render("#"+f.Proteins[i].Accession,f.Proteins[i].EntryEx)+td.ToString()+tr ); w.WriteLine( tr.Render(th.Render("Sequence")+td.Render("<pre>"+f.Sequence+"</pre>")) ); w.WriteLine( tr.Render(th.Render("Position")+td.Render(f.GetPositions(p))) ); w.Write( tr+th.Render("PTMs")+td ); if( f.Variants.Count == 1 ) w.Write( Peptide.Variant2Str(f.LastVariant) ); else { i = 1; foreach( List<PTM> v in f.Variants ) w.Write( "Variant #"+(i++)+": "+Peptide.Variant2Str(v)+"<br/>" ); } w.WriteLine( td.ToString()+tr ); if( Spectra.Count != 0 ) { w.Write( tr+th.Render("PSMs")+td ); if( f.Psm != null ) { for( i = 0; i < f.Psm.Count-1; i++ ) w.Write( a.Render("#PSM"+f.Psm[i].ID,f.Psm[i].ID.ToString()) + ", " ); w.WriteLine( a.Render("#PSM"+f.Psm[i].ID,f.Psm[i].ID.ToString()) ); } w.WriteLine( td.ToString()+tr ); } tr.Hold = false; } w.WriteLine( "</table><br/>" ); }
private void LoadData( string xmlpath ) { SortedList<int,string> SortedAccession = new SortedList<int, string>(); XmlDocument doc = new XmlDocument(); doc.Load( xmlpath ); // Load proteins XmlNodeList proteins = doc.GetElementsByTagName( "PROTEIN" ); foreach( XmlElement element in proteins ) { int id = int.Parse(element.GetAttribute("ID")); if( SortedAccession.ContainsKey(id) ) continue; string acc = element.GetElementsByTagName("ACCESSION")[0].InnerText; SortedAccession.Add( id, acc ); if( m_SortedProteins.ContainsKey(acc) ) continue; string entry = element.GetElementsByTagName("ENTRY")[0].InnerText; string desc = element.GetElementsByTagName("DESCRIPTION")[0].InnerText.Replace('+',' '); string seq = element.GetElementsByTagName("SEQUENCE")[0].InnerText.ToUpper(); Protein p = new Protein(m_pid++, entry, acc, desc, seq); Proteins.Add( p ); m_SortedProteins.Add( acc, p ); } // Load peptides SortedList<int,Peptide> SortedPeptides = new SortedList<int, Peptide>(); XmlNodeList peptides = doc.GetElementsByTagName( "PEPTIDE" ); foreach( XmlElement element in peptides ) { int id = int.Parse(element.GetAttribute("ID")); int pid = int.Parse(element.GetAttribute("PROT_ID")); int mid = 0; if( PlgsThreshold != Peptide.ConfidenceType.NoThreshold ) mid = int.Parse(element.GetAttribute("QUERY_MASS_ID")); string seq = element.GetAttribute("SEQUENCE").ToUpper(); Peptide f = new Peptide(id, seq); XmlNodeList mods = element.GetElementsByTagName( "MATCH_MODIFIER" ); f.Runs.Add( m_Run ); foreach( XmlElement mod in mods ) { PTM ptm = new PTM(); string[] strs = mod.GetAttribute("NAME").Split(new char[]{'+'}); ptm.Name = strs[0]; string str = mod.GetAttribute("POS"); ptm.Pos = str.Length == 0 ? -1 : int.Parse(str); if( strs.Length > 1 ) ptm.Residues = strs[1]; f.AddPTM( ptm ); } Protein p = null; try { p = m_SortedProteins[SortedAccession[pid]]; } catch { Notify( "Peptide '" + id + "' references unknown protein '" + pid + "'" ); } if( p != null ) { p.Peptides.Add( f ); f.Proteins.Add( p ); if( !p.Sequence.Contains(f.Sequence) ) throw new ApplicationException( "Inconsistent sequence data" ); } Peptides.Add( f ); if( PlgsThreshold != Peptide.ConfidenceType.NoThreshold ) SortedPeptides.Add(mid,f); } if( PlgsThreshold == Peptide.ConfidenceType.NoThreshold ) return; // Scores XmlNodeList scores = doc.GetElementsByTagName( "MASS_MATCH" ); foreach( XmlElement element in scores ) { int id = int.Parse(element.GetAttribute("ID")); double score = double.Parse(element.GetAttribute("SCORE"), m_Format); SortedPeptides[id].Score = score; } }
/// <summary> /// Includes a protein in a group /// </summary> private void AddToGroup( Protein p, ref int id, ref List<Protein> proteins ) { Protein g; if( p.Group == null ) { g = new Protein( id++, String.Format("GROUP{0:000}",m_gid++), "", p.Name, "" ); g.Evidence = Protein.EvidenceType.Group; p.Group = g; p.ID = id++; g.Subset.Add( p ); proteins.Add( g ); } else g = p.Group; foreach( Peptide f in p.Peptides ) { if( f.Relation != Peptide.RelationType.Discriminating ) continue; foreach( Protein t in f.Proteins ) if( t.Evidence == Protein.EvidenceType.Group && t.Group == null ) { t.ID = id++; t.Group = g; g.Subset.Add( t ); g.Desc += " + " + t.Name; } } }
/// <summary> /// Parses the confidence enum to a string. /// </summary> public string ParseConfidence( Protein.EvidenceType e ) { switch( e ) { case Protein.EvidenceType.Conclusive: return "conclusive"; case Protein.EvidenceType.Group: return "ambiguous group"; case Protein.EvidenceType.Indistinguishable: return "indistinguishable"; case Protein.EvidenceType.NonConclusive: return "non conclusive"; } return e.ToString(); }
/// <summary> /// Loads a mzIdentML file /// </summary> override protected void Load( string mzid ) { m_mzid = new mzidFile1_0(); m_mzid.Load( mzid ); // Proteins SortedList<string,string> SortedAccession = new SortedList<string, string>(); foreach( PSIPIanalysissearchDBSequenceType element in m_mzid.ListProteins ) { if( SortedAccession.ContainsKey(element.id) ) continue; string acc = element.accession; SortedAccession.Add( element.id, acc ); if( m_SortedProteins.ContainsKey(acc) ) continue; FuGECommonOntologycvParamType cv; cv = FuGECommonOntologycvParamType.Find( "MS:1001352", element.cvParam ); string entry = cv == null ? "" : cv.value; cv = FuGECommonOntologycvParamType.Find( "MS:1001088", element.cvParam ); string desc = cv == null ? "" : cv.value; string seq = element.seq;//.ToUpper(); Protein p = new Protein(m_pid++, entry, acc, desc, seq); p.DBRef = element.id; Proteins.Add( p ); m_SortedProteins.Add( acc, p ); } // Peptides SortedList<string,Peptide> SortedPeptides = new SortedList<string, Peptide>(); int id = 1; foreach( PSIPIpolypeptidePeptideType element in m_mzid.ListPeptides ) { string seq = element.peptideSequence;//.ToUpper(); Peptide f = new Peptide(id++, seq); f.Confidence = Peptide.ConfidenceType.PassThreshold; // It will be filtered later if neccessary SortedPeptides.Add( element.id, f ); f.Runs.Add( m_Run ); if( element.Modification != null ) foreach( PSIPIpolypeptideModificationType mod in element.Modification ) { PTM ptm = new PTM(); ptm.Pos = mod.locationSpecified ? mod.location : -1; if( mod.residues != null ) foreach( string residue in mod.residues ) ptm.Residues += residue; foreach( FuGECommonOntologycvParamType param in mod.cvParam ) if( param.cvRef.Equals("UNIMOD") ) ptm.Name = param.name; f.AddPTM( ptm ); } Peptides.Add( f ); } // Relations if( m_mzid.Data.DataCollection.AnalysisData.SpectrumIdentificationList.Length != 1 ) throw new ApplicationException( "Multiple spectrum identification lists not supported" ); foreach( PSIPIanalysissearchSpectrumIdentificationResultType idres in m_mzid.Data.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult ) foreach( PSIPIanalysissearchSpectrumIdentificationItemType item in idres.SpectrumIdentificationItem ) { if( !item.passThreshold ) continue; Peptide f = SortedPeptides[item.Peptide_ref]; if( item.PeptideEvidence == null ) continue; f.Confidence = Peptide.ConfidenceType.PassThreshold; foreach( PSIPIanalysisprocessPeptideEvidenceType relation in item.PeptideEvidence ) { Protein p = m_SortedProteins[SortedAccession[relation.DBSequence_Ref]]; if( f.Proteins.Contains(p) ) continue; f.Names.Add( relation.DBSequence_Ref, relation.id ); p.Peptides.Add( f ); f.Proteins.Add( p ); } } }
private void LoadData( string xmlpath ) { SortedList<int,string> SortedAccession = new SortedList<int, string>(); XmlDocument doc = new XmlDocument(); doc.Load( xmlpath ); // Load proteins XmlNodeList proteins = doc.GetElementsByTagName( "PROTEIN" ); foreach( XmlElement element in proteins ) { int id = int.Parse(element.GetAttribute("ID")); if( SortedAccession.ContainsKey(id) ) continue; string acc = element.GetElementsByTagName("ACCESSION")[0].InnerText; SortedAccession.Add( id, acc ); if( m_SortedProteins.ContainsKey(acc) ) continue; string entry = element.GetElementsByTagName("ENTRY")[0].InnerText; string desc = element.GetElementsByTagName("DESCRIPTION")[0].InnerText.Replace('+',' '); string seq = element.GetElementsByTagName("SEQUENCE")[0].InnerText.ToUpper(); Protein p = new Protein(m_pid++, entry, acc, desc, seq); Proteins.Add( p ); m_SortedProteins.Add( acc, p ); } // Load peptides SortedList<int,Peptide> SortedPeptides = new SortedList<int, Peptide>(); XmlNodeList peptides = doc.GetElementsByTagName( "PEPTIDE" ); foreach( XmlElement element in peptides ) { int id = int.Parse(element.GetAttribute("ID")); int pid = int.Parse(element.GetAttribute("PROT_ID")); int mid = 0; if( PlgsThreshold != Peptide.ConfidenceType.NoThreshold ) mid = int.Parse(element.GetAttribute("QUERY_MASS_ID")); string seq = element.GetAttribute("SEQUENCE").ToUpper(); Peptide f = new Peptide(id, seq); XmlNodeList mods = element.GetElementsByTagName( "MATCH_MODIFIER" ); f.Runs.Add( m_Run ); foreach( XmlElement mod in mods ) { PTM ptm = new PTM(); string[] strs = mod.GetAttribute("NAME").Split(new char[]{'+'}); ptm.Name = strs[0]; string str = mod.GetAttribute("POS"); ptm.Pos = str.Length == 0 ? -1 : int.Parse(str); if( strs.Length > 1 ) ptm.Residues = strs[1]; f.AddPTM( ptm ); } Protein p = null; try { p = m_SortedProteins[SortedAccession[pid]]; } catch { Notify( "Peptide '" + id + "' references unknown protein '" + pid + "'" ); } if( p != null ) { p.Peptides.Add( f ); f.Proteins.Add( p ); if( !p.Sequence.Contains(f.Sequence) ) throw new ApplicationException( "Inconsistent sequence data" ); } Peptides.Add( f ); if( PlgsThreshold != Peptide.ConfidenceType.NoThreshold ) SortedPeptides.Add(mid,f); } if( PlgsThreshold == Peptide.ConfidenceType.NoThreshold ) return; // Scores XmlNodeList scores = doc.GetElementsByTagName( "MASS_MATCH" ); foreach( XmlElement element in scores ) { int id = int.Parse(element.GetAttribute("ID")); double score = double.Parse(element.GetAttribute("SCORE"), m_Format); SortedPeptides[id].Score = score; } }
private bool IsIndistinguisable( Protein g ) { List<Peptide> discriminating = new List<Peptide>(); foreach( Protein prot in g.Subset ) foreach( Peptide pep in prot.Peptides ) if( pep.Relation == Peptide.RelationType.Discriminating ) discriminating.Add(pep); foreach( Protein prot in g.Subset ) foreach( Peptide pep in discriminating ) if( !prot.Peptides.Contains(pep) ) return false; return true; /*foreach( Peptide f in g.Subset[0].Peptides ) { if( f.Relation != Peptide.RelationType.Discriminating ) continue; foreach( Protein p in g.Subset ) if( !p.HasPeptide(f) ) return false; } return true;*/ }
private void SaveCSVEntry( TextWriter w, Protein p, string grp, string sep ) { w.Write( p.ID + sep + p.Entry + sep + p.Accession + sep + p.Evidence.ToString() + sep + grp + sep + p.Desc + sep ); foreach( Peptide f in p.Peptides ) w.Write(f.ToString() + ' '); w.Write( sep + p.Sequence + sep ); String ptm; foreach( Peptide peptide in p.Peptides ) { ptm = GetPtmCsv(peptide); if( ptm.Length != 0 ) w.Write(ptm+" "); } w.WriteLine(); }
private string Peptides2Html( Protein p ) { if( p.Peptides.Count == 0 ) return ""; int i; Tag a = new Tag( "a", "href" ); string str = ""; for( i = 0; i < p.Peptides.Count-1; i++ ) str += a.Render("#"+p.Accession+"__"+p.Peptides[i].ID,p.Peptides[i].ToString()) + ", "; str += a.Render("#"+p.Accession+"__"+p.Peptides[i].ID,p.Peptides[i].ToString()); return str; }
private void UpdatePdhCvs( ProteinDetectionHypothesisType pdh, Protein.EvidenceType evidence ) { List<AbstractParamType> list = new List<AbstractParamType>(); foreach( AbstractParamType item in pdh.Items ) if( !item.name.Contains("PAnalyzer") && !item.name.Contains("leading") ) list.Add( item ); CVParamType ev = new CVParamType(), ld = new CVParamType(); ev.cvRef = "PSI-MS"; ld.cvRef = "PSI-MS"; switch( evidence ) { case Protein.EvidenceType.Conclusive: ev.accession = "MS:1002213"; ev.name = "PAnalyzer:conclusive protein"; ld.accession = "MS:1002401"; ld.name = "leading protein"; break; case Protein.EvidenceType.Indistinguishable: ev.accession = "MS:1002214"; ev.name = "PAnalyzer:indistinguishable protein"; ld.accession = "MS:1002401"; ld.name = "leading protein"; break; case Protein.EvidenceType.Group: ev.accession = "MS:1002216"; ev.name = "PAnalyzer:ambiguous group member"; ld.accession = "MS:1002401"; ld.name = "leading protein"; break; case Protein.EvidenceType.NonConclusive: ev.accession = "MS:1002215"; ev.name = "PAnalyzer:non-conclusive protein"; ld.accession = "MS:1002402"; ld.name = "non-leading protein"; break; default: // filtered return; } list.Add(ld); list.Add(ev); pdh.Items = list.ToArray(); }