private SortedList<string,Peptide> LoadPeptides() { SortedList<string,Peptide> SortedPeptides = new SortedList<string, Peptide>(); int id = 1; foreach( PeptideType pep in m_mzid.ListPeptides ) { Peptide p = new Peptide( id++, pep.PeptideSequence ); p.Confidence = Peptide.ConfidenceType.NoThreshold; SortedPeptides.Add( pep.id, p ); p.Runs.Add( m_Run ); if( pep.Modification != null ) foreach( ModificationType mod in pep.Modification ) { PTM ptm = new PTM(); ptm.Pos = mod.locationSpecified ? mod.location : -1; if( mod.residues != null ) foreach( string residue in mod.residues ) ptm.Residues += residue; foreach( CVParamType param in mod.cvParam ) if( param.cvRef.Equals("UNIMOD") ) ptm.Name = param.name; p.AddPTM( ptm ); } p.DBRef = pep.id; Peptides.Add( p ); } return SortedPeptides; }
/// <summary> /// Adds a PTM to the current peptide variant /// </summary> public void AddPTM( PTM ptm ) { List<PTM> PTMs = LastVariant; if( !PTMs.Contains(ptm) ) PTMs.Add( ptm ); }
private void LoadData( string xmlpath ) { SortedList<int,string> SortedAccession = new SortedList<int, string>(); XmlDocument doc = new XmlDocument(); doc.Load( xmlpath ); // Load proteins XmlNodeList proteins = doc.GetElementsByTagName( "PROTEIN" ); foreach( XmlElement element in proteins ) { int id = int.Parse(element.GetAttribute("ID")); if( SortedAccession.ContainsKey(id) ) continue; string acc = element.GetElementsByTagName("ACCESSION")[0].InnerText; SortedAccession.Add( id, acc ); if( m_SortedProteins.ContainsKey(acc) ) continue; string entry = element.GetElementsByTagName("ENTRY")[0].InnerText; string desc = element.GetElementsByTagName("DESCRIPTION")[0].InnerText.Replace('+',' '); string seq = element.GetElementsByTagName("SEQUENCE")[0].InnerText.ToUpper(); Protein p = new Protein(m_pid++, entry, acc, desc, seq); Proteins.Add( p ); m_SortedProteins.Add( acc, p ); } // Load peptides SortedList<int,Peptide> SortedPeptides = new SortedList<int, Peptide>(); XmlNodeList peptides = doc.GetElementsByTagName( "PEPTIDE" ); foreach( XmlElement element in peptides ) { int id = int.Parse(element.GetAttribute("ID")); int pid = int.Parse(element.GetAttribute("PROT_ID")); int mid = 0; if( PlgsThreshold != Peptide.ConfidenceType.NoThreshold ) mid = int.Parse(element.GetAttribute("QUERY_MASS_ID")); string seq = element.GetAttribute("SEQUENCE").ToUpper(); Peptide f = new Peptide(id, seq); XmlNodeList mods = element.GetElementsByTagName( "MATCH_MODIFIER" ); f.Runs.Add( m_Run ); foreach( XmlElement mod in mods ) { PTM ptm = new PTM(); string[] strs = mod.GetAttribute("NAME").Split(new char[]{'+'}); ptm.Name = strs[0]; string str = mod.GetAttribute("POS"); ptm.Pos = str.Length == 0 ? -1 : int.Parse(str); if( strs.Length > 1 ) ptm.Residues = strs[1]; f.AddPTM( ptm ); } Protein p = null; try { p = m_SortedProteins[SortedAccession[pid]]; } catch { Notify( "Peptide '" + id + "' references unknown protein '" + pid + "'" ); } if( p != null ) { p.Peptides.Add( f ); f.Proteins.Add( p ); if( !p.Sequence.Contains(f.Sequence) ) throw new ApplicationException( "Inconsistent sequence data" ); } Peptides.Add( f ); if( PlgsThreshold != Peptide.ConfidenceType.NoThreshold ) SortedPeptides.Add(mid,f); } if( PlgsThreshold == Peptide.ConfidenceType.NoThreshold ) return; // Scores XmlNodeList scores = doc.GetElementsByTagName( "MASS_MATCH" ); foreach( XmlElement element in scores ) { int id = int.Parse(element.GetAttribute("ID")); double score = double.Parse(element.GetAttribute("SCORE"), m_Format); SortedPeptides[id].Score = score; } }
/// <summary> /// Loads a mzIdentML file /// </summary> protected override void Load( string mzid ) { m_mzid = new mzidFile1_0(); m_mzid.Load( mzid ); // Proteins SortedList<string,string> SortedAccession = new SortedList<string, string>(); foreach( PSIPIanalysissearchDBSequenceType element in m_mzid.ListProteins ) { if( SortedAccession.ContainsKey(element.id) ) continue; string acc = element.accession; SortedAccession.Add( element.id, acc ); if( m_SortedProteins.ContainsKey(acc) ) continue; FuGECommonOntologycvParamType cv; cv = FuGECommonOntologycvParamType.Find( "MS:1001352", element.cvParam ); string entry = cv == null ? "" : cv.value; cv = FuGECommonOntologycvParamType.Find( "MS:1001088", element.cvParam ); string desc = cv == null ? "" : cv.value; string seq = element.seq;//.ToUpper(); Protein p = new Protein(m_pid++, entry, acc, desc, seq); p.DBRef = element.id; Proteins.Add( p ); m_SortedProteins.Add( acc, p ); } // Peptides SortedList<string,Peptide> SortedPeptides = new SortedList<string, Peptide>(); int id = 1; foreach( PSIPIpolypeptidePeptideType element in m_mzid.ListPeptides ) { string seq = element.peptideSequence;//.ToUpper(); Peptide f = new Peptide(id++, seq); f.Confidence = Peptide.ConfidenceType.PassThreshold; // It will be filtered later if neccessary SortedPeptides.Add( element.id, f ); f.Runs.Add( m_Run ); if( element.Modification != null ) foreach( PSIPIpolypeptideModificationType mod in element.Modification ) { PTM ptm = new PTM(); ptm.Pos = mod.locationSpecified ? mod.location : -1; if( mod.residues != null ) foreach( string residue in mod.residues ) ptm.Residues += residue; foreach( FuGECommonOntologycvParamType param in mod.cvParam ) if( param.cvRef.Equals("UNIMOD") ) ptm.Name = param.name; f.AddPTM( ptm ); } Peptides.Add( f ); } // Relations if( m_mzid.Data.DataCollection.AnalysisData.SpectrumIdentificationList.Length != 1 ) throw new ApplicationException( "Multiple spectrum identification lists not supported" ); foreach( PSIPIanalysissearchSpectrumIdentificationResultType idres in m_mzid.Data.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult ) foreach( PSIPIanalysissearchSpectrumIdentificationItemType item in idres.SpectrumIdentificationItem ) { if( !item.passThreshold ) continue; Peptide f = SortedPeptides[item.Peptide_ref]; if( item.PeptideEvidence == null ) continue; f.Confidence = Peptide.ConfidenceType.PassThreshold; foreach( PSIPIanalysisprocessPeptideEvidenceType relation in item.PeptideEvidence ) { Protein p = m_SortedProteins[SortedAccession[relation.DBSequence_Ref]]; if( f.Proteins.Contains(p) ) continue; f.Names.Add( relation.DBSequence_Ref, relation.id ); p.Peptides.Add( f ); f.Proteins.Add( p ); } } }
/// <summary> /// Loads a mzIdentML file /// </summary> override protected void Load( string mzid ) { m_mzid = new mzidFile1_0(); m_mzid.Load( mzid ); // Proteins SortedList<string,string> SortedAccession = new SortedList<string, string>(); foreach( PSIPIanalysissearchDBSequenceType element in m_mzid.ListProteins ) { if( SortedAccession.ContainsKey(element.id) ) continue; string acc = element.accession; SortedAccession.Add( element.id, acc ); if( m_SortedProteins.ContainsKey(acc) ) continue; FuGECommonOntologycvParamType cv; cv = FuGECommonOntologycvParamType.Find( "MS:1001352", element.cvParam ); string entry = cv == null ? "" : cv.value; cv = FuGECommonOntologycvParamType.Find( "MS:1001088", element.cvParam ); string desc = cv == null ? "" : cv.value; string seq = element.seq;//.ToUpper(); Protein p = new Protein(m_pid++, entry, acc, desc, seq); p.DBRef = element.id; Proteins.Add( p ); m_SortedProteins.Add( acc, p ); } // Peptides SortedList<string,Peptide> SortedPeptides = new SortedList<string, Peptide>(); int id = 1; foreach( PSIPIpolypeptidePeptideType element in m_mzid.ListPeptides ) { string seq = element.peptideSequence;//.ToUpper(); Peptide f = new Peptide(id++, seq); f.Confidence = Peptide.ConfidenceType.PassThreshold; // It will be filtered later if neccessary SortedPeptides.Add( element.id, f ); f.Runs.Add( m_Run ); if( element.Modification != null ) foreach( PSIPIpolypeptideModificationType mod in element.Modification ) { PTM ptm = new PTM(); ptm.Pos = mod.locationSpecified ? mod.location : -1; if( mod.residues != null ) foreach( string residue in mod.residues ) ptm.Residues += residue; foreach( FuGECommonOntologycvParamType param in mod.cvParam ) if( param.cvRef.Equals("UNIMOD") ) ptm.Name = param.name; f.AddPTM( ptm ); } Peptides.Add( f ); } // Relations if( m_mzid.Data.DataCollection.AnalysisData.SpectrumIdentificationList.Length != 1 ) throw new ApplicationException( "Multiple spectrum identification lists not supported" ); foreach( PSIPIanalysissearchSpectrumIdentificationResultType idres in m_mzid.Data.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult ) foreach( PSIPIanalysissearchSpectrumIdentificationItemType item in idres.SpectrumIdentificationItem ) { if( !item.passThreshold ) continue; Peptide f = SortedPeptides[item.Peptide_ref]; if( item.PeptideEvidence == null ) continue; f.Confidence = Peptide.ConfidenceType.PassThreshold; foreach( PSIPIanalysisprocessPeptideEvidenceType relation in item.PeptideEvidence ) { Protein p = m_SortedProteins[SortedAccession[relation.DBSequence_Ref]]; if( f.Proteins.Contains(p) ) continue; f.Names.Add( relation.DBSequence_Ref, relation.id ); p.Peptides.Add( f ); f.Proteins.Add( p ); } } }
private SortedList<string, Peptide> LoadPeptides() { SortedList<string,Peptide> SortedPeptides = new SortedList<string, Peptide>(); int id = 1; foreach( PeptideType pep in m_mzid.ListPeptides ) { Peptide p = new Peptide( id++, pep.PeptideSequence ); p.Confidence = Peptide.ConfidenceType.NoThreshold; SortedPeptides.Add( pep.id, p ); p.Runs.Add( m_Run ); if( pep.Modification != null ) foreach( ModificationType mod in pep.Modification ) { PTM ptm = new PTM(); ptm.Pos = mod.locationSpecified ? mod.location : -1; if( mod.residues != null ) foreach( string residue in mod.residues ) ptm.Residues += residue; foreach( CVParamType param in mod.cvParam ) if( param.cvRef.Equals("UNIMOD") ) ptm.Name = param.name; p.AddPTM( ptm ); } p.DBRef = pep.id; Peptides.Add( p ); } return SortedPeptides; }