/// <summary> /// Aggregate the proteins based on the data from the mass tags /// </summary> private void AggregateProteins() { Dictionary <string, Protein> proteinMap = new Dictionary <string, Protein>(); foreach (ConsensusTarget tag in m_massTags) { List <Protein> proteins = tag.GetProteins(); foreach (Protein p in proteins) { if (!proteinMap.ContainsKey(p.Reference)) { proteinMap.Add(p.Reference, p); p.AddConsensusTarget(tag); } else { Protein existingProtein = proteinMap[p.Reference]; existingProtein.AddConsensusTarget(tag); // Add the mass tag to the protein tag.AddProtein(existingProtein); // Add the protein to the mass tag... } } } int globalCount = 0; foreach (Protein p in proteinMap.Values) { p.Id = globalCount++; Proteins.Add(p); } }
/// <summary> /// Clear all proteins without proteoforms. /// </summary> public void ClearEmptyProteins() { Proteins.Clear(); foreach (var protein in allProteins) { if (protein.Value.Proteoforms.Count > 0) { Proteins.Add(protein.Key, protein.Value); } } }
public void AddProtein(Protein protein) { Proteins.Add(protein); if (RepresentativeProtein == null) { RepresentativeProtein = protein; } else { if (protein.Length > RepresentativeProtein.Length) { RepresentativeProtein = protein; } } }
private SortedList<string,string> LoadProteins() { SortedList<string,string> SortedAccession = new SortedList<string, string>(); foreach( DBSequenceType prot in m_mzid.ListProteins ) { if( SortedAccession.ContainsKey(prot.id) ) // Avoids duplicated entries in the same file continue; SortedAccession.Add( prot.id, prot.accession ); if( m_SortedProteins.ContainsKey(prot.accession) ) // Avoids duplicated entries between different files continue; CVParamType cv = mzidFile1_1.FindCV("MS:1001352", prot.Items); string entry = cv == null ? "" : cv.value; cv = mzidFile1_1.FindCV("MS:1001088", prot.Items); string desc = cv == null ? "" : cv.value; Protein p = new Protein( m_pid++, entry, prot.accession, desc, prot.Seq ); p.DBRef = prot.id; Proteins.Add( p ); m_SortedProteins.Add( p.Accession, p ); } return SortedAccession; }
/// <summary> /// Add a Protein-Spectrum-Match identification. /// </summary> /// <param name="id">Protein-Spectrum-Math to add</param> public void Add(PrSm id) { RemoveUnidentifiedScan(id); if (!allProteins.ContainsKey(id.ProteinName)) { return; ////this.allProteins.Add(id.ProteinName, new ProteinId(id.Sequence, id.ProteinName)); } if (!Proteins.ContainsKey(id.ProteinName)) { Proteins.Add(id.ProteinName, allProteins[id.ProteinName]); } var protein = Proteins[id.ProteinName]; protein.Add(id); }
private void LoadData( string xmlpath ) { SortedList<int,string> SortedAccession = new SortedList<int, string>(); XmlDocument doc = new XmlDocument(); doc.Load( xmlpath ); // Load proteins XmlNodeList proteins = doc.GetElementsByTagName( "PROTEIN" ); foreach( XmlElement element in proteins ) { int id = int.Parse(element.GetAttribute("ID")); if( SortedAccession.ContainsKey(id) ) continue; string acc = element.GetElementsByTagName("ACCESSION")[0].InnerText; SortedAccession.Add( id, acc ); if( m_SortedProteins.ContainsKey(acc) ) continue; string entry = element.GetElementsByTagName("ENTRY")[0].InnerText; string desc = element.GetElementsByTagName("DESCRIPTION")[0].InnerText.Replace('+',' '); string seq = element.GetElementsByTagName("SEQUENCE")[0].InnerText.ToUpper(); Protein p = new Protein(m_pid++, entry, acc, desc, seq); Proteins.Add( p ); m_SortedProteins.Add( acc, p ); } // Load peptides SortedList<int,Peptide> SortedPeptides = new SortedList<int, Peptide>(); XmlNodeList peptides = doc.GetElementsByTagName( "PEPTIDE" ); foreach( XmlElement element in peptides ) { int id = int.Parse(element.GetAttribute("ID")); int pid = int.Parse(element.GetAttribute("PROT_ID")); int mid = 0; if( PlgsThreshold != Peptide.ConfidenceType.NoThreshold ) mid = int.Parse(element.GetAttribute("QUERY_MASS_ID")); string seq = element.GetAttribute("SEQUENCE").ToUpper(); Peptide f = new Peptide(id, seq); XmlNodeList mods = element.GetElementsByTagName( "MATCH_MODIFIER" ); f.Runs.Add( m_Run ); foreach( XmlElement mod in mods ) { PTM ptm = new PTM(); string[] strs = mod.GetAttribute("NAME").Split(new char[]{'+'}); ptm.Name = strs[0]; string str = mod.GetAttribute("POS"); ptm.Pos = str.Length == 0 ? -1 : int.Parse(str); if( strs.Length > 1 ) ptm.Residues = strs[1]; f.AddPTM( ptm ); } Protein p = null; try { p = m_SortedProteins[SortedAccession[pid]]; } catch { Notify( "Peptide '" + id + "' references unknown protein '" + pid + "'" ); } if( p != null ) { p.Peptides.Add( f ); f.Proteins.Add( p ); if( !p.Sequence.Contains(f.Sequence) ) throw new ApplicationException( "Inconsistent sequence data" ); } Peptides.Add( f ); if( PlgsThreshold != Peptide.ConfidenceType.NoThreshold ) SortedPeptides.Add(mid,f); } if( PlgsThreshold == Peptide.ConfidenceType.NoThreshold ) return; // Scores XmlNodeList scores = doc.GetElementsByTagName( "MASS_MATCH" ); foreach( XmlElement element in scores ) { int id = int.Parse(element.GetAttribute("ID")); double score = double.Parse(element.GetAttribute("SCORE"), m_Format); SortedPeptides[id].Score = score; } }
/// <summary> /// Add another protein the ConsensusTarget occurs in /// </summary> /// <param name="protein"></param> public void AddProtein(ProteinInformation protein) { Proteins.Add(protein); protein.Consensus.Add(this); }
/// <summary> /// Add a protein to the list of proteins evidenced /// </summary> /// <param name="protein"></param> public void AddProtein(ProteinInformation protein) { Proteins.Add(protein); }
/// <summary> /// Loads a mzIdentML file /// </summary> override protected void Load( string mzid ) { m_mzid = new mzidFile1_0(); m_mzid.Load( mzid ); // Proteins SortedList<string,string> SortedAccession = new SortedList<string, string>(); foreach( PSIPIanalysissearchDBSequenceType element in m_mzid.ListProteins ) { if( SortedAccession.ContainsKey(element.id) ) continue; string acc = element.accession; SortedAccession.Add( element.id, acc ); if( m_SortedProteins.ContainsKey(acc) ) continue; FuGECommonOntologycvParamType cv; cv = FuGECommonOntologycvParamType.Find( "MS:1001352", element.cvParam ); string entry = cv == null ? "" : cv.value; cv = FuGECommonOntologycvParamType.Find( "MS:1001088", element.cvParam ); string desc = cv == null ? "" : cv.value; string seq = element.seq;//.ToUpper(); Protein p = new Protein(m_pid++, entry, acc, desc, seq); p.DBRef = element.id; Proteins.Add( p ); m_SortedProteins.Add( acc, p ); } // Peptides SortedList<string,Peptide> SortedPeptides = new SortedList<string, Peptide>(); int id = 1; foreach( PSIPIpolypeptidePeptideType element in m_mzid.ListPeptides ) { string seq = element.peptideSequence;//.ToUpper(); Peptide f = new Peptide(id++, seq); f.Confidence = Peptide.ConfidenceType.PassThreshold; // It will be filtered later if neccessary SortedPeptides.Add( element.id, f ); f.Runs.Add( m_Run ); if( element.Modification != null ) foreach( PSIPIpolypeptideModificationType mod in element.Modification ) { PTM ptm = new PTM(); ptm.Pos = mod.locationSpecified ? mod.location : -1; if( mod.residues != null ) foreach( string residue in mod.residues ) ptm.Residues += residue; foreach( FuGECommonOntologycvParamType param in mod.cvParam ) if( param.cvRef.Equals("UNIMOD") ) ptm.Name = param.name; f.AddPTM( ptm ); } Peptides.Add( f ); } // Relations if( m_mzid.Data.DataCollection.AnalysisData.SpectrumIdentificationList.Length != 1 ) throw new ApplicationException( "Multiple spectrum identification lists not supported" ); foreach( PSIPIanalysissearchSpectrumIdentificationResultType idres in m_mzid.Data.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult ) foreach( PSIPIanalysissearchSpectrumIdentificationItemType item in idres.SpectrumIdentificationItem ) { if( !item.passThreshold ) continue; Peptide f = SortedPeptides[item.Peptide_ref]; if( item.PeptideEvidence == null ) continue; f.Confidence = Peptide.ConfidenceType.PassThreshold; foreach( PSIPIanalysisprocessPeptideEvidenceType relation in item.PeptideEvidence ) { Protein p = m_SortedProteins[SortedAccession[relation.DBSequence_Ref]]; if( f.Proteins.Contains(p) ) continue; f.Names.Add( relation.DBSequence_Ref, relation.id ); p.Peptides.Add( f ); f.Proteins.Add( p ); } } }