private void GetPsmScore( SpectrumIdentificationItemType item, out double score, out string type, out Peptide.ConfidenceType confidence ) { score = -1.0; type = "N/A"; confidence = item.passThreshold ? Peptide.ConfidenceType.PassThreshold : Peptide.ConfidenceType.NoThreshold; if( item.Items == null ) return; foreach( AbstractParamType param in item.Items ) { if( !(param is CVParamType) ) continue; CVParamType cv = param as CVParamType; if( cv.accession == "MS:1001155" ) { score = double.Parse(cv.value, m_Format); type = "ProteomeDiscoverer/SEQUEST Confidence XCorr"; if( score >= m_GreenTh[item.chargeState-1] ) confidence = Peptide.ConfidenceType.Green; else if( score >= m_YellowTh[item.chargeState-1] ) confidence = Peptide.ConfidenceType.Yellow; else confidence = Peptide.ConfidenceType.Red; break; } else if( cv.accession == "MS:1001172" ) { score = double.Parse(cv.value, m_Format); type = "Mascot expectation value"; break; } else if( cv.accession == "MS:1001330" ) { score = double.Parse( cv.value, m_Format ); type = "X!Tandem expect"; break; } } }
/// <summary> /// Get the instrument model CV param for the given instrument name /// </summary> /// <param name="instrumentName">the instrument name</param> /// <returns>the instrument CV param</returns> public static CVParamType getInstrumentModel(string instrumentName) { CVParamType instrumentModel; if (OntologyMapping.InstrumentModels.ContainsKey(instrumentName)) { instrumentModel = OntologyMapping.InstrumentModels[instrumentName.ToUpper()]; } else { var longestMatch = InstrumentModels.Where(pair => instrumentName.ToUpper().Contains(pair.Key)) .Select(pair => pair.Key) .Aggregate("", (max, current) => max.Length > current.Length ? max : current); if (!longestMatch.IsNullOrEmpty()) { instrumentModel = OntologyMapping.InstrumentModels[longestMatch]; } else { instrumentModel = new CVParamType { accession = "MS:1000483", name = "Thermo Fisher Scientific instrument model", cvRef = "MS", value = "" }; } } return(instrumentModel); }
private void LoadSeqScores() { m_GreenTh = new double[4]; m_YellowTh = new double[4]; ParamListType ParamList = m_mzid.Data.AnalysisProtocolCollection.SpectrumIdentificationProtocol[0].AdditionalSearchParams; if( ParamList == null ) return; int ProteomeDiscovererSequestXcorr = 0; foreach( AbstractParamType param in ParamList.Items ) { if( !(param is CVParamType) ) continue; CVParamType cv = param as CVParamType; switch( cv.accession ) { case "MS:1001712": // ProteomeDiscoverer:SEQUEST:FT High Confidence XCorr Charge1 m_GreenTh[0] = double.Parse(cv.value, m_Format); ProteomeDiscovererSequestXcorr++; break; case "MS:1001713": // ProteomeDiscoverer:SEQUEST:FT High Confidence XCorr Charge2 m_GreenTh[1] = double.Parse(cv.value, m_Format); ProteomeDiscovererSequestXcorr++; break; case "MS:1001714": // ProteomeDiscoverer:SEQUEST:FT High Confidence XCorr Charge3 m_GreenTh[2] = double.Parse(cv.value, m_Format); ProteomeDiscovererSequestXcorr++; break; case "MS:1001715": // ProteomeDiscoverer:SEQUEST:FT High Confidence XCorr Charge4 m_GreenTh[3] = double.Parse(cv.value, m_Format); ProteomeDiscovererSequestXcorr++; break; case "MS:1001716": // ProteomeDiscoverer:SEQUEST:FT Medium Confidence XCorr Charge1 m_YellowTh[0] = double.Parse(cv.value, m_Format); ProteomeDiscovererSequestXcorr++; break; case "MS:1001717": // ProteomeDiscoverer:SEQUEST:FT Medium Confidence XCorr Charge2 m_YellowTh[1] = double.Parse(cv.value, m_Format); ProteomeDiscovererSequestXcorr++; break; case "MS:1001718": // ProteomeDiscoverer:SEQUEST:FT Medium Confidence XCorr Charge3 m_YellowTh[2] = double.Parse(cv.value, m_Format); ProteomeDiscovererSequestXcorr++; break; case "MS:1001719": // ProteomeDiscoverer:SEQUEST:FT Medium Confidence XCorr Charge4 m_YellowTh[3] = double.Parse(cv.value, m_Format); ProteomeDiscovererSequestXcorr++; break; } } if( ProteomeDiscovererSequestXcorr >= 8 ) { string yellow = "\t* Red-Yellow thresholds:"; string green = "\t* Yellow-Green thresholds:"; for( int i = 0; i < 4; i++ ) { green += " " + m_GreenTh[i] + "(" + (i+1) + ")"; yellow += " " + m_YellowTh[i] + "(" + (i+1) + ")"; } Notify( "Using ProteomeDiscoverer/SEQUEST XCorr values:"); Notify( yellow ); Notify( green ); SeqThreshold = Peptide.ConfidenceType.Yellow; } }
private void UpdatePdhCvs(ProteinDetectionHypothesisType pdh, Protein.EvidenceType evidence) { List <AbstractParamType> list = new List <AbstractParamType>(); foreach (AbstractParamType item in pdh.Items) { if (!item.name.Contains("PAnalyzer") && !item.name.Contains("leading")) { list.Add(item); } } CVParamType ev = new CVParamType(), ld = new CVParamType(); ev.cvRef = "PSI-MS"; ld.cvRef = "PSI-MS"; switch (evidence) { case Protein.EvidenceType.Conclusive: ev.accession = "MS:1002213"; ev.name = "PAnalyzer:conclusive protein"; ld.accession = "MS:1002401"; ld.name = "leading protein"; break; case Protein.EvidenceType.Indistinguishable: ev.accession = "MS:1002214"; ev.name = "PAnalyzer:indistinguishable protein"; ld.accession = "MS:1002401"; ld.name = "leading protein"; break; case Protein.EvidenceType.Group: ev.accession = "MS:1002216"; ev.name = "PAnalyzer:ambiguous group member"; ld.accession = "MS:1002401"; ld.name = "leading protein"; break; case Protein.EvidenceType.NonConclusive: ev.accession = "MS:1002215"; ev.name = "PAnalyzer:non-conclusive protein"; ld.accession = "MS:1002402"; ld.name = "non-leading protein"; break; default: // filtered return; } list.Add(ld); list.Add(ev); pdh.Items = list.ToArray(); }
protected override List <ProteinAmbiguityGroupType> BuildProteinDetectionList() { List <ProteinAmbiguityGroupType> groups = BuildPags(); int count = AddPassThreshold(groups); CVParamType cv = new CVParamType(); cv.cvRef = "PSI-MS"; cv.accession = "MS:1002404"; cv.name = "count of identified proteins"; cv.value = count.ToString(); m_mzid.Data.DataCollection.AnalysisData.ProteinDetectionList.Items = new AbstractParamType[] { cv }; return(groups); }
private void CheckOtherScores() { AbstractParamType[] items = m_mzid.Data.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult[0].SpectrumIdentificationItem[0].Items; foreach( AbstractParamType param in items ) { if( !(param is CVParamType) ) continue; CVParamType cv = param as CVParamType; switch( cv.accession ) { case "MS:1001330": // X!Tandem:expect XTandemAvailable = true; break; case "MS:1001172": // mascot:expectation value MascotAvailable = true; break; } } }
/// <summary> /// Create an object using the contents of the corresponding MzIdentML object /// </summary> /// <param name="cvp"></param> /// <param name="idata"></param> //public CVParam(CVParamType cvp, IdentData idata) // : base(cvp, idata) public CVParamObj(CVParamType cvp, IdentDataObj idata) : base(idata) { CVRef = cvp.cvRef; //this._name = cvp.name; //this._accession = cvp.accession; Accession = cvp.accession; _value = cvp.value; UnitCvRef = cvp.unitCvRef; //this._unitAccession = cvp.unitAccession; UnitAccession = cvp.unitAccession; //this._unitName = cvp.unitName; //this._cvid = CV.CV.CVID.CVID_Unknown; }
// TODO: Support empty PDH in input mzid protected virtual List<ProteinAmbiguityGroupType> BuildProteinDetectionList() { int gid = 1; SortedList<string,ProteinDetectionHypothesisType> list = new SortedList<string, ProteinDetectionHypothesisType>(); List<ProteinAmbiguityGroupType> groups = new List<ProteinAmbiguityGroupType>(); foreach( ProteinAmbiguityGroupType grp in m_mzid.Data.DataCollection.AnalysisData.ProteinDetectionList.ProteinAmbiguityGroup ) foreach( ProteinDetectionHypothesisType pdh in grp.ProteinDetectionHypothesis ) list.Add( pdh.dBSequence_ref, pdh ); foreach( Protein p in Proteins ) { ProteinAmbiguityGroupType g = new ProteinAmbiguityGroupType(); CVParamType ev = new CVParamType(); ev.accession = "MS:1001600"; ev.cvRef = "PSI-MS"; ev.name = "Protein Inference Confidence Category"; switch( p.Evidence ) { case Protein.EvidenceType.Conclusive: ev.value = "conclusive"; break; case Protein.EvidenceType.Indistinguishable: ev.value = "indistinguishable"; break; case Protein.EvidenceType.Group: ev.value = "ambiguous group"; break; case Protein.EvidenceType.NonConclusive: ev.value = "non conclusive"; break; default: continue; } g.id = "PAG_" + gid; gid++; if( p.Subset.Count == 0 ) { //g.ProteinDetectionHypothesis.Add(list[p.DBRef]); g.ProteinDetectionHypothesis = new ProteinDetectionHypothesisType[]{list[p.DBRef]}; g.Items = new CVParamType[]{ev}; } else { List<ProteinDetectionHypothesisType> listpdh = new List<ProteinDetectionHypothesisType>(); foreach( Protein p2 in p.Subset ) { ProteinDetectionHypothesisType pdh = list[p2.DBRef]; pdh.Items = new CVParamType[]{ev}; listpdh.Add( pdh ); } g.ProteinDetectionHypothesis = listpdh.ToArray(); } groups.Add( g ); } return groups; }
private int AddPassThreshold(List <ProteinAmbiguityGroupType> groups) { int count = 0; CVParamType cv; bool pass; foreach (ProteinAmbiguityGroupType pag in groups) { cv = new CVParamType(); cv.cvRef = "PSI-MS"; cv.accession = "MS:1002415"; cv.name = "protein group passes threshold"; pass = false; foreach (ProteinDetectionHypothesisType pdh in pag.ProteinDetectionHypothesis) { if (!pdh.passThreshold) { continue; } foreach (AbstractParamType item in pdh.Items) { if (item.name == "leading protein") { pass = true; break; } } if (pass) { break; } } if (pass) { count++; cv.value = "true"; } else { cv.value = "false"; } pag.Items = new AbstractParamType[] { cv }; } return(count); }
private SortedList<string,string> LoadProteins() { SortedList<string,string> SortedAccession = new SortedList<string, string>(); foreach( DBSequenceType prot in m_mzid.ListProteins ) { if( SortedAccession.ContainsKey(prot.id) ) // Avoids duplicated entries in the same file continue; SortedAccession.Add( prot.id, prot.accession ); if( m_SortedProteins.ContainsKey(prot.accession) ) // Avoids duplicated entries between different files continue; CVParamType cv = mzidFile1_1.FindCV("MS:1001352", prot.Items); string entry = cv == null ? "" : cv.value; cv = mzidFile1_1.FindCV("MS:1001088", prot.Items); string desc = cv == null ? "" : cv.value; Protein p = new Protein( m_pid++, entry, prot.accession, desc, prot.Seq ); p.DBRef = prot.id; Proteins.Add( p ); m_SortedProteins.Add( p.Accession, p ); } return SortedAccession; }
/// <summary> /// Finds the specified CV in cvparams. /// </summary> /// <param name='acc'> /// Accession number of the desired CV. /// </param> /// <param name='cvparams'> /// Search array of CV terms. /// </param> public static CVParamType FindCV(string acc, AbstractParamType[] cvparams) { if (cvparams == null) { return(null); } foreach (AbstractParamType p in cvparams) { if (p is CVParamType) { CVParamType cv = p as CVParamType; if (cv.accession == acc) { return(cv); } } } return(null); }
/// <summary> /// Helper method for including an additional analysis software to the list /// </summary> public void AddAnalysisSoftware( string id, string name, string version, string uri, string cv, string accession, string customizations) { AnalysisSoftwareType sw = new AnalysisSoftwareType(); sw.id = id; sw.name = name; sw.version = version; sw.uri = uri; if (cv != null && accession != null) { CVParamType p = new CVParamType(); p.cvRef = cv; p.accession = accession; p.name = name; sw.SoftwareName.Item = p; } if (customizations != null) { sw.Customizations = customizations; } ListSW.Add(sw); }
/// <summary> /// Save results to a mzIdentML file /// </summary> public void SaveMzid( string fpath ) { if( m_mzid == null || m_InputFiles.Count > 1 ) return; #region Organization OrganizationType org = new OrganizationType(); org.id = "UPV/EHU"; org.name = "University of the Basque Country"; foreach( OrganizationType o in m_mzid.ListOrganizations ) if( o.id == org.id ) { m_mzid.ListOrganizations.Remove( o ); break; } CVParamType url = new CVParamType(); url.accession = "MS:1000588"; url.name = "contact URL"; url.cvRef = "PSI-MS"; url.value = "http://www.ehu.es"; org.Item = url; m_mzid.ListOrganizations.Add( org ); #endregion #region Software author PersonType person = new PersonType(); person.id = "PAnalyzer_Author"; person.firstName = "Gorka"; person.lastName = "Prieto"; CVParamType email = new CVParamType(); email.accession = "MS:1000589"; email.name = "contact email"; email.cvRef = "PSI-MS"; email.value = "*****@*****.**"; //person.Items.Add(email); person.Item = email; AffiliationType aff = new AffiliationType(); aff.organization_ref = org.id; //person.Affiliation.Add(aff); person.Affiliation = new AffiliationType[]{aff}; foreach( PersonType p in m_mzid.ListPeople ) if( p.id == person.id ) { m_mzid.ListPeople.Remove( p ); break; } m_mzid.ListPeople.Add( person ); #endregion #region Analysis software AnalysisSoftwareType sw = new AnalysisSoftwareType(); sw.id = m_Software.Name; sw.name = m_Software.ToString(); sw.uri = m_Software.Url; sw.version = m_Software.Version; CVParamType swname = new CVParamType(); swname.name = "PAnalyzer"; swname.cvRef = "PSI-MS"; swname.accession = "MS:1002076"; sw.SoftwareName = new ParamType(); sw.SoftwareName.Item = swname; RoleType role = new RoleType(); CVParamType contacttype = new CVParamType(); contacttype.accession = "MS:1001271"; contacttype.cvRef = "PSI-MS"; contacttype.name = "researcher"; role.cvParam = contacttype; sw.ContactRole = new ContactRoleType(); sw.ContactRole.contact_ref = person.id; sw.ContactRole.Role = role; sw.Customizations = m_Software.Customizations; AnalysisSoftwareType old = null; foreach( AnalysisSoftwareType s in m_mzid.ListSW ) if( s.id == m_Software.Name ) { old = s; break; } if( old != null ) m_mzid.ListSW.Remove(old); m_mzid.ListSW.Add( sw ); #endregion #region Protein detection protocol if( m_mzid.Data.AnalysisCollection.ProteinDetection == null || m_mzid.Data.AnalysisProtocolCollection.ProteinDetectionProtocol == null ) return; m_mzid.Data.AnalysisCollection.ProteinDetection.proteinDetectionList_ref = "PDL_PAnalyzer"; m_mzid.Data.AnalysisCollection.ProteinDetection.proteinDetectionProtocol_ref = "PDP_PAnalyzer"; m_mzid.Data.AnalysisProtocolCollection.ProteinDetectionProtocol.analysisSoftware_ref = sw.id; m_mzid.Data.AnalysisProtocolCollection.ProteinDetectionProtocol.id = "PDP_PAnalyzer"; #endregion #region Protein detection list m_mzid.Data.DataCollection.AnalysisData.ProteinDetectionList.id = "PDL_PAnalyzer"; List<ProteinAmbiguityGroupType> groups = BuildProteinDetectionList(); m_mzid.Data.DataCollection.AnalysisData.ProteinDetectionList.ProteinAmbiguityGroup = groups.ToArray(); #endregion #region References BibliographicReferenceType pa = new BibliographicReferenceType(); pa.authors = "Gorka Prieto, Kerman Aloria, Nerea Osinalde, Asier Fullaondo, Jesus M. Arizmendi and Rune Matthiesen"; pa.id = pa.doi = "10.1186/1471-2105-13-288"; pa.issue = "288"; pa.name = pa.title = "PAnalyzer: A software tool for protein inference in shotgun proteomics"; pa.publication = "BMC Bioinformatics"; pa.publisher = "BioMed Central Ltd."; pa.volume = "13"; pa.year = 2012; List<BibliographicReferenceType> refs = new List<BibliographicReferenceType>(); refs.Add( pa ); if( m_mzid.Data.BibliographicReference != null ) foreach( BibliographicReferenceType r in m_mzid.Data.BibliographicReference ) { if( r.doi != null && r.doi == pa.doi ) continue; refs.Add( r ); } m_mzid.Data.BibliographicReference = refs.ToArray(); #endregion m_mzid.Save( fpath ); Notify( "Saved to " + fpath ); }