public static void WriteMzIdentMl(IEnumerable <PeptideSpectralMatch> psms, List <EngineLayer.ProteinGroup> groups, List <ModificationWithMass> variableMods, List <ModificationWithMass> fixedMods, List <Protease> proteases, double qValueFilter, Tolerance productTolerance, Tolerance parentTolerance, int missedCleavages, string outputPath) { psms = psms.Where(p => p.FdrInfo.QValue <= qValueFilter && p.FdrInfo.QValueNotch <= qValueFilter); List <PeptideWithSetModifications> peptides = psms.SelectMany(i => i.CompactPeptides.SelectMany(c => c.Value.Item2)).Distinct().ToList(); List <Protein> proteins = peptides.Select(p => p.Protein).Distinct().ToList(); List <string> filenames = psms.Select(i => i.FullFilePath).Distinct().ToList(); Dictionary <string, string> database_reference = new Dictionary <string, string>(); List <string> databases = proteins.Select(p => p.DatabaseFilePath).Distinct().ToList(); UTF8Encoding utf8EmitBOM = new UTF8Encoding(false); XmlWriterSettings settings = new XmlWriterSettings() { NewLineChars = "\n", Indent = true, Encoding = utf8EmitBOM, }; XmlSerializer _indexedSerializer = new XmlSerializer(typeof(mzIdentML110.Generated.MzIdentMLType110)); var _mzid = new mzIdentML110.Generated.MzIdentMLType110() { version = "1.1.0", id = "", }; _mzid.Provider = new mzIdentML110.Generated.ProviderType() { id = "PROVIDER", ContactRole = new mzIdentML110.Generated.ContactRoleType() { contact_ref = "UWMadisonSmithGroup", Role = new mzIdentML110.Generated.RoleType() { cvParam = new mzIdentML110.Generated.CVParamType() { accession = "MS:1001271", name = "researcher", cvRef = "PSI-MS" }, }, }, }; _mzid.AuditCollection = new mzIdentML110.Generated.AbstractContactType[2]; _mzid.AuditCollection[0] = new mzIdentML110.Generated.PersonType() { id = "UWMadisonSmithGroupPerson", cvParam = new mzIdentML110.Generated.CVParamType[2] { new mzIdentML110.Generated.CVParamType() { accession = "MS:1000589", name = "contact email", cvRef = "PSI-MS", value = "*****@*****.**" }, new mzIdentML110.Generated.CVParamType() { accession = "MS:1000590", name = "affiliation name", cvRef = "PSI-MS", value = "UWMadisonSmithGroup" } } }; _mzid.AuditCollection[1] = new mzIdentML110.Generated.OrganizationType() { id = "UWMadisonSmithGroup", cvParam = new mzIdentML110.Generated.CVParamType[2] { new mzIdentML110.Generated.CVParamType() { accession = "MS:1000589", name = "contact email", cvRef = "PSI-MS", value = "*****@*****.**" }, new mzIdentML110.Generated.CVParamType() { accession = "MS:1000590", name = "affiliation name", cvRef = "PSI-MS", value = "UWMadisonSmithGroup" } } }; //cvlist: URLs of controlled vocabularies used within the file. _mzid.cvList = new mzIdentML110.Generated.cvType[4] { new mzIdentML110.Generated.cvType() { id = "PSI-MS", fullName = "Proteomics Standards Initiative Mass Spectrometry Vocabularies", uri = "https://github.com/HUPO-PSI/psi-ms-CV/blob/master/psi-ms.obo", version = "4.0.9" }, new mzIdentML110.Generated.cvType() { id = "PSI-MOD", fullName = "Proteomics Standards Initiative Modification Vocabularies", uri = "http://psidev.cvs.sourceforge.net/viewvc/psidev/psi/mod/data/PSI-MOD.obo", version = "1.2" }, new mzIdentML110.Generated.cvType() { id = "UNIMOD", fullName = "UNIT-ONTOLOGY", uri = "http://www.unimod.org/obo/unimod.obo" }, new mzIdentML110.Generated.cvType() { id = "UO", fullName = "UNIT-ONTOLOGY", uri = "http://www.unimod.org/obo/unimod.obo" } }; _mzid.AnalysisSoftwareList = new mzIdentML110.Generated.AnalysisSoftwareType[1] { new mzIdentML110.Generated.AnalysisSoftwareType() { id = "AS_MetaMorpheus", name = "MetaMorpheus", version = GlobalVariables.MetaMorpheusVersion, uri = "https://github.com/smith-chem-wisc/MetaMorpheus", SoftwareName = new mzIdentML110.Generated.ParamType() { Item = new mzIdentML110.Generated.CVParamType { accession = "MS:1002826", name = "MetaMorpheus", cvRef = "PSI-MS" } }, ContactRole = new mzIdentML110.Generated.ContactRoleType() { contact_ref = "UWMadisonSmithGroup", Role = new mzIdentML110.Generated.RoleType() { cvParam = new mzIdentML110.Generated.CVParamType() { accession = "MS:1001267", name = "software vendor", cvRef = "PSI-MS" } } } } }; _mzid.DataCollection = new mzIdentML110.Generated.DataCollectionType { AnalysisData = new mzIdentML110.Generated.AnalysisDataType() { SpectrumIdentificationList = new mzIdentML110.Generated.SpectrumIdentificationListType[1] { new mzIdentML110.Generated.SpectrumIdentificationListType { id = "SIL", SpectrumIdentificationResult = new mzIdentML110.Generated.SpectrumIdentificationResultType[psms.Count()] } } }, Inputs = new mzIdentML110.Generated.InputsType { SearchDatabase = new mzIdentML110.Generated.SearchDatabaseType[databases.Count()], SpectraData = new mzIdentML110.Generated.SpectraDataType[filenames.Count] } }; _mzid.SequenceCollection = new mzIdentML110.Generated.SequenceCollectionType { Peptide = new mzIdentML110.Generated.PeptideType[peptides.Count], DBSequence = new mzIdentML110.Generated.DBSequenceType[proteins.Count], PeptideEvidence = new mzIdentML110.Generated.PeptideEvidenceType[peptides.Count] }; _mzid.AnalysisCollection = new mzIdentML110.Generated.AnalysisCollectionType { SpectrumIdentification = new mzIdentML110.Generated.SpectrumIdentificationType[1] { new mzIdentML110.Generated.SpectrumIdentificationType { id = "SI", spectrumIdentificationList_ref = "SIL", spectrumIdentificationProtocol_ref = "SIP", InputSpectra = new mzIdentML110.Generated.InputSpectraType[filenames.Count], SearchDatabaseRef = new mzIdentML110.Generated.SearchDatabaseRefType[databases.Count] } } }; int database_index = 0; foreach (string database in databases) { _mzid.DataCollection.Inputs.SearchDatabase[database_index] = new mzIdentML110.Generated.SearchDatabaseType() { id = "SDB_" + database_index, location = database, DatabaseName = new mzIdentML110.Generated.ParamType { Item = new mzIdentML110.Generated.CVParamType { accession = "MS:1001073", name = "database type amino acid", cvRef = "PSI-MS" } } }; database_reference.Add(database, "SDB_" + database_index); _mzid.AnalysisCollection.SpectrumIdentification[0].SearchDatabaseRef[database_index] = new mzIdentML110.Generated.SearchDatabaseRefType() { searchDatabase_ref = "SDB_" + database_index }; database_index++; } int protein_index = 0; foreach (Protein protein in proteins) { _mzid.SequenceCollection.DBSequence[protein_index] = new mzIdentML110.Generated.DBSequenceType { id = "DBS_" + protein.Accession, lengthSpecified = true, length = protein.Length, searchDatabase_ref = database_reference[protein.DatabaseFilePath], accession = protein.Accession, Seq = protein.BaseSequence, cvParam = new mzIdentML110.Generated.CVParamType[1] { new mzIdentML110.Generated.CVParamType { accession = "MS:1001088", name = "protein description", cvRef = "PSI-MS", value = protein.FullDescription } }, name = protein.Name }; protein_index++; } Dictionary <string, int> spectral_ids = new Dictionary <string, int>(); //key is datafile, value is datafile's id int spectra_data_id = 0; foreach (string data_filepath in filenames) { bool thermoRawFile = Path.GetExtension(data_filepath) == ".raw"; string spectral_data_id = "SD_" + spectra_data_id; spectral_ids.Add(data_filepath, spectra_data_id); _mzid.AnalysisCollection.SpectrumIdentification[0].InputSpectra[spectra_data_id] = new mzIdentML110.Generated.InputSpectraType() { spectraData_ref = spectral_data_id }; _mzid.DataCollection.Inputs.SpectraData[spectra_data_id] = new mzIdentML110.Generated.SpectraDataType() { id = spectral_data_id, name = Path.GetFileNameWithoutExtension(data_filepath), location = data_filepath, FileFormat = new mzIdentML110.Generated.FileFormatType { cvParam = new mzIdentML110.Generated.CVParamType { accession = thermoRawFile ? "MS:1000563" : "MS:1000584", name = thermoRawFile ? "Thermo RAW format" : "mzML format", cvRef = "PSI-MS" } }, SpectrumIDFormat = new mzIdentML110.Generated.SpectrumIDFormatType { cvParam = new mzIdentML110.Generated.CVParamType { accession = thermoRawFile ? "MS:1000768" : "MS:1001530", name = thermoRawFile ? "Thermo nativeID format" : "mzML unique identifier", cvRef = "PSI-MS" } } }; spectra_data_id++; } int sir_id = 0; int pe_index = 0; int p_index = 0; Dictionary <PeptideWithSetModifications, int> peptide_evidence_ids = new Dictionary <PeptideWithSetModifications, int>(); Dictionary <string, Tuple <int, HashSet <string> > > peptide_ids = new Dictionary <string, Tuple <int, HashSet <string> > >(); //key is peptide sequence, value is <peptide id for that peptide, peptide evidences>, list of spectra id's Dictionary <Tuple <string, int>, Tuple <int, int> > psm_per_scan = new Dictionary <Tuple <string, int>, Tuple <int, int> >(); //key is <filename, scan numer> value is <scan result id, scan item id #'s (could be more than one ID per scan)> var unambiguousPsms = psms.Where(psm => psm.FullSequence != null); foreach (PeptideSpectralMatch psm in unambiguousPsms) { foreach (PeptideWithSetModifications peptide in psm.CompactPeptides.SelectMany(c => c.Value.Item2).Distinct()) { //if first peptide on list hasn't been added, add peptide and peptide evidence if (!peptide_ids.TryGetValue(peptide.Sequence, out Tuple <int, HashSet <string> > peptide_id)) { peptide_id = new Tuple <int, HashSet <string> >(p_index, new HashSet <string>()); p_index++; _mzid.SequenceCollection.Peptide[peptide_id.Item1] = new mzIdentML110.Generated.PeptideType { PeptideSequence = peptide.BaseSequence, id = "P_" + peptide_id.Item1, Modification = new mzIdentML110.Generated.ModificationType[peptide.NumMods] }; int mod_id = 0; foreach (KeyValuePair <int, ModificationWithMass> mod in peptide.AllModsOneIsNterminus) { _mzid.SequenceCollection.Peptide[peptide_id.Item1].Modification[mod_id] = new mzIdentML110.Generated.ModificationType() { location = mod.Key - 1, locationSpecified = true, monoisotopicMassDelta = mod.Value.monoisotopicMass, residues = new string[1] { peptide.BaseSequence[Math.Min(Math.Max(0, mod.Key - 2), peptide.Length - 1)].ToString() }, monoisotopicMassDeltaSpecified = true, cvParam = new mzIdentML110.Generated.CVParamType[1] { GetUnimodCvParam(mod.Value) } }; mod_id++; } peptide_ids.Add(peptide.Sequence, peptide_id); } if (!peptide_evidence_ids.ContainsKey(peptide)) { _mzid.SequenceCollection.PeptideEvidence[pe_index] = new mzIdentML110.Generated.PeptideEvidenceType() { id = "PE_" + pe_index, peptide_ref = "P_" + peptide_id.Item1, dBSequence_ref = "DBS_" + peptide.Protein.Accession, isDecoy = peptide.Protein.IsDecoy, startSpecified = true, start = peptide.OneBasedStartResidueInProtein, endSpecified = true, end = peptide.OneBasedEndResidueInProtein, pre = peptide.PreviousAminoAcid.ToString(), post = (peptide.OneBasedEndResidueInProtein < peptide.Protein.BaseSequence.Length) ? peptide.Protein[peptide.OneBasedEndResidueInProtein].ToString() : "-", }; peptide_evidence_ids.Add(peptide, pe_index); pe_index++; } } if (!psm_per_scan.TryGetValue(new Tuple <string, int>(psm.FullFilePath, psm.ScanNumber), out Tuple <int, int> scan_result_scan_item)) //check to see if scan has already been added { scan_result_scan_item = new Tuple <int, int>(sir_id, 0); _mzid.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult[scan_result_scan_item.Item1] = new mzIdentML110.Generated.SpectrumIdentificationResultType() { id = "SIR_" + scan_result_scan_item.Item1, spectraData_ref = "SD_" + spectral_ids[psm.FullFilePath].ToString(), spectrumID = "scan=" + psm.ScanNumber.ToString(), SpectrumIdentificationItem = new mzIdentML110.Generated.SpectrumIdentificationItemType[500], cvParam = new mzIdentML110.Generated.CVParamType[1] { new mzIdentML110.Generated.CVParamType { name = "scan start time", cvRef = "PSI-MS", accession = "MS:1000016", value = psm.ScanRetentionTime.ToString() } } }; psm_per_scan.Add(new Tuple <string, int>(psm.FullFilePath, psm.ScanNumber), scan_result_scan_item); sir_id++; } else { psm_per_scan[new Tuple <string, int>(psm.FullFilePath, psm.ScanNumber)] = new Tuple <int, int>(scan_result_scan_item.Item1, scan_result_scan_item.Item2 + 1); scan_result_scan_item = psm_per_scan[new Tuple <string, int>(psm.FullFilePath, psm.ScanNumber)]; } foreach (PeptideWithSetModifications p in psm.CompactPeptides.SelectMany(c => c.Value.Item2).Distinct()) { peptide_ids[p.Sequence].Item2.Add("SII_" + scan_result_scan_item.Item1 + "_" + scan_result_scan_item.Item2); } _mzid.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult[scan_result_scan_item.Item1].SpectrumIdentificationItem[scan_result_scan_item.Item2] = new mzIdentML110.Generated.SpectrumIdentificationItemType() { rank = 1, chargeState = psm.ScanPrecursorCharge, id = "SII_" + scan_result_scan_item.Item1 + "_" + scan_result_scan_item.Item2, experimentalMassToCharge = Math.Round(psm.ScanPrecursorMonoisotopicPeakMz, 5), passThreshold = psm.FdrInfo.QValue <= 0.01, //NOTE:ONLY CAN HAVE ONE PEPTIDE REF PER SPECTRUM IDENTIFICATION ITEM peptide_ref = "P_" + peptide_ids[psm.FullSequence].Item1, PeptideEvidenceRef = new mzIdentML110.Generated.PeptideEvidenceRefType[psm.CompactPeptides.SelectMany(c => c.Value.Item2).Distinct().Count()], cvParam = new mzIdentML110.Generated.CVParamType[2] { new mzIdentML110.Generated.CVParamType { name = "MetaMorpheus:score", cvRef = "PSI-MS", accession = "MS:1002827", value = psm.Score.ToString() }, new mzIdentML110.Generated.CVParamType { accession = "MS:1002354", name = "PSM-level q-value", cvRef = "PSI-MS", value = psm.FdrInfo.QValue.ToString() } } }; if (psm.PeptideMonisotopicMass.HasValue) { _mzid.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult[scan_result_scan_item.Item1].SpectrumIdentificationItem[scan_result_scan_item.Item2].calculatedMassToCharge = Math.Round(psm.PeptideMonisotopicMass.Value.ToMz(psm.ScanPrecursorCharge), 5); _mzid.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult[scan_result_scan_item.Item1].SpectrumIdentificationItem[scan_result_scan_item.Item2].calculatedMassToChargeSpecified = true; } int pe = 0; foreach (PeptideWithSetModifications p in psm.CompactPeptides.SelectMany(c => c.Value.Item2).Distinct()) { _mzid.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult[scan_result_scan_item.Item1].SpectrumIdentificationItem[scan_result_scan_item.Item2].PeptideEvidenceRef[pe] = new mzIdentML110.Generated.PeptideEvidenceRefType { peptideEvidence_ref = "PE_" + peptide_evidence_ids[p] }; pe++; } } _mzid.AnalysisProtocolCollection = new mzIdentML110.Generated.AnalysisProtocolCollectionType() { SpectrumIdentificationProtocol = new mzIdentML110.Generated.SpectrumIdentificationProtocolType[1] { new mzIdentML110.Generated.SpectrumIdentificationProtocolType { id = "SIP", analysisSoftware_ref = "AS_MetaMorpheus", SearchType = new mzIdentML110.Generated.ParamType { Item = new mzIdentML110.Generated.CVParamType { accession = "MS:1001083", name = "ms-ms search", cvRef = "PSI-MS" } }, AdditionalSearchParams = new mzIdentML110.Generated.ParamListType() { //TODO: ADD SEARCH PARAMS? Items = new mzIdentML110.Generated.AbstractParamType[2] { new mzIdentML110.Generated.CVParamType { accession = "MS:1001211", cvRef = "PSI-MS", name = "parent mass type mono" }, new mzIdentML110.Generated.CVParamType { accession = "MS:1001255", name = "fragment mass type mono", cvRef = "PSI-MS" }, } }, ModificationParams = new mzIdentML110.Generated.SearchModificationType[fixedMods.Count + variableMods.Count], Enzymes = new mzIdentML110.Generated.EnzymesType() { Enzyme = new mzIdentML110.Generated.EnzymeType[proteases.Count] }, FragmentTolerance = new mzIdentML110.Generated.CVParamType[2] { new mzIdentML110.Generated.CVParamType { accession = "MS:1001412", name = "search tolerance plus value", value = productTolerance.Value.ToString(), cvRef = "PSI-MS", unitAccession = productTolerance is PpmTolerance? "UO:0000169": "UO:0000221", unitName = productTolerance is PpmTolerance? "parts per million" : "dalton", unitCvRef = "UO" }, new mzIdentML110.Generated.CVParamType { accession = "MS:1001413", name = "search tolerance minus value", value = productTolerance.Value.ToString(), cvRef = "PSI-MS", unitAccession = productTolerance is PpmTolerance? "UO:0000169": "UO:0000221", unitName = productTolerance is PpmTolerance? "parts per million" : "dalton", unitCvRef = "UO" } }, ParentTolerance = new mzIdentML110.Generated.CVParamType[2] { new mzIdentML110.Generated.CVParamType { accession = "MS:1001412", name = "search tolerance plus value", value = parentTolerance.Value.ToString(), cvRef = "PSI-MS", unitAccession = parentTolerance is PpmTolerance? "UO:0000169": "UO:0000221", unitName = parentTolerance is PpmTolerance? "parts per million" : "dalton", unitCvRef = "UO" }, new mzIdentML110.Generated.CVParamType { accession = "MS:1001413", name = "search tolerance minus value", value = parentTolerance.Value.ToString(), cvRef = "PSI-MS", unitAccession = parentTolerance is PpmTolerance? "UO:0000169": "UO:0000221", unitName = parentTolerance is PpmTolerance? "parts per million" : "dalton", unitCvRef = "UO" } }, Threshold = new mzIdentML110.Generated.ParamListType() { Items = new mzIdentML110.Generated.CVParamType[1] { new mzIdentML110.Generated.CVParamType { accession = "MS:1001448", name = "pep:FDR threshold", cvRef = "PSI-MS", value = "0.01" } } } } } }; int protease_index = 0; foreach (Protease protease in proteases) { _mzid.AnalysisProtocolCollection.SpectrumIdentificationProtocol[0].Enzymes.Enzyme[protease_index] = new mzIdentML110.Generated.EnzymeType() { id = "E_" + protease_index, name = protease.Name, semiSpecific = protease.CleavageSpecificity == CleavageSpecificity.Semi, missedCleavagesSpecified = true, missedCleavages = missedCleavages, SiteRegexp = protease.SiteRegexp, EnzymeName = new mzIdentML110.Generated.ParamListType() { Items = new mzIdentML110.Generated.AbstractParamType[1] { new mzIdentML110.Generated.CVParamType { accession = protease.PsiMsAccessionNumber, name = protease.PsiMsName, cvRef = "PSI-MS" } } } }; protease_index++; } int mod_index = 0; foreach (ModificationWithMass mod in fixedMods) { _mzid.AnalysisProtocolCollection.SpectrumIdentificationProtocol[0].ModificationParams[mod_index] = new mzIdentML110.Generated.SearchModificationType() { fixedMod = true, massDelta = (float)mod.monoisotopicMass, residues = mod.motif.ToString(), cvParam = new mzIdentML110.Generated.CVParamType[1] { GetUnimodCvParam(mod) } }; mod_index++; } foreach (ModificationWithMass mod in variableMods) { _mzid.AnalysisProtocolCollection.SpectrumIdentificationProtocol[0].ModificationParams[mod_index] = new mzIdentML110.Generated.SearchModificationType() { fixedMod = false, massDelta = (float)mod.monoisotopicMass, residues = mod.motif.ToString(), cvParam = new mzIdentML110.Generated.CVParamType[1] { GetUnimodCvParam(mod) } }; mod_index++; } _mzid.AnalysisProtocolCollection.ProteinDetectionProtocol = new mzIdentML110.Generated.ProteinDetectionProtocolType() { id = "PDP", analysisSoftware_ref = "AS_MetaMorpheus", Threshold = new mzIdentML110.Generated.ParamListType() { Items = new mzIdentML110.Generated.CVParamType[1] { new mzIdentML110.Generated.CVParamType { accession = "MS:1001447", name = "prot:FDR threshold", cvRef = "PSI-MS", value = "0.01" } } } }; if (groups != null) { _mzid.DataCollection.AnalysisData.ProteinDetectionList = new mzIdentML110.Generated.ProteinDetectionListType() { id = "PDL", ProteinAmbiguityGroup = new mzIdentML110.Generated.ProteinAmbiguityGroupType[groups.Count] }; int group_id = 0; int protein_id = 0; foreach (EngineLayer.ProteinGroup proteinGroup in groups) { _mzid.DataCollection.AnalysisData.ProteinDetectionList.ProteinAmbiguityGroup[group_id] = new mzIdentML110.Generated.ProteinAmbiguityGroupType() { id = "PAG_" + group_id, ProteinDetectionHypothesis = new mzIdentML110.Generated.ProteinDetectionHypothesisType[proteinGroup.Proteins.Count] }; int pag_protein_index = 0; foreach (Protein protein in proteinGroup.Proteins) { _mzid.DataCollection.AnalysisData.ProteinDetectionList.ProteinAmbiguityGroup[group_id].ProteinDetectionHypothesis[pag_protein_index] = new mzIdentML110.Generated.ProteinDetectionHypothesisType() { id = "PDH_" + protein_id, dBSequence_ref = "DBS_" + protein.Accession, passThreshold = proteinGroup.QValue <= 0.01, // hardcoded as 1% FDR but we could change this to the provided threshold PeptideHypothesis = new mzIdentML110.Generated.PeptideHypothesisType[proteinGroup.AllPeptides.Count], cvParam = new mzIdentML110.Generated.CVParamType[4] { new mzIdentML110.Generated.CVParamType { accession = "MS:1002828", name = "MetaMorpheus:protein score", cvRef = "PSI-MS", value = proteinGroup.ProteinGroupScore.ToString() }, new mzIdentML110.Generated.CVParamType { accession = "MS:1002373", name = "protein group-level q-value", cvRef = "PSI-MS", value = proteinGroup.QValue.ToString() }, new mzIdentML110.Generated.CVParamType { accession = "MS:1001093", name = "sequence coverage", cvRef = "PSI-MS", value = proteinGroup.SequenceCoveragePercent.First().ToString() }, new mzIdentML110.Generated.CVParamType { accession = "MS:1001097", name = "distinct peptide sequences", cvRef = "PSI-MS", value = proteinGroup.UniquePeptides.Count.ToString() } } }; int peptide_id = 0; foreach (PeptideWithSetModifications peptide in proteinGroup.AllPeptides) { if (peptide_evidence_ids.ContainsKey(peptide)) { if (peptide.Protein == protein) { _mzid.DataCollection.AnalysisData.ProteinDetectionList.ProteinAmbiguityGroup[group_id].ProteinDetectionHypothesis[pag_protein_index].PeptideHypothesis[peptide_id] = new mzIdentML110.Generated.PeptideHypothesisType() { peptideEvidence_ref = "PE_" + peptide_evidence_ids[peptide], SpectrumIdentificationItemRef = new mzIdentML110.Generated.SpectrumIdentificationItemRefType[peptide_ids[peptide.Sequence].Item2.Count], }; int i = 0; foreach (string sii in peptide_ids[peptide.Sequence].Item2) { _mzid.DataCollection.AnalysisData.ProteinDetectionList.ProteinAmbiguityGroup[group_id].ProteinDetectionHypothesis[pag_protein_index].PeptideHypothesis[peptide_id].SpectrumIdentificationItemRef[i] = new mzIdentML110.Generated.SpectrumIdentificationItemRefType() { spectrumIdentificationItem_ref = sii }; i++; } peptide_id++; } } } pag_protein_index++; protein_id++; } group_id++; } } XmlWriter writer = XmlWriter.Create(outputPath, settings); _indexedSerializer.Serialize(writer, _mzid); writer.Close(); }
public void Mzid110Test() { XmlSerializer _indexedSerializer = new XmlSerializer(typeof(mzIdentML110.Generated.MzIdentMLType110)); var _mzid = new mzIdentML110.Generated.MzIdentMLType110() { DataCollection = new mzIdentML110.Generated.DataCollectionType() }; _mzid.DataCollection.AnalysisData = new mzIdentML110.Generated.AnalysisDataType() { SpectrumIdentificationList = new mzIdentML110.Generated.SpectrumIdentificationListType[1] }; _mzid.DataCollection.AnalysisData.SpectrumIdentificationList[0] = new mzIdentML110.Generated.SpectrumIdentificationListType() { SpectrumIdentificationResult = new mzIdentML110.Generated.SpectrumIdentificationResultType[1] }; _mzid.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult[0] = new mzIdentML110.Generated.SpectrumIdentificationResultType() { spectrumID = "spectrum 2", SpectrumIdentificationItem = new mzIdentML110.Generated.SpectrumIdentificationItemType[50] }; _mzid.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult[0].SpectrumIdentificationItem[0] = new mzIdentML110.Generated.SpectrumIdentificationItemType() { experimentalMassToCharge = 1134.2609130203 + 0.000001 * 1134.2609130203 + 0.000001, calculatedMassToCharge = 1134.26091302033, calculatedMassToChargeSpecified = true, chargeState = 3, cvParam = new mzIdentML110.Generated.CVParamType[1] { new mzIdentML110.Generated.CVParamType() { accession = "MS:1002354", value = "0.05" } } }; _mzid.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult[0].SpectrumIdentificationItem[1] = new mzIdentML110.Generated.SpectrumIdentificationItemType(); _mzid.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult[0].SpectrumIdentificationItem[0].Fragmentation = new mzIdentML110.Generated.IonTypeType[1]; _mzid.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult[0].SpectrumIdentificationItem[0].Fragmentation[0] = new mzIdentML110.Generated.IonTypeType() { FragmentArray = new mzIdentML110.Generated.FragmentArrayType[1] }; _mzid.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult[0].SpectrumIdentificationItem[0].Fragmentation[0].FragmentArray[0] = new mzIdentML110.Generated.FragmentArrayType() { values = new float[3] { 200, 300, 400 } }; _mzid.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult[0].SpectrumIdentificationItem[0].PeptideEvidenceRef = new mzIdentML110.Generated.PeptideEvidenceRefType[1]; _mzid.DataCollection.AnalysisData.SpectrumIdentificationList[0].SpectrumIdentificationResult[0].SpectrumIdentificationItem[0].PeptideEvidenceRef[0] = new mzIdentML110.Generated.PeptideEvidenceRefType() { peptideEvidence_ref = "PE_1" }; _mzid.DataCollection.Inputs = new mzIdentML110.Generated.InputsType() { SpectraData = new mzIdentML110.Generated.SpectraDataType[1] }; _mzid.DataCollection.Inputs.SpectraData[0] = new mzIdentML110.Generated.SpectraDataType() { FileFormat = new mzIdentML110.Generated.FileFormatType() }; _mzid.DataCollection.Inputs.SpectraData[0].FileFormat.cvParam = new mzIdentML110.Generated.CVParamType() { name = "mzML format" }; _mzid.SequenceCollection = new mzIdentML110.Generated.SequenceCollectionType() { PeptideEvidence = new mzIdentML110.Generated.PeptideEvidenceType[1] }; _mzid.SequenceCollection.PeptideEvidence[0] = new mzIdentML110.Generated.PeptideEvidenceType() { endSpecified = true, startSpecified = true, isDecoy = false, start = 2, end = 34, dBSequence_ref = "DB_1", peptide_ref = "P_1", id = "PE_1", }; _mzid.SequenceCollection.Peptide = new mzIdentML110.Generated.PeptideType[1]; _mzid.SequenceCollection.Peptide[0] = new mzIdentML110.Generated.PeptideType() { id = "P_1", PeptideSequence = "GPEAPPPALPAGAPPPCTAVTSDHLNSLLGNILR", Modification = new mzIdentML110.Generated.ModificationType[1] }; _mzid.SequenceCollection.DBSequence = new mzIdentML110.Generated.DBSequenceType[1]; _mzid.SequenceCollection.DBSequence[0] = new mzIdentML110.Generated.DBSequenceType() { id = "DB_1", name = "Protein name", accession = "ACCESSION", }; _mzid.SequenceCollection.Peptide[0].Modification[0] = new mzIdentML110.Generated.ModificationType() { locationSpecified = true, location = 17, monoisotopicMassDeltaSpecified = true, monoisotopicMassDelta = 57.02146373, cvParam = new mzIdentML110.Generated.CVParamType[1] }; _mzid.SequenceCollection.Peptide[0].Modification[0].cvParam[0] = new mzIdentML110.Generated.CVParamType() { accession = "MS:1001460", name = "unknown modification", value = "Carbamidomethyl", cvRef = "PSI-MS" }; _mzid.AnalysisProtocolCollection = new mzIdentML110.Generated.AnalysisProtocolCollectionType() { SpectrumIdentificationProtocol = new mzIdentML110.Generated.SpectrumIdentificationProtocolType[1] }; _mzid.AnalysisProtocolCollection.SpectrumIdentificationProtocol[0] = new mzIdentML110.Generated.SpectrumIdentificationProtocolType() { ParentTolerance = new mzIdentML110.Generated.CVParamType[1] }; _mzid.AnalysisProtocolCollection.SpectrumIdentificationProtocol[0].ParentTolerance[0] = new mzIdentML110.Generated.CVParamType() { unitName = "dalton", value = "0.1" }; _mzid.AnalysisProtocolCollection.SpectrumIdentificationProtocol[0].FragmentTolerance = new mzIdentML110.Generated.CVParamType[1]; _mzid.AnalysisProtocolCollection.SpectrumIdentificationProtocol[0].FragmentTolerance[0] = new mzIdentML110.Generated.CVParamType() { unitName = "dalton", value = "0.01" }; TextWriter writer = new StreamWriter("myIdentifications.mzid"); _indexedSerializer.Serialize(writer, _mzid); writer.Close(); var identifications = new MzidIdentifications("myIdentifications.mzid"); Assert.AreEqual(1134.26091302033, identifications.CalculatedMassToCharge(0, 0)); Assert.AreEqual(3, identifications.ChargeState(0, 0)); Assert.AreEqual(1, identifications.Count); Assert.AreEqual(1134.26091302033 + 0.000001 * 1134.2609130203 + 0.000001, identifications.ExperimentalMassToCharge(0, 0), 1e-10); Assert.IsFalse(identifications.IsDecoy(0, 0)); Assert.AreEqual("MS:1001460", identifications.ModificationAcession(0, 0, 0)); Assert.AreEqual("PSI-MS", identifications.ModificationDictionary(0, 0, 0)); Assert.AreEqual("Carbamidomethyl", identifications.ModificationValue(0, 0, 0)); Assert.AreEqual(17, identifications.ModificationLocation(0, 0, 0)); Assert.AreEqual(57.02146373, identifications.ModificationMass(0, 0, 0)); Assert.AreEqual("spectrum 2", identifications.Ms2SpectrumID(0)); Assert.AreEqual(1, identifications.NumModifications(0, 0)); Assert.AreEqual("GPEAPPPALPAGAPPPCTAVTSDHLNSLLGNILR", identifications.PeptideSequenceWithoutModifications(0, 0)); Assert.AreEqual(0.1, identifications.ParentTolerance.Value); Assert.AreEqual(0.01, identifications.FragmentTolerance.Value); Assert.AreEqual(.05, identifications.QValue(0, 0)); Assert.AreEqual("Protein name", identifications.ProteinFullName(0, 0)); Assert.AreEqual("ACCESSION", identifications.ProteinAccession(0, 0)); Assert.AreEqual(new float[3] { 200, 300, 400 }, identifications.MatchedIons(0, 0, 0)); Assert.AreEqual(3, identifications.MatchedIonCounts(0, 0, 0)); Assert.AreEqual("2", identifications.StartResidueInProtein(0, 0)); Assert.AreEqual("34", identifications.EndResidueInProtein(0, 0)); Assert.AreEqual(2, identifications.NumPSMsFromScan(0)); }