public void WritePepXML_xl(List <CrosslinkSpectralMatch> items, List <Protein> proteinList, string databasePath, List <Modification> variableModifications, List <Modification> fixedModifications, List <string> localizeableModificationTypes, string outputFolder, string fileName, List <string> nestedIds) { if (!items.Any()) { return; } XmlSerializer _indexedSerializer = new XmlSerializer(typeof(pepXML.Generated.msms_pipeline_analysis)); var _pepxml = new pepXML.Generated.msms_pipeline_analysis(); _pepxml.date = DateTime.Now; _pepxml.summary_xml = items[0].FullFilePath + ".pep.XML"; string proteaseC = ""; string proteaseNC = ""; foreach (var x in CommonParameters.DigestionParams.Protease.DigestionMotifs.Select(m => m.InducingCleavage)) { proteaseC += x; } foreach (var x in CommonParameters.DigestionParams.Protease.DigestionMotifs.Select(m => m.PreventingCleavage)) { proteaseNC += x; } Crosslinker crosslinker = XlSearchParameters.Crosslinker; string fileNameNoExtension = Path.GetFileNameWithoutExtension(items[0].FullFilePath); string filePathNoExtension = Path.ChangeExtension(items[0].FullFilePath, null); string modSites = crosslinker.CrosslinkerModSites.ToCharArray().Concat(crosslinker.CrosslinkerModSites2.ToCharArray()).Distinct().ToString(); var para = new List <pepXML.Generated.nameValueType>(); { para.Add(new pepXML.Generated.nameValueType { name = "threads", value = CommonParameters.MaxThreadsToUsePerFile.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "database", value = databasePath }); para.Add(new pepXML.Generated.nameValueType { name = "MS_data_file", value = items[0].FullFilePath }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-link precursor Mass Tolerance", value = CommonParameters.PrecursorMassTolerance.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-linker type", value = crosslinker.CrosslinkerName }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-linker mass", value = crosslinker.TotalMass.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-linker cleavable", value = crosslinker.Cleavable.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-linker cleavable long mass", value = crosslinker.CleaveMassLong.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-linker cleavable short mass", value = crosslinker.CleaveMassShort.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-linker xl site", value = modSites }); para.Add(new pepXML.Generated.nameValueType { name = "Generate decoy proteins", value = XlSearchParameters.DecoyType.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "MaxMissed Cleavages", value = CommonParameters.DigestionParams.MaxMissedCleavages.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Protease", value = CommonParameters.DigestionParams.Protease.Name }); para.Add(new pepXML.Generated.nameValueType { name = "Initiator Methionine", value = CommonParameters.DigestionParams.InitiatorMethionineBehavior.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Max Modification Isoforms", value = CommonParameters.DigestionParams.MaxModificationIsoforms.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Min Peptide Len", value = CommonParameters.DigestionParams.MinPeptideLength.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Max Peptide Len", value = CommonParameters.DigestionParams.MaxPeptideLength.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Product Mass Tolerance", value = CommonParameters.ProductMassTolerance.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Ions to search", value = String.Join(", ", DissociationTypeCollection.ProductsFromDissociationType[CommonParameters.DissociationType]) }); foreach (var fixedMod in fixedModifications) { para.Add(new pepXML.Generated.nameValueType { name = "Fixed Modifications: " + fixedMod.IdWithMotif, value = fixedMod.MonoisotopicMass.ToString() }); } foreach (var variableMod in variableModifications) { para.Add(new pepXML.Generated.nameValueType { name = "Variable Modifications: " + variableMod.IdWithMotif, value = variableMod.MonoisotopicMass.ToString() }); } para.Add(new pepXML.Generated.nameValueType { name = "Localize All Modifications", value = "true" }); } _pepxml.msms_run_summary = new pepXML.Generated.msms_pipeline_analysisMsms_run_summary[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summary { base_name = filePathNoExtension, raw_data_type = "raw", raw_data = ".mzML", sample_enzyme = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzyme() { name = CommonParameters.DigestionParams.Protease.Name, specificity = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzymeSpecificity[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzymeSpecificity { cut = proteaseC, no_cut = proteaseNC, } } }, search_summary = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summary[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summary { base_name = filePathNoExtension, search_engine_version = GlobalVariables.MetaMorpheusVersion, precursor_mass_type = pepXML.Generated.massType.monoisotopic, fragment_mass_type = pepXML.Generated.massType.monoisotopic, search_id = 1, search_database = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summarySearch_database { local_path = databasePath, type = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summarySearch_databaseType.AA, }, enzymatic_search_constraint = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summaryEnzymatic_search_constraint { enzyme = CommonParameters.DigestionParams.Protease.Name, max_num_internal_cleavages = CommonParameters.DigestionParams.MaxMissedCleavages.ToString(), //min_number_termini = "2" }, parameter = para.ToArray() } }, } }; _pepxml.msms_run_summary[0].spectrum_query = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_query[items.Count]; var searchHits = new List <pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit>(); for (int i = 0; i < items.Count; i++) { var mods = new List <pepXML.Generated.modInfoDataTypeMod_aminoacid_mass>(); var alphaPeptide = items[i].BestMatchingPeptides.First().Peptide; foreach (var modification in alphaPeptide.AllModsOneIsNterminus) { var mod = new pepXML.Generated.modInfoDataTypeMod_aminoacid_mass { mass = modification.Value.MonoisotopicMass.Value, // convert from one-based to zero-based (N-term is zero in the pepXML output) position = (modification.Key - 1).ToString() }; mods.Add(mod); } if (items[i].CrossType == PsmCrossType.Single) { var searchHit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit { hit_rank = 1, peptide = alphaPeptide.BaseSequence, peptide_prev_aa = alphaPeptide.PreviousAminoAcid.ToString(), peptide_next_aa = alphaPeptide.NextAminoAcid.ToString(), protein = alphaPeptide.Protein.Accession, num_tot_proteins = 1, calc_neutral_pep_mass = (float)items[i].ScanPrecursorMass, massdiff = (items[i].ScanPrecursorMass - items[i].PeptideMonisotopicMass.Value).ToString(), xlink_typeSpecified = true, xlink_type = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink_type.na, modification_info = new pepXML.Generated.modInfoDataType { mod_aminoacid_mass = mods.ToArray() }, search_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "xlTotalScore", value = items[i].XLTotalScore.ToString() }, new pepXML.Generated.nameValueType { name = "Qvalue", value = items[i].FdrInfo.QValue.ToString() } }, }; searchHits.Add(searchHit); } else if (items[i].CrossType == PsmCrossType.DeadEnd || items[i].CrossType == PsmCrossType.DeadEndH2O || items[i].CrossType == PsmCrossType.DeadEndNH2 || items[i].CrossType == PsmCrossType.DeadEndTris) { double crosslinkerDeadEndMass = 0; switch (items[i].CrossType) { case PsmCrossType.DeadEndNH2: crosslinkerDeadEndMass = crosslinker.DeadendMassNH2; break; case PsmCrossType.DeadEndTris: crosslinkerDeadEndMass = crosslinker.DeadendMassTris; break; default: crosslinkerDeadEndMass = crosslinker.DeadendMassH2O; break; } var mod = new pepXML.Generated.modInfoDataTypeMod_aminoacid_mass { mass = crosslinkerDeadEndMass, position = items[i].LinkPositions.First().ToString() }; mods.Add(mod); var searchHit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit { hit_rank = 1, peptide = alphaPeptide.BaseSequence, peptide_prev_aa = alphaPeptide.PreviousAminoAcid.ToString(), peptide_next_aa = alphaPeptide.NextAminoAcid.ToString(), protein = alphaPeptide.Protein.Accession, num_tot_proteins = 1, calc_neutral_pep_mass = (float)items[i].ScanPrecursorMass, massdiff = (items[i].ScanPrecursorMass - items[i].PeptideMonisotopicMass.Value - crosslinkerDeadEndMass).ToString(), xlink_typeSpecified = true, xlink_type = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink_type.na, modification_info = new pepXML.Generated.modInfoDataType { mod_aminoacid_mass = mods.ToArray() }, search_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "xlTotalScore", value = items[i].XLTotalScore.ToString() }, new pepXML.Generated.nameValueType { name = "Qvalue", value = items[i].FdrInfo.QValue.ToString() } }, }; searchHits.Add(searchHit); } else if (items[i].CrossType == PsmCrossType.Inter || items[i].CrossType == PsmCrossType.Intra || items[i].CrossType == PsmCrossType.Cross) { var betaPeptide = items[i].BetaPeptide.BestMatchingPeptides.First().Peptide; var modsBeta = new List <pepXML.Generated.modInfoDataTypeMod_aminoacid_mass>(); foreach (var mod in betaPeptide.AllModsOneIsNterminus) { var modBeta = new pepXML.Generated.modInfoDataTypeMod_aminoacid_mass { mass = mod.Value.MonoisotopicMass.Value, // convert from one-based to zero-based (N-term is zero in the pepXML output) position = (mod.Key - 1).ToString() }; modsBeta.Add(modBeta); } var alpha = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide { peptide = alphaPeptide.BaseSequence, peptide_prev_aa = alphaPeptide.PreviousAminoAcid.ToString(), peptide_next_aa = alphaPeptide.NextAminoAcid.ToString(), protein = alphaPeptide.Protein.Accession, num_tot_proteins = 1, calc_neutral_pep_mass = (float)items[i].PeptideMonisotopicMass.Value, complement_mass = (float)(items[i].ScanPrecursorMass - alphaPeptide.MonoisotopicMass), designation = "alpha", modification_info = new pepXML.Generated.modInfoDataType { mod_aminoacid_mass = mods.ToArray() }, xlink_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "xlscore", value = items[i].XLTotalScore.ToString() }, new pepXML.Generated.nameValueType { name = "link", value = items[i].LinkPositions.First().ToString() }, } }; var beta = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide { peptide = betaPeptide.BaseSequence, peptide_prev_aa = betaPeptide.PreviousAminoAcid.ToString(), peptide_next_aa = betaPeptide.NextAminoAcid.ToString(), protein = betaPeptide.Protein.Accession, num_tot_proteins = 1, calc_neutral_pep_mass = (float)betaPeptide.MonoisotopicMass, complement_mass = (float)(items[i].ScanPrecursorMass - betaPeptide.MonoisotopicMass), designation = "beta", modification_info = new pepXML.Generated.modInfoDataType { mod_aminoacid_mass = modsBeta.ToArray() }, xlink_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "xlscore", value = items[i].BetaPeptide.Score.ToString() }, new pepXML.Generated.nameValueType { name = "link", value = items[i].BetaPeptide.LinkPositions.First().ToString() }, } }; var cross = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide[2] { alpha, beta }; var searchHit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit { hit_rank = 1, peptide = "-", peptide_prev_aa = "-", peptide_next_aa = "-", protein = "-", num_tot_proteins = 1, calc_neutral_pep_mass = (float)items[i].ScanPrecursorMass, massdiff = (items[i].ScanPrecursorMass - betaPeptide.MonoisotopicMass - alphaPeptide.MonoisotopicMass - crosslinker.TotalMass).ToString(), xlink_typeSpecified = true, xlink_type = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink_type.xl, xlink = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink { identifier = crosslinker.CrosslinkerName, mass = (float)crosslinker.TotalMass, linked_peptide = cross }, search_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "xlTotalScore", value = items[i].XLTotalScore.ToString() }, new pepXML.Generated.nameValueType { name = "Qvalue", value = items[i].FdrInfo.QValue.ToString() } } }; searchHits.Add(searchHit); } else if (items[i].CrossType == PsmCrossType.Loop) { var thePeptide = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide { xlink_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "link", value = items[i].LinkPositions.First().ToString() }, new pepXML.Generated.nameValueType { name = "link", value = items[i].LinkPositions[1].ToString() } } }; var cross = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide[1] { thePeptide }; var searchHit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit { hit_rank = 1, peptide = alphaPeptide.BaseSequence, peptide_prev_aa = alphaPeptide.PreviousAminoAcid.ToString(), peptide_next_aa = alphaPeptide.NextAminoAcid.ToString(), protein = alphaPeptide.Protein.Accession, num_tot_proteins = 1, calc_neutral_pep_mass = (float)items[i].ScanPrecursorMass, massdiff = (items[i].ScanPrecursorMass - alphaPeptide.MonoisotopicMass - crosslinker.LoopMass).ToString(), xlink_typeSpecified = true, xlink_type = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink_type.loop, modification_info = new pepXML.Generated.modInfoDataType { mod_aminoacid_mass = mods.ToArray() }, xlink = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink { identifier = crosslinker.CrosslinkerName, mass = (float)crosslinker.TotalMass, linked_peptide = cross }, search_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "xlTotalScore", value = items[i].XLTotalScore.ToString() }, new pepXML.Generated.nameValueType { name = "Qvalue", value = items[i].FdrInfo.QValue.ToString() } } }; searchHits.Add(searchHit); } } for (int i = 0; i < items.Count; i++) { _pepxml.msms_run_summary[0].spectrum_query[i] = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_query() { spectrum = fileNameNoExtension + "." + items[i].ScanNumber.ToString(), start_scan = Convert.ToUInt32(items[i].ScanNumber), end_scan = Convert.ToUInt32(items[i].ScanNumber), precursor_neutral_mass = (float)items[i].ScanPrecursorMass, assumed_charge = items[i].ScanPrecursorCharge.ToString(), index = Convert.ToUInt32(i + 1), retention_time_sec = (float)(items[i].ScanRetentionTime * 60), search_result = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_result[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_result { search_hit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit[1] { searchHits[i] } } } }; } TextWriter writer = new StreamWriter(Path.Combine(outputFolder, fileName + ".pep.XML")); _indexedSerializer.Serialize(writer, _pepxml); writer.Close(); FinishedWritingFile(Path.Combine(outputFolder, fileName + ".pep.XML"), nestedIds); }
public static void WritePepXml(List <PeptideSpectralMatch> psms, List <DbForTask> database, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, CommonParameters CommonParameters, string outputPath, double qValueFilter) { // TODO: needs a unit test psms = psms.Where(p => p.FdrInfo.QValue <= qValueFilter && p.FdrInfo.QValueNotch < qValueFilter).ToList(); if (!psms.Any()) { return; } XmlSerializer _indexedSerializer = new XmlSerializer(typeof(pepXML.Generated.msms_pipeline_analysis)); var _pepxml = new pepXML.Generated.msms_pipeline_analysis(); _pepxml.date = DateTime.Now; _pepxml.summary_xml = psms[0].FullFilePath + ".pep.XML"; string proteaseNC = string.Join(string.Empty, CommonParameters.DigestionParams.Protease.SequencesPreventingCleavage); string proteaseC = string.Join(string.Empty, CommonParameters.DigestionParams.Protease.SequencesInducingCleavage); string fileNameNoExtension = Path.GetFileNameWithoutExtension(psms[0].FullFilePath); string filePathNoExtension = Path.ChangeExtension(psms[0].FullFilePath, null); var para = new List <pepXML.Generated.nameValueType>(); { para.Add(new pepXML.Generated.nameValueType { name = "threads", value = CommonParameters.MaxThreadsToUsePerFile.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "database", value = database.First().FilePath }); para.Add(new pepXML.Generated.nameValueType { name = "MS_data_file", value = psms[0].FullFilePath }); para.Add(new pepXML.Generated.nameValueType { name = "MaxMissed Cleavages", value = CommonParameters.DigestionParams.MaxMissedCleavages.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Protease", value = CommonParameters.DigestionParams.Protease.Name }); para.Add(new pepXML.Generated.nameValueType { name = "Initiator Methionine", value = CommonParameters.DigestionParams.InitiatorMethionineBehavior.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Max Modification Isoforms", value = CommonParameters.DigestionParams.MaxModificationIsoforms.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Min Peptide Len", value = CommonParameters.DigestionParams.MinPeptideLength.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Max Peptide Len", value = CommonParameters.DigestionParams.MaxPeptideLength.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Product Mass Tolerance", value = CommonParameters.ProductMassTolerance.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Ions to search", value = "B " + CommonParameters.BIons.ToString() + " Y " + CommonParameters.YIons.ToString() + " C " + CommonParameters.CIons.ToString() + " Z " + CommonParameters.ZdotIons.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Q-value Filter", value = CommonParameters.QValueOutputFilter.ToString() }); foreach (var item in fixedModifications) { para.Add(new pepXML.Generated.nameValueType { name = "Fixed Modifications: " + item.id, value = item.monoisotopicMass.ToString() }); } foreach (var item in variableModifications) { para.Add(new pepXML.Generated.nameValueType { name = "Variable Modifications: " + item.id, value = item.monoisotopicMass.ToString() }); } para.Add(new pepXML.Generated.nameValueType { name = "Localize All Modifications", value = "true" }); } _pepxml.msms_run_summary = new pepXML.Generated.msms_pipeline_analysisMsms_run_summary[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summary { base_name = filePathNoExtension, raw_data_type = "raw", raw_data = ".mzML", //TODO: use file format of spectra file used sample_enzyme = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzyme() { name = CommonParameters.DigestionParams.Protease.Name, specificity = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzymeSpecificity[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzymeSpecificity { cut = proteaseC, no_cut = proteaseNC, } } }, search_summary = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summary[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summary { base_name = filePathNoExtension, // TODO: get MetaMorpheus recognized as a search engine type //search_engine = pepXML.Generated.engineType.MetaMorpheus search_engine_version = GlobalVariables.MetaMorpheusVersion, precursor_mass_type = pepXML.Generated.massType.monoisotopic, fragment_mass_type = pepXML.Generated.massType.monoisotopic, search_id = 1, //generate database information //TODO: multiple databases search_database = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summarySearch_database { local_path = database.First().FilePath, type = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summarySearch_databaseType.AA, }, enzymatic_search_constraint = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summaryEnzymatic_search_constraint { enzyme = CommonParameters.DigestionParams.Protease.Name, max_num_internal_cleavages = CommonParameters.DigestionParams.MaxMissedCleavages.ToString(), }, parameter = para.ToArray() } }, } }; _pepxml.msms_run_summary[0].spectrum_query = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_query[psms.Count]; var searchHits = new List <pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit>(); foreach (var psm in psms) { PeptideWithSetModifications peptide = psm.CompactPeptides.First().Value.Item2.First(); var mods = new List <pepXML.Generated.modInfoDataTypeMod_aminoacid_mass>(); foreach (var mod in peptide.AllModsOneIsNterminus) { var pepXmlMod = new pepXML.Generated.modInfoDataTypeMod_aminoacid_mass { mass = mod.Value.monoisotopicMass, position = (mod.Key - 1).ToString() }; mods.Add(pepXmlMod); } var proteinAccessions = psm.CompactPeptides.SelectMany(b => b.Value.Item2).Select(b => b.Protein.Accession).Distinct(); var searchHit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit { // TODO: handle PSM ambiguity if pepXML supports it (base sequence, mod localization, protein) // TODO: add target/decoy/contaminant designation for each PSM // TODO: add amino acid substitution hit_rank = 1, peptide = ((psm.BaseSequence != null) ? psm.BaseSequence : "Ambiguous"), peptide_prev_aa = peptide.PreviousAminoAcid.ToString(), peptide_next_aa = peptide.NextAminoAcid.ToString(), protein = ((peptide.Protein.Accession != null) ? peptide.Protein.Accession : string.Join("|", proteinAccessions)), num_tot_proteins = (uint)proteinAccessions.Count(), calc_neutral_pep_mass = (float)((psm.PeptideMonisotopicMass != null) ? psm.PeptideMonisotopicMass : float.NaN), massdiff = ((psm.PeptideMonisotopicMass != null) ? (psm.ScanPrecursorMass - psm.PeptideMonisotopicMass.Value).ToString() : "Ambiguous"), modification_info = (mods.Count == 0 ? new pepXML.Generated.modInfoDataType { mod_aminoacid_mass = mods.ToArray() } : null), search_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "Score", value = psm.Score.ToString() }, new pepXML.Generated.nameValueType { name = "Qvalue", value = psm.FdrInfo.QValue.ToString() } }, }; searchHits.Add(searchHit); } for (int i = 0; i < psms.Count; i++) { _pepxml.msms_run_summary[0].spectrum_query[i] = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_query() { spectrum = fileNameNoExtension + "." + psms[i].ScanNumber.ToString(), start_scan = Convert.ToUInt32(psms[i].ScanNumber), end_scan = Convert.ToUInt32(psms[i].ScanNumber), precursor_neutral_mass = (float)psms[i].ScanPrecursorMass, assumed_charge = psms[i].ScanPrecursorCharge.ToString(), index = Convert.ToUInt32(i + 1), retention_time_sec = (float)(psms[i].ScanRetentionTime * 60), search_result = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_result[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_result { search_hit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit[1] { searchHits[i] } } } }; } TextWriter writer = new StreamWriter(Path.Combine(outputPath)); _indexedSerializer.Serialize(writer, _pepxml); writer.Close(); }
private void WritePepXML_xl(List <PsmCross> items, List <DbForTask> dbFilenameList, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <string> localizeableModificationTypes, string outputFolder, string fileName, List <string> nestedIds) { XmlSerializer _indexedSerializer = new XmlSerializer(typeof(pepXML.Generated.msms_pipeline_analysis)); var _pepxml = new pepXML.Generated.msms_pipeline_analysis(); #region Add element to pepXML _pepxml.date = DateTime.Now; _pepxml.summary_xml = items[0].FullFilePath + ".pep.xml"; string proteaseC = ""; string proteaseNC = ""; foreach (var x in CommonParameters.DigestionParams.Protease.SequencesInducingCleavage) { proteaseC += x; } foreach (var x in CommonParameters.DigestionParams.Protease.SequencesPreventingCleavage) { proteaseNC += x; } CrosslinkerTypeClass crosslinker = new CrosslinkerTypeClass().SelectCrosslinker(XlSearchParameters.CrosslinkerType); var proteinList = dbFilenameList.SelectMany(b => LoadProteinDb(b.FilePath, true, XlSearchParameters.DecoyType, localizeableModificationTypes, b.IsContaminant, out Dictionary <string, Modification> unknownModifications)).ToList(); uint proteinTot = Convert.ToUInt32(proteinList.Count); string fileNameNoExtension = Path.GetFileNameWithoutExtension(items[0].FullFilePath); string filePathNoExtension = Path.ChangeExtension(items[0].FullFilePath, null); var para = new List <pepXML.Generated.nameValueType>(); { para.Add(new pepXML.Generated.nameValueType { name = "threads", value = "" }); para.Add(new pepXML.Generated.nameValueType { name = "database", value = dbFilenameList[0].FilePath }); para.Add(new pepXML.Generated.nameValueType { name = "MS_data_file", value = items[0].FullFilePath }); para.Add(new pepXML.Generated.nameValueType { name = "Search with All Possible Beta Peptides", value = XlSearchParameters.CrosslinkSearchWithAllBeta.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-link Precusor Mass Tolence", value = XlSearchParameters.XlPrecusorMsTl.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-linker Type", value = crosslinker.CrosslinkerName }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-linker mass", value = crosslinker.TotalMass.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-linker cleavable", value = crosslinker.Cleavable.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-linker cleavable long mass", value = crosslinker.CleaveMassLong.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-linker cleavable short mass", value = crosslinker.CleaveMassShort.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-linker xl site", value = crosslinker.CrosslinkerModSite.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Generate decoy proteins", value = XlSearchParameters.DecoyType.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "MaxMissed Cleavages", value = CommonParameters.DigestionParams.MaxMissedCleavages.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Protease", value = CommonParameters.DigestionParams.Protease.Name }); para.Add(new pepXML.Generated.nameValueType { name = "Initiator Methionine", value = CommonParameters.DigestionParams.InitiatorMethionineBehavior.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Max Modification Isoforms", value = CommonParameters.DigestionParams.MaxModificationIsoforms.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Min Peptide Len", value = CommonParameters.DigestionParams.MinPeptideLength.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Max Peptide Len", value = CommonParameters.DigestionParams.MaxPeptideLength.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Product Mass Tolerance", value = CommonParameters.ProductMassTolerance.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Ions to search", value = "B " + CommonParameters.BIons.ToString() + " Y " + CommonParameters.YIons.ToString() + " C " + CommonParameters.CIons.ToString() + " Z " + CommonParameters.ZdotIons.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Allowed Beta Precusor Mass Difference", value = XlSearchParameters.XlBetaPrecusorMsTl.ToString() }); foreach (var item in fixedModifications) { para.Add(new pepXML.Generated.nameValueType { name = "Fixed Modifications: " + item.id, value = item.monoisotopicMass.ToString() }); } foreach (var item in variableModifications) { para.Add(new pepXML.Generated.nameValueType { name = "Variable Modifications: " + item.id, value = item.monoisotopicMass.ToString() }); } para.Add(new pepXML.Generated.nameValueType { name = "Localize All Modifications", value = CommonParameters.LocalizeAll.ToString() }); } _pepxml.msms_run_summary = new pepXML.Generated.msms_pipeline_analysisMsms_run_summary[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summary { base_name = filePathNoExtension, raw_data_type = "raw", raw_data = ".mzML", sample_enzyme = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzyme() { name = CommonParameters.DigestionParams.Protease.Name, specificity = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzymeSpecificity[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzymeSpecificity { cut = proteaseC, no_cut = proteaseNC, } } }, search_summary = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summary[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summary { base_name = filePathNoExtension, //search_engine = pepXML.Generated.engineType.Kojak, search_engine_version = GlobalVariables.MetaMorpheusVersion, precursor_mass_type = pepXML.Generated.massType.monoisotopic, fragment_mass_type = pepXML.Generated.massType.monoisotopic, search_id = 1, search_database = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summarySearch_database { local_path = dbFilenameList[0].FilePath, type = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summarySearch_databaseType.AA, }, enzymatic_search_constraint = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summaryEnzymatic_search_constraint { enzyme = CommonParameters.DigestionParams.Protease.Name, max_num_internal_cleavages = CommonParameters.DigestionParams.MaxMissedCleavages.ToString(), //min_number_termini = "2" }, parameter = para.ToArray() } }, } }; _pepxml.msms_run_summary[0].spectrum_query = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_query[items.Count]; var searchHits = new List <pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit>(); for (int i = 0; i < items.Count; i++) { int modsFixedNum = items[i].CompactPeptides.First().Value.Item2.First().allModsOneIsNterminus.Count; var mods = new List <pepXML.Generated.modInfoDataTypeMod_aminoacid_mass> (); for (int j = 0; j < modsFixedNum; j++) { var mod = new pepXML.Generated.modInfoDataTypeMod_aminoacid_mass { mass = items[i].CompactPeptides.First().Value.Item2.First().allModsOneIsNterminus.Values.Select(p => p.monoisotopicMass).ToList()[j], position = items[i].CompactPeptides.First().Value.Item2.First().allModsOneIsNterminus.Keys.ToList()[j].ToString() }; mods.Add(mod); } if (items[i].CrossType == PsmCrossType.Singe) { var searchHit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit { hit_rank = 1, peptide = items[i].BaseSequence, peptide_prev_aa = items[i].CompactPeptides.First().Value.Item2.First().PreviousAminoAcid.ToString(), peptide_next_aa = items[i].CompactPeptides.First().Value.Item2.First().NextAminoAcid.ToString(), protein = items[i].CompactPeptides.First().Value.Item2.First().Protein.Accession, num_tot_proteins = 1, calc_neutral_pep_mass = (float)items[i].ScanPrecursorMonoisotopicPeakMz * items[i].ScanPrecursorCharge, massdiff = (items[i].ScanPrecursorMass - items[i].PeptideMonisotopicMass.Value).ToString(), xlink_typeSpecified = true, xlink_type = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink_type.na, modification_info = new pepXML.Generated.modInfoDataType { mod_aminoacid_mass = mods.ToArray() }, search_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "xlTotalScore", value = items[i].XLTotalScore.ToString() }, new pepXML.Generated.nameValueType { name = "Qvalue", value = items[i].FdrInfo.QValue.ToString() } }, }; searchHits.Add(searchHit); } if (items[i].CrossType == PsmCrossType.DeadEnd || items[i].CrossType == PsmCrossType.DeadEndH2O || items[i].CrossType == PsmCrossType.DeadEndNH2 || items[i].CrossType == PsmCrossType.DeadEndTris) { double crosslinkerDeadEndMass = 0; switch (items[i].CrossType) { case PsmCrossType.DeadEndNH2: crosslinkerDeadEndMass = crosslinker.DeadendMassNH2; break; case PsmCrossType.DeadEndTris: crosslinkerDeadEndMass = crosslinker.DeadendMassTris; break; default: crosslinkerDeadEndMass = crosslinker.DeadendMassH2O; break; } var mod = new pepXML.Generated.modInfoDataTypeMod_aminoacid_mass { mass = crosslinkerDeadEndMass, position = items[i].XlPos.ToString() }; mods.Add(mod); var searchHit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit { hit_rank = 1, peptide = items[i].BaseSequence, peptide_prev_aa = items[i].CompactPeptides.First().Value.Item2.First().PreviousAminoAcid.ToString(), peptide_next_aa = items[i].CompactPeptides.First().Value.Item2.First().NextAminoAcid.ToString(), protein = items[i].CompactPeptides.First().Value.Item2.First().Protein.Accession, num_tot_proteins = 1, calc_neutral_pep_mass = (float)items[i].ScanPrecursorMonoisotopicPeakMz * items[i].ScanPrecursorCharge, massdiff = (items[i].ScanPrecursorMass - items[i].PeptideMonisotopicMass.Value - crosslinkerDeadEndMass).ToString(), xlink_typeSpecified = true, xlink_type = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink_type.na, modification_info = new pepXML.Generated.modInfoDataType { mod_aminoacid_mass = mods.ToArray() }, search_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "xlTotalScore", value = items[i].XLTotalScore.ToString() }, new pepXML.Generated.nameValueType { name = "Qvalue", value = items[i].FdrInfo.QValue.ToString() } }, }; searchHits.Add(searchHit); } if (items[i].CrossType == PsmCrossType.Inter || items[i].CrossType == PsmCrossType.Intra || items[i].CrossType == PsmCrossType.Cross) { int modsFixedNumBeta = items[i].BetaPsmCross.CompactPeptides.First().Value.Item2.First().allModsOneIsNterminus.Count; var modsBeta = new List <pepXML.Generated.modInfoDataTypeMod_aminoacid_mass>(); for (int j = 0; j < modsFixedNumBeta; j++) { var modBeta = new pepXML.Generated.modInfoDataTypeMod_aminoacid_mass { mass = items[i].BetaPsmCross.CompactPeptides.First().Value.Item2.First().allModsOneIsNterminus.Values.Select(p => p.monoisotopicMass).ToList()[j], position = items[i].BetaPsmCross.CompactPeptides.First().Value.Item2.First().allModsOneIsNterminus.Keys.ToList()[j].ToString() }; modsBeta.Add(modBeta); } var alpha = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide { peptide = items[i].BaseSequence, peptide_prev_aa = items[i].CompactPeptides.First().Value.Item2.First().PreviousAminoAcid.ToString(), peptide_next_aa = items[i].CompactPeptides.First().Value.Item2.First().NextAminoAcid.ToString(), protein = items[i].CompactPeptides.First().Value.Item2.First().Protein.Accession, num_tot_proteins = 1, calc_neutral_pep_mass = (float)items[i].PeptideMonisotopicMass.Value, complement_mass = (float)(items[i].ScanPrecursorMass - items[i].PeptideMonisotopicMass.Value), designation = "alpha", modification_info = new pepXML.Generated.modInfoDataType { mod_aminoacid_mass = mods.ToArray() }, xlink_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "xlscore", value = items[i].XLBestScore.ToString() }, new pepXML.Generated.nameValueType { name = "link", value = items[i].XlPos.ToString() }, } }; var beta = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide { peptide = items[i].BetaPsmCross.BaseSequence, peptide_prev_aa = items[i].BetaPsmCross.CompactPeptides.First().Value.Item2.First().PreviousAminoAcid.ToString(), peptide_next_aa = items[i].BetaPsmCross.CompactPeptides.First().Value.Item2.First().NextAminoAcid.ToString(), protein = items[i].BetaPsmCross.CompactPeptides.First().Value.Item2.First().Protein.Accession, num_tot_proteins = 1, calc_neutral_pep_mass = (float)items[i].BetaPsmCross.PeptideMonisotopicMass.Value, complement_mass = (float)(items[i].ScanPrecursorMass - items[i].PeptideMonisotopicMass.Value), designation = "beta", modification_info = new pepXML.Generated.modInfoDataType { mod_aminoacid_mass = modsBeta.ToArray() }, xlink_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "xlscore", value = items[i].BetaPsmCross.XLBestScore.ToString() }, new pepXML.Generated.nameValueType { name = "link", value = items[i].BetaPsmCross.XlPos.ToString() }, } }; var cross = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide[2] { alpha, beta }; var searchHit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit { hit_rank = 1, peptide = "-", peptide_prev_aa = "-", peptide_next_aa = "-", protein = "-", num_tot_proteins = 1, calc_neutral_pep_mass = (float)items[i].ScanPrecursorMonoisotopicPeakMz * items[i].ScanPrecursorCharge, massdiff = (items[i].ScanPrecursorMass - items[i].BetaPsmCross.PeptideMonisotopicMass.Value - items[i].PeptideMonisotopicMass.Value - crosslinker.TotalMass).ToString(), xlink_typeSpecified = true, xlink_type = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink_type.xl, xlink = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink { identifier = crosslinker.CrosslinkerName, mass = (float)crosslinker.TotalMass, linked_peptide = cross }, search_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "xlTotalScore", value = items[i].XLTotalScore.ToString() }, new pepXML.Generated.nameValueType { name = "Qvalue", value = items[i].FdrInfo.QValue.ToString() } } }; searchHits.Add(searchHit); } if (items[i].CrossType == PsmCrossType.Loop) { var thePeptide = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide { xlink_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "link", value = items[i].XlPos.ToString() }, new pepXML.Generated.nameValueType { name = "link", value = items[i].XlPos2.ToString() } } }; var cross = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide[1] { thePeptide }; var searchHit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit { hit_rank = 1, peptide = items[i].BaseSequence, peptide_prev_aa = items[i].CompactPeptides.First().Value.Item2.First().PreviousAminoAcid.ToString(), peptide_next_aa = items[i].CompactPeptides.First().Value.Item2.First().NextAminoAcid.ToString(), protein = items[i].CompactPeptides.First().Value.Item2.First().Protein.Accession, num_tot_proteins = 1, calc_neutral_pep_mass = (float)items[i].ScanPrecursorMonoisotopicPeakMz * items[i].ScanPrecursorCharge, massdiff = (items[i].ScanPrecursorMass - items[i].PeptideMonisotopicMass.Value - crosslinker.LoopMass).ToString(), xlink_typeSpecified = true, xlink_type = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink_type.loop, modification_info = new pepXML.Generated.modInfoDataType { mod_aminoacid_mass = mods.ToArray() }, xlink = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink { identifier = crosslinker.CrosslinkerName, mass = (float)crosslinker.TotalMass, linked_peptide = cross }, search_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "xlTotalScore", value = items[i].XLTotalScore.ToString() }, new pepXML.Generated.nameValueType { name = "Qvalue", value = items[i].FdrInfo.QValue.ToString() } } }; searchHits.Add(searchHit); } } for (int i = 0; i < items.Count; i++) { _pepxml.msms_run_summary[0].spectrum_query[i] = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_query() { spectrum = fileNameNoExtension + "." + items[i].ScanNumber.ToString(), start_scan = Convert.ToUInt32(items[i].ScanNumber), end_scan = Convert.ToUInt32(items[i].ScanNumber), precursor_neutral_mass = (float)items[i].ScanPrecursorMonoisotopicPeakMz * items[i].ScanPrecursorCharge, assumed_charge = items[i].ScanPrecursorCharge.ToString(), index = Convert.ToUInt32(i + 1), retention_time_sec = (float)items[i].ScanRetentionTime, search_result = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_result[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_result { search_hit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit[1] { searchHits[i] } } } }; } #endregion Add element to pepXML TextWriter writer = new StreamWriter(Path.Combine(outputFolder, fileName + ".pep.xml")); _indexedSerializer.Serialize(writer, _pepxml); writer.Close(); SucessfullyFinishedWritingFile(Path.Combine(outputFolder, fileName + ".pep.xml"), nestedIds); }