public static List <CrosslinkSpectralMatch> SortOneListCsmsSetSecondBestScore(List <CrosslinkSpectralMatch> csmsPerScan, CommonParameters commonParameters) { List <double> xlTotalScores = csmsPerScan.Select(s => s.XLTotalScore).OrderByDescending(s => s).ToList(); xlTotalScores.RemoveAt(0); //This possibly needs to be doubled for xlinks. But, since each list can be a mix of xlinks and nonxlinks we just leave as is for now. double secondBestSore = commonParameters.ScoreCutoff; if (xlTotalScores.Count() > 0) { secondBestSore = xlTotalScores[0]; } foreach (CrosslinkSpectralMatch csm in csmsPerScan) { csm.SecondBestXlScore = secondBestSore; } csmsPerScan = csmsPerScan.OrderByDescending(c => c.XLTotalScore).ThenBy(c => c.FullSequence + (c.BetaPeptide != null ? c.BetaPeptide.FullSequence : "")).ToList(); return(csmsPerScan); }
public static void WritePepXML_xl(List <CrosslinkSpectralMatch> items, List <Protein> proteinList, string databasePath, List <Modification> variableModifications, List <Modification> fixedModifications, List <string> localizeableModificationTypes, string outputFolder, string fileName, CommonParameters CommonParameters, XlSearchParameters XlSearchParameters) { if (!items.Any()) { return; } XmlSerializer _indexedSerializer = new XmlSerializer(typeof(pepXML.Generated.msms_pipeline_analysis)); var _pepxml = new pepXML.Generated.msms_pipeline_analysis(); _pepxml.date = DateTime.Now; _pepxml.summary_xml = items[0].FullFilePath + ".pep.XML"; string proteaseC = ""; string proteaseNC = ""; foreach (var x in CommonParameters.DigestionParams.Protease.DigestionMotifs.Select(m => m.InducingCleavage)) { proteaseC += x; } foreach (var x in CommonParameters.DigestionParams.Protease.DigestionMotifs.Select(m => m.PreventingCleavage)) { proteaseNC += x; } Crosslinker crosslinker = XlSearchParameters.Crosslinker; string fileNameNoExtension = Path.GetFileNameWithoutExtension(items[0].FullFilePath); string filePathNoExtension = Path.ChangeExtension(items[0].FullFilePath, null); string modSites = crosslinker.CrosslinkerModSites.ToCharArray().Concat(crosslinker.CrosslinkerModSites2.ToCharArray()).Distinct().ToString(); var para = new List <pepXML.Generated.nameValueType>(); { para.Add(new pepXML.Generated.nameValueType { name = "threads", value = CommonParameters.MaxThreadsToUsePerFile.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "database", value = databasePath }); para.Add(new pepXML.Generated.nameValueType { name = "MS_data_file", value = items[0].FullFilePath }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-link precursor Mass Tolerance", value = CommonParameters.PrecursorMassTolerance.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-linker type", value = crosslinker.CrosslinkerName }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-linker mass", value = crosslinker.TotalMass.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-linker cleavable", value = crosslinker.Cleavable.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-linker cleavable long mass", value = crosslinker.CleaveMassLong.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-linker cleavable short mass", value = crosslinker.CleaveMassShort.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Cross-linker xl site", value = modSites }); para.Add(new pepXML.Generated.nameValueType { name = "Generate decoy proteins", value = XlSearchParameters.DecoyType.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "MaxMissed Cleavages", value = CommonParameters.DigestionParams.MaxMissedCleavages.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Protease", value = CommonParameters.DigestionParams.Protease.Name }); para.Add(new pepXML.Generated.nameValueType { name = "Initiator Methionine", value = CommonParameters.DigestionParams.InitiatorMethionineBehavior.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Max Modification Isoforms", value = CommonParameters.DigestionParams.MaxModificationIsoforms.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Min Peptide Len", value = CommonParameters.DigestionParams.MinPeptideLength.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Max Peptide Len", value = CommonParameters.DigestionParams.MaxPeptideLength.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Product Mass Tolerance", value = CommonParameters.ProductMassTolerance.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Ions to search", value = String.Join(", ", DissociationTypeCollection.ProductsFromDissociationType[CommonParameters.DissociationType]) }); foreach (var fixedMod in fixedModifications) { para.Add(new pepXML.Generated.nameValueType { name = "Fixed Modifications: " + fixedMod.IdWithMotif, value = fixedMod.MonoisotopicMass.ToString() }); } foreach (var variableMod in variableModifications) { para.Add(new pepXML.Generated.nameValueType { name = "Variable Modifications: " + variableMod.IdWithMotif, value = variableMod.MonoisotopicMass.ToString() }); } para.Add(new pepXML.Generated.nameValueType { name = "Localize All Modifications", value = "true" }); } _pepxml.msms_run_summary = new pepXML.Generated.msms_pipeline_analysisMsms_run_summary[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summary { base_name = filePathNoExtension, raw_data_type = "raw", raw_data = ".mzML", sample_enzyme = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzyme() { name = CommonParameters.DigestionParams.Protease.Name, specificity = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzymeSpecificity[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzymeSpecificity { cut = proteaseC, no_cut = proteaseNC, } } }, search_summary = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summary[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summary { base_name = filePathNoExtension, search_engine_version = GlobalVariables.MetaMorpheusVersion, precursor_mass_type = pepXML.Generated.massType.monoisotopic, fragment_mass_type = pepXML.Generated.massType.monoisotopic, search_id = 1, search_database = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summarySearch_database { local_path = databasePath, type = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summarySearch_databaseType.AA, }, enzymatic_search_constraint = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summaryEnzymatic_search_constraint { enzyme = CommonParameters.DigestionParams.Protease.Name, max_num_internal_cleavages = CommonParameters.DigestionParams.MaxMissedCleavages.ToString(), //min_number_termini = "2" }, parameter = para.ToArray() } }, } }; _pepxml.msms_run_summary[0].spectrum_query = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_query[items.Count]; var searchHits = new List <pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit>(); for (int i = 0; i < items.Count; i++) { var mods = new List <pepXML.Generated.modInfoDataTypeMod_aminoacid_mass>(); var alphaPeptide = items[i].BestMatchingPeptides.First().Peptide; foreach (var modification in alphaPeptide.AllModsOneIsNterminus) { var mod = new pepXML.Generated.modInfoDataTypeMod_aminoacid_mass { mass = modification.Value.MonoisotopicMass.Value, // convert from one-based to zero-based (N-term is zero in the pepXML output) position = (modification.Key - 1).ToString() }; mods.Add(mod); } if (items[i].CrossType == PsmCrossType.Single) { var searchHit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit { hit_rank = 1, peptide = alphaPeptide.BaseSequence, peptide_prev_aa = alphaPeptide.PreviousAminoAcid.ToString(), peptide_next_aa = alphaPeptide.NextAminoAcid.ToString(), protein = alphaPeptide.Protein.Accession, num_tot_proteins = 1, calc_neutral_pep_mass = (float)items[i].ScanPrecursorMass, massdiff = (items[i].ScanPrecursorMass - items[i].PeptideMonisotopicMass.Value).ToString(), xlink_typeSpecified = true, xlink_type = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink_type.na, modification_info = new pepXML.Generated.modInfoDataType { mod_aminoacid_mass = mods.ToArray() }, search_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "xlTotalScore", value = items[i].XLTotalScore.ToString() }, new pepXML.Generated.nameValueType { name = "Qvalue", value = items[i].FdrInfo.QValue.ToString() } }, }; searchHits.Add(searchHit); } else if (items[i].CrossType == PsmCrossType.DeadEnd || items[i].CrossType == PsmCrossType.DeadEndH2O || items[i].CrossType == PsmCrossType.DeadEndNH2 || items[i].CrossType == PsmCrossType.DeadEndTris) { double crosslinkerDeadEndMass = 0; switch (items[i].CrossType) { case PsmCrossType.DeadEndNH2: crosslinkerDeadEndMass = crosslinker.DeadendMassNH2; break; case PsmCrossType.DeadEndTris: crosslinkerDeadEndMass = crosslinker.DeadendMassTris; break; default: crosslinkerDeadEndMass = crosslinker.DeadendMassH2O; break; } var searchHit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit { hit_rank = 1, peptide = alphaPeptide.BaseSequence, peptide_prev_aa = alphaPeptide.PreviousAminoAcid.ToString(), peptide_next_aa = alphaPeptide.NextAminoAcid.ToString(), protein = alphaPeptide.Protein.Accession, num_tot_proteins = 1, calc_neutral_pep_mass = (float)items[i].ScanPrecursorMass, massdiff = (items[i].ScanPrecursorMass - items[i].PeptideMonisotopicMass.Value - crosslinkerDeadEndMass).ToString(), xlink_typeSpecified = true, xlink_type = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink_type.na, modification_info = new pepXML.Generated.modInfoDataType { mod_aminoacid_mass = mods.ToArray() }, search_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "xlTotalScore", value = items[i].XLTotalScore.ToString() }, new pepXML.Generated.nameValueType { name = "Qvalue", value = items[i].FdrInfo.QValue.ToString() } }, }; searchHits.Add(searchHit); } else if (items[i].CrossType == PsmCrossType.Inter || items[i].CrossType == PsmCrossType.Intra || items[i].CrossType == PsmCrossType.Cross) { var betaPeptide = items[i].BetaPeptide.BestMatchingPeptides.First().Peptide; var modsBeta = new List <pepXML.Generated.modInfoDataTypeMod_aminoacid_mass>(); foreach (var mod in betaPeptide.AllModsOneIsNterminus) { var modBeta = new pepXML.Generated.modInfoDataTypeMod_aminoacid_mass { mass = mod.Value.MonoisotopicMass.Value, // convert from one-based to zero-based (N-term is zero in the pepXML output) position = (mod.Key - 1).ToString() }; modsBeta.Add(modBeta); } var alpha = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide { peptide = alphaPeptide.BaseSequence, peptide_prev_aa = alphaPeptide.PreviousAminoAcid.ToString(), peptide_next_aa = alphaPeptide.NextAminoAcid.ToString(), protein = alphaPeptide.Protein.Accession, num_tot_proteins = 1, calc_neutral_pep_mass = (float)items[i].PeptideMonisotopicMass.Value, complement_mass = (float)(items[i].ScanPrecursorMass - alphaPeptide.MonoisotopicMass), designation = "alpha", modification_info = new pepXML.Generated.modInfoDataType { mod_aminoacid_mass = mods.ToArray() }, xlink_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "xlscore", value = items[i].XLTotalScore.ToString() }, new pepXML.Generated.nameValueType { name = "link", value = items[i].LinkPositions.First().ToString() }, } }; var beta = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide { peptide = betaPeptide.BaseSequence, peptide_prev_aa = betaPeptide.PreviousAminoAcid.ToString(), peptide_next_aa = betaPeptide.NextAminoAcid.ToString(), protein = betaPeptide.Protein.Accession, num_tot_proteins = 1, calc_neutral_pep_mass = (float)betaPeptide.MonoisotopicMass, complement_mass = (float)(items[i].ScanPrecursorMass - betaPeptide.MonoisotopicMass), designation = "beta", modification_info = new pepXML.Generated.modInfoDataType { mod_aminoacid_mass = modsBeta.ToArray() }, xlink_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "xlscore", value = items[i].BetaPeptide.Score.ToString() }, new pepXML.Generated.nameValueType { name = "link", value = items[i].BetaPeptide.LinkPositions.First().ToString() }, } }; var cross = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide[2] { alpha, beta }; var searchHit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit { hit_rank = 1, peptide = "-", peptide_prev_aa = "-", peptide_next_aa = "-", protein = "-", num_tot_proteins = 1, calc_neutral_pep_mass = (float)items[i].ScanPrecursorMass, massdiff = (items[i].ScanPrecursorMass - betaPeptide.MonoisotopicMass - alphaPeptide.MonoisotopicMass - crosslinker.TotalMass).ToString(), xlink_typeSpecified = true, xlink_type = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink_type.xl, xlink = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink { identifier = crosslinker.CrosslinkerName, mass = (float)crosslinker.TotalMass, linked_peptide = cross }, search_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "xlTotalScore", value = items[i].XLTotalScore.ToString() }, new pepXML.Generated.nameValueType { name = "Qvalue", value = items[i].FdrInfo.QValue.ToString() } } }; searchHits.Add(searchHit); } else if (items[i].CrossType == PsmCrossType.Loop) { var thePeptide = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide { xlink_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "link", value = items[i].LinkPositions.First().ToString() }, new pepXML.Generated.nameValueType { name = "link", value = items[i].LinkPositions[1].ToString() } } }; var cross = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide[1] { thePeptide }; var searchHit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit { hit_rank = 1, peptide = alphaPeptide.BaseSequence, peptide_prev_aa = alphaPeptide.PreviousAminoAcid.ToString(), peptide_next_aa = alphaPeptide.NextAminoAcid.ToString(), protein = alphaPeptide.Protein.Accession, num_tot_proteins = 1, calc_neutral_pep_mass = (float)items[i].ScanPrecursorMass, massdiff = (items[i].ScanPrecursorMass - alphaPeptide.MonoisotopicMass - crosslinker.LoopMass).ToString(), xlink_typeSpecified = true, xlink_type = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink_type.loop, modification_info = new pepXML.Generated.modInfoDataType { mod_aminoacid_mass = mods.ToArray() }, xlink = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink { identifier = crosslinker.CrosslinkerName, mass = (float)crosslinker.TotalMass, linked_peptide = cross }, search_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "xlTotalScore", value = items[i].XLTotalScore.ToString() }, new pepXML.Generated.nameValueType { name = "Qvalue", value = items[i].FdrInfo.QValue.ToString() } } }; searchHits.Add(searchHit); } } for (int i = 0; i < items.Count; i++) { _pepxml.msms_run_summary[0].spectrum_query[i] = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_query() { spectrum = fileNameNoExtension + "." + items[i].ScanNumber.ToString(), start_scan = Convert.ToUInt32(items[i].ScanNumber), end_scan = Convert.ToUInt32(items[i].ScanNumber), precursor_neutral_mass = (float)items[i].ScanPrecursorMass, assumed_charge = items[i].ScanPrecursorCharge.ToString(), index = Convert.ToUInt32(i + 1), retention_time_sec = (float)(items[i].ScanRetentionTime * 60), search_result = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_result[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_result { search_hit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit[1] { searchHits[i] } } } }; } TextWriter writer = new StreamWriter(Path.Combine(outputFolder, fileName + ".pep.XML")); _indexedSerializer.Serialize(writer, _pepxml); writer.Close(); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { // disable quantification if a .mgf is being used if (SearchParameters.DoQuantification && currentRawFileList.Any(x => Path.GetExtension(x).Equals(".mgf", StringComparison.OrdinalIgnoreCase))) { SearchParameters.DoQuantification = false; } // load modifications Status("Loading modifications...", taskId); List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList(); List <ModificationWithMass> fixedModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList(); List <string> localizeableModificationTypes = GlobalVariables.AllModTypesKnown.ToList(); // what types of fragment ions to search for List <ProductType> ionTypes = new List <ProductType>(); if (CommonParameters.BIons && CommonParameters.AddCompIons) { ionTypes.Add(ProductType.B); } else if (CommonParameters.BIons) { ionTypes.Add(ProductType.BnoB1ions); } if (CommonParameters.YIons) { ionTypes.Add(ProductType.Y); } if (CommonParameters.ZdotIons) { ionTypes.Add(ProductType.Zdot); } if (CommonParameters.CIons) { ionTypes.Add(ProductType.C); } // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, SearchParameters.SearchTarget, SearchParameters.DecoyType, localizeableModificationTypes, CommonParameters); // write prose settings ProseCreatedWhileRunning.Append("The following search settings were used: "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ? "maximum peptide length = unspecified; " : "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; "); ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; "); ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; "); ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; "); ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; "); ProseCreatedWhileRunning.Append("report PSM ambiguity = " + CommonParameters.ReportAllAmbiguity + ". "); ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. "); // start the search task MyTaskResults = new MyTaskResults(this); List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>(); FlashLFQResults flashLfqResults = null; MyFileManager myFileManager = new MyFileManager(SearchParameters.DisposeOfFileWhenDone); var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b)); HashSet <DigestionParams> ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams)); int completedFiles = 0; object indexLock = new object(); object psmLock = new object(); Status("Searching files...", taskId); Status("Searching files...", new List <string> { taskId, "Individual Spectra Files" }); Dictionary <string, int[]> numMs2SpectraPerFile = new Dictionary <string, int[]>(); for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { if (GlobalVariables.StopLoops) { break; } var origDataFile = currentRawFileList[spectraFileIndex]; // mark the file as in-progress StartingDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); MassDiffAcceptor massDiffAcceptor = GetMassDiffAcceptor(combinedParams.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac); var thisId = new List <string> { taskId, "Individual Spectra Files", origDataFile }; NewCollection(Path.GetFileName(origDataFile), thisId); Status("Loading spectra file...", thisId); MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks, combinedParams); Status("Getting ms2 scans...", thisId); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); numMs2SpectraPerFile.Add(Path.GetFileNameWithoutExtension(origDataFile), new int[] { myMsDataFile.GetAllScansList().Count(p => p.MsnOrder == 2), arrayOfMs2ScansSortedByMass.Length }); myFileManager.DoneWithFile(origDataFile); var fileSpecificPsms = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; // modern search if (SearchParameters.SearchType == SearchType.Modern) { for (int currentPartition = 0; currentPartition < combinedParams.TotalPartitions; currentPartition++) { List <CompactPeptide> peptideIndex = null; List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions)); Status("Getting fragment dictionary...", new List <string> { taskId }); var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, ionTypes, currentPartition, SearchParameters.DecoyType, ListOfDigestionParams, combinedParams, SearchParameters.MaxFragmentSize, new List <string> { taskId }); List <int>[] fragmentIndex = null; lock (indexLock) { GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, taskId); } Status("Searching files...", taskId); new ModernSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, ionTypes, currentPartition, combinedParams, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + combinedParams.TotalPartitions + "!", thisId)); } } // nonspecific search else if (SearchParameters.SearchType == SearchType.NonSpecific) { List <List <ProductType> > terminusSeparatedIons = ProductTypeMethods.SeparateIonsByTerminus(ionTypes); foreach (List <ProductType> terminusSpecificIons in terminusSeparatedIons) { for (int currentPartition = 0; currentPartition < combinedParams.TotalPartitions; currentPartition++) { List <CompactPeptide> peptideIndex = null; List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions)); List <int>[] fragmentIndex = new List <int> [1]; Status("Getting fragment dictionary...", new List <string> { taskId }); var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, terminusSpecificIons, currentPartition, SearchParameters.DecoyType, ListOfDigestionParams, combinedParams, SearchParameters.MaxFragmentSize, new List <string> { taskId }); lock (indexLock) GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, taskId); Status("Getting precursor dictionary...", new List <string> { taskId }); List <CompactPeptide> peptideIndexPrecursor = null; List <Protein> proteinListSubsetPrecursor = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions)); List <int>[] fragmentIndexPrecursor = new List <int> [1]; var indexEnginePrecursor = new PrecursorIndexingEngine(proteinListSubsetPrecursor, variableModifications, fixedModifications, terminusSpecificIons, currentPartition, SearchParameters.DecoyType, ListOfDigestionParams, combinedParams, 0, new List <string> { taskId }); lock (indexLock) GenerateIndexes(indexEnginePrecursor, dbFilenameList, ref peptideIndexPrecursor, ref fragmentIndexPrecursor, taskId); if (peptideIndex.Count != peptideIndexPrecursor.Count) { throw new MetaMorpheusException("peptideIndex not identical between indexing engines"); } Status("Searching files...", taskId); new NonSpecificEnzymeSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, fragmentIndexPrecursor, terminusSpecificIons, currentPartition, combinedParams, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + combinedParams.TotalPartitions + "!", thisId)); } } } // classic search else { Status("Starting search...", thisId); new ClassicSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, proteinList, ionTypes, massDiffAcceptor, combinedParams, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search!", thisId)); } lock (psmLock) { allPsms.AddRange(fileSpecificPsms.Where(p => p != null)); } completedFiles++; FinishedDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> { taskId, "Individual Spectra Files" })); } ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> { taskId, "Individual Spectra Files" })); PostSearchAnalysisParameters parameters = new PostSearchAnalysisParameters(); parameters.SearchTaskResults = MyTaskResults; parameters.SearchTaskId = taskId; parameters.SearchParameters = SearchParameters; parameters.ProteinList = proteinList; parameters.IonTypes = ionTypes; parameters.AllPsms = allPsms; parameters.FixedModifications = fixedModifications; parameters.VariableModifications = variableModifications; parameters.ListOfDigestionParams = ListOfDigestionParams; parameters.CurrentRawFileList = currentRawFileList; parameters.MyFileManager = myFileManager; parameters.NumNotches = GetNumNotches(SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac); parameters.OutputFolder = OutputFolder; parameters.IndividualResultsOutputFolder = Path.Combine(OutputFolder, "Individual File Results"); parameters.FlashLfqResults = flashLfqResults; parameters.FileSettingsList = fileSettingsList; parameters.NumMs2SpectraPerFile = numMs2SpectraPerFile; parameters.DatabaseFilenameList = dbFilenameList; PostSearchAnalysisTask postProcessing = new PostSearchAnalysisTask(); postProcessing.Parameters = parameters; postProcessing.CommonParameters = CommonParameters; return(postProcessing.Run()); }
public static List <List <CrosslinkSpectralMatch> > SortListsOfCsms(List <List <CrosslinkSpectralMatch> > ListOfCsmsPerMS2Scan, CommonParameters commonParameters) { List <List <CrosslinkSpectralMatch> > newLists = new List <List <CrosslinkSpectralMatch> >(); foreach (var csmsPerScan in ListOfCsmsPerMS2Scan) { newLists.Add(SortOneListCsmsSetSecondBestScore(csmsPerScan, commonParameters)); } return(newLists.OrderByDescending(c => c.First().XLTotalScore).ThenByDescending(c => c.First().FullSequence + (c.First().BetaPeptide != null ? c.First().BetaPeptide.FullSequence : "")).ToList()); }
public void ComputeXlinkQandPValues(List <CrosslinkSpectralMatch> allPsms, List <CrosslinkSpectralMatch> intraCsms, List <CrosslinkSpectralMatch> interCsms, CommonParameters commonParameters, string taskId) { List <CrosslinkSpectralMatch> crossCsms = allPsms.Where(p => p.CrossType == PsmCrossType.Inter || p.CrossType == PsmCrossType.Intra).OrderByDescending(p => p.XLTotalScore).ToList(); new FdrAnalysisEngine(crossCsms.ToList <PeptideSpectralMatch>(), 0, commonParameters, this.FileSpecificParameters, new List <string> { taskId }, "crosslink").Run(); List <CrosslinkSpectralMatch> singles = allPsms.Where(p => p.CrossType != PsmCrossType.Inter).Where(p => p.CrossType != PsmCrossType.Intra).OrderByDescending(p => p.Score).ToList(); new FdrAnalysisEngine(singles.ToList <PeptideSpectralMatch>(), 0, commonParameters, this.FileSpecificParameters, new List <string> { taskId }, "PSM").Run(); SingleFDRAnalysis(singles, commonParameters, new List <string> { taskId }); // calculate FDR DoCrosslinkFdrAnalysis(interCsms); DoCrosslinkFdrAnalysis(intraCsms); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes); // TODO: print error messages loading GPTMD mods List <Modification> gptmdModifications = GlobalVariables.AllModsKnown.OfType <Modification>().Where(b => GptmdParameters.ListOfModsGptmd.Contains((b.ModificationType, b.IdWithMotif))).ToList(); IEnumerable <Tuple <double, double> > combos = LoadCombos(gptmdModifications).ToList(); // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters); List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>(); var numRawFiles = currentRawFileList.Count; // write prose settings ProseCreatedWhileRunning.Append("The following G-PTM-D settings were used: "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ? "maximum peptide length = unspecified; " : "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("G-PTM-D modifications count = " + gptmdModifications.Count + "; "); // temporary search type for writing prose // the actual search type is technically file-specific but we don't allow file-specific notches, so it's safe to do this MassDiffAcceptor tempSearchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), CommonParameters.PrecursorMassTolerance); ProseCreatedWhileRunning.Append("precursor mass tolerance(s) = {" + tempSearchMode.ToProseString() + "}; "); ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". "); ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. "); // start the G-PTM-D task Status("Running G-PTM-D...", new List <string> { taskId }); MyTaskResults = new MyTaskResults(this) { NewDatabases = new List <DbForTask>() }; var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b)); HashSet <DigestionParams> ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams)); MyFileManager myFileManager = new MyFileManager(true); object lock1 = new object(); object lock2 = new object(); for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { // Stop if canceled if (GlobalVariables.StopLoops) { break; } var origDataFile = currentRawFileList[spectraFileIndex]; // mark the file as in-progress StartingDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); MassDiffAcceptor searchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), combinedParams.PrecursorMassTolerance); NewCollection(Path.GetFileName(origDataFile), new List <string> { taskId, "Individual Spectra Files", origDataFile }); Status("Loading spectra file...", new List <string> { taskId, "Individual Spectra Files", origDataFile }); MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams); Status("Getting ms2 scans...", new List <string> { taskId, "Individual Spectra Files", origDataFile }); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray(); myFileManager.DoneWithFile(origDataFile); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; new ClassicSearchEngine(allPsmsArray, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, null, proteinList, searchMode, combinedParams, new List <string> { taskId, "Individual Spectra Files", origDataFile }).Run(); allPsms.AddRange(allPsmsArray.Where(p => p != null)); FinishedDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files", origDataFile })); } ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files" })); allPsms = allPsms.OrderByDescending(b => b.Score) .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue) .GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)) .Select(b => b.First()).ToList(); new FdrAnalysisEngine(allPsms, tempSearchMode.NumNotches, CommonParameters, new List <string> { taskId }).Run(); var writtenFile = Path.Combine(OutputFolder, "GPTMD_Candidates.psmtsv"); WritePsmsToTsv(allPsms, writtenFile, new Dictionary <string, int>()); FinishedWritingFile(writtenFile, new List <string> { taskId }); // get file-specific precursor mass tolerances for the GPTMD engine var filePathToPrecursorMassTolerance = new Dictionary <string, Tolerance>(); for (int i = 0; i < currentRawFileList.Count; i++) { string filePath = currentRawFileList[i]; Tolerance fileTolerance = CommonParameters.PrecursorMassTolerance; if (fileSettingsList[i] != null && fileSettingsList[i].PrecursorMassTolerance != null) { fileTolerance = fileSettingsList[i].PrecursorMassTolerance; } filePathToPrecursorMassTolerance.Add(filePath, fileTolerance); } // run GPTMD engine var gptmdResults = (GptmdResults) new GptmdEngine(allPsms, gptmdModifications, combos, filePathToPrecursorMassTolerance, CommonParameters, new List <string> { taskId }).Run(); // Stop if canceled if (GlobalVariables.StopLoops) { return(MyTaskResults); } // write GPTMD databases if (dbFilenameList.Any(b => !b.IsContaminant)) { List <string> databaseNames = new List <string>(); foreach (var nonContaminantDb in dbFilenameList.Where(p => !p.IsContaminant)) { var dbName = Path.GetFileNameWithoutExtension(nonContaminantDb.FilePath); var theExtension = Path.GetExtension(nonContaminantDb.FilePath).ToLowerInvariant(); bool compressed = theExtension.EndsWith("gz"); databaseNames.Add(compressed ? Path.GetFileNameWithoutExtension(dbName) : dbName); } string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml"); var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && !b.IsContaminant).ToList(), outputXMLdbFullName); FinishedWritingFile(outputXMLdbFullName, new List <string> { taskId }); MyTaskResults.NewDatabases.Add(new DbForTask(outputXMLdbFullName, false)); MyTaskResults.AddNiceText("Modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum()); MyTaskResults.AddNiceText("Mods types and counts:"); MyTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value))); } if (dbFilenameList.Any(b => b.IsContaminant)) { // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written //string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml"); List <string> databaseNames = new List <string>(); foreach (var contaminantDb in dbFilenameList.Where(p => p.IsContaminant)) { var dbName = Path.GetFileName(contaminantDb.FilePath); int indexOfFirstDot = dbName.IndexOf("."); databaseNames.Add(dbName.Substring(0, indexOfFirstDot)); } string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml"); var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && b.IsContaminant).ToList(), outputXMLdbFullNameContaminants); FinishedWritingFile(outputXMLdbFullNameContaminants, new List <string> { taskId }); MyTaskResults.NewDatabases.Add(new DbForTask(outputXMLdbFullNameContaminants, true)); MyTaskResults.AddNiceText("Contaminant modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum()); MyTaskResults.AddNiceText("Mods types and counts:"); MyTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value))); } return(MyTaskResults); }
private DataPointAquisitionResults GetDataAcquisitionResults(MsDataFile myMsDataFile, string currentDataFile, List <Modification> variableModifications, List <Modification> fixedModifications, List <Protein> proteinList, string taskId, CommonParameters combinedParameters, Tolerance initPrecTol, Tolerance initProdTol) { var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(currentDataFile); MassDiffAcceptor searchMode = initPrecTol is PpmTolerance ? (MassDiffAcceptor) new SinglePpmAroundZeroSearchMode(initPrecTol.Value) : new SingleAbsoluteAroundZeroSearchMode(initPrecTol.Value); var listOfSortedms2Scans = GetMs2Scans(myMsDataFile, currentDataFile, combinedParameters).OrderBy(b => b.PrecursorMass).ToArray(); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; Log("Searching with searchMode: " + searchMode, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }); Log("Searching with productMassTolerance: " + initProdTol, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }); new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchMode, combinedParameters, FileSpecificParameters, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }).Run(); List <PeptideSpectralMatch> allPsms = allPsmsArray.Where(b => b != null).ToList(); allPsms = allPsms.OrderByDescending(b => b.Score) .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue) .GroupBy(b => (b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList(); new FdrAnalysisEngine(allPsms, searchMode.NumNotches, CommonParameters, FileSpecificParameters, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }).Run(); List <PeptideSpectralMatch> goodIdentifications = allPsms.Where(b => b.FdrInfo.QValueNotch < 0.001 && !b.IsDecoy && b.FullSequence != null).ToList(); if (!goodIdentifications.Any()) { return(new DataPointAquisitionResults(null, new List <PeptideSpectralMatch>(), new List <LabeledDataPoint>(), new List <LabeledDataPoint>(), 0, 0, 0, 0)); } //get the deconvoluted ms2scans for the good identifications List <Ms2ScanWithSpecificMass> goodScans = new List <Ms2ScanWithSpecificMass>(); List <PeptideSpectralMatch> unfilteredPsms = allPsmsArray.ToList(); foreach (PeptideSpectralMatch psm in goodIdentifications) { goodScans.Add(listOfSortedms2Scans[unfilteredPsms.IndexOf(psm)]); } DataPointAquisitionResults currentResult = (DataPointAquisitionResults) new DataPointAcquisitionEngine( goodIdentifications, goodScans, myMsDataFile, initPrecTol, initProdTol, CalibrationParameters.MinMS1IsotopicPeaksNeededForConfirmedIdentification, CommonParameters, FileSpecificParameters, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }).Run(); return(currentResult); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { MyTaskResults = new MyTaskResults(this); List <List <CrosslinkSpectralMatch> > ListOfCsmsPerMS2Scan = new List <List <CrosslinkSpectralMatch> >(); LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes); // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, XlSearchParameters.DecoyType, localizeableModificationTypes, CommonParameters); var crosslinker = XlSearchParameters.Crosslinker; MyFileManager myFileManager = new MyFileManager(true); var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b)); HashSet <DigestionParams> ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams)); int completedFiles = 0; object indexLock = new object(); object psmLock = new object(); Status("Searching files...", taskId); ProseCreatedWhileRunning.Append("The following crosslink discovery were used: "); ProseCreatedWhileRunning.Append("crosslinker name = " + crosslinker.CrosslinkerName + "; "); ProseCreatedWhileRunning.Append("crosslinker type = " + crosslinker.Cleavable + "; "); ProseCreatedWhileRunning.Append("crosslinker mass = " + crosslinker.TotalMass + "; "); ProseCreatedWhileRunning.Append("crosslinker modification site(s) = " + crosslinker.CrosslinkerModSites + "; "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ? "maximum peptide length = unspecified; " : "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif) + "; ")); ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("parent mass tolerance(s) = " + CommonParameters.PrecursorMassTolerance + "; "); ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; "); ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. "); for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { var origDataFile = currentRawFileList[spectraFileIndex]; CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); var thisId = new List <string> { taskId, "Individual Spectra Files", origDataFile }; NewCollection(Path.GetFileName(origDataFile), thisId); Status("Loading spectra file...", thisId); MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams); Status("Getting ms2 scans...", thisId); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray(); List <CrosslinkSpectralMatch>[] newCsmsPerMS2ScanPerFile = new List <CrosslinkSpectralMatch> [arrayOfMs2ScansSortedByMass.Length]; for (int currentPartition = 0; currentPartition < CommonParameters.TotalPartitions; currentPartition++) { List <PeptideWithSetModifications> peptideIndex = null; //When partition, the proteinList will be split for each Thread. List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions)); Status("Getting fragment dictionary...", new List <string> { taskId }); //Only reverse Decoy for crosslink search has been tested and are set as fixed parameter. var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, null, null, null, currentPartition, UsefulProteomicsDatabases.DecoyType.Reverse, combinedParams, 30000.0, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), new List <string> { taskId }); List <int>[] fragmentIndex = null; List <int>[] precursorIndex = null; GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId); //The second Fragment index is for 'MS1-HCD_MS1-ETD_MS2s' type of data. If LowCID is used for MS1, ion-index is not allowed to use. List <int>[] secondFragmentIndex = null; if (combinedParams.ChildScanDissociationType != DissociationType.LowCID && !CrosslinkSearchEngine.DissociationTypeGenerateSameTypeOfIons(combinedParams.DissociationType, combinedParams.ChildScanDissociationType)) { //Becuase two different type of dissociation methods are used, the parameters are changed with different dissociation type. var secondCombinedParams = CommonParameters.CloneWithNewDissociationType(combinedParams.ChildScanDissociationType); var secondIndexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, null, null, null, currentPartition, UsefulProteomicsDatabases.DecoyType.Reverse, secondCombinedParams, 30000.0, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), new List <string> { taskId }); GenerateSecondIndexes(indexEngine, secondIndexEngine, dbFilenameList, ref secondFragmentIndex, proteinList, taskId); } Status("Searching files...", taskId); new CrosslinkSearchEngine(newCsmsPerMS2ScanPerFile, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, secondFragmentIndex, currentPartition, combinedParams, crosslinker, XlSearchParameters.RestrictToTopNHits, XlSearchParameters.CrosslinkSearchTopNum, XlSearchParameters.CrosslinkAtCleavageSite, XlSearchParameters.XlQuench_H2O, XlSearchParameters.XlQuench_NH2, XlSearchParameters.XlQuench_Tris, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + CommonParameters.TotalPartitions + "!", thisId)); if (GlobalVariables.StopLoops) { break; } } ListOfCsmsPerMS2Scan.AddRange(newCsmsPerMS2ScanPerFile.Where(p => p != null).ToList()); completedFiles++; ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> { taskId, "Individual Spectra Files" })); } ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> { taskId, "Individual Spectra Files" })); List <List <CrosslinkSpectralMatch> > ListOfCsmsPerMS2ScanParsimony = new List <List <CrosslinkSpectralMatch> >(); //For every Ms2Scans, each have a list of candidates psms. The allPsms from CrosslinkSearchEngine is the list (all ms2scans) of list (each ms2scan) of psm (all candidate psm). //The allPsmsList is same as allPsms after ResolveAmbiguities. foreach (var csmsPerScan in ListOfCsmsPerMS2Scan) { foreach (var csm in csmsPerScan) { csm.ResolveAllAmbiguities(); if (csm.BetaPeptide != null) { csm.BetaPeptide.ResolveAllAmbiguities(); } } ListOfCsmsPerMS2ScanParsimony.Add(RemoveDuplicateFromCsmsPerScan(csmsPerScan)); } var filteredAllPsms = new List <CrosslinkSpectralMatch>(); //For each ms2scan, try to find the best candidate psm from the psms list. Add it into filteredAllPsms //This function is for current usage, this can be replaced with PEP value. foreach (var csmsPerScan in ListOfCsmsPerMS2ScanParsimony) { filteredAllPsms.Add(csmsPerScan[0]); } PostXLSearchAnalysisTask postXLSearchAnalysisTask = new PostXLSearchAnalysisTask(); return(postXLSearchAnalysisTask.Run(OutputFolder, dbFilenameList, currentRawFileList, taskId, fileSettingsList, filteredAllPsms.OrderByDescending(p => p.XLTotalScore).ToList(), CommonParameters, XlSearchParameters, proteinList, variableModifications, fixedModifications, localizeableModificationTypes, MyTaskResults)); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { // load modifications Status("Loading modifications...", new List <string> { taskId }); List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList(); List <ModificationWithMass> fixedModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList(); List <string> localizeableModificationTypes = GlobalVariables.AllModTypesKnown.ToList(); // what types of fragment ions to search for List <ProductType> ionTypes = new List <ProductType>(); if (CommonParameters.BIons) { ionTypes.Add(ProductType.BnoB1ions); } if (CommonParameters.YIons) { ionTypes.Add(ProductType.Y); } if (CommonParameters.ZdotIons) { ionTypes.Add(ProductType.Zdot); } if (CommonParameters.CIons) { ionTypes.Add(ProductType.C); } // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters); // write prose settings ProseCreatedWhileRunning.Append("The following calibration settings were used: "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ? "maximum peptide length = unspecified; " : "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; "); ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; "); ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; "); ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; "); ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". "); ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. "); // start the calibration task Status("Calibrating...", new List <string> { taskId }); MyTaskResults = new MyTaskResults(this) { NewSpectra = new List <string>(), NewFileSpecificTomls = new List <string>() }; object lock1 = new object(); var myFileManager = new MyFileManager(true); for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { if (GlobalVariables.StopLoops) { break; } // get filename stuff var originalUncalibratedFilePath = currentRawFileList[spectraFileIndex]; var originalUncalibratedFilenameWithoutExtension = Path.GetFileNameWithoutExtension(originalUncalibratedFilePath); string calibratedFilePath = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".mzML"); // mark the file as in-progress StartingDataFile(originalUncalibratedFilePath, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilePath }); CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); MsDataFile myMsDataFile; // load the file Status("Loading spectra file...", new List <string> { taskId, "Individual Spectra Files" }); lock (lock1) { myMsDataFile = myFileManager.LoadFile(originalUncalibratedFilePath, CommonParameters.TopNpeaks, CommonParameters.MinRatio, CommonParameters.TrimMs1Peaks, CommonParameters.TrimMsMsPeaks, CommonParameters); } // get datapoints to fit calibration function to Status("Acquiring calibration data points...", new List <string> { taskId, "Individual Spectra Files" }); DataPointAquisitionResults acquisitionResults = null; for (int i = 1; i <= 5; i++) { acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance); // enough data points to calibrate? if (acquisitionResults.Psms.Count >= NumRequiredPsms && acquisitionResults.Ms1List.Count > NumRequiredMs1Datapoints && acquisitionResults.Ms2List.Count > NumRequiredMs2Datapoints) { break; } if (i == 1) // failed round 1 { CommonParameters.PrecursorMassTolerance = new PpmTolerance(20); CommonParameters.ProductMassTolerance = new PpmTolerance(50); } else if (i == 2) // failed round 2 { CommonParameters.PrecursorMassTolerance = new PpmTolerance(30); CommonParameters.ProductMassTolerance = new PpmTolerance(100); } else if (i == 3) // failed round 3 { CommonParameters.PrecursorMassTolerance = new PpmTolerance(40); CommonParameters.ProductMassTolerance = new PpmTolerance(150); } else // failed round 4 { if (acquisitionResults.Psms.Count < NumRequiredPsms) { Warn("Calibration failure! Could not find enough high-quality PSMs. Required " + NumRequiredPsms + ", saw " + acquisitionResults.Psms.Count); } if (acquisitionResults.Ms1List.Count < NumRequiredMs1Datapoints) { Warn("Calibration failure! Could not find enough MS1 datapoints. Required " + NumRequiredMs1Datapoints + ", saw " + acquisitionResults.Ms1List.Count); } if (acquisitionResults.Ms2List.Count < NumRequiredMs2Datapoints) { Warn("Calibration failure! Could not find enough MS2 datapoints. Required " + NumRequiredMs2Datapoints + ", saw " + acquisitionResults.Ms2List.Count); } FinishedDataFile(originalUncalibratedFilePath, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilePath }); return(MyTaskResults); } Warn("Could not find enough PSMs to calibrate with; opening up tolerances to " + Math.Round(CommonParameters.PrecursorMassTolerance.Value, 2) + " ppm precursor and " + Math.Round(CommonParameters.ProductMassTolerance.Value, 2) + " ppm product"); } // stats before calibration int prevPsmCount = acquisitionResults.Psms.Count; double preCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError; double preCalibrationProductErrorIqr = acquisitionResults.PsmProductIqrPpmError; // generate calibration function and shift data points Status("Calibrating...", new List <string> { taskId, "Individual Spectra Files" }); new CalibrationEngine(myMsDataFile, acquisitionResults, CommonParameters, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }).Run(); // do another search to evaluate calibration results Status("Post-calibration search...", new List <string> { taskId, "Individual Spectra Files" }); acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance); // stats after calibration int postCalibrationPsmCount = acquisitionResults.Psms.Count; double postCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError; double postCalibrationProductErrorIqr = acquisitionResults.PsmProductIqrPpmError; // did the data improve? (not used for anything yet...) bool improvement = ImprovGlobal(preCalibrationPrecursorErrorIqr, preCalibrationProductErrorIqr, prevPsmCount, postCalibrationPsmCount, postCalibrationPrecursorErrorIqr, postCalibrationProductErrorIqr); // write toml settings for the calibrated file var newTomlFileName = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".toml"); var fileSpecificParams = new FileSpecificParameters(); // carry over file-specific parameters from the uncalibrated file to the calibrated one if (fileSettingsList[spectraFileIndex] != null) { fileSpecificParams = fileSettingsList[spectraFileIndex].Clone(); } // don't write over ppm tolerances if they've been specified by the user already in the file-specific settings // otherwise, suggest 4 * interquartile range as the ppm tolerance if (fileSpecificParams.PrecursorMassTolerance == null) { fileSpecificParams.PrecursorMassTolerance = new PpmTolerance((4.0 * postCalibrationPrecursorErrorIqr) + Math.Abs(acquisitionResults.PsmPrecursorMedianPpmError)); } if (fileSpecificParams.ProductMassTolerance == null) { fileSpecificParams.ProductMassTolerance = new PpmTolerance((4.0 * postCalibrationProductErrorIqr) + Math.Abs(acquisitionResults.PsmProductMedianPpmError)); } Toml.WriteFile(fileSpecificParams, newTomlFileName, tomlConfig); FinishedWritingFile(newTomlFileName, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }); // write the calibrated mzML file MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, calibratedFilePath, false); myFileManager.DoneWithFile(originalUncalibratedFilePath); // finished calibrating this file FinishedWritingFile(calibratedFilePath, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }); MyTaskResults.NewSpectra.Add(calibratedFilePath); MyTaskResults.NewFileSpecificTomls.Add(newTomlFileName); FinishedDataFile(originalUncalibratedFilePath, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilePath }); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension })); } // re-write experimental design (if it has been defined) with new calibrated file names string assumedPathToExperDesign = Directory.GetParent(currentRawFileList.First()).FullName; assumedPathToExperDesign = Path.Combine(assumedPathToExperDesign, GlobalVariables.ExperimentalDesignFileName); List <string> newExperimentalDesignOutput = new List <string>(); if (File.Exists(assumedPathToExperDesign)) { var lines = File.ReadAllLines(assumedPathToExperDesign); for (int i = 0; i < lines.Length; i++) { // header of experimental design file if (i == 0) { newExperimentalDesignOutput.Add(lines[i]); } else { var split = lines[i].Split('\t'); string newline = Path.GetFileNameWithoutExtension(split[0]) + CalibSuffix + "\t"; for (int j = 1; j < split.Length; j++) { newline += split[j] + "\t"; } newExperimentalDesignOutput.Add(newline); } } } File.WriteAllLines(Path.Combine(OutputFolder, GlobalVariables.ExperimentalDesignFileName), newExperimentalDesignOutput); // finished calibrating all files for the task ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files" })); return(MyTaskResults); }
public static void WritePepXml(List <PeptideSpectralMatch> psms, List <DbForTask> database, List <Modification> variableModifications, List <Modification> fixedModifications, CommonParameters CommonParameters, string outputPath, double qValueFilter) { // TODO: needs a unit test psms = psms.Where(p => p.FdrInfo.QValue <= qValueFilter && p.FdrInfo.QValueNotch < qValueFilter).ToList(); if (!psms.Any()) { return; } XmlSerializer _indexedSerializer = new XmlSerializer(typeof(pepXML.Generated.msms_pipeline_analysis)); var _pepxml = new pepXML.Generated.msms_pipeline_analysis(); _pepxml.date = DateTime.Now; _pepxml.summary_xml = psms[0].FullFilePath + ".pep.XML"; string proteaseNC = string.Join(string.Empty, CommonParameters.DigestionParams.Protease.DigestionMotifs.Select(m => m.InducingCleavage)); string proteaseC = string.Join(string.Empty, CommonParameters.DigestionParams.Protease.DigestionMotifs.Select(m => m.InducingCleavage)); string fileNameNoExtension = Path.GetFileNameWithoutExtension(psms[0].FullFilePath); string filePathNoExtension = Path.ChangeExtension(psms[0].FullFilePath, null); var para = new List <pepXML.Generated.nameValueType>(); { para.Add(new pepXML.Generated.nameValueType { name = "threads", value = CommonParameters.MaxThreadsToUsePerFile.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "database", value = database.First().FilePath }); para.Add(new pepXML.Generated.nameValueType { name = "MS_data_file", value = psms[0].FullFilePath }); para.Add(new pepXML.Generated.nameValueType { name = "MaxMissed Cleavages", value = CommonParameters.DigestionParams.MaxMissedCleavages.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Protease", value = CommonParameters.DigestionParams.Protease.Name }); para.Add(new pepXML.Generated.nameValueType { name = "Initiator Methionine", value = CommonParameters.DigestionParams.InitiatorMethionineBehavior.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Max Modification Isoforms", value = CommonParameters.DigestionParams.MaxModificationIsoforms.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Min Peptide Len", value = CommonParameters.DigestionParams.MinPeptideLength.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Max Peptide Len", value = CommonParameters.DigestionParams.MaxPeptideLength.ToString() }); para.Add(new pepXML.Generated.nameValueType { name = "Product Mass Tolerance", value = CommonParameters.ProductMassTolerance.ToString() }); // TODO: check this para.Add(new pepXML.Generated.nameValueType { name = "Ions to search", value = string.Join(", ", DissociationTypeCollection.ProductsFromDissociationType[CommonParameters.DissociationType]) }); para.Add(new pepXML.Generated.nameValueType { name = "Q-value Filter", value = CommonParameters.QValueOutputFilter.ToString() }); foreach (var item in fixedModifications) { para.Add(new pepXML.Generated.nameValueType { name = "Fixed Modifications: " + item.IdWithMotif, value = item.MonoisotopicMass.ToString() }); } foreach (var item in variableModifications) { para.Add(new pepXML.Generated.nameValueType { name = "Variable Modifications: " + item.IdWithMotif, value = item.MonoisotopicMass.ToString() }); } para.Add(new pepXML.Generated.nameValueType { name = "Localize All Modifications", value = "true" }); } _pepxml.msms_run_summary = new pepXML.Generated.msms_pipeline_analysisMsms_run_summary[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summary { base_name = filePathNoExtension, raw_data_type = "raw", raw_data = ".mzML", //TODO: use file format of spectra file used sample_enzyme = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzyme() { name = CommonParameters.DigestionParams.Protease.Name, specificity = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzymeSpecificity[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzymeSpecificity { cut = proteaseC, no_cut = proteaseNC, } } }, search_summary = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summary[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summary { base_name = filePathNoExtension, // TODO: get MetaMorpheus recognized as a search engine type //search_engine = pepXML.Generated.engineType.MetaMorpheus search_engine_version = GlobalVariables.MetaMorpheusVersion, precursor_mass_type = pepXML.Generated.massType.monoisotopic, fragment_mass_type = pepXML.Generated.massType.monoisotopic, search_id = 1, //generate database information //TODO: multiple databases search_database = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summarySearch_database { local_path = database.First().FilePath, type = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summarySearch_databaseType.AA, }, enzymatic_search_constraint = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summaryEnzymatic_search_constraint { enzyme = CommonParameters.DigestionParams.Protease.Name, max_num_internal_cleavages = CommonParameters.DigestionParams.MaxMissedCleavages.ToString(), }, parameter = para.ToArray() } }, } }; _pepxml.msms_run_summary[0].spectrum_query = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_query[psms.Count]; var searchHits = new List <pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit>(); foreach (var psm in psms) { PeptideWithSetModifications peptide = psm.BestMatchingPeptides.First().Peptide; var mods = new List <pepXML.Generated.modInfoDataTypeMod_aminoacid_mass>(); foreach (var mod in peptide.AllModsOneIsNterminus) { var pepXmlMod = new pepXML.Generated.modInfoDataTypeMod_aminoacid_mass { mass = (double)mod.Value.MonoisotopicMass, position = (mod.Key - 1).ToString() }; mods.Add(pepXmlMod); } var proteinAccessions = psm.BestMatchingPeptides.Select(p => p.Peptide.Protein.Accession).Distinct(); var searchHit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit { // TODO: handle PSM ambiguity if pepXML supports it (base sequence, mod localization, protein) // TODO: add target/decoy/contaminant designation for each PSM // TODO: add amino acid substitution hit_rank = 1, peptide = ((psm.BaseSequence != null) ? psm.BaseSequence : "Ambiguous"), peptide_prev_aa = peptide.PreviousAminoAcid.ToString(), peptide_next_aa = peptide.NextAminoAcid.ToString(), protein = ((peptide.Protein.Accession != null) ? peptide.Protein.Accession : string.Join("|", proteinAccessions)), num_tot_proteins = (uint)proteinAccessions.Count(), calc_neutral_pep_mass = (float)((psm.PeptideMonisotopicMass != null) ? psm.PeptideMonisotopicMass : float.NaN), massdiff = ((psm.PeptideMonisotopicMass != null) ? (psm.ScanPrecursorMass - psm.PeptideMonisotopicMass.Value).ToString() : "Ambiguous"), modification_info = (mods.Count == 0 ? new pepXML.Generated.modInfoDataType { mod_aminoacid_mass = mods.ToArray() } : null), search_score = new pepXML.Generated.nameValueType[] { new pepXML.Generated.nameValueType { name = "Score", value = psm.Score.ToString() }, new pepXML.Generated.nameValueType { name = "Qvalue", value = psm.FdrInfo.QValue.ToString() } }, }; searchHits.Add(searchHit); } for (int i = 0; i < psms.Count; i++) { _pepxml.msms_run_summary[0].spectrum_query[i] = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_query() { spectrum = fileNameNoExtension + "." + psms[i].ScanNumber.ToString(), start_scan = Convert.ToUInt32(psms[i].ScanNumber), end_scan = Convert.ToUInt32(psms[i].ScanNumber), precursor_neutral_mass = (float)psms[i].ScanPrecursorMass, assumed_charge = psms[i].ScanPrecursorCharge.ToString(), index = Convert.ToUInt32(i + 1), retention_time_sec = (float)(psms[i].ScanRetentionTime * 60), search_result = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_result[1] { new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_result { search_hit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit[1] { searchHits[i] } } } }; } TextWriter writer = new StreamWriter(Path.Combine(outputPath)); _indexedSerializer.Serialize(writer, _pepxml); writer.Close(); }
private DataPointAquisitionResults GetDataAcquisitionResults(MsDataFile myMsDataFile, string currentDataFile, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <Protein> proteinList, string taskId, CommonParameters combinedParameters, Tolerance initPrecTol, Tolerance initProdTol) { var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(currentDataFile); MassDiffAcceptor searchMode = initPrecTol is PpmTolerance ? (MassDiffAcceptor) new SinglePpmAroundZeroSearchMode(initPrecTol.Value) : new SingleAbsoluteAroundZeroSearchMode(initPrecTol.Value); FragmentTypes fragmentTypesForCalibration = FragmentTypes.None; if (combinedParameters.BIons) { fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.b; } if (combinedParameters.YIons) { fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.y; } if (combinedParameters.CIons) { fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.c; } if (combinedParameters.ZdotIons) { fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.zdot; } var listOfSortedms2Scans = GetMs2Scans(myMsDataFile, currentDataFile, combinedParameters.DoPrecursorDeconvolution, combinedParameters.UseProvidedPrecursorInfo, combinedParameters.DeconvolutionIntensityRatio, combinedParameters.DeconvolutionMaxAssumedChargeState, combinedParameters.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; List <ProductType> productTypes = new List <ProductType>(); if (combinedParameters.BIons) { productTypes.Add(ProductType.B); } if (combinedParameters.YIons) { productTypes.Add(ProductType.Y); } if (combinedParameters.CIons) { productTypes.Add(ProductType.C); } if (combinedParameters.ZdotIons) { productTypes.Add(ProductType.Zdot); } Log("Searching with searchMode: " + searchMode, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }); Log("Searching with productMassTolerance: " + initProdTol, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }); new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, proteinList, productTypes, searchMode, combinedParameters, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }).Run(); List <PeptideSpectralMatch> allPsms = allPsmsArray.Where(p => p != null).ToList(); var compactPeptideToProteinPeptideMatching = ((SequencesToActualProteinPeptidesEngineResults) new SequencesToActualProteinPeptidesEngine (allPsms, proteinList, fixedModifications, variableModifications, productTypes, new List <DigestionParams> { combinedParameters.DigestionParams }, combinedParameters.ReportAllAmbiguity, combinedParameters, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }).Run()).CompactPeptideToProteinPeptideMatching; foreach (var huh in allPsms) { huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching); } allPsms = allPsms.OrderByDescending(b => b.Score) .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue) .GroupBy(b => (b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList(); new FdrAnalysisEngine(allPsms, searchMode.NumNotches, CommonParameters, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }).Run(); List <PeptideSpectralMatch> goodIdentifications = allPsms.Where(b => b.FdrInfo.QValueNotch < 0.01 && !b.IsDecoy && b.FullSequence != null).ToList(); if (!goodIdentifications.Any()) { return(new DataPointAquisitionResults(null, new List <PeptideSpectralMatch>(), new List <LabeledDataPoint>(), new List <LabeledDataPoint>(), 0, 0, 0, 0)); } DataPointAquisitionResults currentResult = (DataPointAquisitionResults) new DataPointAcquisitionEngine( goodIdentifications, myMsDataFile, initPrecTol, initProdTol, CalibrationParameters.NumFragmentsNeededForEveryIdentification, CalibrationParameters.MinMS1IsotopicPeaksNeededForConfirmedIdentification, CalibrationParameters.MinMS2IsotopicPeaksNeededForConfirmedIdentification, fragmentTypesForCalibration, CommonParameters, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }).Run(); return(currentResult); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { MyTaskResults = new MyTaskResults(this); if (NeoType.Equals(NeoTaskType.AggregateTargetDecoyFiles)) { //getfolders if (NeoParameters.DecoyFilePath == null) { NeoParameters.DecoyFilePath = new DirectoryInfo(OutputFolder).Name; string taskString = NeoParameters.DecoyFilePath.Split('-')[0]; int taskNum = Convert.ToInt32(taskString.Substring(4, taskString.Length - 4)); taskNum--; NeoParameters.DecoyFilePath = OutputFolder.Substring(0, OutputFolder.Length - NeoParameters.DecoyFilePath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv"; if (NeoParameters.TargetFilePath == null) { NeoParameters.TargetFilePath = new DirectoryInfo(OutputFolder).Name; taskNum--; NeoParameters.TargetFilePath = OutputFolder.Substring(0, OutputFolder.Length - NeoParameters.TargetFilePath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv"; } } if (NeoParameters.TargetFilePath == null) { NeoParameters.TargetFilePath = new DirectoryInfo(OutputFolder).Name; string taskString = NeoParameters.TargetFilePath.Split('-')[0]; int taskNum = Convert.ToInt32(taskString.Substring(4, taskString.Length - 4)); taskNum--; NeoParameters.TargetFilePath = OutputFolder.Substring(0, OutputFolder.Length - NeoParameters.TargetFilePath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv"; } AggregateSearchFiles.Combine(NeoParameters.TargetFilePath, NeoParameters.DecoyFilePath, OutputFolder + "\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0])); } else if (NeoType.Equals(NeoTaskType.AggregateNormalSplicedFiles)) { //reset database dbFilenameList = StoredDatabases; string normalPath = ""; string cisPath = new DirectoryInfo(OutputFolder).Name; string taskString = cisPath.Split('-')[0]; int taskNum = Convert.ToInt32(taskString.Substring(4, taskString.Length - 4)); taskNum -= 2; string transPath = OutputFolder.Substring(0, OutputFolder.Length - cisPath.Length) + "Task" + (taskNum + 1) + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv"; cisPath = OutputFolder.Substring(0, OutputFolder.Length - cisPath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv"; AggregateSearchFiles.RecursiveNeoAggregation(normalPath, cisPath, OutputFolder, "CisResults.psmtsv"); AggregateSearchFiles.RecursiveNeoAggregation(normalPath, transPath, OutputFolder, "TransResults.psmtsv"); } else if (NeoType.Equals(NeoTaskType.GenerateSplicedPeptides)) { NeoMassCalculator.ImportMasses(); MyFileManager myFileManager = new MyFileManager(true); //Import Spectra for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { var origDataFile = currentRawFileList[spectraFileIndex]; CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); var thisId = new List <string> { taskId, "Individual Spectra Files", origDataFile }; NewCollection(Path.GetFileName(origDataFile), thisId); Status("Loading spectra file...", thisId); MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks, combinedParams); Status("Getting ms2 scans...", thisId); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); //Import Database Status("Loading modifications...", taskId); List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList(); List <ModificationWithMass> fixedModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList(); List <string> localizeableModificationTypes = GlobalVariables.AllModTypesKnown.ToList(); // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.None, localizeableModificationTypes, combinedParams); //Read N and C files string nPath = NeoParameters.NFilePath; string cPath = NeoParameters.CFilePath; //if termini input if (nPath == null || cPath == null) { //if no termini input string taskHeader = "Task"; string[] pathArray = OutputFolder.Split('\\'); string basePath = ""; for (int i = 0; i < pathArray.Length - 1; i++) { basePath += pathArray[i] + '\\'; } string currentTaskNumber = pathArray[pathArray.Length - 1].Split('-')[0]; currentTaskNumber = currentTaskNumber.Substring(taskHeader.Length, currentTaskNumber.Length - taskHeader.Length); string NHeader = ""; string CHeader = ""; if (cPath == null) { CHeader = taskHeader + (Convert.ToInt16(currentTaskNumber) - 1); if (nPath == null) { NHeader = taskHeader + (Convert.ToInt16(currentTaskNumber) - 2); } } else { NHeader = taskHeader + (Convert.ToInt16(currentTaskNumber) - 1); } foreach (string s in Directory.GetDirectories(basePath)) { if (s.Contains(NHeader)) { nPath = s; } else if (s.Contains(CHeader)) { cPath = s; } } string fileName = Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv"; nPath += "\\" + fileName; cPath += "\\" + fileName; } Status("Importing Search Results...", taskId); List <NeoPsm> psms = ImportPsmtsv.ImportNeoPsms(nPath, cPath); //Splice Status("Splicing Fragments...", taskId); List <NeoPsm> candidates = NeoSplicePeptides.SplicePeptides(psms); //Find Ambiguity Status("Identifying Ambiguity...", taskId); NeoFindAmbiguity.FindAmbiguity(candidates, proteinList, arrayOfMs2ScansSortedByMass, dbFilenameList[0].FilePath); //Export Results Status("Exporting Results...", taskId); NeoExport.ExportAll(candidates, arrayOfMs2ScansSortedByMass, OutputFolder); //Switch databases string outputFolder = NeoExport.path + NeoExport.folder + @"\" + NeoExport.folder + "FusionDatabaseAppendixNC.fasta"; dbFilenameList = new List <DbForTask>() { new DbForTask(outputFolder, false) }; } } else //if SearchTransDb { string outputFolder = NeoExport.path + NeoExport.folder + @"\" + NeoExport.folder + "FusionDatabaseAppendixTS.fasta"; dbFilenameList = new List <DbForTask>() { new DbForTask(outputFolder, false) }; } return(MyTaskResults); }
public MyTaskResults Run(string outputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, List <CrosslinkSpectralMatch> allPsms, CommonParameters commonParameters, XlSearchParameters xlSearchParameters, List <Protein> proteinList, List <Modification> variableModifications, List <Modification> fixedModifications, List <string> localizeableModificationTypes, MyTaskResults MyTaskResults) { // inter-crosslinks; different proteins are linked List <CrosslinkSpectralMatch> interCsms = allPsms.Where(p => p.CrossType == PsmCrossType.Inter).OrderByDescending(p => p.XLTotalScore).ToList(); // intra-crosslinks; crosslinks within a protein List <CrosslinkSpectralMatch> intraCsms = allPsms.Where(p => p.CrossType == PsmCrossType.Intra).OrderByDescending(p => p.XLTotalScore).ToList(); var singlePsms = allPsms.Where(p => p.CrossType == PsmCrossType.Single).OrderByDescending(p => p.Score).ToList(); var loopPsms = allPsms.Where(p => p.CrossType == PsmCrossType.Loop).OrderByDescending(p => p.Score).ToList(); var deadendPsms = allPsms.Where(p => p.CrossType == PsmCrossType.DeadEnd || p.CrossType == PsmCrossType.DeadEndH2O || p.CrossType == PsmCrossType.DeadEndNH2 || p.CrossType == PsmCrossType.DeadEndTris).OrderByDescending(p => p.Score).ToList(); ComputeXlinkQandPValues(allPsms, intraCsms, interCsms, commonParameters, taskId); WriteCsvFiles(outputFolder, interCsms, intraCsms, singlePsms, loopPsms, deadendPsms, taskId, xlSearchParameters); MyTaskResults.AddTaskSummaryText("Target inter-crosslinks within 1% FDR: " + interCsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.BetaPeptide.IsDecoy)); MyTaskResults.AddTaskSummaryText("Target intra-crosslinks within 1% FDR: " + intraCsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.BetaPeptide.IsDecoy)); MyTaskResults.AddTaskSummaryText("Target single peptides within 1% FDR: " + singlePsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy)); MyTaskResults.AddTaskSummaryText("Target loop-linked peptides within 1% FDR: " + loopPsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy)); MyTaskResults.AddTaskSummaryText("Target deadend peptides within 1% FDR: " + deadendPsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy)); // write pepXML if (xlSearchParameters.WritePepXml) { List <CrosslinkSpectralMatch> writeToXml = new List <CrosslinkSpectralMatch>(); writeToXml.AddRange(intraCsms.Where(p => !p.IsDecoy && !p.BetaPeptide.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(interCsms.Where(p => !p.IsDecoy && !p.BetaPeptide.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(singlePsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(loopPsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(deadendPsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml = writeToXml.OrderBy(p => p.ScanNumber).ToList(); foreach (var fullFilePath in currentRawFileList) { string fileNameNoExtension = Path.GetFileNameWithoutExtension(fullFilePath); WriteFile.WritePepXML_xl(writeToXml.Where(p => p.FullFilePath == fullFilePath).ToList(), proteinList, dbFilenameList[0].FilePath, variableModifications, fixedModifications, localizeableModificationTypes, outputFolder, fileNameNoExtension, commonParameters, xlSearchParameters); FinishedWritingFile(Path.Combine(outputFolder, fileNameNoExtension + ".pep.XML"), new List <string> { taskId }); } } return(MyTaskResults); }
public SearchTask() : base(MyTask.Search) { CommonParameters = new CommonParameters(); SearchParameters = new SearchParameters(); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes); // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters); // write prose settings ProseCreatedWhileRunning.Append("The following calibration settings were used: "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ? "maximum peptide length = unspecified; " : "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; "); ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; "); ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". "); ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. "); // start the calibration task Status("Calibrating...", new List <string> { taskId }); MyTaskResults = new MyTaskResults(this) { NewSpectra = new List <string>(), NewFileSpecificTomls = new List <string>() }; var myFileManager = new MyFileManager(true); List <string> spectraFilesAfterCalibration = new List <string>(); for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { if (GlobalVariables.StopLoops) { break; } bool couldNotFindEnoughDatapoints = false; // get filename stuff var originalUncalibratedFilePath = currentRawFileList[spectraFileIndex]; var originalUncalibratedFilenameWithoutExtension = Path.GetFileNameWithoutExtension(originalUncalibratedFilePath); string calibratedFilePath = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".mzML"); // mark the file as in-progress StartingDataFile(originalUncalibratedFilePath, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilePath }); CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); // load the file Status("Loading spectra file...", new List <string> { taskId, "Individual Spectra Files" }); var myMsDataFile = myFileManager.LoadFile(originalUncalibratedFilePath, CommonParameters); // get datapoints to fit calibration function to Status("Acquiring calibration data points...", new List <string> { taskId, "Individual Spectra Files" }); DataPointAquisitionResults acquisitionResults = null; for (int i = 1; i <= 5; i++) { acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance); // enough data points to calibrate? if (acquisitionResults.Psms.Count >= NumRequiredPsms && acquisitionResults.Ms1List.Count > NumRequiredMs1Datapoints && acquisitionResults.Ms2List.Count > NumRequiredMs2Datapoints) { break; } if (i == 1) // failed round 1 { CommonParameters.PrecursorMassTolerance = new PpmTolerance(20); CommonParameters.ProductMassTolerance = new PpmTolerance(50); } else if (i == 2) // failed round 2 { CommonParameters.PrecursorMassTolerance = new PpmTolerance(30); CommonParameters.ProductMassTolerance = new PpmTolerance(100); } else if (i == 3) // failed round 3 { CommonParameters.PrecursorMassTolerance = new PpmTolerance(40); CommonParameters.ProductMassTolerance = new PpmTolerance(150); } else // failed round 4 { if (acquisitionResults.Psms.Count < NumRequiredPsms) { Warn("Calibration failure! Could not find enough high-quality PSMs. Required " + NumRequiredPsms + ", saw " + acquisitionResults.Psms.Count); } if (acquisitionResults.Ms1List.Count < NumRequiredMs1Datapoints) { Warn("Calibration failure! Could not find enough MS1 datapoints. Required " + NumRequiredMs1Datapoints + ", saw " + acquisitionResults.Ms1List.Count); } if (acquisitionResults.Ms2List.Count < NumRequiredMs2Datapoints) { Warn("Calibration failure! Could not find enough MS2 datapoints. Required " + NumRequiredMs2Datapoints + ", saw " + acquisitionResults.Ms2List.Count); } couldNotFindEnoughDatapoints = true; FinishedDataFile(originalUncalibratedFilePath, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilePath }); break; } Warn("Could not find enough PSMs to calibrate with; opening up tolerances to " + Math.Round(CommonParameters.PrecursorMassTolerance.Value, 2) + " ppm precursor and " + Math.Round(CommonParameters.ProductMassTolerance.Value, 2) + " ppm product"); } if (couldNotFindEnoughDatapoints) { spectraFilesAfterCalibration.Add(Path.GetFileNameWithoutExtension(currentRawFileList[spectraFileIndex])); ReportProgress(new ProgressEventArgs(100, "Failed to calibrate!", new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension })); continue; } // stats before calibration int prevPsmCount = acquisitionResults.Psms.Count; double preCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError; double preCalibrationProductErrorIqr = acquisitionResults.PsmProductIqrPpmError; // generate calibration function and shift data points Status("Calibrating...", new List <string> { taskId, "Individual Spectra Files" }); CalibrationEngine engine = new CalibrationEngine(myMsDataFile, acquisitionResults, CommonParameters, FileSpecificParameters, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }); engine.Run(); //update file myMsDataFile = engine.CalibratedDataFile; // do another search to evaluate calibration results Status("Post-calibration search...", new List <string> { taskId, "Individual Spectra Files" }); acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance); //generate calibration function and shift data points AGAIN because it's fast and contributes new data Status("Calibrating...", new List <string> { taskId, "Individual Spectra Files" }); engine = new CalibrationEngine(myMsDataFile, acquisitionResults, CommonParameters, FileSpecificParameters, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }); engine.Run(); //update file myMsDataFile = engine.CalibratedDataFile; // write the calibrated mzML file MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, calibratedFilePath, false); myFileManager.DoneWithFile(originalUncalibratedFilePath); // stats after calibration int postCalibrationPsmCount = acquisitionResults.Psms.Count; double postCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError; double postCalibrationProductErrorIqr = acquisitionResults.PsmProductIqrPpmError; // did the data improve? (not used for anything yet...) bool improvement = ImprovGlobal(preCalibrationPrecursorErrorIqr, preCalibrationProductErrorIqr, prevPsmCount, postCalibrationPsmCount, postCalibrationPrecursorErrorIqr, postCalibrationProductErrorIqr); // write toml settings for the calibrated file var newTomlFileName = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".toml"); var fileSpecificParams = new FileSpecificParameters(); // carry over file-specific parameters from the uncalibrated file to the calibrated one if (fileSettingsList[spectraFileIndex] != null) { fileSpecificParams = fileSettingsList[spectraFileIndex].Clone(); } //suggest 4 * interquartile range as the ppm tolerance fileSpecificParams.PrecursorMassTolerance = new PpmTolerance((4.0 * postCalibrationPrecursorErrorIqr) + Math.Abs(acquisitionResults.PsmPrecursorMedianPpmError)); fileSpecificParams.ProductMassTolerance = new PpmTolerance((4.0 * postCalibrationProductErrorIqr) + Math.Abs(acquisitionResults.PsmProductMedianPpmError)); Toml.WriteFile(fileSpecificParams, newTomlFileName, tomlConfig); FinishedWritingFile(newTomlFileName, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }); // finished calibrating this file spectraFilesAfterCalibration.Add(Path.GetFileNameWithoutExtension(calibratedFilePath)); FinishedWritingFile(calibratedFilePath, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }); MyTaskResults.NewSpectra.Add(calibratedFilePath); MyTaskResults.NewFileSpecificTomls.Add(newTomlFileName); FinishedDataFile(originalUncalibratedFilePath, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilePath }); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension })); } // re-write experimental design (if it has been defined) with new calibrated file names string assumedPathToExperDesign = Directory.GetParent(currentRawFileList.First()).FullName; assumedPathToExperDesign = Path.Combine(assumedPathToExperDesign, GlobalVariables.ExperimentalDesignFileName); if (File.Exists(assumedPathToExperDesign)) { WriteNewExperimentalDesignFile(assumedPathToExperDesign, OutputFolder, spectraFilesAfterCalibration); } // finished calibrating all files for the task ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files" })); return(MyTaskResults); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { if (SearchParameters.DoQuantification) { // disable quantification if a .mgf is being used if (currentRawFileList.Any(x => Path.GetExtension(x).Equals(".mgf", StringComparison.OrdinalIgnoreCase))) { SearchParameters.DoQuantification = false; } //if we're doing SILAC, assign and add the silac labels to the residue dictionary else if (SearchParameters.SilacLabels != null || SearchParameters.StartTurnoverLabel != null || SearchParameters.EndTurnoverLabel != null) { char heavyLabel = 'a'; //char to assign //add the Turnoverlabels to the silacLabels list. They weren't there before just to prevent duplication in the tomls if (SearchParameters.StartTurnoverLabel != null || SearchParameters.EndTurnoverLabel != null) { //original silacLabels object is null, so we need to initialize it SearchParameters.SilacLabels = new List <SilacLabel>(); if (SearchParameters.StartTurnoverLabel != null) { var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.StartTurnoverLabel, heavyLabel); heavyLabel = updatedLabel.nextHeavyLabel; SearchParameters.StartTurnoverLabel = updatedLabel.updatedLabel; SearchParameters.SilacLabels.Add(SearchParameters.StartTurnoverLabel); } if (SearchParameters.EndTurnoverLabel != null) { var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.EndTurnoverLabel, heavyLabel); heavyLabel = updatedLabel.nextHeavyLabel; SearchParameters.EndTurnoverLabel = updatedLabel.updatedLabel; SearchParameters.SilacLabels.Add(SearchParameters.EndTurnoverLabel); } } else { //change the silac residues to lower case amino acids (currently null) List <SilacLabel> updatedLabels = new List <SilacLabel>(); for (int i = 0; i < SearchParameters.SilacLabels.Count; i++) { var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.SilacLabels[i], heavyLabel); heavyLabel = updatedLabel.nextHeavyLabel; updatedLabels.Add(updatedLabel.updatedLabel); } SearchParameters.SilacLabels = updatedLabels; } } } //if no quant, remove any silac labels that may have been added, because they screw up downstream analysis if (!SearchParameters.DoQuantification) //using "if" instead of "else", because DoQuantification can change if it's an mgf { SearchParameters.SilacLabels = null; } LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes); // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, SearchParameters.SearchTarget, SearchParameters.DecoyType, localizeableModificationTypes, CommonParameters); // write prose settings ProseCreatedWhileRunning.Append("The following search settings were used: "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ? "maximum peptide length = unspecified; " : "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; "); ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; "); ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; "); ProseCreatedWhileRunning.Append("report PSM ambiguity = " + CommonParameters.ReportAllAmbiguity + ". "); ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. "); // start the search task MyTaskResults = new MyTaskResults(this); List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>(); //generate an array to store category specific fdr values (for speedy semi/nonspecific searches) int numFdrCategories = (int)(Enum.GetValues(typeof(FdrCategory)).Cast <FdrCategory>().Last() + 1); //+1 because it starts at zero List <PeptideSpectralMatch>[] allCategorySpecificPsms = new List <PeptideSpectralMatch> [numFdrCategories]; for (int i = 0; i < numFdrCategories; i++) { allCategorySpecificPsms[i] = new List <PeptideSpectralMatch>(); } FlashLfqResults flashLfqResults = null; MyFileManager myFileManager = new MyFileManager(SearchParameters.DisposeOfFileWhenDone); var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b)); int completedFiles = 0; object indexLock = new object(); object psmLock = new object(); Status("Searching files...", taskId); Status("Searching files...", new List <string> { taskId, "Individual Spectra Files" }); Dictionary <string, int[]> numMs2SpectraPerFile = new Dictionary <string, int[]>(); for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { if (GlobalVariables.StopLoops) { break; } var origDataFile = currentRawFileList[spectraFileIndex]; // mark the file as in-progress StartingDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); MassDiffAcceptor massDiffAcceptor = GetMassDiffAcceptor(combinedParams.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac); var thisId = new List <string> { taskId, "Individual Spectra Files", origDataFile }; NewCollection(Path.GetFileName(origDataFile), thisId); Status("Loading spectra file...", thisId); MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams); Status("Getting ms2 scans...", thisId); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray(); numMs2SpectraPerFile.Add(Path.GetFileNameWithoutExtension(origDataFile), new int[] { myMsDataFile.GetAllScansList().Count(p => p.MsnOrder == 2), arrayOfMs2ScansSortedByMass.Length }); myFileManager.DoneWithFile(origDataFile); PeptideSpectralMatch[] fileSpecificPsms = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; // modern search if (SearchParameters.SearchType == SearchType.Modern) { for (int currentPartition = 0; currentPartition < combinedParams.TotalPartitions; currentPartition++) { List <PeptideWithSetModifications> peptideIndex = null; List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count / combinedParams.TotalPartitions)); Status("Getting fragment dictionary...", new List <string> { taskId }); var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, SearchParameters.SilacLabels, SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, currentPartition, SearchParameters.DecoyType, combinedParams, this.FileSpecificParameters, SearchParameters.MaxFragmentSize, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), new List <string> { taskId }); List <int>[] fragmentIndex = null; List <int>[] precursorIndex = null; lock (indexLock) { GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId); } Status("Searching files...", taskId); new ModernSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, currentPartition, combinedParams, this.FileSpecificParameters, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + combinedParams.TotalPartitions + "!", thisId)); if (GlobalVariables.StopLoops) { break; } } } // nonspecific search else if (SearchParameters.SearchType == SearchType.NonSpecific) { PeptideSpectralMatch[][] fileSpecificPsmsSeparatedByFdrCategory = new PeptideSpectralMatch[numFdrCategories][]; //generate an array of all possible locals for (int i = 0; i < numFdrCategories; i++) //only add if we're using for FDR, else ignore it as null. { fileSpecificPsmsSeparatedByFdrCategory[i] = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; } //create params for N, C, or both if semi List <CommonParameters> paramsToUse = new List <CommonParameters> { combinedParams }; if (combinedParams.DigestionParams.SearchModeType == CleavageSpecificity.Semi) //if semi, we need to do both N and C to hit everything { paramsToUse.Clear(); List <FragmentationTerminus> terminiToUse = new List <FragmentationTerminus> { FragmentationTerminus.N, FragmentationTerminus.C }; foreach (FragmentationTerminus terminus in terminiToUse) //set both termini { paramsToUse.Add(combinedParams.CloneWithNewTerminus(terminus)); } } //Compress array of deconvoluted ms2 scans to avoid searching the same ms2 multiple times while still identifying coisolated peptides List <int>[] coisolationIndex = new List <int>[] { new List <int>() }; if (arrayOfMs2ScansSortedByMass.Length != 0) { int maxScanNumber = arrayOfMs2ScansSortedByMass.Max(x => x.OneBasedScanNumber); coisolationIndex = new List <int> [maxScanNumber + 1]; for (int i = 0; i < arrayOfMs2ScansSortedByMass.Length; i++) { int scanNumber = arrayOfMs2ScansSortedByMass[i].OneBasedScanNumber; if (coisolationIndex[scanNumber] == null) { coisolationIndex[scanNumber] = new List <int> { i }; } else { coisolationIndex[scanNumber].Add(i); } } coisolationIndex = coisolationIndex.Where(x => x != null).ToArray(); } //foreach terminus we're going to look at foreach (CommonParameters paramToUse in paramsToUse) { //foreach database partition for (int currentPartition = 0; currentPartition < paramToUse.TotalPartitions; currentPartition++) { List <PeptideWithSetModifications> peptideIndex = null; List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count / paramToUse.TotalPartitions, ((currentPartition + 1) * proteinList.Count / paramToUse.TotalPartitions) - (currentPartition * proteinList.Count / paramToUse.TotalPartitions)); List <int>[] fragmentIndex = null; List <int>[] precursorIndex = null; Status("Getting fragment dictionary...", new List <string> { taskId }); var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, SearchParameters.SilacLabels, SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, currentPartition, SearchParameters.DecoyType, paramToUse, this.FileSpecificParameters, SearchParameters.MaxFragmentSize, true, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), new List <string> { taskId }); lock (indexLock) { GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId); } Status("Searching files...", taskId); new NonSpecificEnzymeSearchEngine(fileSpecificPsmsSeparatedByFdrCategory, arrayOfMs2ScansSortedByMass, coisolationIndex, peptideIndex, fragmentIndex, precursorIndex, currentPartition, paramToUse, this.FileSpecificParameters, variableModifications, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + paramToUse.TotalPartitions + "!", thisId)); if (GlobalVariables.StopLoops) { break; } } } lock (psmLock) { for (int i = 0; i < allCategorySpecificPsms.Length; i++) { if (allCategorySpecificPsms[i] != null) { allCategorySpecificPsms[i].AddRange(fileSpecificPsmsSeparatedByFdrCategory[i]); } } } } // classic search else { Status("Starting search...", thisId); new ClassicSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, SearchParameters.SilacLabels, SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, proteinList, massDiffAcceptor, combinedParams, this.FileSpecificParameters, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search!", thisId)); } lock (psmLock) { allPsms.AddRange(fileSpecificPsms); } completedFiles++; FinishedDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> { taskId, "Individual Spectra Files" })); } ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> { taskId, "Individual Spectra Files" })); int numNotches = GetNumNotches(SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac); //resolve category specific fdrs (for speedy semi and nonspecific if (SearchParameters.SearchType == SearchType.NonSpecific) { allPsms = NonSpecificEnzymeSearchEngine.ResolveFdrCategorySpecificPsms(allCategorySpecificPsms, numNotches, taskId, CommonParameters, FileSpecificParameters); } PostSearchAnalysisParameters parameters = new PostSearchAnalysisParameters { SearchTaskResults = MyTaskResults, SearchTaskId = taskId, SearchParameters = SearchParameters, ProteinList = proteinList, AllPsms = allPsms, VariableModifications = variableModifications, FixedModifications = fixedModifications, ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams)), CurrentRawFileList = currentRawFileList, MyFileManager = myFileManager, NumNotches = numNotches, OutputFolder = OutputFolder, IndividualResultsOutputFolder = Path.Combine(OutputFolder, "Individual File Results"), FlashLfqResults = flashLfqResults, FileSettingsList = fileSettingsList, NumMs2SpectraPerFile = numMs2SpectraPerFile, DatabaseFilenameList = dbFilenameList }; PostSearchAnalysisTask postProcessing = new PostSearchAnalysisTask { Parameters = parameters, FileSpecificParameters = this.FileSpecificParameters, CommonParameters = CommonParameters }; return(postProcessing.Run()); }
private static void UpdateTomls(string tomlFileName, string fileName, CommonParameters ye5, TerminusType terminusType, bool spliceSearch) { string[] oldTomlLines = File.ReadAllLines(@fileName); List <string> newTomlLines = new List <string>(); foreach (string line in oldTomlLines) { if (line.Contains("LocalizeAll") && terminusType.Equals(TerminusType.None)) { newTomlLines.Add(GetCorrectValue("LocalizeAll", tomlFileName, line)); } else if (line.Contains("ListOfModsFixed")) { newTomlLines.Add(GetCorrectValue("ListOfModsFixed", tomlFileName, line)); } else if (line.Contains("ListOfModsVariable") && terminusType.Equals(TerminusType.None) && !spliceSearch) { newTomlLines.Add(GetCorrectValue("ListOfModsVariable", tomlFileName, line)); } else if (line.Contains("BIons")) { if (terminusType.Equals(TerminusType.N) || terminusType.Equals(TerminusType.None)) { newTomlLines.Add(GetCorrectValue("BIons", tomlFileName, line)); } else { newTomlLines.Add("BIons = false"); } } else if (line.Contains("YIons")) { if (terminusType.Equals(TerminusType.C) || terminusType.Equals(TerminusType.None)) { newTomlLines.Add(GetCorrectValue("YIons", tomlFileName, line)); } else { newTomlLines.Add("YIons = false"); } } else if (line.Contains("ZdotIons")) { if (terminusType.Equals(TerminusType.C) || terminusType.Equals(TerminusType.None)) { newTomlLines.Add(GetCorrectValue("ZdotIons", tomlFileName, line)); } else { newTomlLines.Add("ZdotIons = false"); } } else if (line.Contains("CIons")) { if (terminusType.Equals(TerminusType.N) || terminusType.Equals(TerminusType.None)) { newTomlLines.Add(GetCorrectValue("CIons", tomlFileName, line)); } else { newTomlLines.Add("CIons = false"); } } else if (line.Contains("ProductMassTolerance")) { newTomlLines.Add(GetCorrectValue("ProductMassTolerance", tomlFileName, line)); } else if (line.Contains("PrecursorMassTolerance")) { newTomlLines.Add(GetCorrectValue("PrecursorMassTolerance", tomlFileName, line)); } else if (line.Contains("MaxMissedCleavages")) { newTomlLines.Add(GetCorrectValue("MaxMissedCleavages", tomlFileName, line)); } else if (line.Contains("InitiatorMethionineBehavior")) { newTomlLines.Add(GetCorrectValue("InitiatorMethionineBehavior", tomlFileName, line)); } else if (line.Contains("MinPeptideLength") && !!terminusType.Equals(TerminusType.None)) { newTomlLines.Add(GetCorrectValue("MinPeptideLength", tomlFileName, line)); } else if (line.Contains("MaxPeptideLength")) { newTomlLines.Add(GetCorrectValue("MaxPeptideLength", tomlFileName, line)); } else if (line.Contains("MaxModificationIsoforms")) { newTomlLines.Add(GetCorrectValue("MaxModificationIsoforms", tomlFileName, line)); } else if (line.Contains("MaxModsForPeptide")) { newTomlLines.Add(GetCorrectValue("MaxModsForPeptide", tomlFileName, line)); } else if (line.Contains("SemiProteaseDigestion")) { newTomlLines.Add(GetCorrectValue("SemiProteaseDigestion", tomlFileName, line)); } else if (line.Contains("TerminusTypeSemiProtease")) { newTomlLines.Add(GetCorrectValue("TerminusTypeSemiProtease", tomlFileName, line)); } else if (line.Contains("Protease") && terminusType.Equals(TerminusType.None) && !spliceSearch) //this must be last, else other names including protease will be overwritten and crash. { newTomlLines.Add(GetCorrectValue("Protease", tomlFileName, line)); } else { newTomlLines.Add(line); } } using (StreamWriter file = new StreamWriter(fileName)) foreach (string line in newTomlLines) { file.WriteLine(line); } }
public XLSearchTask() : base(MyTask.XLSearch) { CommonParameters = new CommonParameters(); XlSearchParameters = new XlSearchParameters(); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { MyTaskResults = new MyTaskResults(this); List <CrosslinkSpectralMatch> allPsms = new List <CrosslinkSpectralMatch>(); LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes); // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, XlSearchParameters.DecoyType, localizeableModificationTypes, CommonParameters); var crosslinker = XlSearchParameters.Crosslinker; MyFileManager myFileManager = new MyFileManager(true); var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b)); HashSet <DigestionParams> ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams)); int completedFiles = 0; object indexLock = new object(); object psmLock = new object(); Status("Searching files...", taskId); ProseCreatedWhileRunning.Append("The following crosslink discovery were used: "); ProseCreatedWhileRunning.Append("crosslinker name = " + crosslinker.CrosslinkerName + "; "); ProseCreatedWhileRunning.Append("crosslinker type = " + crosslinker.Cleavable + "; "); ProseCreatedWhileRunning.Append("crosslinker mass = " + crosslinker.TotalMass + "; "); ProseCreatedWhileRunning.Append("crosslinker modification site(s) = " + crosslinker.CrosslinkerModSites + "; "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ? "maximum peptide length = unspecified; " : "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif) + "; ")); ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("parent mass tolerance(s) = " + CommonParameters.PrecursorMassTolerance + "; "); ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; "); ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. "); for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { var origDataFile = currentRawFileList[spectraFileIndex]; CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); var thisId = new List <string> { taskId, "Individual Spectra Files", origDataFile }; NewCollection(Path.GetFileName(origDataFile), thisId); Status("Loading spectra file...", thisId); MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams); Status("Getting ms2 scans...", thisId); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray(); CrosslinkSpectralMatch[] newPsms = new CrosslinkSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; for (int currentPartition = 0; currentPartition < CommonParameters.TotalPartitions; currentPartition++) { List <PeptideWithSetModifications> peptideIndex = null; List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions)); Status("Getting fragment dictionary...", new List <string> { taskId }); var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, null, currentPartition, UsefulProteomicsDatabases.DecoyType.Reverse, combinedParams, 30000.0, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), new List <string> { taskId }); List <int>[] fragmentIndex = null; List <int>[] precursorIndex = null; GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId); Status("Searching files...", taskId); new CrosslinkSearchEngine(newPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, currentPartition, combinedParams, crosslinker, XlSearchParameters.RestrictToTopNHits, XlSearchParameters.CrosslinkSearchTopNum, XlSearchParameters.XlQuench_H2O, XlSearchParameters.XlQuench_NH2, XlSearchParameters.XlQuench_Tris, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + CommonParameters.TotalPartitions + "!", thisId)); if (GlobalVariables.StopLoops) { break; } } allPsms.AddRange(newPsms.Where(p => p != null)); completedFiles++; ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> { taskId, "Individual Spectra Files" })); } ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> { taskId, "Individual Spectra Files" })); allPsms = allPsms.OrderByDescending(p => p.XLTotalScore).ToList(); var allPsmsXL = allPsms.Where(p => p.CrossType == PsmCrossType.Cross).ToList(); // inter-crosslinks; different proteins are linked var interCsms = allPsmsXL.Where(p => !p.ProteinAccession.Equals(p.BetaPeptide.ProteinAccession)).ToList(); foreach (var item in interCsms) { item.CrossType = PsmCrossType.Inter; } // intra-crosslinks; crosslinks within a protein var intraCsms = allPsmsXL.Where(p => p.ProteinAccession.Equals(p.BetaPeptide.ProteinAccession)).ToList(); foreach (var item in intraCsms) { item.CrossType = PsmCrossType.Intra; } // calculate FDR DoCrosslinkFdrAnalysis(interCsms); DoCrosslinkFdrAnalysis(intraCsms); SingleFDRAnalysis(allPsms, new List <string> { taskId }); // calculate protein crosslink residue numbers foreach (var csm in allPsmsXL) { // alpha peptide crosslink residue in the protein csm.XlProteinPos = csm.OneBasedStartResidueInProtein.Value + csm.LinkPositions[0] - 1; // beta crosslink residue in protein csm.BetaPeptide.XlProteinPos = csm.BetaPeptide.OneBasedStartResidueInProtein.Value + csm.BetaPeptide.LinkPositions[0] - 1; } // write interlink CSMs if (interCsms.Any()) { string file = Path.Combine(OutputFolder, "XL_Interlinks.tsv"); WritePsmCrossToTsv(interCsms, file, 2); FinishedWritingFile(file, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target inter-crosslinks within 1% FDR: " + interCsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.BetaPeptide.IsDecoy)); if (XlSearchParameters.WriteOutputForPercolator) { var interPsmsXLPercolator = interCsms.Where(p => p.Score >= 2 && p.BetaPeptide.Score >= 2).OrderBy(p => p.ScanNumber).ToList(); WriteCrosslinkToTxtForPercolator(interPsmsXLPercolator, OutputFolder, "XL_Interlinks_Percolator", crosslinker, new List <string> { taskId }); } // write intralink CSMs if (intraCsms.Any()) { string file = Path.Combine(OutputFolder, "XL_Intralinks.tsv"); WritePsmCrossToTsv(intraCsms, file, 2); FinishedWritingFile(file, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target intra-crosslinks within 1% FDR: " + intraCsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.BetaPeptide.IsDecoy)); if (XlSearchParameters.WriteOutputForPercolator) { var intraPsmsXLPercolator = intraCsms.Where(p => p.Score >= 2 && p.BetaPeptide.Score >= 2).OrderBy(p => p.ScanNumber).ToList(); WriteCrosslinkToTxtForPercolator(intraPsmsXLPercolator, OutputFolder, "XL_Intralinks_Percolator", crosslinker, new List <string> { taskId }); } // write single peptides var singlePsms = allPsms.Where(p => p.CrossType == PsmCrossType.Single).ToList(); if (singlePsms.Any()) { string writtenFileSingle = Path.Combine(OutputFolder, "SinglePeptides" + ".tsv"); WritePsmCrossToTsv(singlePsms, writtenFileSingle, 1); FinishedWritingFile(writtenFileSingle, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target single peptides within 1% FDR: " + singlePsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy)); // write loops var loopPsms = allPsms.Where(p => p.CrossType == PsmCrossType.Loop).ToList(); if (loopPsms.Any()) { string writtenFileLoop = Path.Combine(OutputFolder, "Looplinks" + ".tsv"); WritePsmCrossToTsv(loopPsms, writtenFileLoop, 1); FinishedWritingFile(writtenFileLoop, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target loop-linked peptides within 1% FDR: " + loopPsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy)); // write deadends var deadendPsms = allPsms.Where(p => p.CrossType == PsmCrossType.DeadEnd || p.CrossType == PsmCrossType.DeadEndH2O || p.CrossType == PsmCrossType.DeadEndNH2 || p.CrossType == PsmCrossType.DeadEndTris).ToList(); if (deadendPsms.Any()) { string writtenFileDeadend = Path.Combine(OutputFolder, "Deadends" + ".tsv"); WritePsmCrossToTsv(deadendPsms, writtenFileDeadend, 1); FinishedWritingFile(writtenFileDeadend, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target deadend peptides within 1% FDR: " + deadendPsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy)); // write pepXML if (XlSearchParameters.WritePepXml) { List <CrosslinkSpectralMatch> writeToXml = new List <CrosslinkSpectralMatch>(); writeToXml.AddRange(intraCsms.Where(p => !p.IsDecoy && !p.BetaPeptide.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(interCsms.Where(p => !p.IsDecoy && !p.BetaPeptide.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(singlePsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(loopPsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(deadendPsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml = writeToXml.OrderBy(p => p.ScanNumber).ToList(); foreach (var fullFilePath in currentRawFileList) { string fileNameNoExtension = Path.GetFileNameWithoutExtension(fullFilePath); WritePepXML_xl(writeToXml.Where(p => p.FullFilePath == fullFilePath).ToList(), proteinList, dbFilenameList[0].FilePath, variableModifications, fixedModifications, localizeableModificationTypes, OutputFolder, fileNameNoExtension, new List <string> { taskId }); } } return(MyTaskResults); }
public GptmdTask() : base(MyTask.Gptmd) { CommonParameters = new CommonParameters(); GptmdParameters = new GptmdParameters(); }
public MsDataFile LoadFile(string origDataFile, int?topNpeaks, double?minRatio, bool trimMs1Peaks, bool trimMsMsPeaks, CommonParameters commonParameters) { FilteringParams filter = new FilteringParams(topNpeaks, minRatio, 1, trimMs1Peaks, trimMsMsPeaks); if (MyMsDataFiles.TryGetValue(origDataFile, out MsDataFile value) && value != null) { return(value); } // By now know that need to load this file!!! lock (FileLoadingLock) // Lock because reading is sequential { if (Path.GetExtension(origDataFile).Equals(".mzML", StringComparison.OrdinalIgnoreCase)) { MyMsDataFiles[origDataFile] = Mzml.LoadAllStaticData(origDataFile, filter, commonParameters.MaxThreadsToUsePerFile); } else if (Path.GetExtension(origDataFile).Equals(".mgf", StringComparison.OrdinalIgnoreCase)) { MyMsDataFiles[origDataFile] = Mgf.LoadAllStaticData(origDataFile, filter); } else { #if NETFRAMEWORK MyMsDataFiles[origDataFile] = ThermoStaticData.LoadAllStaticData(origDataFile, filter); #else Warn("No capability for reading " + origDataFile); #endif } return(MyMsDataFiles[origDataFile]); } }
public MyTaskResults Run(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList, List <CrosslinkSpectralMatch> allPsms, CommonParameters commonParameters, XlSearchParameters xlSearchParameters, List <Protein> proteinList, List <Modification> variableModifications, List <Modification> fixedModifications, List <string> localizeableModificationTypes, MyTaskResults MyTaskResults) { foreach (var csm in allPsms) { csm.ResolveProteinPosAmbiguitiesForXl(); } var allPsmsXL = allPsms.Where(p => p.CrossType == PsmCrossType.Cross).ToList(); // inter-crosslinks; different proteins are linked var interCsms = allPsmsXL.Where(p => !p.IsIntraCsm()).ToList(); foreach (var item in interCsms) { item.CrossType = PsmCrossType.Inter; } // intra-crosslinks; crosslinks within a protein var intraCsms = allPsmsXL.Where(p => p.IsIntraCsm()).ToList(); foreach (var item in intraCsms) { item.CrossType = PsmCrossType.Intra; } // calculate FDR DoCrosslinkFdrAnalysis(interCsms); DoCrosslinkFdrAnalysis(intraCsms); SingleFDRAnalysis(allPsms, commonParameters, new List <string> { taskId }); // write interlink CSMs if (interCsms.Any()) { string file = Path.Combine(OutputFolder, "XL_Interlinks.tsv"); WriteFile.WritePsmCrossToTsv(interCsms, file, 2); FinishedWritingFile(file, new List <string> { taskId }); } MyTaskResults.AddTaskSummaryText("Target inter-crosslinks within 1% FDR: " + interCsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.BetaPeptide.IsDecoy)); if (xlSearchParameters.WriteOutputForPercolator) { var interPsmsXLPercolator = interCsms.Where(p => p.Score >= 2 && p.BetaPeptide.Score >= 2).OrderBy(p => p.ScanNumber).ToList(); WriteFile.WriteCrosslinkToTxtForPercolator(interPsmsXLPercolator, OutputFolder, "XL_Interlinks_Percolator", xlSearchParameters.Crosslinker); FinishedWritingFile(Path.Combine(OutputFolder, "XL_Interlinks_Percolator.txt"), new List <string> { taskId }); } // write intralink CSMs if (intraCsms.Any()) { string file = Path.Combine(OutputFolder, "XL_Intralinks.tsv"); WriteFile.WritePsmCrossToTsv(intraCsms, file, 2); FinishedWritingFile(file, new List <string> { taskId }); } MyTaskResults.AddTaskSummaryText("Target intra-crosslinks within 1% FDR: " + intraCsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.BetaPeptide.IsDecoy)); if (xlSearchParameters.WriteOutputForPercolator) { var intraPsmsXLPercolator = intraCsms.Where(p => p.Score >= 2 && p.BetaPeptide.Score >= 2).OrderBy(p => p.ScanNumber).ToList(); WriteFile.WriteCrosslinkToTxtForPercolator(intraPsmsXLPercolator, OutputFolder, "XL_Intralinks_Percolator", xlSearchParameters.Crosslinker); FinishedWritingFile(Path.Combine(OutputFolder, "XL_Intralinks_Percolator.txt"), new List <string> { taskId }); } // write single peptides var singlePsms = allPsms.Where(p => p.CrossType == PsmCrossType.Single).ToList(); if (singlePsms.Any()) { string writtenFileSingle = Path.Combine(OutputFolder, "SinglePeptides" + ".tsv"); WriteFile.WritePsmCrossToTsv(singlePsms, writtenFileSingle, 1); FinishedWritingFile(writtenFileSingle, new List <string> { taskId }); } MyTaskResults.AddTaskSummaryText("Target single peptides within 1% FDR: " + singlePsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy)); // write loops var loopPsms = allPsms.Where(p => p.CrossType == PsmCrossType.Loop).ToList(); if (loopPsms.Any()) { string writtenFileLoop = Path.Combine(OutputFolder, "Looplinks" + ".tsv"); WriteFile.WritePsmCrossToTsv(loopPsms, writtenFileLoop, 1); FinishedWritingFile(writtenFileLoop, new List <string> { taskId }); } MyTaskResults.AddTaskSummaryText("Target loop-linked peptides within 1% FDR: " + loopPsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy)); // write deadends var deadendPsms = allPsms.Where(p => p.CrossType == PsmCrossType.DeadEnd || p.CrossType == PsmCrossType.DeadEndH2O || p.CrossType == PsmCrossType.DeadEndNH2 || p.CrossType == PsmCrossType.DeadEndTris).ToList(); if (deadendPsms.Any()) { string writtenFileDeadend = Path.Combine(OutputFolder, "Deadends" + ".tsv"); WriteFile.WritePsmCrossToTsv(deadendPsms, writtenFileDeadend, 1); FinishedWritingFile(writtenFileDeadend, new List <string> { taskId }); } MyTaskResults.AddTaskSummaryText("Target deadend peptides within 1% FDR: " + deadendPsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy)); // write pepXML if (xlSearchParameters.WritePepXml) { List <CrosslinkSpectralMatch> writeToXml = new List <CrosslinkSpectralMatch>(); writeToXml.AddRange(intraCsms.Where(p => !p.IsDecoy && !p.BetaPeptide.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(interCsms.Where(p => !p.IsDecoy && !p.BetaPeptide.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(singlePsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(loopPsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(deadendPsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml = writeToXml.OrderBy(p => p.ScanNumber).ToList(); foreach (var fullFilePath in currentRawFileList) { string fileNameNoExtension = Path.GetFileNameWithoutExtension(fullFilePath); WriteFile.WritePepXML_xl(writeToXml.Where(p => p.FullFilePath == fullFilePath).ToList(), proteinList, dbFilenameList[0].FilePath, variableModifications, fixedModifications, localizeableModificationTypes, OutputFolder, fileNameNoExtension, commonParameters, xlSearchParameters); FinishedWritingFile(Path.Combine(OutputFolder, fileNameNoExtension + ".pep.XML"), new List <string> { taskId }); } } return(MyTaskResults); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { MyTaskResults = new MyTaskResults(this); List <List <GlycoSpectralMatch> > ListOfGsmsPerMS2Scan = new List <List <GlycoSpectralMatch> >(); LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes); // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, _glycoSearchParameters.DecoyType, localizeableModificationTypes, CommonParameters); MyFileManager myFileManager = new MyFileManager(true); int completedFiles = 0; Status("Searching files...", taskId); ProseCreatedWhileRunning.Append("\n"); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; \n"); ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; \n"); ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; \n"); ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ? "maximum peptide length = unspecified; " : "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; \n"); ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; \n"); ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; \n"); ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; \n"); ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; \n"); ProseCreatedWhileRunning.Append("parent mass tolerance(s) = " + CommonParameters.PrecursorMassTolerance + "; \n"); ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; \n"); ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. \n"); if (_glycoSearchParameters.GlycoSearchType == GlycoSearchType.OGlycanSearch) { ProseCreatedWhileRunning.Append("The O-glycan database: " + _glycoSearchParameters.OGlycanDatabasefile + "\n"); } else if (_glycoSearchParameters.GlycoSearchType == GlycoSearchType.NGlycanSearch) { ProseCreatedWhileRunning.Append("The N-glycan database: " + _glycoSearchParameters.OGlycanDatabasefile + "\n"); } else { ProseCreatedWhileRunning.Append("The O-glycan database: " + _glycoSearchParameters.OGlycanDatabasefile + "\n"); ProseCreatedWhileRunning.Append("The N-glycan database: " + _glycoSearchParameters.NGlycanDatabasefile + "\n"); } ProseCreatedWhileRunning.Append("\n"); for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { var origDataFile = currentRawFileList[spectraFileIndex]; CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); var thisId = new List <string> { taskId, "Individual Spectra Files", origDataFile }; NewCollection(Path.GetFileName(origDataFile), thisId); Status("Loading spectra file...", thisId); MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams); Status("Getting ms2 scans...", thisId); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray(); List <GlycoSpectralMatch>[] newCsmsPerMS2ScanPerFile = new List <GlycoSpectralMatch> [arrayOfMs2ScansSortedByMass.Length]; for (int currentPartition = 0; currentPartition < CommonParameters.TotalPartitions; currentPartition++) { List <PeptideWithSetModifications> peptideIndex = null; //When partition, the proteinList will be split for each Thread. List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions)); Status("Getting fragment dictionary...", new List <string> { taskId }); //Only reverse Decoy for glyco search has been tested and are set as fixed parameter. var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, null, null, null, currentPartition, _glycoSearchParameters.DecoyType, combinedParams, this.FileSpecificParameters, 30000.0, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), TargetContaminantAmbiguity.RemoveContaminant, new List <string> { taskId }); List <int>[] fragmentIndex = null; List <int>[] precursorIndex = null; GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId); //The second Fragment index is for 'MS1-HCD_MS1-ETD_MS2s' type of data. If LowCID is used for MS1, ion-index is not allowed to use. List <int>[] secondFragmentIndex = null; //if (combinedParams.MS2ChildScanDissociationType != DissociationType.LowCID //&& !CrosslinkSearchEngine.DissociationTypeGenerateSameTypeOfIons(combinedParams.DissociationType, combinedParams.MS2ChildScanDissociationType)) //{ // //Becuase two different type of dissociation methods are used, the parameters are changed with different dissociation type. // var secondCombinedParams = CommonParameters.CloneWithNewDissociationType(combinedParams.MS2ChildScanDissociationType); // var secondIndexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, null, null, null, currentPartition, _glycoSearchParameters.DecoyType, secondCombinedParams, this.FileSpecificParameters, 30000.0, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), new List<string> { taskId }); // GenerateSecondIndexes(indexEngine, secondIndexEngine, dbFilenameList, ref secondFragmentIndex, proteinList, taskId); //} Status("Searching files...", taskId); new GlycoSearchEngine(newCsmsPerMS2ScanPerFile, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, secondFragmentIndex, currentPartition, combinedParams, this.FileSpecificParameters, _glycoSearchParameters.OGlycanDatabasefile, _glycoSearchParameters.NGlycanDatabasefile, _glycoSearchParameters.GlycoSearchType, _glycoSearchParameters.GlycoSearchTopNum, _glycoSearchParameters.MaximumOGlycanAllowed, _glycoSearchParameters.OxoniumIonFilt, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + CommonParameters.TotalPartitions + "!", thisId)); if (GlobalVariables.StopLoops) { break; } } ListOfGsmsPerMS2Scan.AddRange(newCsmsPerMS2ScanPerFile.Where(p => p != null).ToList()); completedFiles++; ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> { taskId, "Individual Spectra Files" })); } ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> { taskId, "Individual Spectra Files" })); //For every Ms2Scans, each have a list of candidates psms. The allPsms from GlycoSearchEngine is the list (all ms2scans) of list (each ms2scan) of psm (all candidate psm). //Currently, only keep the first scan for consideration. List <GlycoSpectralMatch> GsmPerScans = ListOfGsmsPerMS2Scan.Select(p => p.First()).ToList(); var filteredAllPsms = new List <GlycoSpectralMatch>(); //For each ms2scan, try to find the best candidate psm from the psms list. Do the localizaiton analysis. Add it into filteredAllPsms. foreach (var gsmsPerScan in GsmPerScans.GroupBy(p => p.ScanNumber)) { var glycos = RemoveSimilarSequenceDuplicates(gsmsPerScan.OrderByDescending(p => p.Score).ToList()); foreach (var glycoSpectralMatch in glycos) { if (glycoSpectralMatch.LocalizationGraphs != null) { List <Route> localizationCandidates = new List <Route>(); for (int i = 0; i < glycoSpectralMatch.LocalizationGraphs.Count; i++) { var allPathWithMaxScore = LocalizationGraph.GetAllHighestScorePaths(glycoSpectralMatch.LocalizationGraphs[i].array, glycoSpectralMatch.LocalizationGraphs[i].ChildModBoxes); foreach (var path in allPathWithMaxScore) { var local = LocalizationGraph.GetLocalizedPath(glycoSpectralMatch.LocalizationGraphs[i], path); local.ModBoxId = glycoSpectralMatch.LocalizationGraphs[i].ModBoxId; localizationCandidates.Add(local); } } glycoSpectralMatch.Routes = localizationCandidates; } if (glycoSpectralMatch.Routes != null) { LocalizationLevel localLevel; glycoSpectralMatch.LocalizedGlycan = GlycoSpectralMatch.GetLocalizedGlycan(glycoSpectralMatch.Routes, out localLevel); glycoSpectralMatch.LocalizationLevel = localLevel; //Localization PValue. if (localLevel == LocalizationLevel.Level1 || localLevel == LocalizationLevel.Level2) { List <Route> allRoutes = new List <Route>(); foreach (var graph in glycoSpectralMatch.LocalizationGraphs) { allRoutes.AddRange(LocalizationGraph.GetAllPaths_CalP(graph, glycoSpectralMatch.ScanInfo_p, glycoSpectralMatch.Thero_n)); } glycoSpectralMatch.SiteSpeciLocalProb = LocalizationGraph.CalSiteSpecificLocalizationProbability(allRoutes, glycoSpectralMatch.LocalizationGraphs.First().ModPos); } } filteredAllPsms.Add(glycoSpectralMatch); } } PostGlycoSearchAnalysisTask postGlycoSearchAnalysisTask = new PostGlycoSearchAnalysisTask(); postGlycoSearchAnalysisTask.FileSpecificParameters = this.FileSpecificParameters; return(postGlycoSearchAnalysisTask.Run(OutputFolder, dbFilenameList, currentRawFileList, taskId, fileSettingsList, filteredAllPsms.OrderByDescending(p => p.Score).ToList(), CommonParameters, _glycoSearchParameters, proteinList, variableModifications, fixedModifications, localizeableModificationTypes, MyTaskResults)); }