public MyTaskResults Run(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList, List <CrosslinkSpectralMatch> allPsms, CommonParameters commonParameters, XlSearchParameters xlSearchParameters, List <Protein> proteinList, List <Modification> variableModifications, List <Modification> fixedModifications, List <string> localizeableModificationTypes, MyTaskResults MyTaskResults) { foreach (var csm in allPsms) { csm.ResolveProteinPosAmbiguitiesForXl(); } var allPsmsXL = allPsms.Where(p => p.CrossType == PsmCrossType.Cross).ToList(); // inter-crosslinks; different proteins are linked var interCsms = allPsmsXL.Where(p => !p.IsIntraCsm()).ToList(); foreach (var item in interCsms) { item.CrossType = PsmCrossType.Inter; } // intra-crosslinks; crosslinks within a protein var intraCsms = allPsmsXL.Where(p => p.IsIntraCsm()).ToList(); foreach (var item in intraCsms) { item.CrossType = PsmCrossType.Intra; } // calculate FDR DoCrosslinkFdrAnalysis(interCsms); DoCrosslinkFdrAnalysis(intraCsms); SingleFDRAnalysis(allPsms, commonParameters, new List <string> { taskId }); // write interlink CSMs if (interCsms.Any()) { string file = Path.Combine(OutputFolder, "XL_Interlinks.tsv"); WriteFile.WritePsmCrossToTsv(interCsms, file, 2); FinishedWritingFile(file, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target inter-crosslinks within 1% FDR: " + interCsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.BetaPeptide.IsDecoy)); if (xlSearchParameters.WriteOutputForPercolator) { var interPsmsXLPercolator = interCsms.Where(p => p.Score >= 2 && p.BetaPeptide.Score >= 2).OrderBy(p => p.ScanNumber).ToList(); WriteFile.WriteCrosslinkToTxtForPercolator(interPsmsXLPercolator, OutputFolder, "XL_Interlinks_Percolator", xlSearchParameters.Crosslinker); FinishedWritingFile(Path.Combine(OutputFolder, "XL_Interlinks_Percolator.txt"), new List <string> { taskId }); } // write intralink CSMs if (intraCsms.Any()) { string file = Path.Combine(OutputFolder, "XL_Intralinks.tsv"); WriteFile.WritePsmCrossToTsv(intraCsms, file, 2); FinishedWritingFile(file, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target intra-crosslinks within 1% FDR: " + intraCsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.BetaPeptide.IsDecoy)); if (xlSearchParameters.WriteOutputForPercolator) { var intraPsmsXLPercolator = intraCsms.Where(p => p.Score >= 2 && p.BetaPeptide.Score >= 2).OrderBy(p => p.ScanNumber).ToList(); WriteFile.WriteCrosslinkToTxtForPercolator(intraPsmsXLPercolator, OutputFolder, "XL_Intralinks_Percolator", xlSearchParameters.Crosslinker); FinishedWritingFile(Path.Combine(OutputFolder, "XL_Intralinks_Percolator.txt"), new List <string> { taskId }); } // write single peptides var singlePsms = allPsms.Where(p => p.CrossType == PsmCrossType.Single).ToList(); if (singlePsms.Any()) { string writtenFileSingle = Path.Combine(OutputFolder, "SinglePeptides" + ".tsv"); WriteFile.WritePsmCrossToTsv(singlePsms, writtenFileSingle, 1); FinishedWritingFile(writtenFileSingle, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target single peptides within 1% FDR: " + singlePsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy)); // write loops var loopPsms = allPsms.Where(p => p.CrossType == PsmCrossType.Loop).ToList(); if (loopPsms.Any()) { string writtenFileLoop = Path.Combine(OutputFolder, "Looplinks" + ".tsv"); WriteFile.WritePsmCrossToTsv(loopPsms, writtenFileLoop, 1); FinishedWritingFile(writtenFileLoop, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target loop-linked peptides within 1% FDR: " + loopPsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy)); // write deadends var deadendPsms = allPsms.Where(p => p.CrossType == PsmCrossType.DeadEnd || p.CrossType == PsmCrossType.DeadEndH2O || p.CrossType == PsmCrossType.DeadEndNH2 || p.CrossType == PsmCrossType.DeadEndTris).ToList(); if (deadendPsms.Any()) { string writtenFileDeadend = Path.Combine(OutputFolder, "Deadends" + ".tsv"); WriteFile.WritePsmCrossToTsv(deadendPsms, writtenFileDeadend, 1); FinishedWritingFile(writtenFileDeadend, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target deadend peptides within 1% FDR: " + deadendPsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy)); // write pepXML if (xlSearchParameters.WritePepXml) { List <CrosslinkSpectralMatch> writeToXml = new List <CrosslinkSpectralMatch>(); writeToXml.AddRange(intraCsms.Where(p => !p.IsDecoy && !p.BetaPeptide.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(interCsms.Where(p => !p.IsDecoy && !p.BetaPeptide.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(singlePsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(loopPsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(deadendPsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml = writeToXml.OrderBy(p => p.ScanNumber).ToList(); foreach (var fullFilePath in currentRawFileList) { string fileNameNoExtension = Path.GetFileNameWithoutExtension(fullFilePath); WriteFile.WritePepXML_xl(writeToXml.Where(p => p.FullFilePath == fullFilePath).ToList(), proteinList, dbFilenameList[0].FilePath, variableModifications, fixedModifications, localizeableModificationTypes, OutputFolder, fileNameNoExtension, commonParameters, xlSearchParameters); FinishedWritingFile(Path.Combine(OutputFolder, fileNameNoExtension + ".pep.XML"), new List <string> { taskId }); } } return(MyTaskResults); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { MyTaskResults = new MyTaskResults(this); List <CrosslinkSpectralMatch> allPsms = new List <CrosslinkSpectralMatch>(); LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes); // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, XlSearchParameters.DecoyType, localizeableModificationTypes, CommonParameters); var crosslinker = XlSearchParameters.Crosslinker; MyFileManager myFileManager = new MyFileManager(true); var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b)); HashSet <DigestionParams> ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams)); int completedFiles = 0; object indexLock = new object(); object psmLock = new object(); Status("Searching files...", taskId); ProseCreatedWhileRunning.Append("The following crosslink discovery were used: "); ProseCreatedWhileRunning.Append("crosslinker name = " + crosslinker.CrosslinkerName + "; "); ProseCreatedWhileRunning.Append("crosslinker type = " + crosslinker.Cleavable + "; "); ProseCreatedWhileRunning.Append("crosslinker mass = " + crosslinker.TotalMass + "; "); ProseCreatedWhileRunning.Append("crosslinker modification site(s) = " + crosslinker.CrosslinkerModSites + "; "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ? "maximum peptide length = unspecified; " : "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif) + "; ")); ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("parent mass tolerance(s) = " + CommonParameters.PrecursorMassTolerance + "; "); ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; "); ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. "); for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { var origDataFile = currentRawFileList[spectraFileIndex]; CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); var thisId = new List <string> { taskId, "Individual Spectra Files", origDataFile }; NewCollection(Path.GetFileName(origDataFile), thisId); Status("Loading spectra file...", thisId); MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams); Status("Getting ms2 scans...", thisId); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray(); CrosslinkSpectralMatch[] newPsms = new CrosslinkSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; for (int currentPartition = 0; currentPartition < CommonParameters.TotalPartitions; currentPartition++) { List <PeptideWithSetModifications> peptideIndex = null; List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions)); Status("Getting fragment dictionary...", new List <string> { taskId }); var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, null, currentPartition, UsefulProteomicsDatabases.DecoyType.Reverse, combinedParams, 30000.0, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), new List <string> { taskId }); List <int>[] fragmentIndex = null; List <int>[] precursorIndex = null; GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId); Status("Searching files...", taskId); new CrosslinkSearchEngine(newPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, currentPartition, combinedParams, crosslinker, XlSearchParameters.RestrictToTopNHits, XlSearchParameters.CrosslinkSearchTopNum, XlSearchParameters.XlQuench_H2O, XlSearchParameters.XlQuench_NH2, XlSearchParameters.XlQuench_Tris, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + CommonParameters.TotalPartitions + "!", thisId)); if (GlobalVariables.StopLoops) { break; } } allPsms.AddRange(newPsms.Where(p => p != null)); completedFiles++; ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> { taskId, "Individual Spectra Files" })); } ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> { taskId, "Individual Spectra Files" })); allPsms = allPsms.OrderByDescending(p => p.XLTotalScore).ToList(); var allPsmsXL = allPsms.Where(p => p.CrossType == PsmCrossType.Cross).ToList(); // inter-crosslinks; different proteins are linked var interCsms = allPsmsXL.Where(p => !p.ProteinAccession.Equals(p.BetaPeptide.ProteinAccession)).ToList(); foreach (var item in interCsms) { item.CrossType = PsmCrossType.Inter; } // intra-crosslinks; crosslinks within a protein var intraCsms = allPsmsXL.Where(p => p.ProteinAccession.Equals(p.BetaPeptide.ProteinAccession)).ToList(); foreach (var item in intraCsms) { item.CrossType = PsmCrossType.Intra; } // calculate FDR DoCrosslinkFdrAnalysis(interCsms); DoCrosslinkFdrAnalysis(intraCsms); SingleFDRAnalysis(allPsms, new List <string> { taskId }); // calculate protein crosslink residue numbers foreach (var csm in allPsmsXL) { // alpha peptide crosslink residue in the protein csm.XlProteinPos = csm.OneBasedStartResidueInProtein.Value + csm.LinkPositions[0] - 1; // beta crosslink residue in protein csm.BetaPeptide.XlProteinPos = csm.BetaPeptide.OneBasedStartResidueInProtein.Value + csm.BetaPeptide.LinkPositions[0] - 1; } // write interlink CSMs if (interCsms.Any()) { string file = Path.Combine(OutputFolder, "XL_Interlinks.tsv"); WritePsmCrossToTsv(interCsms, file, 2); FinishedWritingFile(file, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target inter-crosslinks within 1% FDR: " + interCsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.BetaPeptide.IsDecoy)); if (XlSearchParameters.WriteOutputForPercolator) { var interPsmsXLPercolator = interCsms.Where(p => p.Score >= 2 && p.BetaPeptide.Score >= 2).OrderBy(p => p.ScanNumber).ToList(); WriteCrosslinkToTxtForPercolator(interPsmsXLPercolator, OutputFolder, "XL_Interlinks_Percolator", crosslinker, new List <string> { taskId }); } // write intralink CSMs if (intraCsms.Any()) { string file = Path.Combine(OutputFolder, "XL_Intralinks.tsv"); WritePsmCrossToTsv(intraCsms, file, 2); FinishedWritingFile(file, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target intra-crosslinks within 1% FDR: " + intraCsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.BetaPeptide.IsDecoy)); if (XlSearchParameters.WriteOutputForPercolator) { var intraPsmsXLPercolator = intraCsms.Where(p => p.Score >= 2 && p.BetaPeptide.Score >= 2).OrderBy(p => p.ScanNumber).ToList(); WriteCrosslinkToTxtForPercolator(intraPsmsXLPercolator, OutputFolder, "XL_Intralinks_Percolator", crosslinker, new List <string> { taskId }); } // write single peptides var singlePsms = allPsms.Where(p => p.CrossType == PsmCrossType.Single).ToList(); if (singlePsms.Any()) { string writtenFileSingle = Path.Combine(OutputFolder, "SinglePeptides" + ".tsv"); WritePsmCrossToTsv(singlePsms, writtenFileSingle, 1); FinishedWritingFile(writtenFileSingle, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target single peptides within 1% FDR: " + singlePsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy)); // write loops var loopPsms = allPsms.Where(p => p.CrossType == PsmCrossType.Loop).ToList(); if (loopPsms.Any()) { string writtenFileLoop = Path.Combine(OutputFolder, "Looplinks" + ".tsv"); WritePsmCrossToTsv(loopPsms, writtenFileLoop, 1); FinishedWritingFile(writtenFileLoop, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target loop-linked peptides within 1% FDR: " + loopPsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy)); // write deadends var deadendPsms = allPsms.Where(p => p.CrossType == PsmCrossType.DeadEnd || p.CrossType == PsmCrossType.DeadEndH2O || p.CrossType == PsmCrossType.DeadEndNH2 || p.CrossType == PsmCrossType.DeadEndTris).ToList(); if (deadendPsms.Any()) { string writtenFileDeadend = Path.Combine(OutputFolder, "Deadends" + ".tsv"); WritePsmCrossToTsv(deadendPsms, writtenFileDeadend, 1); FinishedWritingFile(writtenFileDeadend, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target deadend peptides within 1% FDR: " + deadendPsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy)); // write pepXML if (XlSearchParameters.WritePepXml) { List <CrosslinkSpectralMatch> writeToXml = new List <CrosslinkSpectralMatch>(); writeToXml.AddRange(intraCsms.Where(p => !p.IsDecoy && !p.BetaPeptide.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(interCsms.Where(p => !p.IsDecoy && !p.BetaPeptide.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(singlePsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(loopPsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(deadendPsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml = writeToXml.OrderBy(p => p.ScanNumber).ToList(); foreach (var fullFilePath in currentRawFileList) { string fileNameNoExtension = Path.GetFileNameWithoutExtension(fullFilePath); WritePepXML_xl(writeToXml.Where(p => p.FullFilePath == fullFilePath).ToList(), proteinList, dbFilenameList[0].FilePath, variableModifications, fixedModifications, localizeableModificationTypes, OutputFolder, fileNameNoExtension, new List <string> { taskId }); } } return(MyTaskResults); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes); // TODO: print error messages loading GPTMD mods List <Modification> gptmdModifications = GlobalVariables.AllModsKnown.OfType <Modification>().Where(b => GptmdParameters.ListOfModsGptmd.Contains((b.ModificationType, b.IdWithMotif))).ToList(); IEnumerable <Tuple <double, double> > combos = LoadCombos(gptmdModifications).ToList(); // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters); List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>(); var numRawFiles = currentRawFileList.Count; // write prose settings ProseCreatedWhileRunning.Append("The following G-PTM-D settings were used: "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ? "maximum peptide length = unspecified; " : "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("G-PTM-D modifications count = " + gptmdModifications.Count + "; "); // temporary search type for writing prose // the actual search type is technically file-specific but we don't allow file-specific notches, so it's safe to do this MassDiffAcceptor tempSearchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), CommonParameters.PrecursorMassTolerance); ProseCreatedWhileRunning.Append("precursor mass tolerance(s) = {" + tempSearchMode.ToProseString() + "}; "); ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". "); ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. "); // start the G-PTM-D task Status("Running G-PTM-D...", new List <string> { taskId }); MyTaskResults = new MyTaskResults(this) { NewDatabases = new List <DbForTask>() }; var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b)); HashSet <DigestionParams> ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams)); MyFileManager myFileManager = new MyFileManager(true); object lock1 = new object(); object lock2 = new object(); for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { // Stop if canceled if (GlobalVariables.StopLoops) { break; } var origDataFile = currentRawFileList[spectraFileIndex]; // mark the file as in-progress StartingDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); MassDiffAcceptor searchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), combinedParams.PrecursorMassTolerance); NewCollection(Path.GetFileName(origDataFile), new List <string> { taskId, "Individual Spectra Files", origDataFile }); Status("Loading spectra file...", new List <string> { taskId, "Individual Spectra Files", origDataFile }); MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams); Status("Getting ms2 scans...", new List <string> { taskId, "Individual Spectra Files", origDataFile }); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray(); myFileManager.DoneWithFile(origDataFile); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; new ClassicSearchEngine(allPsmsArray, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, null, proteinList, searchMode, combinedParams, new List <string> { taskId, "Individual Spectra Files", origDataFile }).Run(); allPsms.AddRange(allPsmsArray.Where(p => p != null)); FinishedDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files", origDataFile })); } ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files" })); allPsms = allPsms.OrderByDescending(b => b.Score) .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue) .GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)) .Select(b => b.First()).ToList(); new FdrAnalysisEngine(allPsms, tempSearchMode.NumNotches, CommonParameters, new List <string> { taskId }).Run(); var writtenFile = Path.Combine(OutputFolder, "GPTMD_Candidates.psmtsv"); WritePsmsToTsv(allPsms, writtenFile, new Dictionary <string, int>()); FinishedWritingFile(writtenFile, new List <string> { taskId }); // get file-specific precursor mass tolerances for the GPTMD engine var filePathToPrecursorMassTolerance = new Dictionary <string, Tolerance>(); for (int i = 0; i < currentRawFileList.Count; i++) { string filePath = currentRawFileList[i]; Tolerance fileTolerance = CommonParameters.PrecursorMassTolerance; if (fileSettingsList[i] != null && fileSettingsList[i].PrecursorMassTolerance != null) { fileTolerance = fileSettingsList[i].PrecursorMassTolerance; } filePathToPrecursorMassTolerance.Add(filePath, fileTolerance); } // run GPTMD engine var gptmdResults = (GptmdResults) new GptmdEngine(allPsms, gptmdModifications, combos, filePathToPrecursorMassTolerance, CommonParameters, new List <string> { taskId }).Run(); // Stop if canceled if (GlobalVariables.StopLoops) { return(MyTaskResults); } // write GPTMD databases if (dbFilenameList.Any(b => !b.IsContaminant)) { List <string> databaseNames = new List <string>(); foreach (var nonContaminantDb in dbFilenameList.Where(p => !p.IsContaminant)) { var dbName = Path.GetFileNameWithoutExtension(nonContaminantDb.FilePath); var theExtension = Path.GetExtension(nonContaminantDb.FilePath).ToLowerInvariant(); bool compressed = theExtension.EndsWith("gz"); databaseNames.Add(compressed ? Path.GetFileNameWithoutExtension(dbName) : dbName); } string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml"); var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && !b.IsContaminant).ToList(), outputXMLdbFullName); FinishedWritingFile(outputXMLdbFullName, new List <string> { taskId }); MyTaskResults.NewDatabases.Add(new DbForTask(outputXMLdbFullName, false)); MyTaskResults.AddNiceText("Modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum()); MyTaskResults.AddNiceText("Mods types and counts:"); MyTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value))); } if (dbFilenameList.Any(b => b.IsContaminant)) { // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written //string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml"); List <string> databaseNames = new List <string>(); foreach (var contaminantDb in dbFilenameList.Where(p => p.IsContaminant)) { var dbName = Path.GetFileName(contaminantDb.FilePath); int indexOfFirstDot = dbName.IndexOf("."); databaseNames.Add(dbName.Substring(0, indexOfFirstDot)); } string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml"); var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && b.IsContaminant).ToList(), outputXMLdbFullNameContaminants); FinishedWritingFile(outputXMLdbFullNameContaminants, new List <string> { taskId }); MyTaskResults.NewDatabases.Add(new DbForTask(outputXMLdbFullNameContaminants, true)); MyTaskResults.AddNiceText("Contaminant modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum()); MyTaskResults.AddNiceText("Mods types and counts:"); MyTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value))); } return(MyTaskResults); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificSettings[] fileSettingsList) { myTaskResults = new MyTaskResults(this) { newDatabases = new List <DbForTask>() }; Status("Loading modifications...", new List <string> { taskId }); List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList(); List <ModificationWithMass> fixedModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList(); List <string> localizeableModificationTypes = CommonParameters.LocalizeAll ? GlobalVariables.AllModTypesKnown.ToList() : CommonParameters.ListOfModTypesLocalize.ToList(); List <ModificationWithMass> gptmdModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => GptmdParameters.ListOfModsGptmd.Contains((b.modificationType, b.id))).ToList(); IEnumerable <Tuple <double, double> > combos = LoadCombos(gptmdModifications).ToList(); List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>(); List <ProductType> ionTypes = new List <ProductType>(); if (CommonParameters.BIons) { ionTypes.Add(ProductType.B); } if (CommonParameters.YIons) { ionTypes.Add(ProductType.Y); } if (CommonParameters.CIons) { ionTypes.Add(ProductType.C); } if (CommonParameters.ZdotIons) { ionTypes.Add(ProductType.Zdot); } Status("Loading proteins...", new List <string> { taskId }); Dictionary <string, Modification> um = null; //Decoys are currently not being searched with DecoyType.None var proteinList = dbFilenameList.SelectMany(b => LoadProteinDb(b.FilePath, true, DecoyType.Reverse, localizeableModificationTypes, b.IsContaminant, out um)).ToList(); var numRawFiles = currentRawFileList.Count; proseCreatedWhileRunning.Append("The following G-PTM-D settings were used: "); proseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); proseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); proseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); if (CommonParameters.DigestionParams.MaxPeptideLength == null) { proseCreatedWhileRunning.Append("maximum peptide length = unspecified; "); } else { proseCreatedWhileRunning.Append("maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); } proseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); proseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); proseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; "); proseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; "); proseCreatedWhileRunning.Append("G-PTM-D modifications count = " + gptmdModifications.Count + "; "); //puppet searchmode for writing files. Actual searchmode is filespecific MassDiffAcceptor tempSearchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), CommonParameters.PrecursorMassTolerance); proseCreatedWhileRunning.Append("parent mass tolerance(s) = {" + tempSearchMode.ToProseString() + "}; "); proseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + " Da. "); proseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. "); Status("Running G-PTM-D...", new List <string> { taskId }); HashSet <IDigestionParams> ListOfDigestionParams = GetListOfDistinctDigestionParams(CommonParameters, fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b))); MyFileManager myFileManager = new MyFileManager(true); object lock1 = new object(); object lock2 = new object(); ParallelOptions parallelOptions = new ParallelOptions(); if (CommonParameters.MaxParallelFilesToAnalyze.HasValue) { parallelOptions.MaxDegreeOfParallelism = CommonParameters.MaxParallelFilesToAnalyze.Value; } Parallel.For(0, currentRawFileList.Count, parallelOptions, spectraFileIndex => { var origDataFile = currentRawFileList[spectraFileIndex]; ICommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); MassDiffAcceptor searchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), combinedParams.PrecursorMassTolerance); NewCollection(Path.GetFileName(origDataFile), new List <string> { taskId, "Individual Spectra Files", origDataFile }); StartingDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); Status("Loading spectra file...", new List <string> { taskId, "Individual Spectra Files", origDataFile }); IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks); Status("Getting ms2 scans...", new List <string> { taskId, "Individual Spectra Files", origDataFile }); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); myFileManager.DoneWithFile(origDataFile); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; new ClassicSearchEngine(allPsmsArray, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, proteinList, ionTypes, searchMode, false, combinedParams, combinedParams.ProductMassTolerance, new List <string> { taskId, "Individual Spectra Files", origDataFile }).Run(); lock (lock2) { allPsms.AddRange(allPsmsArray); } FinishedDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files", origDataFile })); }); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files" })); // Group and order psms SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngineTest = new SequencesToActualProteinPeptidesEngine(allPsms, proteinList, fixedModifications, variableModifications, ionTypes, ListOfDigestionParams, CommonParameters.ReportAllAmbiguity, new List <string> { taskId }); var resTest = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngineTest.Run(); Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatchingTest = resTest.CompactPeptideToProteinPeptideMatching; foreach (var huh in allPsms) { if (huh != null) { huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatchingTest); } } allPsms = allPsms.Where(b => b != null).OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList(); new FdrAnalysisEngine(allPsms, tempSearchMode.NumNotches, false, new List <string> { taskId }).Run(); var writtenFile = Path.Combine(OutputFolder, "GPTMD_Candidates.psmtsv"); WritePsmsToTsv(allPsms, writtenFile, new Dictionary <string, int>()); SucessfullyFinishedWritingFile(writtenFile, new List <string> { taskId }); var gptmdResults = (GptmdResults) new GptmdEngine(allPsms, gptmdModifications, combos, CommonParameters.PrecursorMassTolerance, new List <string> { taskId }).Run(); if (dbFilenameList.Any(b => !b.IsContaminant)) { // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written //string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => !b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml"); List <string> databaseNames = new List <string>(); foreach (var nonContaminantDb in dbFilenameList.Where(p => !p.IsContaminant)) { var dbName = Path.GetFileName(nonContaminantDb.FilePath); int indexOfFirstDot = dbName.IndexOf("."); databaseNames.Add(dbName.Substring(0, indexOfFirstDot)); } string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml"); var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && !b.IsContaminant).ToList(), outputXMLdbFullName); SucessfullyFinishedWritingFile(outputXMLdbFullName, new List <string> { taskId }); myTaskResults.newDatabases.Add(new DbForTask(outputXMLdbFullName, false)); myTaskResults.AddNiceText("Modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum()); myTaskResults.AddNiceText("Mods types and counts:"); myTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value))); } if (dbFilenameList.Any(b => b.IsContaminant)) { // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written //string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml"); List <string> databaseNames = new List <string>(); foreach (var contaminantDb in dbFilenameList.Where(p => p.IsContaminant)) { var dbName = Path.GetFileName(contaminantDb.FilePath); int indexOfFirstDot = dbName.IndexOf("."); databaseNames.Add(dbName.Substring(0, indexOfFirstDot)); } string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml"); var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && b.IsContaminant).ToList(), outputXMLdbFullNameContaminants); SucessfullyFinishedWritingFile(outputXMLdbFullNameContaminants, new List <string> { taskId }); myTaskResults.newDatabases.Add(new DbForTask(outputXMLdbFullNameContaminants, true)); myTaskResults.AddNiceText("Contaminant modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum()); myTaskResults.AddNiceText("Mods types and counts:"); myTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value))); } return(myTaskResults); }