protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { MyTaskResults = new MyTaskResults(this); List <CrosslinkSpectralMatch> allPsms = new List <CrosslinkSpectralMatch>(); LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes); // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, XlSearchParameters.DecoyType, localizeableModificationTypes, CommonParameters); var crosslinker = XlSearchParameters.Crosslinker; MyFileManager myFileManager = new MyFileManager(true); var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b)); HashSet <DigestionParams> ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams)); int completedFiles = 0; object indexLock = new object(); object psmLock = new object(); Status("Searching files...", taskId); ProseCreatedWhileRunning.Append("The following crosslink discovery were used: "); ProseCreatedWhileRunning.Append("crosslinker name = " + crosslinker.CrosslinkerName + "; "); ProseCreatedWhileRunning.Append("crosslinker type = " + crosslinker.Cleavable + "; "); ProseCreatedWhileRunning.Append("crosslinker mass = " + crosslinker.TotalMass + "; "); ProseCreatedWhileRunning.Append("crosslinker modification site(s) = " + crosslinker.CrosslinkerModSites + "; "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ? "maximum peptide length = unspecified; " : "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif) + "; ")); ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("parent mass tolerance(s) = " + CommonParameters.PrecursorMassTolerance + "; "); ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; "); ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. "); for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { var origDataFile = currentRawFileList[spectraFileIndex]; CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); var thisId = new List <string> { taskId, "Individual Spectra Files", origDataFile }; NewCollection(Path.GetFileName(origDataFile), thisId); Status("Loading spectra file...", thisId); MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams); Status("Getting ms2 scans...", thisId); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray(); CrosslinkSpectralMatch[] newPsms = new CrosslinkSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; for (int currentPartition = 0; currentPartition < CommonParameters.TotalPartitions; currentPartition++) { List <PeptideWithSetModifications> peptideIndex = null; List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions)); Status("Getting fragment dictionary...", new List <string> { taskId }); var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, null, currentPartition, UsefulProteomicsDatabases.DecoyType.Reverse, combinedParams, 30000.0, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), new List <string> { taskId }); List <int>[] fragmentIndex = null; List <int>[] precursorIndex = null; GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId); Status("Searching files...", taskId); new CrosslinkSearchEngine(newPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, currentPartition, combinedParams, crosslinker, XlSearchParameters.RestrictToTopNHits, XlSearchParameters.CrosslinkSearchTopNum, XlSearchParameters.XlQuench_H2O, XlSearchParameters.XlQuench_NH2, XlSearchParameters.XlQuench_Tris, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + CommonParameters.TotalPartitions + "!", thisId)); if (GlobalVariables.StopLoops) { break; } } allPsms.AddRange(newPsms.Where(p => p != null)); completedFiles++; ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> { taskId, "Individual Spectra Files" })); } ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> { taskId, "Individual Spectra Files" })); allPsms = allPsms.OrderByDescending(p => p.XLTotalScore).ToList(); var allPsmsXL = allPsms.Where(p => p.CrossType == PsmCrossType.Cross).ToList(); // inter-crosslinks; different proteins are linked var interCsms = allPsmsXL.Where(p => !p.ProteinAccession.Equals(p.BetaPeptide.ProteinAccession)).ToList(); foreach (var item in interCsms) { item.CrossType = PsmCrossType.Inter; } // intra-crosslinks; crosslinks within a protein var intraCsms = allPsmsXL.Where(p => p.ProteinAccession.Equals(p.BetaPeptide.ProteinAccession)).ToList(); foreach (var item in intraCsms) { item.CrossType = PsmCrossType.Intra; } // calculate FDR DoCrosslinkFdrAnalysis(interCsms); DoCrosslinkFdrAnalysis(intraCsms); SingleFDRAnalysis(allPsms, new List <string> { taskId }); // calculate protein crosslink residue numbers foreach (var csm in allPsmsXL) { // alpha peptide crosslink residue in the protein csm.XlProteinPos = csm.OneBasedStartResidueInProtein.Value + csm.LinkPositions[0] - 1; // beta crosslink residue in protein csm.BetaPeptide.XlProteinPos = csm.BetaPeptide.OneBasedStartResidueInProtein.Value + csm.BetaPeptide.LinkPositions[0] - 1; } // write interlink CSMs if (interCsms.Any()) { string file = Path.Combine(OutputFolder, "XL_Interlinks.tsv"); WritePsmCrossToTsv(interCsms, file, 2); FinishedWritingFile(file, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target inter-crosslinks within 1% FDR: " + interCsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.BetaPeptide.IsDecoy)); if (XlSearchParameters.WriteOutputForPercolator) { var interPsmsXLPercolator = interCsms.Where(p => p.Score >= 2 && p.BetaPeptide.Score >= 2).OrderBy(p => p.ScanNumber).ToList(); WriteCrosslinkToTxtForPercolator(interPsmsXLPercolator, OutputFolder, "XL_Interlinks_Percolator", crosslinker, new List <string> { taskId }); } // write intralink CSMs if (intraCsms.Any()) { string file = Path.Combine(OutputFolder, "XL_Intralinks.tsv"); WritePsmCrossToTsv(intraCsms, file, 2); FinishedWritingFile(file, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target intra-crosslinks within 1% FDR: " + intraCsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.BetaPeptide.IsDecoy)); if (XlSearchParameters.WriteOutputForPercolator) { var intraPsmsXLPercolator = intraCsms.Where(p => p.Score >= 2 && p.BetaPeptide.Score >= 2).OrderBy(p => p.ScanNumber).ToList(); WriteCrosslinkToTxtForPercolator(intraPsmsXLPercolator, OutputFolder, "XL_Intralinks_Percolator", crosslinker, new List <string> { taskId }); } // write single peptides var singlePsms = allPsms.Where(p => p.CrossType == PsmCrossType.Single).ToList(); if (singlePsms.Any()) { string writtenFileSingle = Path.Combine(OutputFolder, "SinglePeptides" + ".tsv"); WritePsmCrossToTsv(singlePsms, writtenFileSingle, 1); FinishedWritingFile(writtenFileSingle, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target single peptides within 1% FDR: " + singlePsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy)); // write loops var loopPsms = allPsms.Where(p => p.CrossType == PsmCrossType.Loop).ToList(); if (loopPsms.Any()) { string writtenFileLoop = Path.Combine(OutputFolder, "Looplinks" + ".tsv"); WritePsmCrossToTsv(loopPsms, writtenFileLoop, 1); FinishedWritingFile(writtenFileLoop, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target loop-linked peptides within 1% FDR: " + loopPsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy)); // write deadends var deadendPsms = allPsms.Where(p => p.CrossType == PsmCrossType.DeadEnd || p.CrossType == PsmCrossType.DeadEndH2O || p.CrossType == PsmCrossType.DeadEndNH2 || p.CrossType == PsmCrossType.DeadEndTris).ToList(); if (deadendPsms.Any()) { string writtenFileDeadend = Path.Combine(OutputFolder, "Deadends" + ".tsv"); WritePsmCrossToTsv(deadendPsms, writtenFileDeadend, 1); FinishedWritingFile(writtenFileDeadend, new List <string> { taskId }); } MyTaskResults.AddNiceText("Target deadend peptides within 1% FDR: " + deadendPsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy)); // write pepXML if (XlSearchParameters.WritePepXml) { List <CrosslinkSpectralMatch> writeToXml = new List <CrosslinkSpectralMatch>(); writeToXml.AddRange(intraCsms.Where(p => !p.IsDecoy && !p.BetaPeptide.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(interCsms.Where(p => !p.IsDecoy && !p.BetaPeptide.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(singlePsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(loopPsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml.AddRange(deadendPsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05)); writeToXml = writeToXml.OrderBy(p => p.ScanNumber).ToList(); foreach (var fullFilePath in currentRawFileList) { string fileNameNoExtension = Path.GetFileNameWithoutExtension(fullFilePath); WritePepXML_xl(writeToXml.Where(p => p.FullFilePath == fullFilePath).ToList(), proteinList, dbFilenameList[0].FilePath, variableModifications, fixedModifications, localizeableModificationTypes, OutputFolder, fileNameNoExtension, new List <string> { taskId }); } } return(MyTaskResults); }
public static void XlTest_BSA_DSSO() { //Generate parameters var commonParameters = new CommonParameters(doPrecursorDeconvolution: false, dissociationType: DissociationType.EThcD, scoreCutoff: 1, digestionParams: new DigestionParams(minPeptideLength: 5), precursorMassTolerance: new PpmTolerance(10)); var xlSearchParameters = new XlSearchParameters(); //Create databases contain two protein. var proteinList = new List <Protein> { new Protein("EKVLTSSAR", "Fake01"), new Protein("LSQKFPK", "Fake02") }; ModificationMotif.TryGetMotif("M", out ModificationMotif motif1); Modification mod1 = new Modification(_originalId: "Oxidation of M", _modificationType: "Common Variable", _target: motif1, _locationRestriction: "Anywhere.", _monoisotopicMass: 15.99491461957); ModificationMotif.TryGetMotif("C", out ModificationMotif motif2); Modification mod2 = new Modification(_originalId: "Carbamidomethyl of C", _modificationType: "Common Fixed", _target: motif2, _locationRestriction: "Anywhere.", _monoisotopicMass: 57.02146372068994); var variableModifications = new List <Modification>() { mod1 }; var fixedModifications = new List <Modification>() { mod2 }; var localizeableModifications = new List <Modification>(); //Run index engine var indexEngine = new IndexingEngine(proteinList, variableModifications, fixedModifications, null, 1, DecoyType.Reverse, commonParameters, 30000, false, new List <FileInfo>(), new List <string>()); var indexResults = (IndexingResults)indexEngine.Run(); var indexedFragments = indexResults.FragmentIndex.Where(p => p != null).SelectMany(v => v).ToList(); Assert.AreEqual(82, indexedFragments.Count); Assert.AreEqual(3, indexResults.PeptideIndex.Count); //Get MS2 scans. var myMsDataFile = new XLTestDataFile(); var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray(); //Generate crosslinker, which is DSSO here. Crosslinker crosslinker = GlobalVariables.Crosslinkers.Where(p => p.CrosslinkerName == "DSSO").First(); CrosslinkSpectralMatch[] possiblePsms = new CrosslinkSpectralMatch[listOfSortedms2Scans.Length]; new CrosslinkSearchEngine(possiblePsms, listOfSortedms2Scans, indexResults.PeptideIndex, indexResults.FragmentIndex, 0, commonParameters, crosslinker, xlSearchParameters.RestrictToTopNHits, xlSearchParameters.CrosslinkSearchTopNum, xlSearchParameters.XlQuench_H2O, xlSearchParameters.XlQuench_NH2, xlSearchParameters.XlQuench_Tris, new List <string> { }).Run(); var newPsms = possiblePsms.Where(p => p != null).ToList(); foreach (var item in newPsms) { item.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false); } //Test newPsms Assert.AreEqual(3, newPsms.Count); //Test Output var task = new XLSearchTask(); task.WritePepXML_xl(newPsms, proteinList, null, variableModifications, fixedModifications, null, TestContext.CurrentContext.TestDirectory, "pep.XML", new List <string> { }); //Test PsmCross.XlCalculateTotalProductMasses //var psmCrossAlpha = new CrosslinkSpectralMatch(digestedList[1], 0, 0, 0, listOfSortedms2Scans[0], commonParameters.DigestionParams, new List<MatchedFragmentIon>()); //var psmCrossBeta = new CrosslinkSpectralMatch(digestedList[2], 0, 0, 0, listOfSortedms2Scans[0], commonParameters.DigestionParams, new List<MatchedFragmentIon>()); //var linkPos = CrosslinkSpectralMatch.GetPossibleCrosslinkerModSites(crosslinker.CrosslinkerModSites.ToCharArray(), digestedList[1]); //var productMassesAlphaList = CrosslinkedPeptide.XlGetTheoreticalFragments(DissociationType.EThcD, false, crosslinker, linkPos, digestedList[2].MonoisotopicMass, digestedList[1]); //Assert.AreEqual(productMassesAlphaList.First().Value.Count, 50); //TO DO: The number here should be manually verified. File.Delete(@"singlePsms.tsv"); File.Delete(@"pep.XML.pep.xml"); File.Delete(@"allPsms.tsv"); }