public TwoPassCrosslinkSearchEngine(List <PsmCross> globalPsmsCross, Ms2ScanWithSpecificMass[] listOfSortedms2Scans, List <CompactPeptide> peptideIndex, List <int>[] fragmentIndex, List <ProductType> lp, int currentPartition, ICommonParameters CommonParameters, bool addCompIons, Tolerance XLPrecusorMsTl, CrosslinkerTypeClass crosslinker, bool CrosslinkSearchTop, int CrosslinkSearchTopNum, bool quench_H2O, bool quench_NH2, bool quench_Tris, bool charge_2_3, bool charge_2_3_PrimeFragment, List <string> nestedIds) : base(nestedIds) { this.globalPsmsCross = globalPsmsCross; this.listOfSortedms2Scans = listOfSortedms2Scans; this.peptideIndex = peptideIndex; this.fragmentIndex = fragmentIndex; this.lp = lp; this.currentPartition = currentPartition + 1; this.CommonParameters = CommonParameters; this.addCompIons = addCompIons; //Here use LowTheoreticalDiffAcceptor in practice doesn't work in 12/2/2017 this.massDiffAcceptor = new OpenSearchMode(); this.dissociationTypes = DetermineDissociationType(lp); this.XLPrecusorMsTl = XLPrecusorMsTl; XLPrecusorSearchMode = new SinglePpmAroundZeroSearchMode(XLPrecusorMsTl.Value); //if (XLBetaPrecusorMsTl.ToString().Contains("Absolute")) //{ // XLPrecusorSearchMode = new SingleAbsoluteAroundZeroSearchMode(XLPrecusorMsTl.Value); //} this.crosslinker = crosslinker; this.CrosslinkSearchTop = CrosslinkSearchTop; this.CrosslinkSearchTopNum = CrosslinkSearchTopNum; this.quench_H2O = quench_H2O; this.quench_NH2 = quench_NH2; this.quench_Tris = quench_Tris; this.charge_2_3 = charge_2_3; this.charge_2_3_PrimeFragment = charge_2_3_PrimeFragment; }
private void Text_TextChanged(object sender, TextChangedEventArgs e) { ICommonParameters.GetCommonData().AdditionalRequirements.Clear(); foreach (var block in AddParamsBlocks) { ICommonParameters.GetCommonData().AdditionalRequirements.Add(block.Text.Text); } }
public IndexingEngine(List <Protein> proteinList, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <ProductType> lp, int currentPartition, DecoyType decoyType, IEnumerable <IDigestionParams> CollectionOfDigestionParams, ICommonParameters commonParams, double maxFragmentSize, List <string> nestedIds) : base(nestedIds) { this.proteinList = proteinList; this.variableModifications = variableModifications; this.fixedModifications = fixedModifications; this.lp = lp; this.currentPartition = currentPartition + 1; this.decoyType = decoyType; this.CollectionOfDigestionParams = CollectionOfDigestionParams; this.commonParams = commonParams; this.maxFragmentSize = maxFragmentSize; }
public CrosslinkAnalysisEngine(List <PsmCross> newPsms, Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatching, List <Protein> proteinList, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <ProductType> lp, string OutputFolder, CrosslinkerTypeClass crosslinker, TerminusType terminusType, ICommonParameters CommonParameters, List <string> nestedIds) : base(nestedIds) { this.newPsms = newPsms; this.compactPeptideToProteinPeptideMatching = compactPeptideToProteinPeptideMatching; this.proteinList = proteinList; this.variableModifications = variableModifications; this.fixedModifications = fixedModifications; this.lp = lp; this.OutputFolder = OutputFolder; this.crosslinker = crosslinker; this.terminusType = terminusType; this.CommonParameters = CommonParameters; }
public ModernSearchEngine(PeptideSpectralMatch[] globalPsms, Ms2ScanWithSpecificMass[] listOfSortedms2Scans, List <CompactPeptide> peptideIndex, List <int>[] fragmentIndex, List <ProductType> lp, int currentPartition, ICommonParameters CommonParameters, bool addCompIons, MassDiffAcceptor massDiffAcceptor, double maximumMassThatFragmentIonScoreIsDoubled, List <string> nestedIds) : base(nestedIds) { this.globalPsms = globalPsms; this.listOfSortedms2Scans = listOfSortedms2Scans; this.peptideIndex = peptideIndex; this.fragmentIndex = fragmentIndex; this.lp = lp; this.currentPartition = currentPartition + 1; this.CommonParameters = CommonParameters; this.addCompIons = addCompIons; this.massDiffAcceptor = massDiffAcceptor; this.dissociationTypes = DetermineDissociationType(lp); this.maximumMassThatFragmentIonScoreIsDoubled = maximumMassThatFragmentIonScoreIsDoubled; }
public ClassicSearchEngine(PeptideSpectralMatch[] globalPsms, Ms2ScanWithSpecificMass[] arrayOfSortedMS2Scans, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <Protein> proteinList, List <ProductType> lp, MassDiffAcceptor searchMode, bool addCompIons, ICommonParameters CommonParameters, Tolerance productMassTolerance, List <string> nestedIds) : base(nestedIds) { this.peptideSpectralMatches = globalPsms; this.arrayOfSortedMS2Scans = arrayOfSortedMS2Scans; this.myScanPrecursorMasses = arrayOfSortedMS2Scans.Select(b => b.PrecursorMass).ToArray(); this.variableModifications = variableModifications; this.fixedModifications = fixedModifications; this.proteins = proteinList; this.searchMode = searchMode; this.lp = lp; this.addCompIons = addCompIons; this.dissociationTypes = DetermineDissociationType(lp); this.commonParameters = CommonParameters; this.productMassTolerance = productMassTolerance; }
public NonSpecificEnzymeSearchEngine(PeptideSpectralMatch[] globalPsms, Ms2ScanWithSpecificMass[] listOfSortedms2Scans, List <CompactPeptide> peptideIndex, List <int>[] fragmentIndex, List <int>[] fragmentIndexPrecursor, List <ProductType> lp, int currentPartition, ICommonParameters CommonParameters, bool addCompIons, MassDiffAcceptor massDiffAcceptor, double maximumMassThatFragmentIonScoreIsDoubled, List <string> nestedIds) : base(globalPsms, listOfSortedms2Scans, peptideIndex, fragmentIndex, lp, currentPartition, CommonParameters, addCompIons, massDiffAcceptor, maximumMassThatFragmentIonScoreIsDoubled, nestedIds) { this.fragmentIndexPrecursor = fragmentIndexPrecursor; }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificSettings[] fileSettingsList) { myTaskResults = new MyTaskResults(this); List <PsmCross> allPsms = new List <PsmCross>(); var compactPeptideToProteinPeptideMatch = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >(); Status("Loading modifications...", taskId); #region Load modifications List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList(); List <ModificationWithMass> fixedModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList(); List <string> localizeableModificationTypes = CommonParameters.LocalizeAll ? GlobalVariables.AllModTypesKnown.ToList() : CommonParameters.ListOfModTypesLocalize.ToList(); #endregion Load modifications Status("Loading proteins...", new List <string> { taskId }); var proteinList = dbFilenameList.SelectMany(b => LoadProteinDb(b.FilePath, true, XlSearchParameters.DecoyType, localizeableModificationTypes, b.IsContaminant, out Dictionary <string, Modification> unknownModifications)).ToList(); List <ProductType> ionTypes = new List <ProductType>(); if (CommonParameters.BIons) { ionTypes.Add(ProductType.BnoB1ions); } if (CommonParameters.YIons) { ionTypes.Add(ProductType.Y); } if (CommonParameters.ZdotIons) { ionTypes.Add(ProductType.Zdot); } if (CommonParameters.CIons) { ionTypes.Add(ProductType.C); } TerminusType terminusType = ProductTypeMethod.IdentifyTerminusType(ionTypes); var crosslinker = new CrosslinkerTypeClass(); crosslinker.SelectCrosslinker(XlSearchParameters.CrosslinkerType); if (XlSearchParameters.CrosslinkerType == CrosslinkerType.UserDefined) { crosslinker.CrosslinkerName = XlSearchParameters.UdXLkerName; crosslinker.Cleavable = XlSearchParameters.UdXLkerCleavable; crosslinker.TotalMass = XlSearchParameters.UdXLkerTotalMass.HasValue ? (double)XlSearchParameters.UdXLkerTotalMass : 9999; crosslinker.CleaveMassShort = XlSearchParameters.UdXLkerShortMass.HasValue ? (double)XlSearchParameters.UdXLkerShortMass : 9999; crosslinker.CleaveMassLong = XlSearchParameters.UdXLkerShortMass.HasValue ? (double)XlSearchParameters.UdXLkerLongMass : 9999; crosslinker.CrosslinkerModSite = XlSearchParameters.UdXLkerResidue; crosslinker.LoopMass = XlSearchParameters.UdXLkerLoopMass.HasValue ? (double)XlSearchParameters.UdXLkerLoopMass : 9999; crosslinker.DeadendMassH2O = XlSearchParameters.UdXLkerDeadendMassH2O.HasValue ? (double)XlSearchParameters.UdXLkerDeadendMassH2O : 9999; crosslinker.DeadendMassNH2 = XlSearchParameters.UdXLkerDeadendMassNH2.HasValue ? (double)XlSearchParameters.UdXLkerDeadendMassNH2 : 9999; crosslinker.DeadendMassTris = XlSearchParameters.UdXLkerDeadendMassTris.HasValue ? (double)XlSearchParameters.UdXLkerDeadendMassTris : 9999; } ParallelOptions parallelOptions = new ParallelOptions(); if (CommonParameters.MaxParallelFilesToAnalyze.HasValue) { parallelOptions.MaxDegreeOfParallelism = CommonParameters.MaxParallelFilesToAnalyze.Value; } MyFileManager myFileManager = new MyFileManager(XlSearchParameters.DisposeOfFileWhenDone); HashSet <IDigestionParams> ListOfDigestionParams = GetListOfDistinctDigestionParams(CommonParameters, fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b))); int completedFiles = 0; object indexLock = new object(); object psmLock = new object(); Status("Searching files...", taskId); #region proseCreatedWhileRunning proseCreatedWhileRunning.Append("The following crosslink discovery were used: "); proseCreatedWhileRunning.Append("crosslinker name = " + crosslinker.CrosslinkerName + "; "); proseCreatedWhileRunning.Append("crosslinker type = " + crosslinker.Cleavable + "; "); proseCreatedWhileRunning.Append("crosslinker mass = " + crosslinker.TotalMass + "; "); proseCreatedWhileRunning.Append("crosslinker modification site(s) = " + crosslinker.CrosslinkerModSite + "; "); proseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); proseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); proseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); if (CommonParameters.DigestionParams.MaxPeptideLength == null) { proseCreatedWhileRunning.Append("maximum peptide length = unspecified; "); } else { proseCreatedWhileRunning.Append("maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); } proseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); proseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); proseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; "); proseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; "); proseCreatedWhileRunning.Append("parent mass tolerance(s) = " + XlSearchParameters.XlPrecusorMsTl + "; "); proseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; "); proseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. "); #endregion proseCreatedWhileRunning Parallel.For(0, currentRawFileList.Count, parallelOptions, spectraFileIndex => { var origDataFile = currentRawFileList[spectraFileIndex]; ICommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); List <PsmCross> newPsms = new List <PsmCross>(); var thisId = new List <string> { taskId, "Individual Spectra Files", origDataFile }; NewCollection(Path.GetFileName(origDataFile), thisId); Status("Loading spectra file...", thisId); IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks); Status("Getting ms2 scans...", thisId); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); //List<Ms2ScanWithSpecificMass> arrayOfMs2ScansSortedByMass = new List<Ms2ScanWithSpecificMass>(); //arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToList(); //Code to resolve MS3 data //if (XlSearchParameters.FragmentationType == FragmentaionType.MS2_HCD || XlSearchParameters.FragmentationType == FragmentaionType.MS2_EthCD) //{ // arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToList(); //} //else //{ // arrayOfMs2ScansSortedByMass = GetCombinedMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToList(); //} for (int currentPartition = 0; currentPartition < CommonParameters.TotalPartitions; currentPartition++) { List <CompactPeptide> peptideIndex = null; List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions)); #region Generate indices for modern search Status("Getting fragment dictionary...", new List <string> { taskId }); var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, ionTypes, currentPartition, UsefulProteomicsDatabases.DecoyType.Reverse, ListOfDigestionParams, combinedParams, 30000.0, new List <string> { taskId }); List <int>[] fragmentIndex = null; lock (indexLock) GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, taskId); #endregion Generate indices for modern search Status("Searching files...", taskId); new TwoPassCrosslinkSearchEngine(newPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, ionTypes, currentPartition, combinedParams, false, XlSearchParameters.XlPrecusorMsTl, crosslinker, XlSearchParameters.CrosslinkSearchTop, XlSearchParameters.CrosslinkSearchTopNum, XlSearchParameters.XlQuench_H2O, XlSearchParameters.XlQuench_NH2, XlSearchParameters.XlQuench_Tris, XlSearchParameters.XlCharge_2_3, XlSearchParameters.XlCharge_2_3_PrimeFragment, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + CommonParameters.TotalPartitions + "!", thisId)); } lock (psmLock) { allPsms.AddRange(newPsms); } completedFiles++; ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> { taskId, "Individual Spectra Files" })); }); ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> { taskId, "Individual Spectra Files" })); Status("Crosslink analysis engine", taskId); MetaMorpheusEngineResults allcrosslinkanalysisResults; allcrosslinkanalysisResults = new CrosslinkAnalysisEngine(allPsms, compactPeptideToProteinPeptideMatch, proteinList, variableModifications, fixedModifications, ionTypes, OutputFolder, crosslinker, terminusType, CommonParameters, new List <string> { taskId }).Run(); allPsms = allPsms.Where(p => p != null).ToList(); if (XlSearchParameters.XlOutAll) { try { WriteAllToTsv(allPsms, OutputFolder, "allPsms", new List <string> { taskId }); } catch (Exception) { throw; } } var allPsmsXL = allPsms.Where(p => p.CrossType == PsmCrossType.Cross).Where(p => p.XLBestScore >= CommonParameters.ScoreCutoff && p.BetaPsmCross.XLBestScore >= CommonParameters.ScoreCutoff).ToList(); foreach (var item in allPsmsXL) { if (item.OneBasedStartResidueInProtein.HasValue) { item.XlProteinPos = item.OneBasedStartResidueInProtein.Value + item.XlPos - 1; } if (item.BetaPsmCross.OneBasedStartResidueInProtein.HasValue) { item.BetaPsmCross.XlProteinPos = item.BetaPsmCross.OneBasedStartResidueInProtein.Value + item.BetaPsmCross.XlPos - 1; } } #region Inter Crosslink //Write Inter Psms FDR var interPsmsXL = allPsmsXL.Where(p => !p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First().Contains(p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First()) && !p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First().Contains(p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First())).OrderByDescending(p => p.XLQvalueTotalScore).ToList(); foreach (var item in interPsmsXL) { item.CrossType = PsmCrossType.Inter; } var interPsmsXLFDR = CrosslinkDoFalseDiscoveryRateAnalysis(interPsmsXL).ToList(); //var interPsmsXLFDR = CrosslinkFDRAnalysis(interPsmsXL).ToList(); if (XlSearchParameters.XlOutCrosslink) { WriteCrosslinkToTsv(interPsmsXLFDR, OutputFolder, "xl_inter_fdr", new List <string> { taskId }); } if (XlSearchParameters.XlOutPercolator) { try { var interPsmsXLPercolator = interPsmsXL.Where(p => p.XLBestScore >= 2 && p.BetaPsmCross.XLBestScore >= 2).OrderBy(p => p.ScanNumber).ToList(); WriteCrosslinkToTxtForPercolator(interPsmsXLPercolator, OutputFolder, "xl_inter_perc", crosslinker, new List <string> { taskId }); } catch (Exception) { throw; } } #endregion Inter Crosslink #region Intra Cross-link //Write Intra Psms FDR var intraPsmsXL = allPsmsXL.Where(p => p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First() == p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First() || p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First().Contains(p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First()) || p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First().Contains(p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First())).OrderByDescending(p => p.XLQvalueTotalScore).ToList(); foreach (var item in intraPsmsXL) { item.CrossType = PsmCrossType.Intra; } var intraPsmsXLFDR = CrosslinkDoFalseDiscoveryRateAnalysis(intraPsmsXL).ToList(); //var intraPsmsXLFDR = CrosslinkFDRAnalysis(intraPsmsXL).ToList(); if (XlSearchParameters.XlOutCrosslink) { WriteCrosslinkToTsv(intraPsmsXLFDR, OutputFolder, "xl_intra_fdr", new List <string> { taskId }); } if (XlSearchParameters.XlOutPercolator) { try { var intraPsmsXLPercolator = intraPsmsXL.Where(p => p.XLBestScore >= 2 && p.BetaPsmCross.XLBestScore >= 2).OrderBy(p => p.ScanNumber).ToList(); WriteCrosslinkToTxtForPercolator(intraPsmsXLPercolator, OutputFolder, "xl_intra_perc", crosslinker, new List <string> { taskId }); } catch (Exception) { throw; } } #endregion Intra Cross-link #region Single peptide var singlePsms = allPsms.Where(p => p.CrossType == PsmCrossType.Singe && !p.FullSequence.Contains("Crosslink")).OrderByDescending(p => p.Score).ToList(); var singlePsmsFDR = SingleFDRAnalysis(singlePsms).ToList(); if (XlSearchParameters.XlOutAll) { WriteSingleToTsv(singlePsmsFDR, OutputFolder, "single_fdr", new List <string> { taskId }); } #endregion Single peptide #region Loop peptide var loopPsms = allPsms.Where(p => p.CrossType == PsmCrossType.Loop).OrderByDescending(p => p.XLTotalScore).ToList(); var loopPsmsFDR = SingleFDRAnalysis(loopPsms).ToList(); if (XlSearchParameters.XlOutAll) { WriteSingleToTsv(loopPsmsFDR, OutputFolder, "loop_fdr", new List <string> { taskId }); } #endregion Loop peptide #region deadend peptide var deadendPsms = allPsms.Where(p => p.CrossType == PsmCrossType.DeadEnd || p.CrossType == PsmCrossType.DeadEndH2O || p.CrossType == PsmCrossType.DeadEndNH2 || p.CrossType == PsmCrossType.DeadEndTris).OrderByDescending(p => p.XLTotalScore).ToList(); deadendPsms.AddRange(allPsms.Where(p => p.CrossType == PsmCrossType.Singe && p.FullSequence.Contains("Crosslink")).ToList()); var deadendPsmsFDR = SingleFDRAnalysis(deadendPsms).ToList(); if (XlSearchParameters.XlOutAll) { WriteSingleToTsv(deadendPsmsFDR, OutputFolder, "deadend_fdr", new List <string> { taskId }); } #endregion deadend peptide if (XlSearchParameters.XlOutPepXML) { List <PsmCross> allPsmsFDR = new List <PsmCross>(); allPsmsFDR.AddRange(intraPsmsXLFDR.Where(p => p.IsDecoy != true && p.BetaPsmCross.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); allPsmsFDR.AddRange(interPsmsXLFDR.Where(p => p.IsDecoy != true && p.BetaPsmCross.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); allPsmsFDR.AddRange(singlePsmsFDR.Where(p => p.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); allPsmsFDR.AddRange(loopPsmsFDR.Where(p => p.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); allPsmsFDR.AddRange(deadendPsmsFDR.Where(p => p.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); allPsmsFDR = allPsmsFDR.OrderBy(p => p.ScanNumber).ToList(); foreach (var fullFilePath in currentRawFileList) { string fileNameNoExtension = Path.GetFileNameWithoutExtension(fullFilePath); WritePepXML_xl(allPsmsFDR.Where(p => p.FullFilePath == fullFilePath).ToList(), dbFilenameList, variableModifications, fixedModifications, localizeableModificationTypes, OutputFolder, fileNameNoExtension, new List <string> { taskId }); } } if (XlSearchParameters.XlOutAll) { List <PsmCross> allPsmsXLFDR = new List <PsmCross>(); allPsmsXLFDR.AddRange(intraPsmsXLFDR.Where(p => p.IsDecoy != true && p.BetaPsmCross.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); allPsmsXLFDR.AddRange(interPsmsXLFDR.Where(p => p.IsDecoy != true && p.BetaPsmCross.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); try { allPsmsXLFDR = allPsmsXLFDR.OrderByDescending(p => p.XLQvalueTotalScore).ToList(); var allPsmsXLFDRGroup = FindCrosslinks(allPsmsXLFDR); WriteCrosslinkToTsv(allPsmsXLFDRGroup, OutputFolder, "allPsmsXLFDRGroup", new List <string> { taskId }); } catch (Exception) { throw; } } return(myTaskResults); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificSettings[] fileSettingsList) { myTaskResults = new MyTaskResults(this) { newSpectra = new List <string>() }; Status("Loading modifications...", new List <string> { taskId }); List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList(); List <ModificationWithMass> fixedModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList(); List <string> localizeableModificationTypes = CommonParameters.LocalizeAll ? GlobalVariables.AllModTypesKnown.ToList() : CommonParameters.ListOfModTypesLocalize.ToList(); Status("Loading proteins...", new List <string> { taskId }); var proteinList = dbFilenameList.SelectMany(b => LoadProteinDb(b.FilePath, true, UsefulProteomicsDatabases.DecoyType.Reverse, localizeableModificationTypes, b.IsContaminant, out Dictionary <string, Modification> um)).ToList(); proseCreatedWhileRunning.Append("The following calibration settings were used: "); proseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); proseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); proseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); if (CommonParameters.DigestionParams.MaxPeptideLength == null) { proseCreatedWhileRunning.Append("maximum peptide length = unspecified; "); } else { proseCreatedWhileRunning.Append("maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); } proseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); proseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); proseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; "); proseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; "); proseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + " Da. "); proseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. "); object lock1 = new object(); ParallelOptions parallelOptions = new ParallelOptions(); if (CommonParameters.MaxParallelFilesToAnalyze.HasValue) { parallelOptions.MaxDegreeOfParallelism = CommonParameters.MaxParallelFilesToAnalyze.Value; } var myFileManager = new MyFileManager(true); Status("Calibrating...", new List <string> { taskId }); Parallel.For(0, currentRawFileList.Count, parallelOptions, spectraFileIndex => { var originalUncalibratedFilePath = currentRawFileList[spectraFileIndex]; var originalUncalibratedFilenameWithoutExtension = Path.GetFileNameWithoutExtension(originalUncalibratedFilePath); ICommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); string calibratedFilePath = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + "-calib.mzml"); IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile; Status("Loading spectra file...", new List <string> { taskId, "Individual Spectra Files" }); // only load one file at a time lock (lock1) { myMsDataFile = myFileManager.LoadFile(originalUncalibratedFilePath, CommonParameters.TopNpeaks, CommonParameters.MinRatio, CommonParameters.TrimMs1Peaks, CommonParameters.TrimMsMsPeaks); } Status("Acquiring calibration data points...", new List <string> { taskId, "Individual Spectra Files" }); // get datapoints to fit calibration function to var acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance); // stats before calibration int prevPsmCount = acquisitionResults.Item1.Count; var preCalibrationPrecursorErrors = acquisitionResults.Item1.Select(p => (p.ScanPrecursorMass - p.PeptideMonisotopicMass.Value) / p.PeptideMonisotopicMass.Value * 1e6).ToList(); double preCalibrationPrecursorIqr = Statistics.InterquartileRange(preCalibrationPrecursorErrors); var preCalibrationProductErrors = acquisitionResults.Item1.SelectMany(p => p.ProductMassErrorPpm.SelectMany(v => v.Value)).ToList(); double preCalibrationProductIqr = Statistics.InterquartileRange(preCalibrationProductErrors); // enough data points to calibrate with? if (acquisitionResults.Item2 == null) { Warn("Could not find any datapoints to calibrate with!"); return; } if (acquisitionResults.Item2.Ms1List.Count < 4 || acquisitionResults.Item2.Ms2List.Count < 4) { Warn("Could not find enough MS1 datapoints to calibrate (" + acquisitionResults.Item2.Ms1List.Count + " found)"); Warn("Could not find enough MS2 datapoints to calibrate (" + acquisitionResults.Item2.Ms2List.Count + " found)"); return; } // generate calibration function and shift data points Status("Calibrating...", new List <string> { taskId, "Individual Spectra Files" }); new CalibrationEngine(myMsDataFile, acquisitionResults.Item2, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }).Run(); myFileManager.DoneWithFile(originalUncalibratedFilePath); // do another search to evaluate calibration results Status("Post-calibration search...", new List <string> { taskId, "Individual Spectra Files" }); acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance); // stats after calibration int postCalibrationPsmCount = acquisitionResults.Item1.Count; var postCalibrationPrecursorErrors = acquisitionResults.Item1.Select(p => (p.ScanPrecursorMass - p.PeptideMonisotopicMass) / p.PeptideMonisotopicMass * 1e6).ToList(); double postCalibrationPrecursorIqr = Statistics.InterquartileRange(postCalibrationPrecursorErrors); var postCalibrationProductErrors = acquisitionResults.Item1.SelectMany(p => p.ProductMassErrorPpm.SelectMany(v => v.Value)).ToList(); double postCalibrationProductIqr = Statistics.InterquartileRange(postCalibrationProductErrors); // did the data improve? (not used for anything yet...) bool improvement = ImprovGlobal(preCalibrationPrecursorIqr, preCalibrationProductIqr, prevPsmCount, postCalibrationPsmCount, postCalibrationPrecursorIqr, postCalibrationProductIqr); // write suggested tolerances for this file var tomlFileName = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + "-calib.toml"); FileSpecificTolerances f = new FileSpecificTolerances { PrecursorMassTolerance = new PpmTolerance(4.0 * postCalibrationPrecursorIqr), ProductMassTolerance = new PpmTolerance(4.0 * postCalibrationProductIqr) }; Toml.WriteFile(f, tomlFileName, tomlConfig); SucessfullyFinishedWritingFile(tomlFileName, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }); // write the calibrated MZML file MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, calibratedFilePath, false); // all done SucessfullyFinishedWritingFile(calibratedFilePath, new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension }); myTaskResults.newSpectra.Add(calibratedFilePath); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension })); }); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files" })); return(myTaskResults); }
private (List <PeptideSpectralMatch>, DataPointAquisitionResults) GetDataAcquisitionResults(IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile, string currentDataFile, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <Protein> proteinList, string taskId, ICommonParameters combinedParameters, Tolerance initPrecTol, Tolerance initProdTol) { var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(currentDataFile); MassDiffAcceptor searchMode; if (initPrecTol is PpmTolerance) { searchMode = new SinglePpmAroundZeroSearchMode(initPrecTol.Value); } else { searchMode = new SingleAbsoluteAroundZeroSearchMode(initPrecTol.Value); } FragmentTypes fragmentTypesForCalibration = FragmentTypes.None; if (combinedParameters.BIons) { fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.b; } if (combinedParameters.YIons) { fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.y; } if (combinedParameters.CIons) { fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.c; } if (combinedParameters.ZdotIons) { fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.zdot; } var listOfSortedms2Scans = GetMs2Scans(myMsDataFile, currentDataFile, combinedParameters.DoPrecursorDeconvolution, combinedParameters.UseProvidedPrecursorInfo, combinedParameters.DeconvolutionIntensityRatio, combinedParameters.DeconvolutionMaxAssumedChargeState, combinedParameters.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; List <ProductType> lp = new List <ProductType>(); if (combinedParameters.BIons) { lp.Add(ProductType.B); } if (combinedParameters.YIons) { lp.Add(ProductType.Y); } if (combinedParameters.CIons) { lp.Add(ProductType.C); } if (combinedParameters.ZdotIons) { lp.Add(ProductType.Zdot); } Log("Searching with searchMode: " + searchMode, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }); Log("Searching with productMassTolerance: " + initProdTol, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }); new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, proteinList, lp, searchMode, false, combinedParameters, initProdTol, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }).Run(); List <PeptideSpectralMatch> allPsms = allPsmsArray.ToList(); Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatching = ((SequencesToActualProteinPeptidesEngineResults) new SequencesToActualProteinPeptidesEngine(allPsms, proteinList, fixedModifications, variableModifications, lp, new List <IDigestionParams> { combinedParameters.DigestionParams }, combinedParameters.ReportAllAmbiguity, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }).Run()).CompactPeptideToProteinPeptideMatching; foreach (var huh in allPsms) { if (huh != null) { huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching); } } allPsms = allPsms.Where(b => b != null).OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => (b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList(); new FdrAnalysisEngine(allPsms, searchMode.NumNotches, false, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }).Run(); List <PeptideSpectralMatch> goodIdentifications = allPsms.Where(b => b.FdrInfo.QValueNotch < 0.01 && !b.IsDecoy && b.FullSequence != null).ToList(); if (!goodIdentifications.Any()) { Warn("No PSMs below 1% FDR observed!"); return(new List <PeptideSpectralMatch>(), null); } var dissociationTypes = MetaMorpheusEngine.DetermineDissociationType(lp); foreach (var psm in allPsms) { var theScan = myMsDataFile.GetOneBasedScan(psm.ScanNumber); double thePrecursorMass = psm.ScanPrecursorMass; foreach (var huh in lp) { var ionMasses = psm.CompactPeptides.First().Key.ProductMassesMightHaveDuplicatesAndNaNs(new List <ProductType> { huh }); Array.Sort(ionMasses); List <double> matchedIonMassesList = new List <double>(); List <double> productMassErrorDaList = new List <double>(); List <double> productMassErrorPpmList = new List <double>(); LocalizationEngine.MatchIons(theScan, initProdTol, ionMasses, matchedIonMassesList, productMassErrorDaList, productMassErrorPpmList, thePrecursorMass, dissociationTypes, false); double[] matchedIonMassesOnlyMatches = matchedIonMassesList.ToArray(); psm.MatchedIonDictOnlyMatches.Add(huh, matchedIonMassesOnlyMatches); psm.ProductMassErrorDa.Add(huh, productMassErrorDaList.ToArray()); psm.ProductMassErrorPpm.Add(huh, productMassErrorPpmList.ToArray()); } } DataPointAquisitionResults currentResult = (DataPointAquisitionResults) new DataPointAcquisitionEngine( goodIdentifications, myMsDataFile, initPrecTol, initProdTol, CalibrationParameters.NumFragmentsNeededForEveryIdentification, CalibrationParameters.MinMS1IsotopicPeaksNeededForConfirmedIdentification, CalibrationParameters.MinMS2IsotopicPeaksNeededForConfirmedIdentification, fragmentTypesForCalibration, new List <string> { taskId, "Individual Spectra Files", fileNameWithoutExtension }).Run(); return(goodIdentifications, currentResult); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificSettings[] fileSettingsList) { myTaskResults = new MyTaskResults(this); if (NeoType.Equals(NeoTaskType.AggregateTargetDecoyFiles)) { //getfolders if (NeoParameters.DecoyFilePath == null) { NeoParameters.DecoyFilePath = new DirectoryInfo(OutputFolder).Name; string taskString = NeoParameters.DecoyFilePath.Split('-')[0]; int taskNum = Convert.ToInt32(taskString.Substring(4, taskString.Length - 4)); taskNum--; NeoParameters.DecoyFilePath = OutputFolder.Substring(0, OutputFolder.Length - NeoParameters.DecoyFilePath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv"; if (NeoParameters.TargetFilePath == null) { NeoParameters.TargetFilePath = new DirectoryInfo(OutputFolder).Name; taskNum--; NeoParameters.TargetFilePath = OutputFolder.Substring(0, OutputFolder.Length - NeoParameters.TargetFilePath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv"; } } if (NeoParameters.TargetFilePath == null) { NeoParameters.TargetFilePath = new DirectoryInfo(OutputFolder).Name; string taskString = NeoParameters.TargetFilePath.Split('-')[0]; int taskNum = Convert.ToInt32(taskString.Substring(4, taskString.Length - 4)); taskNum--; NeoParameters.TargetFilePath = OutputFolder.Substring(0, OutputFolder.Length - NeoParameters.TargetFilePath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv"; } AggregateSearchFiles.Combine(NeoParameters.TargetFilePath, NeoParameters.DecoyFilePath, OutputFolder + "\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0])); } else if (NeoType.Equals(NeoTaskType.AggregateNormalSplicedFiles)) { //reset database dbFilenameList = StoredDatabases; string normalPath = ""; string cisPath = new DirectoryInfo(OutputFolder).Name; string taskString = cisPath.Split('-')[0]; int taskNum = Convert.ToInt32(taskString.Substring(4, taskString.Length - 4)); taskNum -= 2; string transPath = OutputFolder.Substring(0, OutputFolder.Length - cisPath.Length) + "Task" + (taskNum + 1) + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv"; cisPath = OutputFolder.Substring(0, OutputFolder.Length - cisPath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv"; AggregateSearchFiles.RecursiveNeoAggregation(normalPath, cisPath, OutputFolder, "CisResults.psmtsv"); AggregateSearchFiles.RecursiveNeoAggregation(normalPath, transPath, OutputFolder, "TransResults.psmtsv"); } else if (NeoType.Equals(NeoTaskType.GenerateSplicedPeptides)) { NeoMassCalculator.ImportMasses(); ParallelOptions parallelOptions = new ParallelOptions(); if (CommonParameters.MaxParallelFilesToAnalyze.HasValue) { parallelOptions.MaxDegreeOfParallelism = CommonParameters.MaxParallelFilesToAnalyze.Value; } MyFileManager myFileManager = new MyFileManager(true); //Import Spectra Parallel.For(0, currentRawFileList.Count, parallelOptions, spectraFileIndex => { var origDataFile = currentRawFileList[spectraFileIndex]; ICommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); var thisId = new List <string> { taskId, "Individual Spectra Files", origDataFile }; NewCollection(Path.GetFileName(origDataFile), thisId); Status("Loading spectra file...", thisId); IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks); Status("Getting ms2 scans...", thisId); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); //Import Database Status("Loading modifications...", taskId); #region Load modifications List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList(); List <ModificationWithMass> fixedModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList(); List <string> localizeableModificationTypes = CommonParameters.ListOfModTypesLocalize == null ? new List <string>() : CommonParameters.ListOfModTypesLocalize.ToList(); if (CommonParameters.LocalizeAll) { localizeableModificationTypes = GlobalVariables.AllModTypesKnown.ToList(); } else { localizeableModificationTypes = GlobalVariables.AllModTypesKnown.Where(b => localizeableModificationTypes.Contains(b)).ToList(); } #endregion Load modifications var proteinList = dbFilenameList.SelectMany(b => LoadProteinDb(b.FilePath, true, DecoyType.None, localizeableModificationTypes, b.IsContaminant, out Dictionary <string, Modification> unknownModifications)).ToList(); //Read N and C files string nPath = NeoParameters.NFilePath; string cPath = NeoParameters.CFilePath; //if termini input if (nPath == null || cPath == null) { //if no termini input string taskHeader = "Task"; string[] pathArray = OutputFolder.Split('\\'); string basePath = ""; for (int i = 0; i < pathArray.Length - 1; i++) { basePath += pathArray[i] + '\\'; } string currentTaskNumber = pathArray[pathArray.Length - 1].Split('-')[0]; currentTaskNumber = currentTaskNumber.Substring(taskHeader.Length, currentTaskNumber.Length - taskHeader.Length); string NHeader = ""; string CHeader = ""; if (cPath == null) { CHeader = taskHeader + (Convert.ToInt16(currentTaskNumber) - 1); if (nPath == null) { NHeader = taskHeader + (Convert.ToInt16(currentTaskNumber) - 2); } } else { NHeader = taskHeader + (Convert.ToInt16(currentTaskNumber) - 1); } foreach (string s in Directory.GetDirectories(basePath)) { if (s.Contains(NHeader)) { nPath = s; } else if (s.Contains(CHeader)) { cPath = s; } } string fileName = Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv"; nPath += "\\" + fileName; cPath += "\\" + fileName; } Status("Importing Search Results...", taskId); List <NeoPsm> psms = ImportPsmtsv.ImportNeoPsms(nPath, cPath); //Splice Status("Splicing Fragments...", taskId); List <NeoPsm> candidates = NeoSplicePeptides.SplicePeptides(psms); //Find Ambiguity Status("Identifying Ambiguity...", taskId); NeoFindAmbiguity.FindAmbiguity(candidates, proteinList, arrayOfMs2ScansSortedByMass, dbFilenameList[0].FilePath); //Export Results Status("Exporting Results...", taskId); NeoExport.ExportAll(candidates, arrayOfMs2ScansSortedByMass, OutputFolder); //Switch databases string outputFolder = NeoExport.path + NeoExport.folder + @"\" + NeoExport.folder + "FusionDatabaseAppendixNC.fasta"; dbFilenameList = new List <DbForTask>() { new DbForTask(outputFolder, false) }; });
public PrecursorIndexingEngine(List <Protein> proteinList, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <ProductType> lp, int currentPartition, DecoyType decoyType, IEnumerable <IDigestionParams> CollectionOfDigestionParams, ICommonParameters commonParams, double maxFragmentSize, List <string> nestedIds) : base(proteinList, variableModifications, fixedModifications, lp, currentPartition, decoyType, CollectionOfDigestionParams, commonParams, maxFragmentSize, nestedIds) { }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificSettings[] fileSettingsList) { myTaskResults = new MyTaskResults(this) { newDatabases = new List <DbForTask>() }; Status("Loading modifications...", new List <string> { taskId }); List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList(); List <ModificationWithMass> fixedModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList(); List <string> localizeableModificationTypes = CommonParameters.LocalizeAll ? GlobalVariables.AllModTypesKnown.ToList() : CommonParameters.ListOfModTypesLocalize.ToList(); List <ModificationWithMass> gptmdModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => GptmdParameters.ListOfModsGptmd.Contains((b.modificationType, b.id))).ToList(); IEnumerable <Tuple <double, double> > combos = LoadCombos(gptmdModifications).ToList(); List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>(); List <ProductType> ionTypes = new List <ProductType>(); if (CommonParameters.BIons) { ionTypes.Add(ProductType.B); } if (CommonParameters.YIons) { ionTypes.Add(ProductType.Y); } if (CommonParameters.CIons) { ionTypes.Add(ProductType.C); } if (CommonParameters.ZdotIons) { ionTypes.Add(ProductType.Zdot); } Status("Loading proteins...", new List <string> { taskId }); Dictionary <string, Modification> um = null; //Decoys are currently not being searched with DecoyType.None var proteinList = dbFilenameList.SelectMany(b => LoadProteinDb(b.FilePath, true, DecoyType.Reverse, localizeableModificationTypes, b.IsContaminant, out um)).ToList(); var numRawFiles = currentRawFileList.Count; proseCreatedWhileRunning.Append("The following G-PTM-D settings were used: "); proseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); proseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); proseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); if (CommonParameters.DigestionParams.MaxPeptideLength == null) { proseCreatedWhileRunning.Append("maximum peptide length = unspecified; "); } else { proseCreatedWhileRunning.Append("maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); } proseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); proseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); proseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; "); proseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; "); proseCreatedWhileRunning.Append("G-PTM-D modifications count = " + gptmdModifications.Count + "; "); //puppet searchmode for writing files. Actual searchmode is filespecific MassDiffAcceptor tempSearchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), CommonParameters.PrecursorMassTolerance); proseCreatedWhileRunning.Append("parent mass tolerance(s) = {" + tempSearchMode.ToProseString() + "}; "); proseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + " Da. "); proseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. "); Status("Running G-PTM-D...", new List <string> { taskId }); HashSet <IDigestionParams> ListOfDigestionParams = GetListOfDistinctDigestionParams(CommonParameters, fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b))); MyFileManager myFileManager = new MyFileManager(true); object lock1 = new object(); object lock2 = new object(); ParallelOptions parallelOptions = new ParallelOptions(); if (CommonParameters.MaxParallelFilesToAnalyze.HasValue) { parallelOptions.MaxDegreeOfParallelism = CommonParameters.MaxParallelFilesToAnalyze.Value; } Parallel.For(0, currentRawFileList.Count, parallelOptions, spectraFileIndex => { var origDataFile = currentRawFileList[spectraFileIndex]; ICommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); MassDiffAcceptor searchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), combinedParams.PrecursorMassTolerance); NewCollection(Path.GetFileName(origDataFile), new List <string> { taskId, "Individual Spectra Files", origDataFile }); StartingDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); Status("Loading spectra file...", new List <string> { taskId, "Individual Spectra Files", origDataFile }); IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks); Status("Getting ms2 scans...", new List <string> { taskId, "Individual Spectra Files", origDataFile }); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); myFileManager.DoneWithFile(origDataFile); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; new ClassicSearchEngine(allPsmsArray, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, proteinList, ionTypes, searchMode, false, combinedParams, combinedParams.ProductMassTolerance, new List <string> { taskId, "Individual Spectra Files", origDataFile }).Run(); lock (lock2) { allPsms.AddRange(allPsmsArray); } FinishedDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files", origDataFile })); }); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files" })); // Group and order psms SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngineTest = new SequencesToActualProteinPeptidesEngine(allPsms, proteinList, fixedModifications, variableModifications, ionTypes, ListOfDigestionParams, CommonParameters.ReportAllAmbiguity, new List <string> { taskId }); var resTest = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngineTest.Run(); Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatchingTest = resTest.CompactPeptideToProteinPeptideMatching; foreach (var huh in allPsms) { if (huh != null) { huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatchingTest); } } allPsms = allPsms.Where(b => b != null).OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList(); new FdrAnalysisEngine(allPsms, tempSearchMode.NumNotches, false, new List <string> { taskId }).Run(); var writtenFile = Path.Combine(OutputFolder, "GPTMD_Candidates.psmtsv"); WritePsmsToTsv(allPsms, writtenFile, new Dictionary <string, int>()); SucessfullyFinishedWritingFile(writtenFile, new List <string> { taskId }); var gptmdResults = (GptmdResults) new GptmdEngine(allPsms, gptmdModifications, combos, CommonParameters.PrecursorMassTolerance, new List <string> { taskId }).Run(); if (dbFilenameList.Any(b => !b.IsContaminant)) { // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written //string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => !b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml"); List <string> databaseNames = new List <string>(); foreach (var nonContaminantDb in dbFilenameList.Where(p => !p.IsContaminant)) { var dbName = Path.GetFileName(nonContaminantDb.FilePath); int indexOfFirstDot = dbName.IndexOf("."); databaseNames.Add(dbName.Substring(0, indexOfFirstDot)); } string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml"); var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && !b.IsContaminant).ToList(), outputXMLdbFullName); SucessfullyFinishedWritingFile(outputXMLdbFullName, new List <string> { taskId }); myTaskResults.newDatabases.Add(new DbForTask(outputXMLdbFullName, false)); myTaskResults.AddNiceText("Modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum()); myTaskResults.AddNiceText("Mods types and counts:"); myTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value))); } if (dbFilenameList.Any(b => b.IsContaminant)) { // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written //string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml"); List <string> databaseNames = new List <string>(); foreach (var contaminantDb in dbFilenameList.Where(p => p.IsContaminant)) { var dbName = Path.GetFileName(contaminantDb.FilePath); int indexOfFirstDot = dbName.IndexOf("."); databaseNames.Add(dbName.Substring(0, indexOfFirstDot)); } string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml"); var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && b.IsContaminant).ToList(), outputXMLdbFullNameContaminants); SucessfullyFinishedWritingFile(outputXMLdbFullNameContaminants, new List <string> { taskId }); myTaskResults.newDatabases.Add(new DbForTask(outputXMLdbFullNameContaminants, true)); myTaskResults.AddNiceText("Contaminant modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum()); myTaskResults.AddNiceText("Mods types and counts:"); myTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value))); } return(myTaskResults); }
private static void UpdateTomls(string tomlFileName, string fileName, ICommonParameters ye5, TerminusType terminusType, bool spliceSearch) { string[] oldTomlLines = File.ReadAllLines(@fileName); List <string> newTomlLines = new List <string>(); foreach (string line in oldTomlLines) { if (line.Contains("LocalizeAll") && terminusType.Equals(TerminusType.None)) { newTomlLines.Add(GetCorrectValue("LocalizeAll", tomlFileName, line)); } else if (line.Contains("ListOfModsFixed")) { newTomlLines.Add(GetCorrectValue("ListOfModsFixed", tomlFileName, line)); } else if (line.Contains("ListOfModsVariable") && terminusType.Equals(TerminusType.None) && !spliceSearch) { newTomlLines.Add(GetCorrectValue("ListOfModsVariable", tomlFileName, line)); } else if (line.Contains("BIons")) { if (terminusType.Equals(TerminusType.N) || terminusType.Equals(TerminusType.None)) { newTomlLines.Add(GetCorrectValue("BIons", tomlFileName, line)); } else { newTomlLines.Add("BIons = false"); } } else if (line.Contains("YIons")) { if (terminusType.Equals(TerminusType.C) || terminusType.Equals(TerminusType.None)) { newTomlLines.Add(GetCorrectValue("YIons", tomlFileName, line)); } else { newTomlLines.Add("YIons = false"); } } else if (line.Contains("ZdotIons")) { if (terminusType.Equals(TerminusType.C) || terminusType.Equals(TerminusType.None)) { newTomlLines.Add(GetCorrectValue("ZdotIons", tomlFileName, line)); } else { newTomlLines.Add("ZdotIons = false"); } } else if (line.Contains("CIons")) { if (terminusType.Equals(TerminusType.N) || terminusType.Equals(TerminusType.None)) { newTomlLines.Add(GetCorrectValue("CIons", tomlFileName, line)); } else { newTomlLines.Add("CIons = false"); } } else if (line.Contains("ProductMassTolerance")) { newTomlLines.Add(GetCorrectValue("ProductMassTolerance", tomlFileName, line)); } else if (line.Contains("PrecursorMassTolerance")) { newTomlLines.Add(GetCorrectValue("PrecursorMassTolerance", tomlFileName, line)); } else if (line.Contains("MaxMissedCleavages")) { newTomlLines.Add(GetCorrectValue("MaxMissedCleavages", tomlFileName, line)); } else if (line.Contains("InitiatorMethionineBehavior")) { newTomlLines.Add(GetCorrectValue("InitiatorMethionineBehavior", tomlFileName, line)); } else if (line.Contains("MinPeptideLength") && !!terminusType.Equals(TerminusType.None)) { newTomlLines.Add(GetCorrectValue("MinPeptideLength", tomlFileName, line)); } else if (line.Contains("MaxPeptideLength")) { newTomlLines.Add(GetCorrectValue("MaxPeptideLength", tomlFileName, line)); } else if (line.Contains("MaxModificationIsoforms")) { newTomlLines.Add(GetCorrectValue("MaxModificationIsoforms", tomlFileName, line)); } else if (line.Contains("MaxModsForPeptide")) { newTomlLines.Add(GetCorrectValue("MaxModsForPeptide", tomlFileName, line)); } else if (line.Contains("SemiProteaseDigestion")) { newTomlLines.Add(GetCorrectValue("SemiProteaseDigestion", tomlFileName, line)); } else if (line.Contains("TerminusTypeSemiProtease")) { newTomlLines.Add(GetCorrectValue("TerminusTypeSemiProtease", tomlFileName, line)); } else if (line.Contains("Protease") && terminusType.Equals(TerminusType.None) && !spliceSearch) //this must be last, else other names including protease will be overwritten and crash. { newTomlLines.Add(GetCorrectValue("Protease", tomlFileName, line)); } else { newTomlLines.Add(line); } } using (StreamWriter file = new StreamWriter(fileName)) foreach (string line in newTomlLines) { file.WriteLine(line); } }