protected override MetaMorpheusEngineResults RunSpecific() { //At this point have Spectrum-Sequence matching, without knowing which protein, and without know if target/decoy Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatching = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >(); Dictionary <CompactPeptideBase, List <double> > compactPeptideToMassMatching = new Dictionary <CompactPeptideBase, List <double> >(); //Looking at the search results, generate a dictionary of keys for each unique CompactPeptide with empty values foreach (var psm in allPsms) { if (psm != null) { foreach (var cp in psm.CompactPeptides) { if (compactPeptideToMassMatching.TryGetValue(cp.Key, out List <double> ld)) { ld.Add(psm.ScanPrecursorMass); } else { compactPeptideToProteinPeptideMatching.Add(cp.Key as CompactPeptideBase, new HashSet <PeptideWithSetModifications>()); //populate dictionary with all keys compactPeptideToMassMatching.Add(cp.Key, new List <double> { psm.ScanPrecursorMass }); } } } } //CP==CompactPeptide //CPWM==CompactPeptideWithMass (Patched to respresent a double) //PWSM==PeptideWithSetModification int totalProteins = proteins.Count; int proteinsSeen = 0; int old_progress = 0; var obj = new object(); //Status("Adding possible sources to peptide dictionary...", new List<string> { taskId }); //Populate the dictionary with possible sources for those ions //particularly tricky for single proteases, since each is more scan specific. if (terminusType == TerminusType.N) { Parallel.ForEach(Partitioner.Create(0, totalProteins), fff => { //Digest protein into large peptide fragments and store in local1 Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > localCPtoPWSM = compactPeptideToProteinPeptideMatching.ToDictionary(b => b.Key as CompactPeptideBase, b => new HashSet <PeptideWithSetModifications>()); for (int i = fff.Item1; i < fff.Item2; i++) { foreach (var digestionParam in collectionOfDigestionParams) { foreach (var peptideWithSetModifications in proteins[i].Digest(digestionParam, fixedModifications, variableModifications)) { if (localCPtoPWSM.TryGetValue(new CompactPeptide(peptideWithSetModifications, terminusType), out HashSet <PeptideWithSetModifications> v)) { v.Add(peptideWithSetModifications); } } } } //Foreach large peptide in localCPtoPWSM, find the precursor masses it's associated with and attempt to find other terminus. Store new compact peptide in local2 //CP==CompactPeptide //CPWM==CompactPeptideWithMass (Patched to respresent a double) //PWSM==PeptideWithSetModificationDictionary Dictionary <CompactPeptideWithModifiedMass, HashSet <PeptideWithSetModifications> > localCPWMtoPWSM = new Dictionary <CompactPeptideWithModifiedMass, HashSet <PeptideWithSetModifications> >(); foreach (KeyValuePair <CompactPeptideBase, HashSet <PeptideWithSetModifications> > kvp in localCPtoPWSM) //foreach theoretical kvp { if (compactPeptideToMassMatching.TryGetValue(kvp.Key, out List <double> listScanPrecursorMasses)) //get list of theoretical precursor masses that have been found and are associated with compactPeptide { foreach (PeptideWithSetModifications pwsm in kvp.Value) { //Determine if the precursor mass can be obtained within the acceptable margin of error. double initialMass = 0; if (pwsm.allModsOneIsNterminus.TryGetValue(1, out ModificationWithMass pep_n_term_variable_mod)) { foreach (double nl in pep_n_term_variable_mod.neutralLosses) { initialMass = pep_n_term_variable_mod.monoisotopicMass - nl; } } else { initialMass = 0; } double[] finalMass = new double[1]; foreach (double precursorMass in listScanPrecursorMasses) //foreach precursor { finalMass[0] = initialMass + waterMonoisotopicMass; //This is the starting mass of the final mass int index = ComputePeptideIndexes(pwsm, finalMass, 1, 1, precursorMass, massDiffAcceptor); foreach (IDigestionParams digestionParam in collectionOfDigestionParams) { if (index >= 0 && (!digestionParam.MinPeptideLength.HasValue | index >= digestionParam.MinPeptideLength)) { //generate correct sequence PeptideWithSetModifications tempPWSM = new PeptideWithSetModifications(pwsm, pwsm.OneBasedStartResidueInProtein, pwsm.OneBasedStartResidueInProtein + index - 1); double modifiedMass = finalMass[0]; CompactPeptideWithModifiedMass tempCPWM = new CompactPeptideWithModifiedMass(kvp.Key, modifiedMass); tempCPWM.SwapMonoisotopicMassWithModifiedMass(); if (localCPWMtoPWSM.TryGetValue(tempCPWM, out HashSet <PeptideWithSetModifications> tempPWSMHashSet)) { tempPWSMHashSet.Add(tempPWSM); } else { localCPWMtoPWSM.Add(tempCPWM, new HashSet <PeptideWithSetModifications> { tempPWSM }); } } } } } } } lock (obj) { //PopulateCPWMtoPWSM foreach (KeyValuePair <CompactPeptideWithModifiedMass, HashSet <PeptideWithSetModifications> > kvp in localCPWMtoPWSM) { if (CPWMtoPWSM.TryGetValue(kvp.Key, out HashSet <PeptideWithSetModifications> tempPWSMHashSet)) { foreach (PeptideWithSetModifications PWSM in kvp.Value) { if (!tempPWSMHashSet.Contains(PWSM)) { tempPWSMHashSet.Add(PWSM); } } } else { tempPWSMHashSet = new HashSet <PeptideWithSetModifications>(); foreach (PeptideWithSetModifications PWSM in kvp.Value) { if (!tempPWSMHashSet.Contains(PWSM)) { tempPWSMHashSet.Add(PWSM); } } CPWMtoPWSM.Add(kvp.Key, tempPWSMHashSet); } } //Everything has been populated! //now need to //record progress proteinsSeen += fff.Item2 - fff.Item1; var new_progress = (int)((double)proteinsSeen / (totalProteins) * 100); if (new_progress > old_progress) { //ReportProgress(new ProgressEventArgs(new_progress, "In adding possible sources to peptide dictionary loop", nestedIds)); old_progress = new_progress; } } }); } else //if (terminusType==TerminusType.C) { Parallel.ForEach(Partitioner.Create(0, totalProteins), fff => { //Digest protein into large peptide fragments and store in local1 Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > localCPtoPWSM = compactPeptideToProteinPeptideMatching.ToDictionary(b => b.Key, b => new HashSet <PeptideWithSetModifications>()); for (int i = fff.Item1; i < fff.Item2; i++) { foreach (var digestionParam in collectionOfDigestionParams) { foreach (var peptideWithSetModifications in proteins[i].Digest(digestionParam, fixedModifications, variableModifications)) { if (localCPtoPWSM.TryGetValue(new CompactPeptide(peptideWithSetModifications, terminusType), out HashSet <PeptideWithSetModifications> v)) { v.Add(peptideWithSetModifications); } } } } //Foreach large peptide in localCPtoPWSM, find the precursor masses it's associated with and attempt to find other terminus. Store new compact peptide in local2 //CP==CompactPeptide //CPWM==CompactPeptideWithMass (Patched to respresent a double) //PWSM==PeptideWithSetModificationDictionary Dictionary <CompactPeptideWithModifiedMass, HashSet <PeptideWithSetModifications> > localCPWMtoPWSM = new Dictionary <CompactPeptideWithModifiedMass, HashSet <PeptideWithSetModifications> >(); foreach (KeyValuePair <CompactPeptideBase, HashSet <PeptideWithSetModifications> > kvp in localCPtoPWSM) //foreach theoretical kvp { if (compactPeptideToMassMatching.TryGetValue(kvp.Key, out List <double> listScanPrecursorMasses)) //do peaks match? Then lets modify double[] into compactpeptide { foreach (PeptideWithSetModifications pwsm in kvp.Value) { //Determine if the precursor mass can be obtained within the acceptable margin of error. double initialMass = 0; if (pwsm.allModsOneIsNterminus.TryGetValue(1, out ModificationWithMass pep_n_term_variable_mod)) { foreach (double nl in pep_n_term_variable_mod.neutralLosses) { initialMass = pep_n_term_variable_mod.monoisotopicMass - nl; } } else { initialMass = 0; } double[] finalMass = new double[1]; foreach (double precursorMass in listScanPrecursorMasses) { finalMass[0] = initialMass + waterMonoisotopicMass; int index = ComputePeptideIndexes(pwsm, finalMass, pwsm.Length, -1, precursorMass, massDiffAcceptor); foreach (IDigestionParams digestionParam in collectionOfDigestionParams) { if (index >= 0 && (!digestionParam.MinPeptideLength.HasValue | (pwsm.OneBasedEndResidueInProtein - (pwsm.OneBasedStartResidueInProtein + index - 2)) >= digestionParam.MinPeptideLength)) { //generate correct sequence PeptideWithSetModifications tempPWSM = new PeptideWithSetModifications(pwsm, pwsm.OneBasedStartResidueInProtein + index - 1, pwsm.OneBasedEndResidueInProtein); double modifiedMass = finalMass[0]; CompactPeptideWithModifiedMass tempCPWM = new CompactPeptideWithModifiedMass(kvp.Key, modifiedMass); tempCPWM.SwapMonoisotopicMassWithModifiedMass(); if (localCPWMtoPWSM.TryGetValue(tempCPWM, out HashSet <PeptideWithSetModifications> tempPWSMHashSet)) { tempPWSMHashSet.Add(tempPWSM); } else { localCPWMtoPWSM.Add(tempCPWM, new HashSet <PeptideWithSetModifications> { tempPWSM }); } } } } } } } lock (obj) { //PopulateCPWMtoPWSM int i = 0; foreach (KeyValuePair <CompactPeptideWithModifiedMass, HashSet <PeptideWithSetModifications> > kvp in localCPWMtoPWSM) { i++; if (CPWMtoPWSM.TryGetValue(kvp.Key, out HashSet <PeptideWithSetModifications> tempPWSMHashSet)) { foreach (PeptideWithSetModifications PWSM in kvp.Value) { if (!tempPWSMHashSet.Contains(PWSM)) { tempPWSMHashSet.Add(PWSM); } } } else { tempPWSMHashSet = new HashSet <PeptideWithSetModifications>(); foreach (PeptideWithSetModifications PWSM in kvp.Value) { if (!tempPWSMHashSet.Contains(PWSM)) { tempPWSMHashSet.Add(PWSM); } } CPWMtoPWSM.Add(kvp.Key, tempPWSMHashSet); } } //Everything has been populated! //now need to //record progress proteinsSeen += fff.Item2 - fff.Item1; var new_progress = (int)((double)proteinsSeen / (totalProteins) * 100); if (new_progress > old_progress) { //ReportProgress(new ProgressEventArgs(new_progress, "In adding possible sources to peptide dictionary loop", nestedIds)); old_progress = new_progress; } } }); } //with filled CPtoCPWM and CPWMtoPWSM, convert psm objects to corrected CP mass foreach (var psm in allPsms) { if (psm != null) { foreach (KeyValuePair <CompactPeptideBase, Tuple <int, HashSet <PeptideWithSetModifications> > > kvp in psm.CompactPeptides) { (kvp.Key as CompactPeptideWithModifiedMass).SwapMonoisotopicMassWithModifiedMass(); //Change CPWM to reflect actual CP if (CPWMtoPWSM.TryGetValue(kvp.Key, out HashSet <PeptideWithSetModifications> misplacedPWSMs)) { (kvp.Key as CompactPeptideWithModifiedMass).CropTerminalMasses(terminusType); if (CPWMtoPWSM.TryGetValue(kvp.Key, out HashSet <PeptideWithSetModifications> wellPlacedPWSMs)) { foreach (PeptideWithSetModifications PWSM in misplacedPWSMs) { wellPlacedPWSMs.Add(PWSM); } } else { CPWMtoPWSM.Add(kvp.Key, misplacedPWSMs); } } } psm.CompactCompactPeptides(); } } return(new MetaMorpheusEngineResults(this)); }
protected override MetaMorpheusEngineResults RunSpecific() { double progress = 0; int oldPercentProgress = 0; ReportProgress(new ProgressEventArgs(oldPercentProgress, "Performing nonspecific search... " + currentPartition + "/" + CommonParameters.TotalPartitions, nestedIds)); TerminusType terminusType = ProductTypeMethod.IdentifyTerminusType(lp); byte byteScoreCutoff = (byte)CommonParameters.ScoreCutoff; Parallel.ForEach(Partitioner.Create(0, listOfSortedms2Scans.Length), new ParallelOptions { MaxDegreeOfParallelism = CommonParameters.MaxThreadsToUsePerFile }, range => { byte[] scoringTable = new byte[peptideIndex.Count]; HashSet <int> idsOfPeptidesPossiblyObserved = new HashSet <int>(); for (int i = range.Item1; i < range.Item2; i++) { // empty the scoring table to score the new scan (conserves memory compared to allocating a new array) Array.Clear(scoringTable, 0, scoringTable.Length); idsOfPeptidesPossiblyObserved.Clear(); var scan = listOfSortedms2Scans[i]; //get bins to add points to List <int> allBinsToSearch = GetBinsToSearch(scan); for (int j = 0; j < allBinsToSearch.Count; j++) { fragmentIndex[allBinsToSearch[j]].ForEach(id => scoringTable[id]++); } //populate ids of possibly observed with those containing allowed precursor masses List <int> binsToSearch = new List <int>(); int obsPrecursorFloorMz = (int)Math.Floor(CommonParameters.PrecursorMassTolerance.GetMinimumValue(scan.PrecursorMass) * fragmentBinsPerDalton); int obsPrecursorCeilingMz = (int)Math.Ceiling(CommonParameters.PrecursorMassTolerance.GetMaximumValue(scan.PrecursorMass) * fragmentBinsPerDalton); for (int fragmentBin = obsPrecursorFloorMz; fragmentBin <= obsPrecursorCeilingMz; fragmentBin++) { binsToSearch.Add(fragmentBin); } foreach (ProductType pt in lp) { int binShift; switch (pt) { case ProductType.B: binShift = bBinShift; break; case ProductType.Y: binShift = 0; break; case ProductType.C: binShift = cBinShift; break; case ProductType.Zdot: binShift = zdotBinShift; break; default: throw new NotImplementedException(); } for (int j = 0; j < binsToSearch.Count; j++) { int bin = binsToSearch[j] - binShift; if (bin < fragmentIndex.Length && fragmentIndex[bin] != null) { fragmentIndex[bin].ForEach(id => idsOfPeptidesPossiblyObserved.Add(id)); } } } for (int j = 0; j < binsToSearch.Count; j++) { int bin = binsToSearch[j]; if (bin < fragmentIndexPrecursor.Length && fragmentIndexPrecursor[bin] != null) { fragmentIndexPrecursor[bin].ForEach(id => idsOfPeptidesPossiblyObserved.Add(id)); } } // done with initial scoring; refine scores and create PSMs if (idsOfPeptidesPossiblyObserved.Any()) { int maxInitialScore = idsOfPeptidesPossiblyObserved.Max(id => scoringTable[id]) + 1; while (maxInitialScore > CommonParameters.ScoreCutoff) { maxInitialScore--; foreach (var id in idsOfPeptidesPossiblyObserved.Where(id => scoringTable[id] == maxInitialScore)) { var candidatePeptide = peptideIndex[id]; double[] fragmentMasses = candidatePeptide.ProductMassesMightHaveDuplicatesAndNaNs(lp).Distinct().Where(p => !Double.IsNaN(p)).OrderBy(p => p).ToArray(); double peptideScore = CalculatePeptideScore(scan.TheScan, CommonParameters.ProductMassTolerance, fragmentMasses, scan.PrecursorMass, dissociationTypes, addCompIons, maximumMassThatFragmentIonScoreIsDoubled); Tuple <int, double> notchAndPrecursor = Accepts(scan.PrecursorMass, candidatePeptide, terminusType, massDiffAcceptor); if (notchAndPrecursor.Item1 >= 0) { CompactPeptideWithModifiedMass cp = new CompactPeptideWithModifiedMass(candidatePeptide, notchAndPrecursor.Item2); if (globalPsms[i] == null) { globalPsms[i] = new PeptideSpectralMatch(cp, notchAndPrecursor.Item1, peptideScore, i, scan); } else { globalPsms[i].AddOrReplace(cp, peptideScore, notchAndPrecursor.Item1, CommonParameters.ReportAllAmbiguity); } } } if (globalPsms[i] != null) { break; } } } // report search progress progress++; var percentProgress = (int)((progress / listOfSortedms2Scans.Length) * 100); if (percentProgress > oldPercentProgress) { oldPercentProgress = percentProgress; ReportProgress(new ProgressEventArgs(percentProgress, "Performing nonspecific search... " + currentPartition + "/" + CommonParameters.TotalPartitions, nestedIds)); } } }); return(new MetaMorpheusEngineResults(this)); }