public static void ParsimonyLocalizeableTreatAsUnique() { bool modPeptidesAreUnique = true; // set up mods var modDictionary = new Dictionary <int, List <Modification> >(); ModificationMotif.TryGetMotif("M", out ModificationMotif motif1); var mod = new ModificationWithMass("Oxidation of M", "Common Variable", motif1, TerminusLocalization.Any, 15.99491461957); TerminusType terminusType = ProductTypeMethod.IdentifyTerminusType(new List <ProductType> { ProductType.B, ProductType.Y }); var protease = new Protease("kprotease", new List <string> { "K" }, new List <string>(), TerminusType.C, CleavageSpecificity.Full, null, null, null); // modified version of protein var protein1 = new Protein("PEPTIDEM", "accession1"); // unmodified version of protein var protein2 = new Protein("YYYKPEPTIDEM", "accession2"); var pep1 = protein1.Digest(new DigestionParams { MinPeptideLength = null, Protease = protease }, new List <ModificationWithMass> { mod }, new List <ModificationWithMass>()).First(); var pep2 = protein2.Digest(new DigestionParams { MinPeptideLength = null, Protease = protease }, new List <ModificationWithMass>(), new List <ModificationWithMass>()).ToList()[1]; // check to make sure mod is present Assert.That(pep1.Sequence != pep2.Sequence); Assert.That(pep1.NumMods == 1); Assert.That(pep2.NumMods == 0); // build the dictionary for input to parsimony var compactPeptideToProteinPeptideMatching = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >(); compactPeptideToProteinPeptideMatching.Add(pep1.CompactPeptide(terminusType), new HashSet <PeptideWithSetModifications> { pep1 }); compactPeptideToProteinPeptideMatching.Add(pep2.CompactPeptide(terminusType), new HashSet <PeptideWithSetModifications> { pep2 }); // apply parsimony ProteinParsimonyEngine pae = new ProteinParsimonyEngine(compactPeptideToProteinPeptideMatching, modPeptidesAreUnique, new List <string>()); pae.Run(); // check to make sure both peptides are NOT associated with both proteins Assert.That(compactPeptideToProteinPeptideMatching.Count == 2); foreach (var kvp in compactPeptideToProteinPeptideMatching) { Assert.That(kvp.Value.Count == 1); } }
public SequencesToActualProteinPeptidesEngine(List <PeptideSpectralMatch> allPsms, List <Protein> proteinList, List <ModificationWithMass> fixedModifications, List <ModificationWithMass> variableModifications, List <ProductType> ionTypes, IEnumerable <IDigestionParams> collectionOfDigestionParams, bool reportAllAmbiguity, List <string> nestedIds) : base(nestedIds) { this.proteins = proteinList; this.allPsms = allPsms; this.fixedModifications = fixedModifications; this.variableModifications = variableModifications; this.terminusType = ProductTypeMethod.IdentifyTerminusType(ionTypes); this.collectionOfDigestionParams = collectionOfDigestionParams; this.reportAllAmbiguity = reportAllAmbiguity; }
public SequencesToActualProteinPeptidesEngine(List <PeptideSpectralMatch> allPsms, List <Protein> proteinList, List <ModificationWithMass> fixedModifications, List <ModificationWithMass> variableModifications, List <ProductType> ionTypes, IEnumerable <DigestionParams> collectionOfDigestionParams, bool reportAllAmbiguity, CommonParameters commonParameters, List <string> nestedIds) : base(commonParameters, nestedIds) { Proteins = proteinList; AllPsms = allPsms; FixedModifications = fixedModifications; VariableModifications = variableModifications; TerminusType = ProductTypeMethods.IdentifyTerminusType(ionTypes); CollectionOfDigestionParams = collectionOfDigestionParams; ReportAllAmbiguity = reportAllAmbiguity; }
protected override MetaMorpheusEngineResults RunSpecific() { TerminusType terminusType = ProductTypeMethod.IdentifyTerminusType(lp); foreach (var ok in allResultingIdentifications) { ok.MatchedIonDictOnlyMatches = new Dictionary <ProductType, double[]>(); ok.ProductMassErrorDa = new Dictionary <ProductType, double[]>(); ok.ProductMassErrorPpm = new Dictionary <ProductType, double[]>(); var theScan = myMsDataFile.GetOneBasedScan(ok.ScanNumber); double thePrecursorMass = ok.ScanPrecursorMass; foreach (var huh in lp) { var ionMasses = ok.CompactPeptides.First().Key.ProductMassesMightHaveDuplicatesAndNaNs(new List <ProductType> { huh }); Array.Sort(ionMasses); List <double> matchedIonMassesList = new List <double>(); List <double> productMassErrorDaList = new List <double>(); List <double> productMassErrorPpmList = new List <double>(); MatchIons(theScan, fragmentTolerance, ionMasses, matchedIonMassesList, productMassErrorDaList, productMassErrorPpmList, thePrecursorMass, dissociationTypes, addCompIons); double[] matchedIonMassesOnlyMatches = matchedIonMassesList.ToArray(); ok.MatchedIonDictOnlyMatches.Add(huh, matchedIonMassesOnlyMatches); ok.ProductMassErrorDa.Add(huh, productMassErrorDaList.ToArray()); ok.ProductMassErrorPpm.Add(huh, productMassErrorPpmList.ToArray()); } } foreach (var ok in allResultingIdentifications.Where(b => b.NumDifferentCompactPeptides == 1)) { var theScan = myMsDataFile.GetOneBasedScan(ok.ScanNumber); double thePrecursorMass = ok.ScanPrecursorMass; if (ok.FullSequence == null) { continue; } var representative = ok.CompactPeptides.First().Value.Item2.First(); var localizedScores = new List <double>(); for (int indexToLocalize = 0; indexToLocalize < representative.Length; indexToLocalize++) { PeptideWithSetModifications localizedPeptide = representative.Localize(indexToLocalize, ok.ScanPrecursorMass - representative.MonoisotopicMass); var gg = localizedPeptide.CompactPeptide(terminusType).ProductMassesMightHaveDuplicatesAndNaNs(lp); Array.Sort(gg); var score = CalculatePeptideScore(theScan, fragmentTolerance, gg, thePrecursorMass, dissociationTypes, addCompIons, 0); localizedScores.Add(score); } ok.LocalizedScores = localizedScores; } return(new LocalizationEngineResults(this)); }
public Protease(string name, IEnumerable <string> sequencesInducingCleavage, IEnumerable <string> sequencesPreventingCleavage, TerminusType cleavageTerminus, CleavageSpecificity cleavageSpecificity, string psiMSAccessionNumber, string psiMSName, string siteRegexp) { Name = name; SequencesInducingCleavage = sequencesInducingCleavage; SequencesPreventingCleavage = sequencesPreventingCleavage; CleavageTerminus = cleavageTerminus; CleavageSpecificity = cleavageSpecificity; PsiMsAccessionNumber = psiMSAccessionNumber; PsiMsName = psiMSName; SiteRegexp = siteRegexp; }
public CrosslinkAnalysisEngine(List <PsmCross> newPsms, Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatching, List <Protein> proteinList, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <ProductType> lp, string outputFolder, CrosslinkerTypeClass crosslinker, TerminusType terminusType, CommonParameters commonParameters, List <string> nestedIds) : base(commonParameters, nestedIds) { NewPsms = newPsms; CompactPeptideToProteinPeptideMatching = compactPeptideToProteinPeptideMatching; ProteinList = proteinList; VariableModifications = variableModifications; FixedModifications = fixedModifications; ProductTypes = lp; OutputFolder = outputFolder; Crosslinker = crosslinker; TerminusType = terminusType; }
public CompactPeptide CompactPeptide(TerminusType terminusType) { if (compactPeptides.TryGetValue(terminusType, out CompactPeptide compactPeptide)) { return(compactPeptide); } else { CompactPeptide cp = new CompactPeptide(this, terminusType); compactPeptides.Add(terminusType, cp); return(cp); } }
public CrosslinkAnalysisEngine(List <PsmCross> newPsms, Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatching, List <Protein> proteinList, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <ProductType> lp, string OutputFolder, CrosslinkerTypeClass crosslinker, TerminusType terminusType, ICommonParameters CommonParameters, List <string> nestedIds) : base(nestedIds) { this.newPsms = newPsms; this.compactPeptideToProteinPeptideMatching = compactPeptideToProteinPeptideMatching; this.proteinList = proteinList; this.variableModifications = variableModifications; this.fixedModifications = fixedModifications; this.lp = lp; this.OutputFolder = OutputFolder; this.crosslinker = crosslinker; this.terminusType = terminusType; this.CommonParameters = CommonParameters; }
public void CropTerminalMasses(TerminusType terminusType) { List <double> tempList = new List <double>(); double[] masses = terminusType == TerminusType.N ? NTerminalMasses : CTerminalMasses; for (int i = 0; i < masses.Length; i++) { if (masses[i] < MonoisotopicMassIncludingFixedMods) { tempList.Add(masses[i]); } else if (terminusType == TerminusType.N) { NTerminalMasses = tempList.ToArray(); break; } else { CTerminalMasses = tempList.ToArray(); break; } } }
protected override MetaMorpheusEngineResults RunSpecific() { double progress = 0; int oldPercentProgress = 0; TerminusType terminusType = ProductTypeMethods.IdentifyTerminusType(ProductTypes); // digest database HashSet <CompactPeptide> peptideToId = new HashSet <CompactPeptide>(); Parallel.ForEach(Partitioner.Create(0, ProteinList.Count), new ParallelOptions { MaxDegreeOfParallelism = commonParameters.MaxThreadsToUsePerFile }, (range, loopState) => { for (int i = range.Item1; i < range.Item2; i++) { // Stop loop if canceled if (GlobalVariables.StopLoops) { loopState.Stop(); return; } foreach (var digestionParams in CollectionOfDigestionParams) { foreach (var pepWithSetMods in ProteinList[i].Digest(digestionParams, FixedModifications, VariableModifications)) { CompactPeptide compactPeptide = pepWithSetMods.CompactPeptide(terminusType); var observed = peptideToId.Contains(compactPeptide); if (observed) { continue; } lock (peptideToId) { observed = peptideToId.Contains(compactPeptide); if (observed) { continue; } peptideToId.Add(compactPeptide); } } } progress++; var percentProgress = (int)((progress / ProteinList.Count) * 100); if (percentProgress > oldPercentProgress) { oldPercentProgress = percentProgress; ReportProgress(new ProgressEventArgs(percentProgress, "Digesting proteins for precursor...", nestedIds)); } } }); // sort peptides by mass var peptidesSortedByMass = peptideToId.AsParallel().WithDegreeOfParallelism(commonParameters.MaxThreadsToUsePerFile).OrderBy(p => p.MonoisotopicMassIncludingFixedMods).ToList(); peptideToId = null; // create fragment index int maxFragmentMass = 0; for (int i = peptidesSortedByMass.Count - 1; i >= 0; i--) { if (!Double.IsNaN(peptidesSortedByMass[i].MonoisotopicMassIncludingFixedMods)) { maxFragmentMass = (int)Math.Ceiling(Chemistry.ClassExtensions.ToMz(peptidesSortedByMass[i].MonoisotopicMassIncludingFixedMods, 1)); break; } } var fragmentIndex = new List <int> [maxFragmentMass * FragmentBinsPerDalton + 1]; // populate fragment index progress = 0; oldPercentProgress = 0; for (int i = 0; i < peptidesSortedByMass.Count; i++) { double mz = Chemistry.ClassExtensions.ToMz(peptidesSortedByMass[i].MonoisotopicMassIncludingFixedMods, 1); if (!Double.IsNaN(mz)) { int fragmentBin = (int)Math.Round(mz * FragmentBinsPerDalton); if (fragmentIndex[fragmentBin] == null) { fragmentIndex[fragmentBin] = new List <int> { i } } ; else { fragmentIndex[fragmentBin].Add(i); } } progress++; var percentProgress = (int)((progress / peptidesSortedByMass.Count) * 100); if (percentProgress > oldPercentProgress) { oldPercentProgress = percentProgress; ReportProgress(new ProgressEventArgs(percentProgress, "Creating fragment index for precursor...", nestedIds)); } } return(new IndexingResults(peptidesSortedByMass, fragmentIndex, this)); }
protected override MetaMorpheusEngineResults RunSpecific() { double progress = 0; int oldPercentProgress = 0; ReportProgress(new ProgressEventArgs(oldPercentProgress, "Performing nonspecific search... " + currentPartition + "/" + CommonParameters.TotalPartitions, nestedIds)); TerminusType terminusType = ProductTypeMethod.IdentifyTerminusType(lp); byte byteScoreCutoff = (byte)CommonParameters.ScoreCutoff; Parallel.ForEach(Partitioner.Create(0, listOfSortedms2Scans.Length), new ParallelOptions { MaxDegreeOfParallelism = CommonParameters.MaxThreadsToUsePerFile }, range => { byte[] scoringTable = new byte[peptideIndex.Count]; HashSet <int> idsOfPeptidesPossiblyObserved = new HashSet <int>(); for (int i = range.Item1; i < range.Item2; i++) { // empty the scoring table to score the new scan (conserves memory compared to allocating a new array) Array.Clear(scoringTable, 0, scoringTable.Length); idsOfPeptidesPossiblyObserved.Clear(); var scan = listOfSortedms2Scans[i]; //get bins to add points to List <int> allBinsToSearch = GetBinsToSearch(scan); for (int j = 0; j < allBinsToSearch.Count; j++) { fragmentIndex[allBinsToSearch[j]].ForEach(id => scoringTable[id]++); } //populate ids of possibly observed with those containing allowed precursor masses List <int> binsToSearch = new List <int>(); int obsPrecursorFloorMz = (int)Math.Floor(CommonParameters.PrecursorMassTolerance.GetMinimumValue(scan.PrecursorMass) * fragmentBinsPerDalton); int obsPrecursorCeilingMz = (int)Math.Ceiling(CommonParameters.PrecursorMassTolerance.GetMaximumValue(scan.PrecursorMass) * fragmentBinsPerDalton); for (int fragmentBin = obsPrecursorFloorMz; fragmentBin <= obsPrecursorCeilingMz; fragmentBin++) { binsToSearch.Add(fragmentBin); } foreach (ProductType pt in lp) { int binShift; switch (pt) { case ProductType.B: binShift = bBinShift; break; case ProductType.Y: binShift = 0; break; case ProductType.C: binShift = cBinShift; break; case ProductType.Zdot: binShift = zdotBinShift; break; default: throw new NotImplementedException(); } for (int j = 0; j < binsToSearch.Count; j++) { int bin = binsToSearch[j] - binShift; if (bin < fragmentIndex.Length && fragmentIndex[bin] != null) { fragmentIndex[bin].ForEach(id => idsOfPeptidesPossiblyObserved.Add(id)); } } } for (int j = 0; j < binsToSearch.Count; j++) { int bin = binsToSearch[j]; if (bin < fragmentIndexPrecursor.Length && fragmentIndexPrecursor[bin] != null) { fragmentIndexPrecursor[bin].ForEach(id => idsOfPeptidesPossiblyObserved.Add(id)); } } // done with initial scoring; refine scores and create PSMs if (idsOfPeptidesPossiblyObserved.Any()) { int maxInitialScore = idsOfPeptidesPossiblyObserved.Max(id => scoringTable[id]) + 1; while (maxInitialScore > CommonParameters.ScoreCutoff) { maxInitialScore--; foreach (var id in idsOfPeptidesPossiblyObserved.Where(id => scoringTable[id] == maxInitialScore)) { var candidatePeptide = peptideIndex[id]; double[] fragmentMasses = candidatePeptide.ProductMassesMightHaveDuplicatesAndNaNs(lp).Distinct().Where(p => !Double.IsNaN(p)).OrderBy(p => p).ToArray(); double peptideScore = CalculatePeptideScore(scan.TheScan, CommonParameters.ProductMassTolerance, fragmentMasses, scan.PrecursorMass, dissociationTypes, addCompIons, maximumMassThatFragmentIonScoreIsDoubled); Tuple <int, double> notchAndPrecursor = Accepts(scan.PrecursorMass, candidatePeptide, terminusType, massDiffAcceptor); if (notchAndPrecursor.Item1 >= 0) { CompactPeptideWithModifiedMass cp = new CompactPeptideWithModifiedMass(candidatePeptide, notchAndPrecursor.Item2); if (globalPsms[i] == null) { globalPsms[i] = new PeptideSpectralMatch(cp, notchAndPrecursor.Item1, peptideScore, i, scan); } else { globalPsms[i].AddOrReplace(cp, peptideScore, notchAndPrecursor.Item1, CommonParameters.ReportAllAmbiguity); } } } if (globalPsms[i] != null) { break; } } } // report search progress progress++; var percentProgress = (int)((progress / listOfSortedms2Scans.Length) * 100); if (percentProgress > oldPercentProgress) { oldPercentProgress = percentProgress; ReportProgress(new ProgressEventArgs(percentProgress, "Performing nonspecific search... " + currentPartition + "/" + CommonParameters.TotalPartitions, nestedIds)); } } }); return(new MetaMorpheusEngineResults(this)); }
private Tuple <int, double> Accepts(double scanPrecursorMass, CompactPeptide peptide, TerminusType terminusType, MassDiffAcceptor searchMode) { //all masses in N and CTerminalMasses are b-ion masses, which are one water away from a full peptide int localminPeptideLength = CommonParameters.DigestionParams.MinPeptideLength ?? 0; if (terminusType == TerminusType.N) { for (int i = localminPeptideLength; i < peptide.NTerminalMasses.Count(); i++) { double theoMass = peptide.NTerminalMasses[i] + waterMonoisotopicMass; int notch = searchMode.Accepts(scanPrecursorMass, theoMass); if (notch >= 0) { return(new Tuple <int, double>(notch, theoMass)); } else if (theoMass > scanPrecursorMass) { break; } } //if the theoretical and experimental have the same mass if (peptide.NTerminalMasses.Count() > localminPeptideLength) { double totalMass = peptide.MonoisotopicMassIncludingFixedMods;// + Constants.protonMass; int notch = searchMode.Accepts(scanPrecursorMass, totalMass); if (notch >= 0) { return(new Tuple <int, double>(notch, totalMass)); } } } else//if (terminusType==TerminusType.C) { for (int i = localminPeptideLength; i < peptide.CTerminalMasses.Count(); i++) { double theoMass = peptide.CTerminalMasses[i] + waterMonoisotopicMass; int notch = searchMode.Accepts(scanPrecursorMass, theoMass); if (notch >= 0) { return(new Tuple <int, double>(notch, theoMass)); } else if (theoMass > scanPrecursorMass) { break; } } //if the theoretical and experimental have the same mass if (peptide.CTerminalMasses.Count() > localminPeptideLength) { double totalMass = peptide.MonoisotopicMassIncludingFixedMods;// + Constants.protonMass; int notch = searchMode.Accepts(scanPrecursorMass, totalMass); if (notch >= 0) { return(new Tuple <int, double>(notch, totalMass)); } } } return(new Tuple <int, double>(-1, -1)); }
protected override MetaMorpheusEngineResults RunSpecific() { TerminusType terminusType = ProductTypeMethods.IdentifyTerminusType(ProductTypes); foreach (PeptideSpectralMatch psm in AllResultingIdentifications) { // Stop loop if canceled if (GlobalVariables.StopLoops) { break; } psm.MatchedIonSeriesDict = new Dictionary <ProductType, int[]>(); psm.MatchedIonMassToChargeRatioDict = new Dictionary <ProductType, double[]>(); psm.ProductMassErrorDa = new Dictionary <ProductType, double[]>(); psm.ProductMassErrorPpm = new Dictionary <ProductType, double[]>(); psm.MatchedIonIntensitiesDict = new Dictionary <ProductType, double[]>(); var theScan = MyMsDataFile.GetOneBasedScan(psm.ScanNumber); double thePrecursorMass = psm.ScanPrecursorMass; foreach (ProductType productType in ProductTypes) { var sortedTheoreticalProductMasses = psm.CompactPeptides.First().Key.ProductMassesMightHaveDuplicatesAndNaNs(new List <ProductType> { productType }); Array.Sort(sortedTheoreticalProductMasses); List <int> matchedIonSeriesList = new List <int>(); List <double> matchedIonMassToChargeRatioList = new List <double>(); List <double> productMassErrorDaList = new List <double>(); List <double> productMassErrorPpmList = new List <double>(); List <double> matchedIonIntensityList = new List <double>(); //populate the above lists MatchIonsOld(theScan, commonParameters.ProductMassTolerance, sortedTheoreticalProductMasses, matchedIonSeriesList, matchedIonMassToChargeRatioList, productMassErrorDaList, productMassErrorPpmList, matchedIonIntensityList, thePrecursorMass, productType, commonParameters.AddCompIons); psm.MatchedIonSeriesDict.Add(productType, matchedIonSeriesList.ToArray()); psm.MatchedIonMassToChargeRatioDict.Add(productType, matchedIonMassToChargeRatioList.ToArray()); psm.ProductMassErrorDa.Add(productType, productMassErrorDaList.ToArray()); psm.ProductMassErrorPpm.Add(productType, productMassErrorPpmList.ToArray()); psm.MatchedIonIntensitiesDict.Add(productType, matchedIonIntensityList.ToArray()); } } foreach (PeptideSpectralMatch psm in AllResultingIdentifications.Where(b => b.NumDifferentCompactPeptides == 1)) { // Stop loop if canceled if (GlobalVariables.StopLoops) { break; } var theScan = MyMsDataFile.GetOneBasedScan(psm.ScanNumber); double thePrecursorMass = psm.ScanPrecursorMass; if (psm.FullSequence == null) { continue; } PeptideWithSetModifications representative = psm.CompactPeptides.First().Value.Item2.First(); var localizedScores = new List <double>(); for (int indexToLocalize = 0; indexToLocalize < representative.Length; indexToLocalize++) { PeptideWithSetModifications localizedPeptide = representative.Localize(indexToLocalize, psm.ScanPrecursorMass - representative.MonoisotopicMass); var gg = localizedPeptide.CompactPeptide(terminusType).ProductMassesMightHaveDuplicatesAndNaNs(ProductTypes); Array.Sort(gg); var score = CalculatePeptideScoreOld(theScan, commonParameters.ProductMassTolerance, gg, thePrecursorMass, DissociationTypes, commonParameters.AddCompIons, 0); localizedScores.Add(score); } psm.LocalizedScores = localizedScores; } return(new LocalizationEngineResults(this)); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { MyTaskResults = new MyTaskResults(this); List <PsmCross> allPsms = new List <PsmCross>(); var compactPeptideToProteinPeptideMatch = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >(); Status("Loading modifications...", taskId); List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList(); List <ModificationWithMass> fixedModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList(); List <string> localizeableModificationTypes = GlobalVariables.AllModTypesKnown.ToList(); // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, XlSearchParameters.DecoyType, localizeableModificationTypes, CommonParameters); List <ProductType> ionTypes = new List <ProductType>(); if (CommonParameters.BIons) { ionTypes.Add(ProductType.BnoB1ions); } if (CommonParameters.YIons) { ionTypes.Add(ProductType.Y); } if (CommonParameters.ZdotIons) { ionTypes.Add(ProductType.Zdot); } if (CommonParameters.CIons) { ionTypes.Add(ProductType.C); } TerminusType terminusType = ProductTypeMethods.IdentifyTerminusType(ionTypes); var crosslinker = new CrosslinkerTypeClass(); crosslinker.SelectCrosslinker(XlSearchParameters.CrosslinkerType); if (XlSearchParameters.CrosslinkerType == CrosslinkerType.UserDefined) { crosslinker = GenerateUserDefinedCrosslinker(XlSearchParameters); } MyFileManager myFileManager = new MyFileManager(XlSearchParameters.DisposeOfFileWhenDone); var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b)); HashSet <DigestionParams> ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams)); int completedFiles = 0; object indexLock = new object(); object psmLock = new object(); Status("Searching files...", taskId); ProseCreatedWhileRunning.Append("The following crosslink discovery were used: "); ProseCreatedWhileRunning.Append("crosslinker name = " + crosslinker.CrosslinkerName + "; "); ProseCreatedWhileRunning.Append("crosslinker type = " + crosslinker.Cleavable + "; "); ProseCreatedWhileRunning.Append("crosslinker mass = " + crosslinker.TotalMass + "; "); ProseCreatedWhileRunning.Append("crosslinker modification site(s) = " + crosslinker.CrosslinkerModSites + "; "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ? "maximum peptide length = unspecified; " : "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; "); ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; "); ProseCreatedWhileRunning.Append("parent mass tolerance(s) = " + XlSearchParameters.XlPrecusorMsTl + "; "); ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; "); ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. "); for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { if (GlobalVariables.StopLoops) { break; } var origDataFile = currentRawFileList[spectraFileIndex]; CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); List <PsmCross> newPsms = new List <PsmCross>(); var thisId = new List <string> { taskId, "Individual Spectra Files", origDataFile }; NewCollection(Path.GetFileName(origDataFile), thisId); Status("Loading spectra file...", thisId); MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks, combinedParams); Status("Getting ms2 scans...", thisId); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray(); for (int currentPartition = 0; currentPartition < CommonParameters.TotalPartitions; currentPartition++) { List <CompactPeptide> peptideIndex = null; List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions)); Status("Getting fragment dictionary...", new List <string> { taskId }); var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, ionTypes, currentPartition, UsefulProteomicsDatabases.DecoyType.Reverse, ListOfDigestionParams, combinedParams, 30000.0, new List <string> { taskId }); List <int>[] fragmentIndex = null; lock (indexLock) GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, taskId); Status("Searching files...", taskId); new TwoPassCrosslinkSearchEngine(newPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, ionTypes, currentPartition, combinedParams, false, XlSearchParameters.XlPrecusorMsTl, crosslinker, XlSearchParameters.CrosslinkSearchTop, XlSearchParameters.CrosslinkSearchTopNum, XlSearchParameters.XlQuench_H2O, XlSearchParameters.XlQuench_NH2, XlSearchParameters.XlQuench_Tris, XlSearchParameters.XlCharge_2_3, XlSearchParameters.XlCharge_2_3_PrimeFragment, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + CommonParameters.TotalPartitions + "!", thisId)); } lock (psmLock) { allPsms.AddRange(newPsms.Where(p => p != null)); } completedFiles++; ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> { taskId, "Individual Spectra Files" })); } ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> { taskId, "Individual Spectra Files" })); Status("Crosslink analysis engine", taskId); MetaMorpheusEngineResults allcrosslinkanalysisResults; allcrosslinkanalysisResults = new CrosslinkAnalysisEngine(allPsms, compactPeptideToProteinPeptideMatch, proteinList, variableModifications, fixedModifications, ionTypes, OutputFolder, crosslinker, terminusType, CommonParameters, new List <string> { taskId }).Run(); allPsms = allPsms.ToList(); if (XlSearchParameters.XlOutAll) { try { WriteAllToTsv(allPsms, OutputFolder, "allPsms", new List <string> { taskId }); } catch (Exception) { throw; } } var allPsmsXL = allPsms.Where(p => p.CrossType == PsmCrossType.Cross).Where(p => p.XLBestScore >= CommonParameters.ScoreCutoff && p.BetaPsmCross.XLBestScore >= CommonParameters.ScoreCutoff).ToList(); foreach (var item in allPsmsXL) { if (item.OneBasedStartResidueInProtein.HasValue) { item.XlProteinPos = item.OneBasedStartResidueInProtein.Value + item.XlPos - 1; } if (item.BetaPsmCross.OneBasedStartResidueInProtein.HasValue) { item.BetaPsmCross.XlProteinPos = item.BetaPsmCross.OneBasedStartResidueInProtein.Value + item.BetaPsmCross.XlPos - 1; } } //Write Inter Psms FDR var interPsmsXL = allPsmsXL.Where(p => !p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First().Contains(p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First()) && !p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First().Contains(p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First())).OrderByDescending(p => p.XLQvalueTotalScore).ToList(); foreach (var item in interPsmsXL) { item.CrossType = PsmCrossType.Inter; } var interPsmsXLFDR = CrosslinkDoFalseDiscoveryRateAnalysis(interPsmsXL).ToList(); //var interPsmsXLFDR = CrosslinkFDRAnalysis(interPsmsXL).ToList(); if (XlSearchParameters.XlOutCrosslink) { WriteCrosslinkToTsv(interPsmsXLFDR, OutputFolder, "xl_inter_fdr", new List <string> { taskId }); } if (XlSearchParameters.XlOutPercolator) { try { var interPsmsXLPercolator = interPsmsXL.Where(p => p.XLBestScore >= 2 && p.BetaPsmCross.XLBestScore >= 2).OrderBy(p => p.ScanNumber).ToList(); WriteCrosslinkToTxtForPercolator(interPsmsXLPercolator, OutputFolder, "xl_inter_perc", crosslinker, new List <string> { taskId }); } catch (Exception) { throw; } } //Write Intra Psms FDR var intraPsmsXL = allPsmsXL.Where(p => p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First() == p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First() || p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First().Contains(p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First()) || p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First().Contains(p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First())).OrderByDescending(p => p.XLQvalueTotalScore).ToList(); foreach (var item in intraPsmsXL) { item.CrossType = PsmCrossType.Intra; } var intraPsmsXLFDR = CrosslinkDoFalseDiscoveryRateAnalysis(intraPsmsXL).ToList(); //var intraPsmsXLFDR = CrosslinkFDRAnalysis(intraPsmsXL).ToList(); if (XlSearchParameters.XlOutCrosslink) { WriteCrosslinkToTsv(intraPsmsXLFDR, OutputFolder, "xl_intra_fdr", new List <string> { taskId }); } if (XlSearchParameters.XlOutPercolator) { try { var intraPsmsXLPercolator = intraPsmsXL.Where(p => p.XLBestScore >= 2 && p.BetaPsmCross.XLBestScore >= 2).OrderBy(p => p.ScanNumber).ToList(); WriteCrosslinkToTxtForPercolator(intraPsmsXLPercolator, OutputFolder, "xl_intra_perc", crosslinker, new List <string> { taskId }); } catch (Exception) { throw; } } var singlePsms = allPsms.Where(p => p.CrossType == PsmCrossType.Singe && p.FullSequence != null && !p.FullSequence.Contains("Crosslink")).OrderByDescending(p => p.Score).ToList(); var singlePsmsFDR = SingleFDRAnalysis(singlePsms).ToList(); if (XlSearchParameters.XlOutAll) { WriteSingleToTsv(singlePsmsFDR, OutputFolder, "single_fdr", new List <string> { taskId }); } var loopPsms = allPsms.Where(p => p.CrossType == PsmCrossType.Loop).OrderByDescending(p => p.XLTotalScore).ToList(); var loopPsmsFDR = SingleFDRAnalysis(loopPsms).ToList(); if (XlSearchParameters.XlOutAll) { WriteSingleToTsv(loopPsmsFDR, OutputFolder, "loop_fdr", new List <string> { taskId }); } var deadendPsms = allPsms.Where(p => p.CrossType == PsmCrossType.DeadEnd || p.CrossType == PsmCrossType.DeadEndH2O || p.CrossType == PsmCrossType.DeadEndNH2 || p.CrossType == PsmCrossType.DeadEndTris).OrderByDescending(p => p.XLTotalScore).ToList(); deadendPsms.AddRange(allPsms.Where(p => p.CrossType == PsmCrossType.Singe && p.FullSequence != null && p.FullSequence.Contains("Crosslink")).ToList()); var deadendPsmsFDR = SingleFDRAnalysis(deadendPsms).ToList(); if (XlSearchParameters.XlOutAll) { WriteSingleToTsv(deadendPsmsFDR, OutputFolder, "deadend_fdr", new List <string> { taskId }); } if (XlSearchParameters.XlOutPepXML) { List <PsmCross> allPsmsFDR = new List <PsmCross>(); allPsmsFDR.AddRange(intraPsmsXLFDR.Where(p => p.IsDecoy != true && p.BetaPsmCross.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); allPsmsFDR.AddRange(interPsmsXLFDR.Where(p => p.IsDecoy != true && p.BetaPsmCross.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); allPsmsFDR.AddRange(singlePsmsFDR.Where(p => p.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); allPsmsFDR.AddRange(loopPsmsFDR.Where(p => p.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); allPsmsFDR.AddRange(deadendPsmsFDR.Where(p => p.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); allPsmsFDR = allPsmsFDR.OrderBy(p => p.ScanNumber).ToList(); foreach (var fullFilePath in currentRawFileList) { string fileNameNoExtension = Path.GetFileNameWithoutExtension(fullFilePath); WritePepXML_xl(allPsmsFDR.Where(p => p.FullFilePath == fullFilePath).ToList(), proteinList, dbFilenameList[0].FilePath, variableModifications, fixedModifications, localizeableModificationTypes, OutputFolder, fileNameNoExtension, new List <string> { taskId }); } } if (XlSearchParameters.XlOutAll) { List <PsmCross> allPsmsXLFDR = new List <PsmCross>(); allPsmsXLFDR.AddRange(intraPsmsXLFDR.Where(p => p.IsDecoy != true && p.BetaPsmCross.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); allPsmsXLFDR.AddRange(interPsmsXLFDR.Where(p => p.IsDecoy != true && p.BetaPsmCross.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList()); try { allPsmsXLFDR = allPsmsXLFDR.OrderByDescending(p => p.XLQvalueTotalScore).ToList(); var allPsmsXLFDRGroup = FindCrosslinks(allPsmsXLFDR); WriteCrosslinkToTsv(allPsmsXLFDRGroup, OutputFolder, "allPsmsXLFDRGroup", new List <string> { taskId }); } catch (Exception) { throw; } } return(MyTaskResults); }
protected override MetaMorpheusEngineResults RunSpecific() { Status("Getting ms2 scans..."); double proteinsSearched = 0; int oldPercentProgress = 0; TerminusType terminusType = ProductTypeMethod.IdentifyTerminusType(lp); // one lock for each MS2 scan; a scan can only be accessed by one thread at a time var myLocks = new object[peptideSpectralMatches.Length]; for (int i = 0; i < myLocks.Length; i++) { myLocks[i] = new object(); } Status("Performing classic search..."); if (proteins.Any()) { Parallel.ForEach(Partitioner.Create(0, proteins.Count), partitionRange => { for (int i = partitionRange.Item1; i < partitionRange.Item2; i++) { // digest each protein into peptides and search for each peptide in all spectra within precursor mass tolerance foreach (var peptide in proteins[i].Digest(commonParameters.DigestionParams, fixedModifications, variableModifications)) { var compactPeptide = peptide.CompactPeptide(terminusType); var productMasses = compactPeptide.ProductMassesMightHaveDuplicatesAndNaNs(lp); Array.Sort(productMasses); foreach (ScanWithIndexAndNotchInfo scan in GetAcceptableScans(compactPeptide.MonoisotopicMassIncludingFixedMods, searchMode)) { double scanPrecursorMass = scan.theScan.PrecursorMass; var thisScore = CalculatePeptideScore(scan.theScan.TheScan, productMassTolerance, productMasses, scanPrecursorMass, dissociationTypes, addCompIons, 0); bool meetsScoreCutoff = thisScore > commonParameters.ScoreCutoff; bool scoreImprovement = peptideSpectralMatches[scan.scanIndex] == null || (peptideSpectralMatches[scan.scanIndex].Score - PeptideSpectralMatch.tolForScoreDifferentiation) <= thisScore; // this is thread-safe because even if the score improves from another thread writing to this PSM, // the lock combined with AddOrReplace method will ensure thread safety if ((meetsScoreCutoff && scoreImprovement) || commonParameters.CalculateEValue) { // valid hit (met the cutoff score); lock the scan to prevent other threads from accessing it lock (myLocks[scan.scanIndex]) { if (peptideSpectralMatches[scan.scanIndex] == null) { peptideSpectralMatches[scan.scanIndex] = new PeptideSpectralMatch(compactPeptide, scan.notch, thisScore, scan.scanIndex, scan.theScan); } else { peptideSpectralMatches[scan.scanIndex].AddOrReplace(compactPeptide, thisScore, scan.notch, commonParameters.ReportAllAmbiguity); } if (commonParameters.CalculateEValue) { peptideSpectralMatches[scan.scanIndex].AddThisScoreToScoreDistribution(thisScore); } } } } } // report search progress (proteins searched so far out of total proteins in database) proteinsSearched++; var percentProgress = (int)((proteinsSearched / proteins.Count) * 100); if (percentProgress > oldPercentProgress) { oldPercentProgress = percentProgress; ReportProgress(new ProgressEventArgs(percentProgress, "Performing classic search... ", nestedIds)); } } }); } // remove peptides below the score cutoff that were stored to calculate expectation values if (commonParameters.CalculateEValue) { for (int i = 0; i < peptideSpectralMatches.Length; i++) { if (peptideSpectralMatches[i] != null && peptideSpectralMatches[i].Score < commonParameters.ScoreCutoff) { peptideSpectralMatches[i] = null; } } } return(new MetaMorpheusEngineResults(this)); }
protected override MetaMorpheusEngineResults RunSpecific() { Status("Getting ms2 scans..."); double proteinsSearched = 0; int oldPercentProgress = 0; TerminusType terminusType = ProductTypeMethods.IdentifyTerminusType(ProductTypes); // one lock for each MS2 scan; a scan can only be accessed by one thread at a time var myLocks = new object[PeptideSpectralMatches.Length]; for (int i = 0; i < myLocks.Length; i++) { myLocks[i] = new object(); } Status("Performing classic search..."); if (Proteins.Any()) { Parallel.ForEach(Partitioner.Create(0, Proteins.Count), new ParallelOptions { MaxDegreeOfParallelism = commonParameters.MaxThreadsToUsePerFile }, (partitionRange, loopState) => { for (int i = partitionRange.Item1; i < partitionRange.Item2; i++) { // Stop loop if canceled if (GlobalVariables.StopLoops) { loopState.Stop(); return; } // digest each protein into peptides and search for each peptide in all spectra within precursor mass tolerance foreach (var peptide in Proteins[i].Digest(commonParameters.DigestionParams, FixedModifications, VariableModifications)) { var peptideTheorIons = peptide.GetTheoreticalFragments(ProductTypes); var compactPeptide = peptide.CompactPeptide(terminusType); foreach (ScanWithIndexAndNotchInfo scan in GetAcceptableScans(compactPeptide.MonoisotopicMassIncludingFixedMods, SearchMode)) { var matchedIons = MatchFragmentIons(scan.TheScan.TheScan.MassSpectrum, peptideTheorIons, commonParameters); if (commonParameters.AddCompIons) { foreach (var dissociationType in DissociationTypes) { MzSpectrum complementarySpectrum = GenerateComplementarySpectrum(scan.TheScan.TheScan.MassSpectrum, scan.TheScan.PrecursorMass, dissociationType); matchedIons.AddRange(MatchFragmentIons(complementarySpectrum, peptideTheorIons, commonParameters)); } } double thisScore = CalculatePeptideScore(scan.TheScan.TheScan, matchedIons, 0); bool meetsScoreCutoff = thisScore >= commonParameters.ScoreCutoff; // this is thread-safe because even if the score improves from another thread writing to this PSM, // the lock combined with AddOrReplace method will ensure thread safety if (meetsScoreCutoff || commonParameters.CalculateEValue) { // valid hit (met the cutoff score); lock the scan to prevent other threads from accessing it lock (myLocks[scan.ScanIndex]) { bool scoreImprovement = PeptideSpectralMatches[scan.ScanIndex] == null || (thisScore - PeptideSpectralMatches[scan.ScanIndex].RunnerUpScore) > -PeptideSpectralMatch.ToleranceForScoreDifferentiation; if (scoreImprovement) { if (PeptideSpectralMatches[scan.ScanIndex] == null) { PeptideSpectralMatches[scan.ScanIndex] = new PeptideSpectralMatch(compactPeptide, scan.Notch, thisScore, scan.ScanIndex, scan.TheScan, commonParameters.DigestionParams); } else { PeptideSpectralMatches[scan.ScanIndex].AddOrReplace(compactPeptide, thisScore, scan.Notch, commonParameters.ReportAllAmbiguity); } //TODO: move this into the PeptideSpectralMatch constructor PeptideSpectralMatches[scan.ScanIndex].SetMatchedFragments(matchedIons); } if (commonParameters.CalculateEValue) { PeptideSpectralMatches[scan.ScanIndex].AllScores.Add(thisScore); } } } } } // report search progress (proteins searched so far out of total proteins in database) proteinsSearched++; var percentProgress = (int)((proteinsSearched / Proteins.Count) * 100); if (percentProgress > oldPercentProgress) { oldPercentProgress = percentProgress; ReportProgress(new ProgressEventArgs(percentProgress, "Performing classic search... ", nestedIds)); } } }); } // remove peptides below the score cutoff that were stored to calculate expectation values if (commonParameters.CalculateEValue) { for (int i = 0; i < PeptideSpectralMatches.Length; i++) { if (PeptideSpectralMatches[i] != null && PeptideSpectralMatches[i].Score < commonParameters.ScoreCutoff) { PeptideSpectralMatches[i] = null; } } } return(new MetaMorpheusEngineResults(this)); }
protected override MetaMorpheusEngineResults RunSpecific() { double progress = 0; int oldPercentProgress = 0; TerminusType terminusType = ProductTypeMethods.IdentifyTerminusType(ProductTypes); // digest database HashSet <CompactPeptide> peptideToId = new HashSet <CompactPeptide>(); Parallel.ForEach(Partitioner.Create(0, ProteinList.Count), new ParallelOptions { MaxDegreeOfParallelism = commonParameters.MaxThreadsToUsePerFile }, (range, loopState) => { for (int i = range.Item1; i < range.Item2; i++) { // Stop loop if canceled if (GlobalVariables.StopLoops) { loopState.Stop(); return; } foreach (var digestionParams in CollectionOfDigestionParams) { foreach (var pepWithSetMods in ProteinList[i].Digest(digestionParams, FixedModifications, VariableModifications)) { CompactPeptide compactPeptide = pepWithSetMods.CompactPeptide(terminusType); var observed = peptideToId.Contains(compactPeptide); if (observed) { continue; } lock (peptideToId) { observed = peptideToId.Contains(compactPeptide); if (observed) { continue; } peptideToId.Add(compactPeptide); } } } progress++; var percentProgress = (int)((progress / ProteinList.Count) * 100); if (percentProgress > oldPercentProgress) { oldPercentProgress = percentProgress; ReportProgress(new ProgressEventArgs(percentProgress, "Digesting proteins...", nestedIds)); } } }); // sort peptides by mass var peptidesSortedByMass = peptideToId.AsParallel().WithDegreeOfParallelism(commonParameters.MaxThreadsToUsePerFile).OrderBy(p => p.MonoisotopicMassIncludingFixedMods).ToList(); peptideToId = null; // create fragment index List <int>[] fragmentIndex; try { fragmentIndex = new List <int> [(int)Math.Ceiling(MaxFragmentSize) * FragmentBinsPerDalton + 1]; } catch (OutOfMemoryException) { throw new MetaMorpheusException("Max fragment mass too large for indexing engine; try \"Classic Search\" mode, or make the maximum fragment mass smaller"); } // populate fragment index progress = 0; oldPercentProgress = 0; for (int peptideId = 0; peptideId < peptidesSortedByMass.Count; peptideId++) { var validFragments = peptidesSortedByMass[peptideId].ProductMassesMightHaveDuplicatesAndNaNs(ProductTypes).Distinct().Where(p => !Double.IsNaN(p)); foreach (var theoreticalFragmentMass in validFragments) { if (theoreticalFragmentMass < MaxFragmentSize && theoreticalFragmentMass > 0) { int fragmentBin = (int)Math.Round(theoreticalFragmentMass * FragmentBinsPerDalton); if (fragmentIndex[fragmentBin] == null) { fragmentIndex[fragmentBin] = new List <int> { peptideId } } ; else { fragmentIndex[fragmentBin].Add(peptideId); } } } progress++; var percentProgress = (int)((progress / peptidesSortedByMass.Count) * 100); if (percentProgress > oldPercentProgress) { oldPercentProgress = percentProgress; ReportProgress(new ProgressEventArgs(percentProgress, "Creating fragment index...", nestedIds)); } } return(new IndexingResults(peptidesSortedByMass, fragmentIndex, this)); }
private static void UpdateTomls(string tomlFileName, string fileName, CommonParameters ye5, TerminusType terminusType, bool spliceSearch) { string[] oldTomlLines = File.ReadAllLines(@fileName); List <string> newTomlLines = new List <string>(); foreach (string line in oldTomlLines) { if (line.Contains("LocalizeAll") && terminusType.Equals(TerminusType.None)) { newTomlLines.Add(GetCorrectValue("LocalizeAll", tomlFileName, line)); } else if (line.Contains("ListOfModsFixed")) { newTomlLines.Add(GetCorrectValue("ListOfModsFixed", tomlFileName, line)); } else if (line.Contains("ListOfModsVariable") && terminusType.Equals(TerminusType.None) && !spliceSearch) { newTomlLines.Add(GetCorrectValue("ListOfModsVariable", tomlFileName, line)); } else if (line.Contains("BIons")) { if (terminusType.Equals(TerminusType.N) || terminusType.Equals(TerminusType.None)) { newTomlLines.Add(GetCorrectValue("BIons", tomlFileName, line)); } else { newTomlLines.Add("BIons = false"); } } else if (line.Contains("YIons")) { if (terminusType.Equals(TerminusType.C) || terminusType.Equals(TerminusType.None)) { newTomlLines.Add(GetCorrectValue("YIons", tomlFileName, line)); } else { newTomlLines.Add("YIons = false"); } } else if (line.Contains("ZdotIons")) { if (terminusType.Equals(TerminusType.C) || terminusType.Equals(TerminusType.None)) { newTomlLines.Add(GetCorrectValue("ZdotIons", tomlFileName, line)); } else { newTomlLines.Add("ZdotIons = false"); } } else if (line.Contains("CIons")) { if (terminusType.Equals(TerminusType.N) || terminusType.Equals(TerminusType.None)) { newTomlLines.Add(GetCorrectValue("CIons", tomlFileName, line)); } else { newTomlLines.Add("CIons = false"); } } else if (line.Contains("ProductMassTolerance")) { newTomlLines.Add(GetCorrectValue("ProductMassTolerance", tomlFileName, line)); } else if (line.Contains("PrecursorMassTolerance")) { newTomlLines.Add(GetCorrectValue("PrecursorMassTolerance", tomlFileName, line)); } else if (line.Contains("MaxMissedCleavages")) { newTomlLines.Add(GetCorrectValue("MaxMissedCleavages", tomlFileName, line)); } else if (line.Contains("InitiatorMethionineBehavior")) { newTomlLines.Add(GetCorrectValue("InitiatorMethionineBehavior", tomlFileName, line)); } else if (line.Contains("MinPeptideLength") && !!terminusType.Equals(TerminusType.None)) { newTomlLines.Add(GetCorrectValue("MinPeptideLength", tomlFileName, line)); } else if (line.Contains("MaxPeptideLength")) { newTomlLines.Add(GetCorrectValue("MaxPeptideLength", tomlFileName, line)); } else if (line.Contains("MaxModificationIsoforms")) { newTomlLines.Add(GetCorrectValue("MaxModificationIsoforms", tomlFileName, line)); } else if (line.Contains("MaxModsForPeptide")) { newTomlLines.Add(GetCorrectValue("MaxModsForPeptide", tomlFileName, line)); } else if (line.Contains("SemiProteaseDigestion")) { newTomlLines.Add(GetCorrectValue("SemiProteaseDigestion", tomlFileName, line)); } else if (line.Contains("TerminusTypeSemiProtease")) { newTomlLines.Add(GetCorrectValue("TerminusTypeSemiProtease", tomlFileName, line)); } else if (line.Contains("Protease") && terminusType.Equals(TerminusType.None) && !spliceSearch) //this must be last, else other names including protease will be overwritten and crash. { newTomlLines.Add(GetCorrectValue("Protease", tomlFileName, line)); } else { newTomlLines.Add(line); } } using (StreamWriter file = new StreamWriter(fileName)) foreach (string line in newTomlLines) { file.WriteLine(line); } }
private void SaveButton_Click(object sender, RoutedEventArgs e) { if (nonSpecificSearchRadioButton1.IsChecked.Value || semiSpecificSearchRadioButton.IsChecked.Value) { if ((bCheckBox.IsChecked.Value || cCheckBox.IsChecked.Value) && (yCheckBox.IsChecked.Value || zdotCheckBox.IsChecked.Value)) { //MessageBox.Show("Only ion types from a single terminus are allowed for this search algorithm. \ne.g. b- and/or c-ions OR y- and/or zdot-ions. \nC-terminal ions (y and/or zdot) will be chosen by default."); bCheckBox.IsChecked = false; cCheckBox.IsChecked = false; } if (((Protease)proteaseComboBox.SelectedItem).Name.Contains("non-specific")) { proteaseComboBox.SelectedItem = proteaseComboBox.Items.CurrentItem; if ((bCheckBox.IsChecked.Value || cCheckBox.IsChecked.Value)) { for (int i = 0; i < proteaseComboBox.Items.Count; i++) { if (((Protease)proteaseComboBox.Items[i]).Name.Equals("singleN")) { proteaseComboBox.SelectedItem = proteaseComboBox.Items[i]; break; } } } else { for (int i = 0; i < proteaseComboBox.Items.Count; i++) { if (((Protease)proteaseComboBox.Items[i]).Name.Equals("singleC")) { proteaseComboBox.SelectedItem = proteaseComboBox.Items[i]; break; } } } } if (((Protease)proteaseComboBox.SelectedItem).Name.Contains("semi-trypsin")) { proteaseComboBox.Items.MoveCurrentToFirst(); proteaseComboBox.SelectedItem = proteaseComboBox.Items.CurrentItem; while (!((Protease)proteaseComboBox.SelectedItem).Name.Equals("trypsin")) { proteaseComboBox.Items.MoveCurrentToNext(); proteaseComboBox.SelectedItem = proteaseComboBox.Items.CurrentItem; } } if (!addCompIonCheckBox.IsChecked.Value) { MessageBox.Show("Warning: Complementary ions are strongly recommended when using this algorithm."); } } if (!GlobalGuiSettings.CheckTaskSettingsValidity(precursorMassToleranceTextBox.Text, productMassToleranceTextBox.Text, missedCleavagesTextBox.Text, maxModificationIsoformsTextBox.Text, MinPeptideLengthTextBox.Text, MaxPeptideLengthTextBox.Text, maxThreadsTextBox.Text, minScoreAllowed.Text, peakFindingToleranceTextBox.Text, histogramBinWidthTextBox.Text, DeconvolutionMaxAssumedChargeStateTextBox.Text, TopNPeaksTextBox.Text, MinRatioTextBox.Text, numberOfDatabaseSearchesTextBox.Text, MaxModNumTextBox.Text, MaxFragmentMassTextBox.Text, QValueTextBox.Text)) { return; } Protease protease = (Protease)proteaseComboBox.SelectedItem; bool semiProteaseDigestion = (semiSpecificSearchRadioButton.IsChecked.Value && ((Protease)proteaseComboBox.SelectedItem).CleavageSpecificity != CleavageSpecificity.SingleN && ((Protease)proteaseComboBox.SelectedItem).CleavageSpecificity != CleavageSpecificity.SingleC); TerminusType terminusTypeSemiProtease = (bCheckBox.IsChecked.Value || cCheckBox.IsChecked.Value ? TerminusType.N : TerminusType.C); int maxMissedCleavages = string.IsNullOrEmpty(missedCleavagesTextBox.Text) ? int.MaxValue : (int.Parse(missedCleavagesTextBox.Text, NumberStyles.Any, CultureInfo.InvariantCulture)); int minPeptideLengthValue = (int.Parse(MinPeptideLengthTextBox.Text, NumberStyles.Any, CultureInfo.InvariantCulture)); int maxPeptideLengthValue = string.IsNullOrEmpty(MaxPeptideLengthTextBox.Text) ? int.MaxValue : (int.Parse(MaxPeptideLengthTextBox.Text, NumberStyles.Any, CultureInfo.InvariantCulture)); int maxModificationIsoformsValue = (int.Parse(maxModificationIsoformsTextBox.Text, CultureInfo.InvariantCulture)); int maxModsForPeptideValue = (int.Parse(MaxModNumTextBox.Text, CultureInfo.InvariantCulture)); InitiatorMethionineBehavior initiatorMethionineBehavior = ((InitiatorMethionineBehavior)initiatorMethionineBehaviorComboBox.SelectedIndex); DigestionParams digestionParamsToSave = new DigestionParams( protease: protease.Name, semiProteaseDigestion: semiProteaseDigestion, terminusTypeSemiProtease: terminusTypeSemiProtease, maxMissedCleavages: maxMissedCleavages, minPeptideLength: minPeptideLengthValue, maxPeptideLength: maxPeptideLengthValue, maxModificationIsoforms: maxModificationIsoformsValue, initiatorMethionineBehavior: initiatorMethionineBehavior, maxModsForPeptides: maxModsForPeptideValue); Tolerance ProductMassTolerance; if (productMassToleranceComboBox.SelectedIndex == 0) { ProductMassTolerance = new AbsoluteTolerance(double.Parse(productMassToleranceTextBox.Text, CultureInfo.InvariantCulture)); } else { ProductMassTolerance = new PpmTolerance(double.Parse(productMassToleranceTextBox.Text, CultureInfo.InvariantCulture)); } Tolerance PrecursorMassTolerance; if (precursorMassToleranceComboBox.SelectedIndex == 0) { PrecursorMassTolerance = new AbsoluteTolerance(double.Parse(precursorMassToleranceTextBox.Text, CultureInfo.InvariantCulture)); } else { PrecursorMassTolerance = new PpmTolerance(double.Parse(precursorMassToleranceTextBox.Text, CultureInfo.InvariantCulture)); } TheTask.SearchParameters.MaxFragmentSize = Double.Parse(MaxFragmentMassTextBox.Text, CultureInfo.InvariantCulture); var listOfModsVariable = new List <(string, string)>(); foreach (var heh in VariableModTypeForTreeViewObservableCollection) { listOfModsVariable.AddRange(heh.Children.Where(b => b.Use).Select(b => (b.Parent.DisplayName, b.DisplayName))); } var listOfModsFixed = new List <(string, string)>(); foreach (var heh in FixedModTypeForTreeViewObservableCollection) { listOfModsFixed.AddRange(heh.Children.Where(b => b.Use).Select(b => (b.Parent.DisplayName, b.DisplayName))); } if (!GlobalGuiSettings.VariableModCheck(listOfModsVariable)) { return; } bool TrimMs1Peaks = trimMs1.IsChecked.Value; bool TrimMsMsPeaks = trimMsMs.IsChecked.Value; int TopNpeaks = int.Parse(TopNPeaksTextBox.Text); double MinRatio = double.Parse(MinRatioTextBox.Text); bool parseMaxThreadsPerFile = !maxThreadsTextBox.Text.Equals("") && (int.Parse(maxThreadsTextBox.Text) <= Environment.ProcessorCount && int.Parse(maxThreadsTextBox.Text) > 0); CommonParameters commonParamsToSave = new CommonParameters( taskDescriptor: OutputFileNameTextBox.Text != "" ? OutputFileNameTextBox.Text : "SearchTask", maxThreadsToUsePerFile: parseMaxThreadsPerFile ? int.Parse(maxThreadsTextBox.Text, CultureInfo.InvariantCulture) : new CommonParameters().MaxThreadsToUsePerFile, useDeltaScore: deltaScoreCheckBox.IsChecked.Value, reportAllAmbiguity: allAmbiguity.IsChecked.Value, deconvolutionMaxAssumedChargeState: int.Parse(DeconvolutionMaxAssumedChargeStateTextBox.Text, CultureInfo.InvariantCulture), totalPartitions: int.Parse(numberOfDatabaseSearchesTextBox.Text, CultureInfo.InvariantCulture), doPrecursorDeconvolution: deconvolutePrecursors.IsChecked.Value, useProvidedPrecursorInfo: useProvidedPrecursor.IsChecked.Value, scoreCutoff: double.Parse(minScoreAllowed.Text, CultureInfo.InvariantCulture), calculateEValue: eValueCheckBox.IsChecked.Value, listOfModsFixed: listOfModsFixed, listOfModsVariable: listOfModsVariable, bIons: bCheckBox.IsChecked.Value, yIons: yCheckBox.IsChecked.Value, cIons: cCheckBox.IsChecked.Value, zDotIons: zdotCheckBox.IsChecked.Value, precursorMassTolerance: PrecursorMassTolerance, productMassTolerance: ProductMassTolerance, digestionParams: digestionParamsToSave, trimMs1Peaks: TrimMs1Peaks, trimMsMsPeaks: TrimMsMsPeaks, topNpeaks: TopNpeaks, minRatio: MinRatio, addCompIons: addCompIonCheckBox.IsChecked.Value, qValueOutputFilter: QValueCheckBox.IsChecked.Value ? double.Parse(QValueTextBox.Text, CultureInfo.InvariantCulture) : double.PositiveInfinity); if (classicSearchRadioButton.IsChecked.Value) { TheTask.SearchParameters.SearchType = SearchType.Classic; } else if (modernSearchRadioButton.IsChecked.Value) { TheTask.SearchParameters.SearchType = SearchType.Modern; } else //if (nonSpecificSearchRadioButton.IsChecked.Value) { TheTask.SearchParameters.SearchType = SearchType.NonSpecific; } TheTask.SearchParameters.DoParsimony = checkBoxParsimony.IsChecked.Value; TheTask.SearchParameters.NoOneHitWonders = checkBoxNoOneHitWonders.IsChecked.Value; TheTask.SearchParameters.DoQuantification = checkBoxQuantification.IsChecked.Value; TheTask.SearchParameters.Normalize = checkBoxNormalize.IsChecked.Value; TheTask.SearchParameters.MatchBetweenRuns = checkBoxMatchBetweenRuns.IsChecked.Value; TheTask.SearchParameters.ModPeptidesAreDifferent = modPepsAreUnique.IsChecked.Value; TheTask.SearchParameters.QuantifyPpmTol = double.Parse(peakFindingToleranceTextBox.Text, CultureInfo.InvariantCulture); TheTask.SearchParameters.SearchTarget = checkBoxTarget.IsChecked.Value; TheTask.SearchParameters.WriteMzId = ckbMzId.IsChecked.Value; //TheTask.SearchParameters.OutPepXML = ckbPepXML.IsChecked.Value; if (checkBoxDecoy.IsChecked.Value) { if (radioButtonReverseDecoy.IsChecked.Value) { TheTask.SearchParameters.DecoyType = DecoyType.Reverse; } else //if (radioButtonSlideDecoy.IsChecked.Value) { TheTask.SearchParameters.DecoyType = DecoyType.Slide; } } else { TheTask.SearchParameters.DecoyType = DecoyType.None; } if (massDiffAcceptExact.IsChecked.HasValue && massDiffAcceptExact.IsChecked.Value) { TheTask.SearchParameters.MassDiffAcceptorType = MassDiffAcceptorType.Exact; } if (massDiffAccept1mm.IsChecked.HasValue && massDiffAccept1mm.IsChecked.Value) { TheTask.SearchParameters.MassDiffAcceptorType = MassDiffAcceptorType.OneMM; } if (massDiffAccept2mm.IsChecked.HasValue && massDiffAccept2mm.IsChecked.Value) { TheTask.SearchParameters.MassDiffAcceptorType = MassDiffAcceptorType.TwoMM; } if (massDiffAccept3mm.IsChecked.HasValue && massDiffAccept3mm.IsChecked.Value) { TheTask.SearchParameters.MassDiffAcceptorType = MassDiffAcceptorType.ThreeMM; } if (massDiffAccept187.IsChecked.HasValue && massDiffAccept187.IsChecked.Value) { TheTask.SearchParameters.MassDiffAcceptorType = MassDiffAcceptorType.ModOpen; } if (massDiffAcceptOpen.IsChecked.HasValue && massDiffAcceptOpen.IsChecked.Value) { TheTask.SearchParameters.MassDiffAcceptorType = MassDiffAcceptorType.Open; } if (massDiffAcceptCustom.IsChecked.HasValue && massDiffAcceptCustom.IsChecked.Value) { TheTask.SearchParameters.MassDiffAcceptorType = MassDiffAcceptorType.Custom; TheTask.SearchParameters.CustomMdac = customkMdacTextBox.Text; } // displays warning if classic search is enabled with an open search mode if (TheTask.SearchParameters.SearchType == SearchType.Classic && (TheTask.SearchParameters.MassDiffAcceptorType == MassDiffAcceptorType.ModOpen || TheTask.SearchParameters.MassDiffAcceptorType == MassDiffAcceptorType.Open)) { MessageBoxResult result = MessageBox.Show("We recommend using Modern Search mode when conducting open precursor mass searches to reduce search time.\n\n" + "Continue anyway?", "Modern search recommended", MessageBoxButton.OKCancel); if (result == MessageBoxResult.Cancel) { return; } } TheTask.SearchParameters.DoHistogramAnalysis = checkBoxHistogramAnalysis.IsChecked.Value; TheTask.SearchParameters.HistogramBinTolInDaltons = double.Parse(histogramBinWidthTextBox.Text, CultureInfo.InvariantCulture); TheTask.SearchParameters.WritePrunedDatabase = writePrunedDBCheckBox.IsChecked.Value; SetModSelectionForPrunedDB(); TheTask.CommonParameters = commonParamsToSave; DialogResult = true; }
public static void ParsimonyVariableDontTreatAsUnique() { bool modPeptidesAreUnique = false; // set up mods var modDictionary = new Dictionary <int, List <Modification> >(); ModificationMotif.TryGetMotif("M", out ModificationMotif motif1); var mod = new ModificationWithMass("Oxidation of M", "Common Variable", motif1, TerminusLocalization.Any, 15.99491461957); Protease protease = new Protease("k Protease", new List <Tuple <string, TerminusType> > { new Tuple <string, TerminusType>("K", TerminusType.C) }, new List <Tuple <string, TerminusType> >(), CleavageSpecificity.Full, null, null, null); ProteaseDictionary.Dictionary.Add(protease.Name, protease); TerminusType terminusType = ProductTypeMethods.IdentifyTerminusType(new List <ProductType> { ProductType.B, ProductType.Y }); // modified version of protein var protein1 = new Protein("PEPTIDEM", "accession1"); // unmodified version of protein var protein2 = new Protein("YYYKPEPTIDEM", "accession2"); var pep1 = protein1.Digest(new DigestionParams(protease: "k Protease", minPeptideLength: 1), new List <ModificationWithMass> { mod }, new List <ModificationWithMass>()).First(); var pep2 = protein2.Digest(new DigestionParams(protease: "k Protease", minPeptideLength: 1), new List <ModificationWithMass> { mod }, new List <ModificationWithMass>()).ToList()[1]; // check to make sure mod is present Assert.That(pep1.Sequence.Equals(pep2.Sequence)); Assert.That(pep1.NumMods == 1); Assert.That(pep2.NumMods == 1); // build the dictionary for input to parsimony var compactPeptideToProteinPeptideMatching = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >(); var cp1 = pep1.CompactPeptide(terminusType); var cp2 = pep2.CompactPeptide(terminusType); Assert.That(cp1.Equals(cp2)); compactPeptideToProteinPeptideMatching.Add(pep1.CompactPeptide(terminusType), new HashSet <PeptideWithSetModifications> { pep1 }); Assert.That(compactPeptideToProteinPeptideMatching.ContainsKey(cp2)); compactPeptideToProteinPeptideMatching[cp2].Add(pep2); // apply parsimony ProteinParsimonyEngine pae = new ProteinParsimonyEngine(compactPeptideToProteinPeptideMatching, modPeptidesAreUnique, new CommonParameters(), new List <string>()); pae.Run(); // check to make sure both peptides are associated with both proteins Assert.That(compactPeptideToProteinPeptideMatching.Count == 1); Assert.That(compactPeptideToProteinPeptideMatching.First().Value.Count == 2); var seq = compactPeptideToProteinPeptideMatching.First().Value.First().Sequence; foreach (var sequence in compactPeptideToProteinPeptideMatching.First().Value) { Assert.That(sequence.Sequence.Equals(seq)); } }
public CompactPeptide(PeptideWithSetModifications peptideWithSetModifications, TerminusType terminusType) { NTerminalMasses = null; CTerminalMasses = null; if (terminusType == TerminusType.None || terminusType == TerminusType.N) { NTerminalMasses = ComputeFollowingFragmentMasses(peptideWithSetModifications, 0, 0, 1).ToArray(); } if (terminusType == TerminusType.None || terminusType == TerminusType.C) { CTerminalMasses = ComputeFollowingFragmentMasses(peptideWithSetModifications, 0, peptideWithSetModifications.Length + 1, -1).ToArray(); } MonoisotopicMassIncludingFixedMods = peptideWithSetModifications.MonoisotopicMass; }