public static void WritePepXml() { string filePath = Path.Combine(Examples.BASE_DIRECTORY, "example.pepXML"); Console.WriteLine("Writting to " + filePath); using (PepXmlWriter writer = new PepXmlWriter(filePath)) { writer.WriteSampleProtease(Protease.Trypsin); writer.StartSearchSummary("OMSSA", true, true); writer.WriteProteinDatabase("Resources/yeast_uniprot_120226.fasta"); writer.WriteSearchProtease(Protease.Trypsin, 3); writer.WriteModification(ModificationDictionary.GetModification("Acetyl"), ModificationSites.K | ModificationSites.NPep); writer.WriteModification(ModificationDictionary.GetModification("CAM"), ModificationSites.C); writer.WriteModification(ModificationDictionary.GetModification("Phospho"), ModificationSites.S | ModificationSites.T | ModificationSites.Y, false); writer.SetCurrentStage(PepXmlWriter.Stage.Spectra, true); writer.StartSpectrum(15, 1.234, 523.4324, 3); PeptideSpectralMatch psm = new PeptideSpectralMatch(PeptideSpectralMatchScoreType.OmssaEvalue); psm.Score = 1.5e-5; Protein protein = new Protein("", "Test Protein"); psm.Peptide = new Peptide("DEREK",protein); psm.Charge = 3; writer.WritePSM(psm); writer.EndSpectrum(); } }
public override PeptideSpectralMatch Search(IMassSpectrum massSpectrum, Peptide peptide, FragmentTypes fragmentTypes, Tolerance productMassTolerance) { double[] eMasses = massSpectrum.MassSpectrum.GetMasses(); double[] eIntenisties = massSpectrum.MassSpectrum.GetIntensities(); double tic = massSpectrum.MassSpectrum.GetTotalIonCurrent(); PeptideSpectralMatch psm = new PeptideSpectralMatch(DefaultPsmScoreType) {Peptide = peptide}; double[] tMasses = peptide.Fragment(fragmentTypes).Select(frag => Mass.MzFromMass(frag.MonoisotopicMass, 1)).OrderBy(val => val).ToArray(); double score = Search(eMasses, eIntenisties, tMasses, productMassTolerance, tic); psm.Score = score; return psm; }
public override IEnumerable<PeptideSpectralMatch> ReadNextPsm() { Protein prot; MSDataFile dataFile; foreach (OmssaPeptideSpectralMatch omssaPSM in _reader.GetRecords<OmssaPeptideSpectralMatch>()) { Peptide peptide = new Peptide(omssaPSM.Sequence.ToUpper()); SetFixedMods(peptide); SetDynamicMods(peptide, omssaPSM.Modifications); peptide.StartResidue = omssaPSM.StartResidue; peptide.EndResidue = omssaPSM.StopResidue; if (_proteins.TryGetValue(omssaPSM.Defline, out prot)) { peptide.Parent = prot; } PeptideSpectralMatch psm = new PeptideSpectralMatch(); if (_extraColumns.Count > 0) { foreach(string name in _extraColumns) { psm.AddExtraData(name, _reader.GetField<string>(name)); } } psm.Peptide = peptide; psm.Score = omssaPSM.EValue; psm.Charge = omssaPSM.Charge; psm.ScoreType = PeptideSpectralMatchScoreType.EValue; psm.IsDecoy = omssaPSM.Defline.StartsWith("DECOY"); psm.SpectrumNumber = omssaPSM.SpectrumNumber; psm.FileName = omssaPSM.FileName; string[] filenameparts = psm.FileName.Split('.'); if (_dataFiles.TryGetValue(filenameparts[0], out dataFile)) { if (!dataFile.IsOpen) dataFile.Open(); psm.Spectrum = dataFile[psm.SpectrumNumber] as MsnDataScan; } yield return psm; } }
public static void TestLastPeaks() { IDictionary <int, List <Modification> > mods = new Dictionary <int, List <Modification> >(); ModificationMotif.TryGetMotif("M", out ModificationMotif motif); var prot = new Protein("MMMM", null, null, null, mods); DigestionParams digestionParams = new DigestionParams(minPeptideLength: 1); PeptideWithSetModifications thePep = prot.Digest(digestionParams, new List <Modification>(), new List <Modification>()).First(); var frags = new List <Product>(); thePep.Fragment(DissociationType.HCD, FragmentationTerminus.Both, frags); var massArray = frags.Select(p => p.NeutralMass).ToArray(); Array.Sort(massArray); double[] intensities = new double[] { 1, 1, 1 }; double[] mz = new double[] { 1, 2, massArray[4].ToMz(1) }; MzSpectrum massSpectrum = new MzSpectrum(mz, intensities, false); MsDataScan scan = new MsDataScan(massSpectrum, 1, 1, true, Polarity.Positive, 1, new MzRange(300, 2000), "", MZAnalyzerType.Unknown, massSpectrum.SumOfAllY, null, null, "scan=1", 0, null, null, 0, null, DissociationType.Unknown, 1, null); PeptideSpectralMatch[] globalPsms = new PeptideSpectralMatch[1]; Ms2ScanWithSpecificMass[] arrayOfSortedMS2Scans = { new Ms2ScanWithSpecificMass(scan, 0, 1, null, new CommonParameters()) }; CommonParameters CommonParameters = new CommonParameters( scoreCutoff: 1, productMassTolerance: new PpmTolerance(5), digestionParams: new DigestionParams( maxMissedCleavages: 0, minPeptideLength: 1, maxModificationIsoforms: int.MaxValue, initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain)); bool writeSpectralLibrary = false; ClassicSearchEngine cse = new ClassicSearchEngine(globalPsms, arrayOfSortedMS2Scans, new List <Modification>(), new List <Modification>(), null, null, null, new List <Protein> { prot }, new OpenSearchMode(), CommonParameters, null, null, new List <string>(), writeSpectralLibrary); cse.Run(); Assert.Less(globalPsms[0].Score, 2); Assert.Greater(globalPsms[0].Score, 1); }
public static void TestPsmHeader() { DigestionParams digestionParams = new DigestionParams(); PeptideWithSetModifications pepWithSetMods = new Protein("MQQQQQQQ", "accession1").Digest(digestionParams, new List <ModificationWithMass>(), new List <ModificationWithMass>()).First(); IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile = new TestDataFile(pepWithSetMods, "quadratic"); IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > scann = myMsDataFile.GetOneBasedScan(2) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >; Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(scann, 4, 1, null); PeptideSpectralMatch psm = new PeptideSpectralMatch(pepWithSetMods.CompactPeptide(TerminusType.None), 1, 2, 3, scan); var t = psm.ToString(); var tabsepheader = PeptideSpectralMatch.GetTabSeparatedHeader(); Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t')); Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > matching = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > { { pepWithSetMods.CompactPeptide(TerminusType.None), new HashSet <PeptideWithSetModifications> { pepWithSetMods } } }; psm.MatchToProteinLinkedPeptides(matching); Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t')); Tolerance fragmentTolerance = new PpmTolerance(10); List <ProductType> lp = new List <ProductType> { ProductType.B }; new LocalizationEngine(new List <PeptideSpectralMatch> { psm }, lp, myMsDataFile, fragmentTolerance, new List <string>(), false).Run(); Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t')); psm.SetFdrValues(6, 6, 6, 6, 6, 6, 0, 0, 0, false); Assert.AreEqual(psm.ToString().Count(f => f == '\t'), PeptideSpectralMatch.GetTabSeparatedHeader().Count(f => f == '\t')); }
public static void TestVeryCloseExperimentalsModern() { IDictionary <int, List <Modification> > mods = new Dictionary <int, List <Modification> >(); ModificationMotif.TryGetMotif("M", out ModificationMotif motif); var prot = new Protein("MMMM", null, null, null, mods); DigestionParams digestionParams = new DigestionParams(minPeptideLength: 1); var thePep = prot.Digest(digestionParams, new List <ModificationWithMass>(), new List <ModificationWithMass>()).First(); var massArray = thePep.CompactPeptide(TerminusType.None).ProductMassesMightHaveDuplicatesAndNaNs(new List <ProductType> { ProductType.B, ProductType.Y }); Array.Sort(massArray); double[] intensities = new double[] { 1, 1, 1, 1 }; double[] mz = new double[] { 1, 2, massArray[4].ToMz(1), massArray[4].ToMz(1) + 1e-9 }; MzSpectrum massSpectrum = new MzSpectrum(mz, intensities, false); MsDataScan scan = new MsDataScan(massSpectrum, 1, 1, true, Polarity.Positive, 1, new MzRange(300, 2000), "", MZAnalyzerType.Unknown, massSpectrum.SumOfAllY, null, null, "scan=1", 0, null, null, 0, null, DissociationType.Unknown, 1, null); PeptideSpectralMatch[] globalPsms = new PeptideSpectralMatch[1]; Ms2ScanWithSpecificMass[] arrayOfSortedMS2Scans = { new Ms2ScanWithSpecificMass(scan, 600, 1, null) }; CommonParameters CommonParameters = new CommonParameters(productMassTolerance: new PpmTolerance(5), scoreCutoff: 1, digestionParams: new DigestionParams(maxMissedCleavages: 0, minPeptideLength: 1, maxModificationIsoforms: int.MaxValue, initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain)); var indexEngine = new IndexingEngine(new List <Protein> { prot }, new List <ModificationWithMass>(), new List <ModificationWithMass>(), new List <ProductType> { ProductType.B, ProductType.Y }, 1, DecoyType.Reverse, new List <DigestionParams> { CommonParameters.DigestionParams }, CommonParameters, 30000, new List <string>()); var indexResults = (IndexingResults)indexEngine.Run(); var cse = new ModernSearchEngine(globalPsms, arrayOfSortedMS2Scans, indexResults.PeptideIndex, indexResults.FragmentIndex, new List <ProductType> { ProductType.B, ProductType.Y }, 0, CommonParameters, new OpenSearchMode(), 0, new List <string>()); cse.Run(); Assert.Less(globalPsms[0].Score, 2); Assert.Greater(globalPsms[0].Score, 1); }
public static void TestIdenticalPeaks() { IDictionary <int, List <Modification> > mods = new Dictionary <int, List <Modification> >(); ModificationMotif.TryGetMotif("M", out ModificationMotif motif); mods.Add(1, new List <Modification> { new ModificationWithMass("Hehe", null, motif, TerminusLocalization.NProt, 18.010565, null, null, null, null) }); var prot = new Protein("MMMM", null, null, null, mods); DigestionParams digestionParams = new DigestionParams(minPeptideLength: 1); var ye = prot.Digest(digestionParams, new List <ModificationWithMass>(), new List <ModificationWithMass>()).First(); var massArray = ye.CompactPeptide(TerminusType.None).ProductMassesMightHaveDuplicatesAndNaNs(new List <ProductType> { ProductType.B, ProductType.Y }); Array.Sort(massArray); double[] intensities = new double[] { 1, 1, 1, 1 }; double[] mz = new double[] { massArray[0].ToMz(1), massArray[2].ToMz(1), massArray[4].ToMz(1), 10000 }; MzSpectrum massSpectrum = new MzSpectrum(mz, intensities, false); MsDataScan scan = new MsDataScan(massSpectrum, 1, 1, true, Polarity.Positive, 1, new MzRange(300, 2000), "", MZAnalyzerType.Unknown, massSpectrum.SumOfAllY, null, null, "scan=1", 0, null, null, 0, null, DissociationType.Unknown, 1, null); PeptideSpectralMatch[] globalPsms = new PeptideSpectralMatch[1]; Ms2ScanWithSpecificMass[] arrayOfSortedMS2Scans = { new Ms2ScanWithSpecificMass(scan, 0, 0, null) }; CommonParameters CommonParameters = new CommonParameters( productMassTolerance: new PpmTolerance(5), scoreCutoff: 1, digestionParams: new DigestionParams( maxMissedCleavages: 0, minPeptideLength: 1, initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain)); ClassicSearchEngine cse = new ClassicSearchEngine(globalPsms, arrayOfSortedMS2Scans, new List <ModificationWithMass>(), new List <ModificationWithMass>(), new List <Protein> { prot }, new List <ProductType> { ProductType.B, ProductType.Y }, new OpenSearchMode(), CommonParameters, new List <string>()); cse.Run(); Assert.AreEqual(globalPsms[0].MatchedFragmentIons.Count, 3); }
public static void TestIdenticalPeaks() { IDictionary <int, List <Modification> > mods = new Dictionary <int, List <Modification> >(); ModificationMotif.TryGetMotif("M", out ModificationMotif motif); mods.Add(1, new List <Modification> { new Modification(_originalId: "Hehe", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 18.010565) }); var prot = new Protein("MMMM", null, null, null, mods); DigestionParams digestionParams = new DigestionParams(minPeptideLength: 1); var ye = prot.Digest(digestionParams, new List <Modification>(), new List <Modification>()).First(); var frags = new List <Product>(); ye.Fragment(DissociationType.HCD, FragmentationTerminus.Both, frags); var massArray = frags.Select(p => p.NeutralMass).ToArray(); Array.Sort(massArray); double[] intensities = new double[] { 1, 1, 1, 1 }; double[] mz = new double[] { massArray[0].ToMz(1), massArray[2].ToMz(1), massArray[4].ToMz(1), 10000 }; MzSpectrum massSpectrum = new MzSpectrum(mz, intensities, false); MsDataScan scan = new MsDataScan(massSpectrum, 1, 1, true, Polarity.Positive, 1, new MzRange(300, 2000), "", MZAnalyzerType.Unknown, massSpectrum.SumOfAllY, null, null, "scan=1", 0, null, null, 0, null, DissociationType.Unknown, 1, null); PeptideSpectralMatch[] globalPsms = new PeptideSpectralMatch[1]; Ms2ScanWithSpecificMass[] arrayOfSortedMS2Scans = { new Ms2ScanWithSpecificMass(scan, 0, 1, null, new CommonParameters()) }; CommonParameters CommonParameters = new CommonParameters( productMassTolerance: new PpmTolerance(5), scoreCutoff: 1, digestionParams: new DigestionParams( maxMissedCleavages: 0, minPeptideLength: 1, initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain)); ClassicSearchEngine cse = new ClassicSearchEngine(globalPsms, arrayOfSortedMS2Scans, new List <Modification>(), new List <Modification>(), null, null, null, new List <Protein> { prot }, new OpenSearchMode(), CommonParameters, null, new List <string>()); cse.Run(); Assert.AreEqual(3, globalPsms[0].MatchedFragmentIons.Count); }
public static void TestClassicSearchEngineTopDown() { CommonParameters CommonParameters = new CommonParameters( digestionParams: new DigestionParams(protease: "top-down"), scoreCutoff: 1, assumeOrphanPeaksAreZ1Fragments: false); MetaMorpheusTask.DetermineAnalyteType(CommonParameters); // test output file name (should be proteoform and not peptide) Assert.That(GlobalVariables.AnalyteType == "Proteoform"); var variableModifications = new List <Modification>(); var fixedModifications = new List <Modification>(); var proteinList = new List <Protein> { new Protein("MPKVYSYQEVAEHNGPENFWIIIDDKVYDVSQFKDEHPGGDEIIMDLGGQDATESFVDIGHSDEALRLLKGLYIGDVDKTSERVSVEKVSTSENQSKGSGTLVVILAILMLGVAYYLLNE", "P40312") }; var myMsDataFile = Mzml.LoadAllStaticData(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TopDownTestData\slicedTDYeast.mzML")); var searchMode = new SinglePpmAroundZeroSearchMode(5); Tolerance DeconvolutionMassTolerance = new PpmTolerance(5); var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray(); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; bool writeSpetralLibrary = false; new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchMode, CommonParameters, null, null, new List <string>(), writeSpetralLibrary).Run(); var psm = allPsmsArray.Where(p => p != null).FirstOrDefault(); Assert.That(psm.MatchedFragmentIons.Count == 47); }
public override SortedMaxSizedContainer<PeptideSpectralMatch> Search(IMassSpectrum spectrum, IEnumerable<Peptide> peptides, FragmentTypes fragmentTypes, Tolerance productMassTolerance) { SortedMaxSizedContainer<PeptideSpectralMatch> results = new SortedMaxSizedContainer<PeptideSpectralMatch>(MaxMatchesPerSpectrum); double[] eMasses = spectrum.MassSpectrum.GetMasses(); double[] eIntenisties = spectrum.MassSpectrum.GetIntensities(); double tic = spectrum.MassSpectrum.GetTotalIonCurrent(); ; foreach (var peptide in peptides) { PeptideSpectralMatch psm = new PeptideSpectralMatch(DefaultPsmScoreType) {Peptide = peptide}; double[] tMasses = peptide.Fragment(fragmentTypes) .Select(frag => Mass.MzFromMass(frag.MonoisotopicMass, 1)) .OrderBy(val => val) .ToArray(); double score = Search(eMasses, eIntenisties, tMasses, productMassTolerance, tic); psm.Score = score; results.Add(psm); } return results; }
protected override MetaMorpheusEngineResults RunSpecific() { Status("Extracting data points:"); // The final training point list int numMs1MassChargeCombinationsConsidered = 0; int numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0; int numMs2MassChargeCombinationsConsidered = 0; int numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0; List <LabeledMs1DataPoint> Ms1List = new List <LabeledMs1DataPoint>(); List <LabeledMs2DataPoint> Ms2List = new List <LabeledMs2DataPoint>(); int numIdentifications = goodIdentifications.Count; // Loop over identifications HashSet <string> sequences = new HashSet <string>(); object lockObj = new object(); object lockObj2 = new object(); Parallel.ForEach(Partitioner.Create(0, numIdentifications), fff => { for (int matchIndex = fff.Item1; matchIndex < fff.Item2; matchIndex++) { PeptideSpectralMatch identification = goodIdentifications[matchIndex]; // Each identification has an MS2 spectrum attached to it. int ms2scanNumber = identification.ScanNumber; int peptideCharge = identification.ScanPrecursorCharge; if (identification.FullSequence == null) { continue; } var representativeSinglePeptide = identification.CompactPeptides.First().Value.Item2.First(); // Get the peptide, don't forget to add the modifications!!!! var SequenceWithChemicalFormulas = representativeSinglePeptide.SequenceWithChemicalFormulas; if (SequenceWithChemicalFormulas == null || representativeSinglePeptide.allModsOneIsNterminus.Any(b => b.Value.neutralLosses.Count != 1 || b.Value.neutralLosses.First() != 0)) { continue; } Proteomics.Peptide coolPeptide = new Proteomics.Peptide(SequenceWithChemicalFormulas); var ms2tuple = SearchMS2Spectrum(myMsDataFile.GetOneBasedScan(ms2scanNumber) as IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> >, coolPeptide, peptideCharge, identification); // If MS2 has low evidence for peptide, skip and go to next one if (ms2tuple.Item4 < numFragmentsNeededForEveryIdentification) { continue; } lock (lockObj2) { Ms2List.AddRange(ms2tuple.Item1); numMs2MassChargeCombinationsConsidered += ms2tuple.Item2; numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms2tuple.Item3; if (sequences.Contains(identification.FullSequence)) { continue; // Do not search same sequence multiple times in MS1 scans } sequences.Add(identification.FullSequence); } // Calculate isotopic distribution of the full peptide var dist = IsotopicDistribution.GetDistribution(coolPeptide.GetChemicalFormula(), fineResolutionForIsotopeDistCalculation, 0.001); double[] theoreticalMasses = dist.Masses.ToArray(); double[] theoreticalIntensities = dist.Intensities.ToArray(); Array.Sort(theoreticalIntensities, theoreticalMasses, Comparer <double> .Create((x, y) => y.CompareTo(x))); var ms1tupleBack = SearchMS1Spectra(theoreticalMasses, theoreticalIntensities, ms2scanNumber, -1, peptideCharge, identification); var ms1tupleForward = SearchMS1Spectra(theoreticalMasses, theoreticalIntensities, ms2scanNumber, 1, peptideCharge, identification); lock (lockObj) { Ms1List.AddRange(ms1tupleBack.Item1); numMs1MassChargeCombinationsConsidered += ms1tupleBack.Item2; numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms1tupleBack.Item3; Ms1List.AddRange(ms1tupleForward.Item1); numMs1MassChargeCombinationsConsidered += ms1tupleForward.Item2; numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks += ms1tupleForward.Item3; } } }); return(new DataPointAquisitionResults(this, Ms1List, Ms2List, numMs1MassChargeCombinationsConsidered, numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks, numMs2MassChargeCombinationsConsidered, numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks )); }
private (List <LabeledMs2DataPoint>, int, int, int) SearchMS2Spectrum(IMsDataScanWithPrecursor <IMzSpectrum <IMzPeak> > ms2DataScan, Proteomics.Peptide peptide, int peptideCharge, PeptideSpectralMatch identification) { List <LabeledMs2DataPoint> result = new List <LabeledMs2DataPoint>(); int numMs2MassChargeCombinationsConsidered = 0; int numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0; int numFragmentsIdentified = 0; if (ms2DataScan.MassSpectrum.Size == 0) { return(result, numMs2MassChargeCombinationsConsidered, numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks, numFragmentsIdentified); } // Key: mz value, Value: error var addedPeaks = new Dictionary <double, double>(); var countForThisMS2 = 0; var countForThisMS2a = 0; var scanWindowRange = ms2DataScan.ScanWindowRange; IHasChemicalFormula[] fragmentList = peptide.Fragment(fragmentTypesForCalibration, true).OfType <IHasChemicalFormula>().ToArray(); foreach (var fragment in fragmentList) { bool fragmentIdentified = false; bool computedIsotopologues = false; double[] masses = new double[0]; double[] intensities = new double[0]; // First look for monoisotopic masses, do not compute distribution spectrum! for (int chargeToLookAt = 1; chargeToLookAt <= peptideCharge; chargeToLookAt++) { var monoisotopicMZ = fragment.MonoisotopicMass.ToMz(chargeToLookAt); if (monoisotopicMZ > scanWindowRange.Maximum) { continue; } if (monoisotopicMZ < scanWindowRange.Minimum) { break; } var closestPeakMZ = ms2DataScan.MassSpectrum.GetClosestPeakXvalue(monoisotopicMZ); if (mzToleranceForMs2Search.Within(closestPeakMZ.Value, monoisotopicMZ) && !computedIsotopologues) { var dist = IsotopicDistribution.GetDistribution(fragment.ThisChemicalFormula, fineResolutionForIsotopeDistCalculation, 0.001); masses = dist.Masses.ToArray(); intensities = dist.Intensities.ToArray(); Array.Sort(intensities, masses, Comparer <double> .Create((x, y) => y.CompareTo(x))); computedIsotopologues = true; break; } } if (computedIsotopologues) { bool startingToAdd = false; for (int chargeToLookAt = 1; chargeToLookAt <= peptideCharge; chargeToLookAt++) { if (masses.First().ToMz(chargeToLookAt) > scanWindowRange.Maximum) { continue; } if (masses.Last().ToMz(chargeToLookAt) < scanWindowRange.Minimum) { break; } var trainingPointsToAverage = new List <LabeledMs2DataPoint>(); foreach (double a in masses) { double theMZ = a.ToMz(chargeToLookAt); var npwr = ms2DataScan.MassSpectrum.NumPeaksWithinRange(mzToleranceForMs2Search.GetMinimumValue(theMZ), mzToleranceForMs2Search.GetMaximumValue(theMZ)); if (npwr == 0) { break; } numMs2MassChargeCombinationsConsidered++; if (npwr > 1) { numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks++; continue; } var closestPeakIndex = ms2DataScan.MassSpectrum.GetClosestPeakIndex(theMZ); var closestPeakMZ = ms2DataScan.MassSpectrum.XArray[closestPeakIndex.Value]; if (!addedPeaks.ContainsKey(closestPeakMZ)) { addedPeaks.Add(closestPeakMZ, Math.Abs(closestPeakMZ - theMZ)); trainingPointsToAverage.Add(new LabeledMs2DataPoint(closestPeakMZ, double.NaN, double.NaN, double.NaN, Math.Log(ms2DataScan.MassSpectrum.YArray[closestPeakIndex.Value]), theMZ, null)); } } // If started adding and suddnely stopped, go to next one, no need to look at higher charges if (trainingPointsToAverage.Count == 0 && startingToAdd) { break; } if (trainingPointsToAverage.Count < Math.Min(minMS2isotopicPeaksNeededForConfirmedIdentification, intensities.Count())) { } else { startingToAdd = true; if (!fragmentIdentified) { fragmentIdentified = true; numFragmentsIdentified += 1; } countForThisMS2 += trainingPointsToAverage.Count; countForThisMS2a++; result.Add(new LabeledMs2DataPoint(trainingPointsToAverage.Select(b => b.mz).Average(), ms2DataScan.RetentionTime, Math.Log(ms2DataScan.TotalIonCurrent), ms2DataScan.InjectionTime.HasValue ? Math.Log(ms2DataScan.InjectionTime.Value) : double.NaN, trainingPointsToAverage.Select(b => b.logIntensity).Average(), trainingPointsToAverage.Select(b => b.expectedMZ).Average(), identification)); } } } } return(result, numMs2MassChargeCombinationsConsidered, numMs2MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks, numFragmentsIdentified); }
private (List <LabeledMs1DataPoint>, int, int) SearchMS1Spectra(double[] theoreticalMasses, double[] theoreticalIntensities, int ms2spectrumIndex, int direction, int peptideCharge, PeptideSpectralMatch identification) { List <LabeledMs1DataPoint> result = new List <LabeledMs1DataPoint>(); int numMs1MassChargeCombinationsConsidered = 0; int numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks = 0; int theIndex; theIndex = direction == 1 ? ms2spectrumIndex : ms2spectrumIndex - 1; bool addedAscan = true; int highestKnownChargeForThisPeptide = peptideCharge; while (theIndex >= 1 && theIndex <= myMsDataFile.NumSpectra && addedAscan) { int countForThisScan = 0; if (myMsDataFile.GetOneBasedScan(theIndex).MsnOrder > 1) { theIndex += direction; continue; } addedAscan = false; var fullMS1scan = myMsDataFile.GetOneBasedScan(theIndex); var scanWindowRange = fullMS1scan.ScanWindowRange; var fullMS1spectrum = fullMS1scan.MassSpectrum; if (fullMS1spectrum.Size == 0) { break; } bool startingToAddCharges = false; int chargeToLookAt = 1; do { if (theoreticalMasses[0].ToMz(chargeToLookAt) > scanWindowRange.Maximum) { chargeToLookAt++; continue; } if (theoreticalMasses[0].ToMz(chargeToLookAt) < scanWindowRange.Minimum) { break; } var trainingPointsToAverage = new List <LabeledMs1DataPoint>(); foreach (double a in theoreticalMasses) { double theMZ = a.ToMz(chargeToLookAt); var npwr = fullMS1spectrum.NumPeaksWithinRange(mzToleranceForMs1Search.GetMinimumValue(theMZ), mzToleranceForMs1Search.GetMaximumValue(theMZ)); if (npwr == 0) { break; } numMs1MassChargeCombinationsConsidered++; if (npwr > 1) { numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks++; continue; } var closestPeakIndex = fullMS1spectrum.GetClosestPeakIndex(theMZ); var closestPeakMZ = fullMS1spectrum.XArray[closestPeakIndex.Value]; highestKnownChargeForThisPeptide = Math.Max(highestKnownChargeForThisPeptide, chargeToLookAt); trainingPointsToAverage.Add(new LabeledMs1DataPoint(closestPeakMZ, double.NaN, double.NaN, double.NaN, Math.Log(fullMS1spectrum.YArray[closestPeakIndex.Value]), theMZ, null)); } // If started adding and suddnely stopped, go to next one, no need to look at higher charges if (trainingPointsToAverage.Count == 0 && startingToAddCharges) { break; } if ((trainingPointsToAverage.Count == 0 || (trainingPointsToAverage.Count == 1 && theoreticalIntensities[0] < 0.65)) && (peptideCharge <= chargeToLookAt)) { break; } if ((trainingPointsToAverage.Count == 1 && theoreticalIntensities[0] < 0.65) || trainingPointsToAverage.Count < Math.Min(minMS1isotopicPeaksNeededForConfirmedIdentification, theoreticalIntensities.Count())) { } else { addedAscan = true; startingToAddCharges = true; countForThisScan++; result.Add(new LabeledMs1DataPoint(trainingPointsToAverage.Select(b => b.mz).Average(), fullMS1scan.RetentionTime, Math.Log(fullMS1scan.TotalIonCurrent), fullMS1scan.InjectionTime.HasValue ? Math.Log(fullMS1scan.InjectionTime.Value) : double.NaN, trainingPointsToAverage.Select(b => b.logIntensity).Average(), trainingPointsToAverage.Select(b => b.expectedMZ).Average(), identification)); } chargeToLookAt++; } while (chargeToLookAt <= highestKnownChargeForThisPeptide + 1); theIndex += direction; } return(result, numMs1MassChargeCombinationsConsidered, numMs1MassChargeCombinationsThatAreIgnoredBecauseOfTooManyPeaks); }
public static void TestAnalysisEngineTests() { List <DigestionMotif> motifs = new List <DigestionMotif> { new DigestionMotif("K", null, 1, null) }; Protease protease = new Protease("Custom Protease5", CleavageSpecificity.Full, null, null, motifs); ProteaseDictionary.Dictionary.Add(protease.Name, protease); CommonParameters CommonParameters = new CommonParameters( digestionParams: new DigestionParams( protease: protease.Name, maxMissedCleavages: 0, minPeptideLength: 1, maxModificationIsoforms: 1042), scoreCutoff: 1, productMassTolerance: new PpmTolerance(10)); var fsp = new List <(string fileName, CommonParameters fileSpecificParameters)>(); fsp.Add(("", CommonParameters)); List <Modification> localizeableModifications = new List <Modification>(); List <Modification> variableModifications = new List <Modification>(); List <Modification> fixedModifications = new List <Modification>(); Dictionary <Modification, ushort> modsDictionary = new Dictionary <Modification, ushort>(); foreach (var mod in fixedModifications) { modsDictionary.Add(mod, 0); } int i = 1; foreach (var mod in variableModifications) { modsDictionary.Add(mod, (ushort)i); i++; } foreach (var mod in localizeableModifications) { modsDictionary.Add(mod, (ushort)i); i++; } var proteinList = new List <Protein> { new Protein("MNNNKQQQ", "accession") }; var modPep = proteinList.First().Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).Last(); HashSet <PeptideWithSetModifications> value1 = new HashSet <PeptideWithSetModifications> { modPep }; PeptideWithSetModifications compactPeptide1 = value1.First(); Assert.AreEqual("QQQ", value1.First().BaseSequence); var modPep2 = proteinList.First().Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).First(); HashSet <PeptideWithSetModifications> value2 = new HashSet <PeptideWithSetModifications> { modPep2 }; PeptideWithSetModifications compactPeptide2 = value2.First(); Assert.AreEqual("MNNNK", value2.First().BaseSequence); var modPep3 = proteinList.First().Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[1]; HashSet <PeptideWithSetModifications> value3 = new HashSet <PeptideWithSetModifications> { modPep3 }; PeptideWithSetModifications compactPeptide3 = value3.First(); Assert.AreEqual("NNNK", value3.First().BaseSequence); Ms2ScanWithSpecificMass scanA = new Ms2ScanWithSpecificMass(new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 2, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 1, null), 1, 1, null, new CommonParameters()); Ms2ScanWithSpecificMass scanB = new Ms2ScanWithSpecificMass(new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 3, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=2", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 1, null), 2 + 132.040, 1, null, new CommonParameters()); Ms2ScanWithSpecificMass scanC = new Ms2ScanWithSpecificMass(new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 4, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=3", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 1, null), 3, 1, null, new CommonParameters()); PeptideSpectralMatch matchA = new PeptideSpectralMatch(compactPeptide1, 0, 0, 0, scanA, CommonParameters, new List <MatchedFragmentIon>()); PeptideSpectralMatch matchB = new PeptideSpectralMatch(compactPeptide2, 0, 0, 0, scanB, CommonParameters, new List <MatchedFragmentIon>()); PeptideSpectralMatch matchC = new PeptideSpectralMatch(compactPeptide3, 0, 0, 0, scanC, CommonParameters, new List <MatchedFragmentIon>()); var newPsms = new List <PeptideSpectralMatch> { matchA, matchB, matchC }; MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications> { value1.First(), value2.First(), value3.First() }); var searchMode = new SinglePpmAroundZeroSearchMode(5); Action <List <PeptideSpectralMatch>, string, List <string> > action2 = (List <PeptideSpectralMatch> l, string s, List <string> sdf) => {; }; Tolerance DeconvolutionMassTolerance = new PpmTolerance(5); var arrayOfMs2ScansSortedByMass = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray(); Action <BinTreeStructure, string> action1 = (BinTreeStructure l, string s) => { Assert.AreEqual(1, l.FinalBins.Count); }; FdrAnalysisEngine engine = new FdrAnalysisEngine(newPsms, searchMode.NumNotches, CommonParameters, fsp, new List <string> { "ff" }); engine.Run(); }
public static void TestDeltaValues() { CommonParameters CommonParameters = new CommonParameters(scoreCutoff: 1, useDeltaScore: true, digestionParams: new DigestionParams(minPeptideLength: 5)); SearchParameters SearchParameters = new SearchParameters { MassDiffAcceptorType = MassDiffAcceptorType.Exact, }; List <Modification> variableModifications = GlobalVariables.AllModsKnown.OfType <Modification>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.ModificationType, b.IdWithMotif))).ToList(); List <Modification> fixedModifications = GlobalVariables.AllModsKnown.OfType <Modification>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.ModificationType, b.IdWithMotif))).ToList(); // Generate data for files Protein TargetProtein1 = new Protein("TIDEANTHE", "accession1"); Protein TargetProtein2 = new Protein("TIDELVE", "accession2"); Protein TargetProtein3 = new Protein("TIDENIE", "accession3"); Protein TargetProteinLost = new Protein("PEPTIDEANTHE", "accession4"); Protein DecoyProteinFound = new Protein("PETPLEDQGTHE", "accessiond", isDecoy: true); MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications> { TargetProtein1.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0], TargetProtein2.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0], TargetProtein3.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0], DecoyProteinFound.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0] }); var proteinList = new List <Protein> { TargetProtein1, TargetProtein2, TargetProtein3, TargetProteinLost, DecoyProteinFound }; var searchModes = new SinglePpmAroundZeroSearchMode(5); Tolerance DeconvolutionMassTolerance = new PpmTolerance(5); var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray(); //check better when using delta PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, proteinList, searchModes, CommonParameters, new List <string>()).Run(); var indexEngine = new IndexingEngine(proteinList, variableModifications, fixedModifications, null, 1, DecoyType.None, CommonParameters, 30000, false, new List <FileInfo>(), new List <string>()); var indexResults = (IndexingResults)indexEngine.Run(); MassDiffAcceptor massDiffAcceptor = SearchTask.GetMassDiffAcceptor(CommonParameters.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac); PeptideSpectralMatch[] allPsmsArrayModern = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ModernSearchEngine(allPsmsArrayModern, listOfSortedms2Scans, indexResults.PeptideIndex, indexResults.FragmentIndex, 0, CommonParameters, massDiffAcceptor, 0, new List <string>()).Run(); FdrAnalysisResults fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run()); FdrAnalysisResults fdrResultsModernDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArrayModern.ToList(), 1, CommonParameters, new List <string>()).Run()); Assert.IsTrue(fdrResultsClassicDelta.PsmsWithin1PercentFdr == 3); Assert.IsTrue(fdrResultsModernDelta.PsmsWithin1PercentFdr == 3); CommonParameters = new CommonParameters(digestionParams: new DigestionParams(minPeptideLength: 5)); //check worse when using score FdrAnalysisResults fdrResultsClassic = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run()); FdrAnalysisResults fdrResultsModern = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run()); Assert.IsTrue(fdrResultsClassic.PsmsWithin1PercentFdr == 0); Assert.IsTrue(fdrResultsModern.PsmsWithin1PercentFdr == 0); //check that when delta is bad, we used the score // Generate data for files Protein DecoyProtein1 = new Protein("TLEDAGGTHE", "accession1d", isDecoy: true); Protein DecoyProtein2 = new Protein("TLEDLVE", "accession2d", isDecoy: true); Protein DecoyProtein3 = new Protein("TLEDNIE", "accession3d", isDecoy: true); Protein DecoyProteinShiny = new Protein("GGGGGG", "accessionShinyd", isDecoy: true); myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications> { TargetProtein1.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0], TargetProtein2.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0], TargetProtein3.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0], DecoyProteinShiny.Digest(CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList()[0], }); proteinList = new List <Protein> { TargetProtein1, DecoyProtein1, TargetProtein2, DecoyProtein2, TargetProtein3, DecoyProtein3, DecoyProteinShiny, }; listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray(); //check no change when using delta allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, proteinList, searchModes, CommonParameters, new List <string>()).Run(); CommonParameters = new CommonParameters(useDeltaScore: true, digestionParams: new DigestionParams(minPeptideLength: 5)); indexEngine = new IndexingEngine(proteinList, variableModifications, fixedModifications, null, 1, DecoyType.None, CommonParameters, 30000, false, new List <FileInfo>(), new List <string>()); indexResults = (IndexingResults)indexEngine.Run(); massDiffAcceptor = SearchTask.GetMassDiffAcceptor(CommonParameters.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac); allPsmsArrayModern = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ModernSearchEngine(allPsmsArrayModern, listOfSortedms2Scans, indexResults.PeptideIndex, indexResults.FragmentIndex, 0, CommonParameters, massDiffAcceptor, 0, new List <string>()).Run(); fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run()); fdrResultsModernDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArrayModern.ToList(), 1, CommonParameters, new List <string>()).Run()); Assert.IsTrue(fdrResultsClassicDelta.PsmsWithin1PercentFdr == 3); Assert.IsTrue(fdrResultsModernDelta.PsmsWithin1PercentFdr == 3); CommonParameters = new CommonParameters(digestionParams: new DigestionParams(minPeptideLength: 5)); //check no change when using score fdrResultsClassic = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.ToList(), 1, CommonParameters, new List <string>()).Run()); fdrResultsModern = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArrayModern.ToList(), 1, CommonParameters, new List <string>()).Run()); Assert.IsTrue(fdrResultsClassic.PsmsWithin1PercentFdr == 3); Assert.IsTrue(fdrResultsModern.PsmsWithin1PercentFdr == 3); }
public static void TestComputePEPValue() { var variableModifications = new List <Modification>(); var fixedModifications = new List <Modification>(); var origDataFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\TaGe_SA_HeLa_04_subset_longestSeq.mzML"); MyFileManager myFileManager = new MyFileManager(true); CommonParameters CommonParameters = new CommonParameters(digestionParams: new DigestionParams()); var myMsDataFile = myFileManager.LoadFile(origDataFile, CommonParameters); var searchModes = new SinglePpmAroundZeroSearchMode(5); List <Protein> proteinList = ProteinDbLoader.LoadProteinFasta(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\hela_snip_for_unitTest.fasta"), true, DecoyType.Reverse, false, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, out var dbErrors, -1); var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, CommonParameters).OrderBy(b => b.PrecursorMass).ToArray(); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, proteinList, searchModes, CommonParameters, new List <string>()).Run(); FdrAnalysisResults fdrResultsClassicDelta = (FdrAnalysisResults)(new FdrAnalysisEngine(allPsmsArray.Where(p => p != null).ToList(), 1, CommonParameters, new List <string>()).Run()); var nonNullPsms = allPsmsArray.Where(p => p != null).ToList(); var nonNullPsmsOriginalCopy = allPsmsArray.Where(p => p != null).ToList(); var accessionCounts = PEP_Analysis.GetAccessionCounts(nonNullPsms); var maxScore = nonNullPsms.Select(n => n.Score).Max(); var maxScorePsm = nonNullPsms.Where(n => n.Score == maxScore).First(); Dictionary <string, int> sequenceToPsmCount = new Dictionary <string, int>(); List <string> sequences = new List <string>(); foreach (PeptideSpectralMatch psm in nonNullPsms) { var ss = psm.BestMatchingPeptides.Select(b => b.Peptide.FullSequence).ToList(); sequences.Add(String.Join("|", ss)); } var s = sequences.GroupBy(i => i); foreach (var grp in s) { sequenceToPsmCount.Add(grp.Key, grp.Count()); } var maxPsmData = PEP_Analysis.CreateOnePsmDataFromPsm(maxScorePsm, accessionCounts, sequenceToPsmCount); Assert.That(maxScorePsm.PeptidesToMatchingFragments.Count, Is.EqualTo(maxPsmData.Ambiguity)); Assert.That(maxScorePsm.DeltaScore, Is.EqualTo(maxPsmData.DeltaScore).Within(0.05)); Assert.That((float)(maxScorePsm.Score - (int)maxScorePsm.Score), Is.EqualTo(maxPsmData.Intensity).Within(0.05)); Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().MissedCleavages, Is.EqualTo(maxPsmData.MissedCleavagesCount)); Assert.That(maxScorePsm.BestMatchingPeptides.Select(p => p.Peptide).First().AllModsOneIsNterminus.Values.Count(), Is.EqualTo(maxPsmData.ModsCount)); Assert.That(maxScorePsm.Notch ?? 0, Is.EqualTo(maxPsmData.Notch)); Assert.That(maxScorePsm.PsmCount, Is.EqualTo(maxPsmData.PsmCount)); Assert.That(maxScorePsm.ScanPrecursorCharge, Is.EqualTo(maxPsmData.ScanPrecursorCharge)); PEP_Analysis.ComputePEPValuesForAllPSMsGeneric(nonNullPsms); int trueCount = 0; foreach (var item in allPsmsArray.Where(p => p != null)) { var b = item.FdrInfo.PEP; if (b >= 0.5) { trueCount++; } } Assert.GreaterOrEqual(32, trueCount); }
public static void TryFailSequenceCoverage() { var prot1 = new Protein("MMKMMK", "prot1"); ModificationMotif.TryGetMotif("M", out ModificationMotif motifM); Modification mod1 = new Modification(_originalId: "mod1", _modificationType: "mt", _target: motifM, _locationRestriction: "N-terminal.", _monoisotopicMass: 10); Modification mod2 = new Modification(_originalId: "mod2", _modificationType: "mt", _target: motifM, _locationRestriction: "Peptide N-terminal.", _monoisotopicMass: 10); Modification mod3 = new Modification(_originalId: "mod3", _modificationType: "mt", _target: motifM, _locationRestriction: "Anywhere.", _monoisotopicMass: 10); ModificationMotif.TryGetMotif("K", out ModificationMotif motifK); Modification mod4 = new Modification(_originalId: "mod4", _modificationType: "mt", _target: motifK, _locationRestriction: "Peptide C-terminal.", _monoisotopicMass: 10); Modification mod5 = new Modification(_originalId: "mod5", _modificationType: "mt", _target: motifK, _locationRestriction: "C-terminal.", _monoisotopicMass: 10); Dictionary <int, Modification> modsFor1 = new Dictionary <int, Modification> { { 1, mod1 }, { 3, mod3 }, { 5, mod4 }, }; Dictionary <int, Modification> modsFor2 = new Dictionary <int, Modification> { { 1, mod2 }, { 5, mod5 }, }; Dictionary <int, Modification> modsFor3 = new Dictionary <int, Modification> { { 1, mod1 }, { 5, mod3 }, { 8, mod5 } }; DigestionParams digestionParams = new DigestionParams(); var pwsm1 = new PeptideWithSetModifications(prot1, digestionParams, 1, 3, CleavageSpecificity.Unknown, "", 0, modsFor1, 0); var pwsm2 = new PeptideWithSetModifications(prot1, digestionParams, 4, 6, CleavageSpecificity.Unknown, "", 0, modsFor2, 0); var pwsm3 = new PeptideWithSetModifications(prot1, digestionParams, 1, 6, CleavageSpecificity.Unknown, "", 0, modsFor3, 0); HashSet <PeptideWithSetModifications> peptides = new HashSet <PeptideWithSetModifications> { pwsm1, pwsm2, pwsm3, }; IScan scan = new ThisTestScan(); var psm1 = new PeptideSpectralMatch(pwsm1, 0, 1, 0, scan, digestionParams, new List <MatchedFragmentIon>()); psm1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false); var psm2 = new PeptideSpectralMatch(pwsm2, 0, 1, 0, scan, digestionParams, new List <MatchedFragmentIon>()); psm2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false); var psm3 = new PeptideSpectralMatch(pwsm3, 0, 1, 0, scan, digestionParams, new List <MatchedFragmentIon>()); psm3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false); List <PeptideSpectralMatch> newPsms = new List <PeptideSpectralMatch> { psm1, psm2, psm3, }; newPsms.ForEach(p => p.ResolveAllAmbiguities()); ProteinParsimonyEngine ppe = new ProteinParsimonyEngine(newPsms, true, new CommonParameters(), new List <string>()); ProteinParsimonyResults fjkd = (ProteinParsimonyResults)ppe.Run(); ProteinScoringAndFdrEngine psafe = new ProteinScoringAndFdrEngine(fjkd.ProteinGroups, newPsms, true, true, true, new CommonParameters(), new List <string>()); psafe.Run(); fjkd.ProteinGroups.First().CalculateSequenceCoverage(); var firstSequenceCoverageDisplayList = fjkd.ProteinGroups.First().SequenceCoverageDisplayList.First(); Assert.AreEqual("MMKMMK", firstSequenceCoverageDisplayList); var firstSequenceCoverageDisplayListWithMods = fjkd.ProteinGroups.First().SequenceCoverageDisplayListWithMods.First(); Assert.AreEqual("[mod1 on M]-MM[mod3 on M]KM[mod3 on M]MK-[mod5 on K]", firstSequenceCoverageDisplayListWithMods); var firstModInfo = fjkd.ProteinGroups.First().ModsInfo.First(); Assert.IsTrue(firstModInfo.Contains(@"#aa1[mod1 on M,info:occupancy=1.00(2/2)]")); Assert.IsTrue(firstModInfo.Contains(@"#aa2[mod3 on M,info:occupancy=0.50(1/2)]")); Assert.IsFalse(firstModInfo.Contains(@"#aa3")); Assert.IsTrue(firstModInfo.Contains(@"#aa4[mod3 on M,info:occupancy=0.50(1/2)]")); Assert.IsFalse(firstModInfo.Contains(@"#aa5")); Assert.IsTrue(firstModInfo.Contains(@"#aa6[mod5 on K,info:occupancy=1.00(2/2)]")); }
public static void AddCompIonsCommonParams() { CommonParameters cp = new CommonParameters(null, DissociationType.HCD, DissociationType.Unknown, DissociationType.Unknown, null, true, true, 3, 12, true, true, 1, 5, 200, 0.01, null, null, false, false, true, false, null, null, null, -1, null, null, null, 1, true, 4, 1); var myMsDataFile = new TestDataFile(); var variableModifications = new List <Modification>(); var fixedModifications = new List <Modification>(); var localizeableModifications = new List <Modification>(); Dictionary <Modification, ushort> modsDictionary = new Dictionary <Modification, ushort>(); foreach (var mod in fixedModifications) { modsDictionary.Add(mod, 0); } int ii = 1; foreach (var mod in variableModifications) { modsDictionary.Add(mod, (ushort)ii); ii++; } foreach (var mod in localizeableModifications) { modsDictionary.Add(mod, (ushort)ii); ii++; } var proteinList = new List <Protein> { new Protein("MNNNKQQQ", null) }; SearchParameters SearchParameters = new SearchParameters { MassDiffAcceptorType = MassDiffAcceptorType.Exact, SearchTarget = true, }; List <DigestionMotif> motifs = new List <DigestionMotif> { new DigestionMotif("K", null, 1, null) }; Protease protease = new Protease("Test", CleavageSpecificity.Full, null, null, motifs); ProteaseDictionary.Dictionary.Add(protease.Name, protease); var fsp = new List <(string fileName, CommonParameters fileSpecificParameters)>(); fsp.Add(("", cp)); var indexEngine = new IndexingEngine(proteinList, variableModifications, fixedModifications, new List <SilacLabel>(), null, null, 1, DecoyType.Reverse, cp, fsp, SearchParameters.MaxFragmentSize, false, new List <FileInfo>(), TargetContaminantAmbiguity.RemoveContaminant, new List <string>()); var indexResults = (IndexingResults)indexEngine.Run(); Tolerance DeconvolutionMassTolerance = new PpmTolerance(5); var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray(); MassDiffAcceptor massDiffAcceptor = SearchTask.GetMassDiffAcceptor(cp.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac); // without complementary ions PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; var mse = new ModernSearchEngine(allPsmsArray, listOfSortedms2Scans, indexResults.PeptideIndex, indexResults.FragmentIndex, 0, cp, fsp, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, new List <string>()).Run(); }
public static void TestReverseDecoyGenerationDuringSearch() { CommonParameters CommonParameters = new CommonParameters(); MetaMorpheusTask.DetermineAnalyteType(CommonParameters); var variableModifications = new List <Modification>(); var fixedModifications = new List <Modification>(); var proteinList = new List <Protein> { new Protein("KKAEDGINK", ""), new Protein("AVNSISLK", ""), new Protein("EKAEAEAEK", ""), new Protein("DITANLR", ""), new Protein("QNAIGTAK", ""), new Protein("FHKSQLNK", ""), new Protein("KQVAQWNK", ""), new Protein("NTRIEELK", ""), new Protein("RQPAQPR", ""), }; var myMsDataFile = Mzml.LoadAllStaticData(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\SmallCalibratible_Yeast.mzML")); var searchMode = new SinglePpmAroundZeroSearchMode(5); Tolerance DeconvolutionMassTolerance = new PpmTolerance(5); var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray(); var path = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\myPrositLib.msp"); var testLibrary = new SpectralLibrary(new List <string> { path }); //test when doing spectral library search without generating library PeptideSpectralMatch[] allPsmsArray1 = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ClassicSearchEngine(allPsmsArray1, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchMode, CommonParameters, null, testLibrary, new List <string>(), false).Run(); var psm1 = allPsmsArray1.Where(p => p != null).ToList(); Assert.That(psm1[0].IsDecoy == false && psm1[0].FullSequence == "DITANLR"); Assert.That(psm1[1].IsDecoy == true && psm1[1].FullSequence == "LSISNVAK"); Assert.That(psm1[2].IsDecoy == true && psm1[2].FullSequence == "LSISNVAK"); Assert.That(psm1[3].IsDecoy == false && psm1[3].FullSequence == "RQPAQPR"); Assert.That(psm1[4].IsDecoy == false && psm1[4].FullSequence == "KKAEDGINK"); Assert.That(psm1[5].IsDecoy == false && psm1[5].FullSequence == "EKAEAEAEK"); Assert.That(psm1[6].IsDecoy == false && psm1[6].FullSequence == "EKAEAEAEK"); proteinList.Add(new Protein("LSISNVAK", "", isDecoy: true)); //test when doing spectral library search with generating library; non spectral search won't generate decoy by "decoy on the fly" , so proteinlist used by non spectral library search would contain decoys PeptideSpectralMatch[] allPsmsArray2 = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ClassicSearchEngine(allPsmsArray2, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchMode, CommonParameters, null, testLibrary, new List <string>(), true).Run(); var psm2 = allPsmsArray2.Where(p => p != null).ToList(); Assert.That(psm2[0].IsDecoy == false && psm2[0].FullSequence == "DITANLR"); Assert.That(psm2[1].IsDecoy == true && psm2[1].FullSequence == "LSISNVAK"); Assert.That(psm2[2].IsDecoy == true && psm2[2].FullSequence == "LSISNVAK"); Assert.That(psm2[3].IsDecoy == false && psm2[3].FullSequence == "RQPAQPR"); Assert.That(psm2[4].IsDecoy == false && psm2[4].FullSequence == "KKAEDGINK"); Assert.That(psm2[5].IsDecoy == false && psm2[5].FullSequence == "EKAEAEAEK"); Assert.That(psm2[6].IsDecoy == false && psm2[6].FullSequence == "EKAEAEAEK"); //test when doing non spectral library search without generating library PeptideSpectralMatch[] allPsmsArray3 = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ClassicSearchEngine(allPsmsArray3, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchMode, CommonParameters, null, null, new List <string>(), false).Run(); var psm3 = allPsmsArray3.Where(p => p != null).ToList(); Assert.That(psm3[0].IsDecoy == false && psm3[0].FullSequence == "DITANLR"); Assert.That(psm3[1].IsDecoy == true && psm3[1].FullSequence == "LSISNVAK"); Assert.That(psm3[2].IsDecoy == true && psm3[2].FullSequence == "LSISNVAK"); Assert.That(psm3[3].IsDecoy == false && psm3[3].FullSequence == "RQPAQPR"); Assert.That(psm3[4].IsDecoy == false && psm3[4].FullSequence == "KKAEDGINK"); Assert.That(psm3[5].IsDecoy == false && psm3[5].FullSequence == "EKAEAEAEK"); Assert.That(psm3[6].IsDecoy == false && psm3[6].FullSequence == "EKAEAEAEK"); //test when doing non spectral library search with generating library PeptideSpectralMatch[] allPsmsArray4 = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ClassicSearchEngine(allPsmsArray4, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchMode, CommonParameters, null, null, new List <string>(), true).Run(); var psm4 = allPsmsArray4.Where(p => p != null).ToList(); Assert.That(psm4[0].IsDecoy == false && psm4[0].FullSequence == "DITANLR"); Assert.That(psm4[1].IsDecoy == true && psm4[1].FullSequence == "LSISNVAK"); Assert.That(psm4[2].IsDecoy == true && psm4[2].FullSequence == "LSISNVAK"); Assert.That(psm4[3].IsDecoy == false && psm4[3].FullSequence == "RQPAQPR"); Assert.That(psm4[4].IsDecoy == false && psm4[4].FullSequence == "KKAEDGINK"); Assert.That(psm4[5].IsDecoy == false && psm4[5].FullSequence == "EKAEAEAEK"); Assert.That(psm4[6].IsDecoy == false && psm4[6].FullSequence == "EKAEAEAEK"); //compare psm's target/decoy results in 4 conditions. they should be same as new decoy methods shouldn't change the t/d results for (int i = 0; i < psm1.Count; i++) { Assert.That(psm1[i].FullSequence == psm2[i].FullSequence && psm3[i].FullSequence == psm3[i].FullSequence && psm2[i].FullSequence == psm3[i].FullSequence); Assert.That(psm1[i].IsDecoy == psm2[i].IsDecoy && psm3[i].IsDecoy == psm3[i].IsDecoy && psm2[i].IsDecoy == psm3[i].IsDecoy); } //compare MetaMorpheus scores in 4 conditions; for some psms, they should have a little higher score when "generating library" as they switch to all charges ions matching function for (int j = 0; j < psm1.Count; j++) { if (psm1[j].FullSequence == psm2[j].FullSequence && psm1[j].MatchedFragmentIons.Count != psm2[j].MatchedFragmentIons.Count) { Assert.That(psm1[j].Score < psm2[j].Score); } } }
public static void TestMatchIonsOfAllChargesTopDown() { CommonParameters CommonParameters = new CommonParameters( digestionParams: new DigestionParams(protease: "top-down"), scoreCutoff: 1, assumeOrphanPeaksAreZ1Fragments: false); MetaMorpheusTask.DetermineAnalyteType(CommonParameters); // test output file name (should be proteoform and not peptide) Assert.That(GlobalVariables.AnalyteType == "Proteoform"); var variableModifications = new List <Modification>(); var fixedModifications = new List <Modification>(); var proteinList = new List <Protein> { new Protein("MPKVYSYQEVAEHNGPENFWIIIDDKVYDVSQFKDEHPGGDEIIMDLGGQDATESFVDIGHSDEALRLLKGLYIGDVDKTSERVSVEKVSTSENQSKGSGTLVVILAILMLGVAYYLLNE", "P40312") }; var myMsDataFile = Mzml.LoadAllStaticData(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TopDownTestData\slicedTDYeast.mzML")); var searchMode = new SinglePpmAroundZeroSearchMode(5); Tolerance DeconvolutionMassTolerance = new PpmTolerance(5); var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray(); //search by new method of looking for all charges PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchMode, CommonParameters, null, null, new List <string>(), true).Run(); var psm = allPsmsArray.Where(p => p != null).FirstOrDefault(); Assert.That(psm.MatchedFragmentIons.Count == 62); //search by old method of looking for only one charge PeptideSpectralMatch[] allPsmsArray_oneCharge = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ClassicSearchEngine(allPsmsArray_oneCharge, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchMode, CommonParameters, null, null, new List <string>(), false).Run(); var psm_oneCharge = allPsmsArray_oneCharge.Where(p => p != null).FirstOrDefault(); Assert.That(psm_oneCharge.MatchedFragmentIons.Count == 47); //compare 2 scores , they should have same integer but new search has a little higher score than old search Assert.That(psm.Score > psm_oneCharge.Score); Assert.AreEqual(Math.Truncate(psm.Score), 47); Assert.AreEqual(Math.Truncate(psm_oneCharge.Score), 47); //compare 2 results and evaluate the different matched ions var peptideTheorProducts = new List <Product>(); var differences = psm.MatchedFragmentIons.Except(psm_oneCharge.MatchedFragmentIons); psm.BestMatchingPeptides.First().Peptide.Fragment(CommonParameters.DissociationType, CommonParameters.DigestionParams.FragmentationTerminus, peptideTheorProducts); foreach (var ion in differences) { foreach (var product in peptideTheorProducts) { if (product.Annotation.ToString().Equals(ion.NeutralTheoreticalProduct.Annotation.ToString())) { //to see if the different matched ions are qualified Assert.That(CommonParameters.ProductMassTolerance.Within(ion.Mz.ToMass(ion.Charge), product.NeutralMass)); } } } }
public void WritePSM(PeptideSpectralMatch psm, int hitRank = 1) { _writer.WriteStartElement("search_hit"); _writer.WriteAttributeString("hit_rank", hitRank.ToString()); _writer.WriteAttributeString("peptide", psm.Peptide.Sequence); _writer.WriteAttributeString("peptide_prev_aa" , (psm.Peptide.PreviousAminoAcid != null) ? psm.Peptide.PreviousAminoAcid.Letter.ToString() : "-"); _writer.WriteAttributeString("peptide_next_aa", (psm.Peptide.NextAminoAcid != null) ? psm.Peptide.NextAminoAcid.Letter.ToString() : "-"); double pepMonoMass = psm.Peptide.MonoisotopicMass; double massDifference = spectrumNeutralMass - pepMonoMass; _writer.WriteAttributeString("calc_neutral_pep_mass", pepMonoMass.ToString()); _writer.WriteAttributeString("massdiff", massDifference.ToString()); Protein protein = psm.Peptide.Parent as Protein; if(protein != null) { _writer.WriteAttributeString("protein", protein.Description); _writer.WriteAttributeString("protein_descr", protein.Description); } _writer.WriteAttributeString("num_tot_proteins", "1"); _writer.WriteAttributeString("is_rejected", "0"); _writer.WriteStartElement("search_score"); _writer.WriteAttributeString("name", Enum.GetName(typeof(PeptideSpectralMatchScoreType), psm.ScoreType)); _writer.WriteAttributeString("value", psm.Score.ToString()); _writer.WriteEndElement(); // search_score _writer.WriteEndElement(); // search_hit }
public static void TestMatchIonsOfAllChargesBottomUp() { CommonParameters CommonParameters = new CommonParameters(); MetaMorpheusTask.DetermineAnalyteType(CommonParameters); var variableModifications = new List <Modification>(); var fixedModifications = new List <Modification>(); var proteinList = new List <Protein> { new Protein("AAAHSSLK", ""), new Protein("RQPAQPR", ""), new Protein("EKAEAEAEK", "") }; var myMsDataFile = Mzml.LoadAllStaticData(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\SmallCalibratible_Yeast.mzML")); var searchMode = new SinglePpmAroundZeroSearchMode(5); Tolerance DeconvolutionMassTolerance = new PpmTolerance(5); var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray(); //search by new method of looking for all charges PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchMode, CommonParameters, null, null, new List <string>(), true).Run(); var psm = allPsmsArray.Where(p => p != null).ToList(); Assert.That(psm[1].MatchedFragmentIons.Count == 14); //there are ions with same product type and same fragment number but different charges Assert.That(psm[1].MatchedFragmentIons[8].NeutralTheoreticalProduct.ProductType == psm[1].MatchedFragmentIons[9].NeutralTheoreticalProduct.ProductType && psm[1].MatchedFragmentIons[8].NeutralTheoreticalProduct.FragmentNumber == psm[1].MatchedFragmentIons[9].NeutralTheoreticalProduct.FragmentNumber && psm[1].MatchedFragmentIons[8].Charge != psm[1].MatchedFragmentIons[9].Charge); Assert.That(psm[2].MatchedFragmentIons.Count == 14); Assert.That(psm[4].MatchedFragmentIons.Count == 16); //search by old method of looking for only one charge PeptideSpectralMatch[] allPsmsArray_oneCharge = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ClassicSearchEngine(allPsmsArray_oneCharge, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchMode, CommonParameters, null, null, new List <string>(), false).Run(); var psm_oneCharge = allPsmsArray_oneCharge.Where(p => p != null).ToList(); //compare 2 scores , they should have same integer part but new search has a little higher score than old search Assert.That(psm[1].Score > psm_oneCharge[1].Score); Assert.AreEqual(Math.Truncate(psm[1].Score), 12); Assert.AreEqual(Math.Truncate(psm_oneCharge[1].Score), 12); //compare 2 results and evaluate the different matched ions var peptideTheorProducts = new List <Product>(); Assert.That(psm_oneCharge[1].MatchedFragmentIons.Count == 12); var differences = psm[1].MatchedFragmentIons.Except(psm_oneCharge[1].MatchedFragmentIons); psm[1].BestMatchingPeptides.First().Peptide.Fragment(CommonParameters.DissociationType, CommonParameters.DigestionParams.FragmentationTerminus, peptideTheorProducts); foreach (var ion in differences) { foreach (var product in peptideTheorProducts) { if (product.Annotation.ToString().Equals(ion.NeutralTheoreticalProduct.Annotation.ToString())) { //to see if the different matched ions are qualified Assert.That(CommonParameters.ProductMassTolerance.Within(ion.Mz.ToMass(ion.Charge), product.NeutralMass)); } } } //test specific condition: unknown fragment mass; this only happens rarely for sequences with unknown amino acids var myMsDataFile1 = new TestDataFile(); var variableModifications1 = new List <Modification>(); var fixedModifications1 = new List <Modification>(); var proteinList1 = new List <Protein> { new Protein("QXQ", null) }; var productMassTolerance = new AbsoluteTolerance(0.01); var searchModes = new OpenSearchMode(); Tolerance DeconvolutionMassTolerance1 = new PpmTolerance(5); var listOfSortedms2Scans1 = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray(); List <DigestionMotif> motifs = new List <DigestionMotif> { new DigestionMotif("K", null, 1, null) }; Protease protease = new Protease("Custom Protease3", CleavageSpecificity.Full, null, null, motifs); ProteaseDictionary.Dictionary.Add(protease.Name, protease); CommonParameters CommonParameters1 = new CommonParameters( digestionParams: new DigestionParams(protease: protease.Name, maxMissedCleavages: 0, minPeptideLength: 1), scoreCutoff: 1, addCompIons: false); var fsp = new List <(string fileName, CommonParameters fileSpecificParameters)>(); fsp.Add(("", CommonParameters)); PeptideSpectralMatch[] allPsmsArray1 = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; bool writeSpectralLibrary = true; new ClassicSearchEngine(allPsmsArray1, listOfSortedms2Scans1, variableModifications1, fixedModifications1, null, null, null, proteinList1, searchModes, CommonParameters1, fsp, null, new List <string>(), writeSpectralLibrary).Run(); var psm1 = allPsmsArray1.Where(p => p != null).ToList(); Assert.AreEqual(psm1.Count, 222); }
private void DoFalseDiscoveryRateAnalysis(FdrAnalysisResults myAnalysisResults) { // Stop if canceled if (GlobalVariables.StopLoops) { return; } // calculate FDR on a per-protease basis (targets and decoys for a specific protease) var psmsGroupedByProtease = AllPsms.GroupBy(p => p.DigestionParams.Protease); foreach (var proteasePsms in psmsGroupedByProtease) { var psms = proteasePsms.ToList(); // generate the null distribution for e-value calculations double globalMeanScore = 0; int globalMeanCount = 0; if (CalculateEValue && psms.Any()) { List <double> combinedScores = new List <double>(); foreach (PeptideSpectralMatch psm in psms) { psm.AllScores.Sort(); combinedScores.AddRange(psm.AllScores); //remove top scoring peptide if (combinedScores.Any()) { combinedScores.RemoveAt(combinedScores.Count - 1); } } if (combinedScores.Any()) { globalMeanScore = combinedScores.Average(); globalMeanCount = (int)((double)combinedScores.Count / psms.Count); } else { // should be a very rare case... if there are PSMs but each PSM only has one hit globalMeanScore = 0; globalMeanCount = 0; } } //Calculate delta scores for the psms (regardless of if we are using them) foreach (PeptideSpectralMatch psm in psms) { if (psm != null) { psm.CalculateDeltaScore(ScoreCutoff); } } //determine if Score or DeltaScore performs better if (UseDeltaScore) { const double qValueCutoff = 0.01; //optimize to get the most PSMs at a 1% FDR List <PeptideSpectralMatch> scoreSorted = psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList(); int ScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff); scoreSorted = psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList(); int DeltaScorePSMs = GetNumPSMsAtqValueCutoff(scoreSorted, qValueCutoff); //sort by best method myAnalysisResults.DeltaScoreImprovement = DeltaScorePSMs > ScorePSMs; psms = myAnalysisResults.DeltaScoreImprovement ? psms.OrderByDescending(b => b.DeltaScore).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList() : psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList(); } else //sort by score { psms = psms.OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).ToList(); } double cumulativeTarget = 0; double cumulativeDecoy = 0; //set up arrays for local FDRs double[] cumulativeTargetPerNotch = new double[MassDiffAcceptorNumNotches + 1]; double[] cumulativeDecoyPerNotch = new double[MassDiffAcceptorNumNotches + 1]; //Assign FDR values to PSMs for (int i = 0; i < psms.Count; i++) { // Stop if canceled if (GlobalVariables.StopLoops) { break; } PeptideSpectralMatch psm = psms[i]; int notch = psm.Notch ?? MassDiffAcceptorNumNotches; if (psm.IsDecoy) { // the PSM can be ambiguous between a target and a decoy sequence // in that case, count it as the fraction of decoy hits // e.g. if the PSM matched to 1 target and 2 decoys, it counts as 2/3 decoy double decoyHits = 0; double totalHits = 0; var hits = psm.BestMatchingPeptides.GroupBy(p => p.Peptide.FullSequence); foreach (var hit in hits) { if (hit.First().Peptide.Protein.IsDecoy) { decoyHits++; } totalHits++; } cumulativeDecoy += decoyHits / totalHits; cumulativeDecoyPerNotch[notch] += decoyHits / totalHits; } else { cumulativeTarget++; cumulativeTargetPerNotch[notch]++; } double qValue = Math.Min(1, cumulativeDecoy / cumulativeTarget); double qValueNotch = Math.Min(1, cumulativeDecoyPerNotch[notch] / cumulativeTargetPerNotch[notch]); double maximumLikelihood = 0; double eValue = 0; double eScore = 0; if (CalculateEValue) { eValue = GetEValue(psm, globalMeanCount, globalMeanScore, out maximumLikelihood); eScore = -Math.Log(eValue, 10); } psm.SetFdrValues(cumulativeTarget, cumulativeDecoy, qValue, cumulativeTargetPerNotch[notch], cumulativeDecoyPerNotch[notch], qValueNotch, maximumLikelihood, eValue, eScore, CalculateEValue); } // set q-value thresholds such that a lower scoring PSM can't have // a higher confidence than a higher scoring PSM //Populate min qValues double qValueThreshold = 1.0; double[] qValueNotchThreshold = new double[MassDiffAcceptorNumNotches + 1]; for (int i = 0; i < qValueNotchThreshold.Length; i++) { qValueNotchThreshold[i] = 1.0; } for (int i = psms.Count - 1; i >= 0; i--) { PeptideSpectralMatch psm = psms[i]; // threshold q-values if (psm.FdrInfo.QValue > qValueThreshold) { psm.FdrInfo.QValue = qValueThreshold; } else if (psm.FdrInfo.QValue < qValueThreshold) { qValueThreshold = psm.FdrInfo.QValue; } // threshold notch q-values int notch = psm.Notch ?? MassDiffAcceptorNumNotches; if (psm.FdrInfo.QValueNotch > qValueNotchThreshold[notch]) { psm.FdrInfo.QValueNotch = qValueNotchThreshold[notch]; } else if (psm.FdrInfo.QValueNotch < qValueNotchThreshold[notch]) { qValueNotchThreshold[notch] = psm.FdrInfo.QValueNotch; } } } if (AnalysisType == "PSM") { CountPsm(); } }
public static void TryFailSequenceCoverage() { var prot1 = new Protein("MMKMMK", "prot1"); ModificationMotif.TryGetMotif("M", out ModificationMotif motifM); ModificationWithMass mod1 = new ModificationWithMass("mod1", "mt", motifM, TerminusLocalization.NProt, 10); ModificationWithMass mod2 = new ModificationWithMass("mod2", "mt", motifM, TerminusLocalization.NPep, 10); ModificationWithMass mod3 = new ModificationWithMass("mod3", "mt", motifM, TerminusLocalization.Any, 10); ModificationMotif.TryGetMotif("K", out ModificationMotif motifK); ModificationWithMass mod4 = new ModificationWithMass("mod4", "mt", motifK, TerminusLocalization.PepC, 10); ModificationWithMass mod5 = new ModificationWithMass("mod5", "mt", motifK, TerminusLocalization.ProtC, 10); Dictionary <int, ModificationWithMass> modsFor1 = new Dictionary <int, ModificationWithMass> { { 1, mod1 }, { 3, mod3 }, { 5, mod4 }, }; Dictionary <int, ModificationWithMass> modsFor2 = new Dictionary <int, ModificationWithMass> { { 1, mod2 }, { 5, mod5 }, }; Dictionary <int, ModificationWithMass> modsFor3 = new Dictionary <int, ModificationWithMass> { { 1, mod1 }, { 5, mod3 }, { 8, mod5 } }; DigestionParams digestionParams = new DigestionParams(); var pwsm1 = new PeptideWithSetModifications(protein: prot1, digestionParams: digestionParams, oneBasedStartResidueInProtein: 1, oneBasedEndResidueInProtein: 3, peptideDescription: "", missedCleavages: 0, allModsOneIsNterminus: modsFor1, numFixedMods: 0); var pwsm2 = new PeptideWithSetModifications(protein: prot1, digestionParams: digestionParams, oneBasedStartResidueInProtein: 4, oneBasedEndResidueInProtein: 6, peptideDescription: "", missedCleavages: 0, allModsOneIsNterminus: modsFor2, numFixedMods: 0); var pwsm3 = new PeptideWithSetModifications(protein: prot1, digestionParams: digestionParams, oneBasedStartResidueInProtein: 1, oneBasedEndResidueInProtein: 6, peptideDescription: "", missedCleavages: 0, allModsOneIsNterminus: modsFor3, numFixedMods: 0); HashSet <PeptideWithSetModifications> peptides = new HashSet <PeptideWithSetModifications> { pwsm1, pwsm2, pwsm3, }; Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > matching = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > { { pwsm1.CompactPeptide(TerminusType.None), new HashSet <PeptideWithSetModifications> { pwsm1 } }, { pwsm2.CompactPeptide(TerminusType.None), new HashSet <PeptideWithSetModifications> { pwsm2 } }, { pwsm3.CompactPeptide(TerminusType.None), new HashSet <PeptideWithSetModifications> { pwsm3 } }, }; IScan scan = new ThisTestScan(); var psm1 = new PeptideSpectralMatch(pwsm1.CompactPeptide(TerminusType.None), 0, 1, 0, scan, digestionParams); psm1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false); psm1.MatchToProteinLinkedPeptides(matching); var psm2 = new PeptideSpectralMatch(pwsm2.CompactPeptide(TerminusType.None), 0, 1, 0, scan, digestionParams); psm2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false); psm2.MatchToProteinLinkedPeptides(matching); var psm3 = new PeptideSpectralMatch(pwsm3.CompactPeptide(TerminusType.None), 0, 1, 0, scan, digestionParams); psm3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false); psm3.MatchToProteinLinkedPeptides(matching); List <PeptideSpectralMatch> newPsms = new List <PeptideSpectralMatch> { psm1, psm2, psm3, }; ProteinParsimonyEngine ppe = new ProteinParsimonyEngine(matching, true, new CommonParameters(), new List <string>()); ProteinParsimonyResults fjkd = (ProteinParsimonyResults)ppe.Run(); ProteinScoringAndFdrEngine psafe = new ProteinScoringAndFdrEngine(fjkd.ProteinGroups, newPsms, true, true, true, new CommonParameters(), new List <string>()); psafe.Run(); fjkd.ProteinGroups.First().CalculateSequenceCoverage(); var firstSequenceCoverageDisplayList = fjkd.ProteinGroups.First().SequenceCoverageDisplayList.First(); Assert.AreEqual("MMKMMK", firstSequenceCoverageDisplayList); var firstSequenceCoverageDisplayListWithMods = fjkd.ProteinGroups.First().SequenceCoverageDisplayListWithMods.First(); Assert.AreEqual("[mod1]-MM[mod3]KM[mod3]MK-[mod5]", firstSequenceCoverageDisplayListWithMods); var firstModInfo = fjkd.ProteinGroups.First().ModsInfo.First(); Assert.IsTrue(firstModInfo.Contains(@"#aa1[mod1,info:occupancy=1.00(2/2)]")); Assert.IsTrue(firstModInfo.Contains(@"#aa2[mod3,info:occupancy=0.50(1/2)]")); Assert.IsFalse(firstModInfo.Contains(@"#aa3")); Assert.IsTrue(firstModInfo.Contains(@"#aa4[mod3,info:occupancy=0.50(1/2)]")); Assert.IsFalse(firstModInfo.Contains(@"#aa5")); Assert.IsTrue(firstModInfo.Contains(@"#aa6[mod5,info:occupancy=1.00(2/2)]")); }
protected override MetaMorpheusEngineResults RunSpecific() { double progress = 0; int oldPercentProgress = 0; ReportProgress(new ProgressEventArgs(oldPercentProgress, "Performing nonspecific search... " + currentPartition + "/" + CommonParameters.TotalPartitions, nestedIds)); TerminusType terminusType = ProductTypeMethod.IdentifyTerminusType(lp); byte byteScoreCutoff = (byte)CommonParameters.ScoreCutoff; Parallel.ForEach(Partitioner.Create(0, listOfSortedms2Scans.Length), new ParallelOptions { MaxDegreeOfParallelism = CommonParameters.MaxThreadsToUsePerFile }, range => { byte[] scoringTable = new byte[peptideIndex.Count]; HashSet <int> idsOfPeptidesPossiblyObserved = new HashSet <int>(); for (int i = range.Item1; i < range.Item2; i++) { // empty the scoring table to score the new scan (conserves memory compared to allocating a new array) Array.Clear(scoringTable, 0, scoringTable.Length); idsOfPeptidesPossiblyObserved.Clear(); var scan = listOfSortedms2Scans[i]; //get bins to add points to List <int> allBinsToSearch = GetBinsToSearch(scan); for (int j = 0; j < allBinsToSearch.Count; j++) { fragmentIndex[allBinsToSearch[j]].ForEach(id => scoringTable[id]++); } //populate ids of possibly observed with those containing allowed precursor masses List <int> binsToSearch = new List <int>(); int obsPrecursorFloorMz = (int)Math.Floor(CommonParameters.PrecursorMassTolerance.GetMinimumValue(scan.PrecursorMass) * fragmentBinsPerDalton); int obsPrecursorCeilingMz = (int)Math.Ceiling(CommonParameters.PrecursorMassTolerance.GetMaximumValue(scan.PrecursorMass) * fragmentBinsPerDalton); for (int fragmentBin = obsPrecursorFloorMz; fragmentBin <= obsPrecursorCeilingMz; fragmentBin++) { binsToSearch.Add(fragmentBin); } foreach (ProductType pt in lp) { int binShift; switch (pt) { case ProductType.B: binShift = bBinShift; break; case ProductType.Y: binShift = 0; break; case ProductType.C: binShift = cBinShift; break; case ProductType.Zdot: binShift = zdotBinShift; break; default: throw new NotImplementedException(); } for (int j = 0; j < binsToSearch.Count; j++) { int bin = binsToSearch[j] - binShift; if (bin < fragmentIndex.Length && fragmentIndex[bin] != null) { fragmentIndex[bin].ForEach(id => idsOfPeptidesPossiblyObserved.Add(id)); } } } for (int j = 0; j < binsToSearch.Count; j++) { int bin = binsToSearch[j]; if (bin < fragmentIndexPrecursor.Length && fragmentIndexPrecursor[bin] != null) { fragmentIndexPrecursor[bin].ForEach(id => idsOfPeptidesPossiblyObserved.Add(id)); } } // done with initial scoring; refine scores and create PSMs if (idsOfPeptidesPossiblyObserved.Any()) { int maxInitialScore = idsOfPeptidesPossiblyObserved.Max(id => scoringTable[id]) + 1; while (maxInitialScore > CommonParameters.ScoreCutoff) { maxInitialScore--; foreach (var id in idsOfPeptidesPossiblyObserved.Where(id => scoringTable[id] == maxInitialScore)) { var candidatePeptide = peptideIndex[id]; double[] fragmentMasses = candidatePeptide.ProductMassesMightHaveDuplicatesAndNaNs(lp).Distinct().Where(p => !Double.IsNaN(p)).OrderBy(p => p).ToArray(); double peptideScore = CalculatePeptideScore(scan.TheScan, CommonParameters.ProductMassTolerance, fragmentMasses, scan.PrecursorMass, dissociationTypes, addCompIons, maximumMassThatFragmentIonScoreIsDoubled); Tuple <int, double> notchAndPrecursor = Accepts(scan.PrecursorMass, candidatePeptide, terminusType, massDiffAcceptor); if (notchAndPrecursor.Item1 >= 0) { CompactPeptideWithModifiedMass cp = new CompactPeptideWithModifiedMass(candidatePeptide, notchAndPrecursor.Item2); if (globalPsms[i] == null) { globalPsms[i] = new PeptideSpectralMatch(cp, notchAndPrecursor.Item1, peptideScore, i, scan); } else { globalPsms[i].AddOrReplace(cp, peptideScore, notchAndPrecursor.Item1, CommonParameters.ReportAllAmbiguity); } } } if (globalPsms[i] != null) { break; } } } // report search progress progress++; var percentProgress = (int)((progress / listOfSortedms2Scans.Length) * 100); if (percentProgress > oldPercentProgress) { oldPercentProgress = percentProgress; ReportProgress(new ProgressEventArgs(percentProgress, "Performing nonspecific search... " + currentPartition + "/" + CommonParameters.TotalPartitions, nestedIds)); } } }); return(new MetaMorpheusEngineResults(this)); }
public static void TestCompIons_ModernSearch() { var myMsDataFile = new TestDataFile(); var variableModifications = new List <Modification>(); var fixedModifications = new List <Modification>(); var localizeableModifications = new List <Modification>(); Dictionary <Modification, ushort> modsDictionary = new Dictionary <Modification, ushort>(); foreach (var mod in fixedModifications) { modsDictionary.Add(mod, 0); } int ii = 1; foreach (var mod in variableModifications) { modsDictionary.Add(mod, (ushort)ii); ii++; } foreach (var mod in localizeableModifications) { modsDictionary.Add(mod, (ushort)ii); ii++; } var proteinList = new List <Protein> { new Protein("MNNNKQQQ", null) }; SearchParameters SearchParameters = new SearchParameters { MassDiffAcceptorType = MassDiffAcceptorType.Exact, SearchTarget = true, }; List <DigestionMotif> motifs = new List <DigestionMotif> { new DigestionMotif("K", null, 1, null) }; Protease protease = new Protease("singleN4", CleavageSpecificity.Full, null, null, motifs); ProteaseDictionary.Dictionary.Add(protease.Name, protease); CommonParameters CommonParameters = new CommonParameters(digestionParams: new DigestionParams(protease: protease.Name, minPeptideLength: 1), scoreCutoff: 1); var fsp = new List <(string fileName, CommonParameters fileSpecificParameters)>(); fsp.Add(("", CommonParameters)); CommonParameters withCompIons = new CommonParameters(digestionParams: new DigestionParams(protease: protease.Name, minPeptideLength: 1), scoreCutoff: 1, addCompIons: true); var fspComp = new List <(string fileName, CommonParameters fileSpecificParameters)>(); fspComp.Add(("", CommonParameters)); var indexEngine = new IndexingEngine(proteinList, variableModifications, fixedModifications, null, null, null, 1, DecoyType.Reverse, CommonParameters, fsp, SearchParameters.MaxFragmentSize, false, new List <FileInfo>(), TargetContaminantAmbiguity.RemoveContaminant, new List <string>()); var indexResults = (IndexingResults)indexEngine.Run(); Tolerance DeconvolutionMassTolerance = new PpmTolerance(5); var listOfSortedms2Scans = MetaMorpheusTask.GetMs2Scans(myMsDataFile, null, new CommonParameters()).OrderBy(b => b.PrecursorMass).ToArray(); MassDiffAcceptor massDiffAcceptor = SearchTask.GetMassDiffAcceptor(CommonParameters.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac); // without complementary ions PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ModernSearchEngine(allPsmsArray, listOfSortedms2Scans, indexResults.PeptideIndex, indexResults.FragmentIndex, 0, CommonParameters, fsp, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, new List <string>()).Run(); // with complementary ions PeptideSpectralMatch[] allPsmsArray2 = new PeptideSpectralMatch[listOfSortedms2Scans.Length]; new ModernSearchEngine(allPsmsArray2, listOfSortedms2Scans, indexResults.PeptideIndex, indexResults.FragmentIndex, 0, withCompIons, fspComp, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, new List <string>()).Run(); // Single search mode Assert.AreEqual(allPsmsArray.Length, allPsmsArray2.Length); // Single ms2 scan Assert.AreEqual(allPsmsArray.Length, allPsmsArray2.Length); Assert.That(allPsmsArray[0] != null); Assert.That(allPsmsArray2[0] != null); Assert.IsTrue(allPsmsArray2[0].Score > 1); Assert.AreEqual(allPsmsArray[0].ScanNumber, allPsmsArray2[0].ScanNumber); Assert.IsTrue(allPsmsArray2[0].Score <= allPsmsArray[0].Score * 2 && allPsmsArray2[0].Score > allPsmsArray[0].Score + 3); }
public static void MatchInternalFragmentIons(PeptideSpectralMatch[] fileSpecificPsms, Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass, CommonParameters combinedParams, int minInternalFragmentLength) { //for each PSM with an ID for (int index = 0; index < fileSpecificPsms.Length; index++) { PeptideSpectralMatch psm = fileSpecificPsms[index]; if (psm != null && psm.BestMatchingPeptides.Count() > 0) { //Get the scan Ms2ScanWithSpecificMass scanForThisPsm = arrayOfMs2ScansSortedByMass[index]; DissociationType dissociationType = combinedParams.DissociationType == DissociationType.Autodetect ? scanForThisPsm.TheScan.DissociationType.Value : combinedParams.DissociationType; //Get the theoretical peptides List <PeptideWithSetModifications> ambiguousPeptides = new List <PeptideWithSetModifications>(); List <int> notches = new List <int>(); foreach (var(Notch, Peptide) in psm.BestMatchingPeptides) { ambiguousPeptides.Add(Peptide); notches.Add(Notch); } //get matched ions for each peptide List <List <MatchedFragmentIon> > matchedIonsForAllAmbiguousPeptides = new List <List <MatchedFragmentIon> >(); List <Product> internalFragments = new List <Product>(); foreach (PeptideWithSetModifications peptide in ambiguousPeptides) { internalFragments.Clear(); peptide.FragmentInternally(combinedParams.DissociationType, minInternalFragmentLength, internalFragments); //TODO: currently, internal and terminal ions can match to the same observed peaks (much like how b- and y-ions can match to the same peaks). Investigate if we should change that... matchedIonsForAllAmbiguousPeptides.Add(MetaMorpheusEngine.MatchFragmentIons(scanForThisPsm, internalFragments, combinedParams)); } //Find the max number of matched ions int maxNumMatchedIons = matchedIonsForAllAmbiguousPeptides.Max(x => x.Count); //remove peptides if they have fewer than max-1 matched ions, thus requiring at least two internal ions to disambiguate an ID //if not removed, then add the matched internal ions HashSet <PeptideWithSetModifications> PeptidesToMatchingInternalFragments = new HashSet <PeptideWithSetModifications>(); for (int peptideIndex = 0; peptideIndex < ambiguousPeptides.Count; peptideIndex++) { //if we should remove the theoretical, remove it if (matchedIonsForAllAmbiguousPeptides[peptideIndex].Count + 1 < maxNumMatchedIons) { psm.RemoveThisAmbiguousPeptide(notches[peptideIndex], ambiguousPeptides[peptideIndex]); } // otherwise add the matched internal ions to the total ions else { PeptideWithSetModifications currentPwsm = ambiguousPeptides[peptideIndex]; //check that we haven't already added the matched ions for this peptide if (!PeptidesToMatchingInternalFragments.Contains(currentPwsm)) { PeptidesToMatchingInternalFragments.Add(currentPwsm); //record that we've seen this peptide psm.PeptidesToMatchingFragments[currentPwsm].AddRange(matchedIonsForAllAmbiguousPeptides[peptideIndex]); //add the matched ions } } } } } }
public static void TestPTMOutput() { List <Modification> variableModifications = new List <Modification>(); List <Modification> fixedModifications = new List <Modification>(); ModificationMotif.TryGetMotif("S", out ModificationMotif motif); variableModifications.Add(new Modification(_originalId: "resMod", _modificationType: "HaHa", _target: motif, _locationRestriction: "Anywhere.", _chemicalFormula: ChemicalFormula.ParseFormula("H"))); var proteinList = new List <Protein> { new Protein("MNNNSKQQQ", "accession") }; var protease = new Protease("CustomProtease", CleavageSpecificity.Full, null, null, new List <DigestionMotif> { new DigestionMotif("K", null, 1, null) }); ProteaseDictionary.Dictionary.Add(protease.Name, protease); Dictionary <Modification, ushort> modsDictionary = new Dictionary <Modification, ushort> { { variableModifications.Last(), 1 } }; DigestionParams digestionParams = new DigestionParams(protease: protease.Name, maxMissedCleavages: 0, minPeptideLength: 1); var modPep = proteinList.First().Digest(digestionParams, fixedModifications, variableModifications).Last(); HashSet <PeptideWithSetModifications> value = new HashSet <PeptideWithSetModifications> { modPep }; PeptideWithSetModifications compactPeptide1 = value.First(); Assert.AreEqual("QQQ", value.First().FullSequence);//this might be base var firstProtDigest = proteinList.First().Digest(digestionParams, fixedModifications, variableModifications).ToList(); HashSet <PeptideWithSetModifications> value2 = new HashSet <PeptideWithSetModifications> { firstProtDigest[0] }; PeptideWithSetModifications compactPeptide2 = value2.First(); Assert.AreEqual("MNNNSK", value2.First().FullSequence);//this might be base HashSet <PeptideWithSetModifications> value2mod = new HashSet <PeptideWithSetModifications> { firstProtDigest[1] }; PeptideWithSetModifications compactPeptide2mod = value2mod.Last(); Assert.AreEqual("MNNNS[HaHa:resMod on S]K", value2mod.Last().FullSequence);//this might be base HashSet <PeptideWithSetModifications> value3 = new HashSet <PeptideWithSetModifications> { firstProtDigest[2] }; PeptideWithSetModifications compactPeptide3 = value3.First(); Assert.AreEqual("NNNSK", value3.First().FullSequence);//this might be base HashSet <PeptideWithSetModifications> value3mod = new HashSet <PeptideWithSetModifications> { firstProtDigest[3] }; PeptideWithSetModifications compactPeptide3mod = value3mod.Last(); Assert.AreEqual("NNNS[HaHa:resMod on S]K", value3mod.Last().FullSequence);//this might be base var peptideList = new HashSet <PeptideWithSetModifications>(); foreach (var protein in proteinList) { foreach (var peptide in protein.Digest(digestionParams, new List <Modification>(), variableModifications)) { peptideList.Add(peptide); } } MsDataScan jdfk = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null); Ms2ScanWithSpecificMass ms2scan = new Ms2ScanWithSpecificMass(jdfk, 2, 0, "File", new CommonParameters()); Tolerance fragmentTolerance = new AbsoluteTolerance(0.01); var match1 = new PeptideSpectralMatch(peptideList.ElementAt(0), 0, 10, 0, ms2scan, digestionParams, new List <MatchedFragmentIon>()) { }; match1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false); var match2 = new PeptideSpectralMatch(peptideList.ElementAt(1), 0, 10, 0, ms2scan, digestionParams, new List <MatchedFragmentIon>()) { }; match2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false); var match3 = new PeptideSpectralMatch(peptideList.ElementAt(1), 0, 10, 0, ms2scan, digestionParams, new List <MatchedFragmentIon>()) { }; match3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false); List <PeptideSpectralMatch> psms = new List <PeptideSpectralMatch> { match1, match2, match3 }; psms.ForEach(p => p.ResolveAllAmbiguities()); ProteinParsimonyEngine engine = new ProteinParsimonyEngine(psms, true, new CommonParameters(), new List <string> { "ff" }); var cool = (ProteinParsimonyResults)engine.Run(); var proteinGroups = cool.ProteinGroups; ProteinScoringAndFdrEngine f = new ProteinScoringAndFdrEngine(proteinGroups, psms, false, false, true, new CommonParameters(), new List <string>()); f.Run(); Assert.AreEqual("#aa5[resMod on S,info:occupancy=0.67(2/3)];", proteinGroups.First().ModsInfo[0]); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { if (SearchParameters.DoQuantification) { // disable quantification if a .mgf is being used if (currentRawFileList.Any(x => Path.GetExtension(x).Equals(".mgf", StringComparison.OrdinalIgnoreCase))) { SearchParameters.DoQuantification = false; } //if we're doing SILAC, assign and add the silac labels to the residue dictionary else if (SearchParameters.SilacLabels != null || SearchParameters.StartTurnoverLabel != null || SearchParameters.EndTurnoverLabel != null) { char heavyLabel = 'a'; //char to assign //add the Turnoverlabels to the silacLabels list. They weren't there before just to prevent duplication in the tomls if (SearchParameters.StartTurnoverLabel != null || SearchParameters.EndTurnoverLabel != null) { //original silacLabels object is null, so we need to initialize it SearchParameters.SilacLabels = new List <SilacLabel>(); if (SearchParameters.StartTurnoverLabel != null) { var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.StartTurnoverLabel, heavyLabel); heavyLabel = updatedLabel.nextHeavyLabel; SearchParameters.StartTurnoverLabel = updatedLabel.updatedLabel; SearchParameters.SilacLabels.Add(SearchParameters.StartTurnoverLabel); } if (SearchParameters.EndTurnoverLabel != null) { var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.EndTurnoverLabel, heavyLabel); heavyLabel = updatedLabel.nextHeavyLabel; SearchParameters.EndTurnoverLabel = updatedLabel.updatedLabel; SearchParameters.SilacLabels.Add(SearchParameters.EndTurnoverLabel); } } else { //change the silac residues to lower case amino acids (currently null) List <SilacLabel> updatedLabels = new List <SilacLabel>(); for (int i = 0; i < SearchParameters.SilacLabels.Count; i++) { var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.SilacLabels[i], heavyLabel); heavyLabel = updatedLabel.nextHeavyLabel; updatedLabels.Add(updatedLabel.updatedLabel); } SearchParameters.SilacLabels = updatedLabels; } } } //if no quant, remove any silac labels that may have been added, because they screw up downstream analysis if (!SearchParameters.DoQuantification) //using "if" instead of "else", because DoQuantification can change if it's an mgf { SearchParameters.SilacLabels = null; } LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes); // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, SearchParameters.SearchTarget, SearchParameters.DecoyType, localizeableModificationTypes, CommonParameters); SanitizeProteinDatabase(proteinList, SearchParameters.TCAmbiguity); // load spectral libraries var spectralLibrary = LoadSpectralLibraries(taskId, dbFilenameList); // write prose settings ProseCreatedWhileRunning.Append("The following search settings were used: "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ? "maximum peptide length = unspecified; " : "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; "); ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; "); ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; "); ProseCreatedWhileRunning.Append("report PSM ambiguity = " + CommonParameters.ReportAllAmbiguity + ". "); ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. "); // start the search task MyTaskResults = new MyTaskResults(this); List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>(); //generate an array to store category specific fdr values (for speedy semi/nonspecific searches) int numFdrCategories = (int)(Enum.GetValues(typeof(FdrCategory)).Cast <FdrCategory>().Last() + 1); //+1 because it starts at zero List <PeptideSpectralMatch>[] allCategorySpecificPsms = new List <PeptideSpectralMatch> [numFdrCategories]; for (int i = 0; i < numFdrCategories; i++) { allCategorySpecificPsms[i] = new List <PeptideSpectralMatch>(); } FlashLfqResults flashLfqResults = null; MyFileManager myFileManager = new MyFileManager(SearchParameters.DisposeOfFileWhenDone); var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b)); int completedFiles = 0; object indexLock = new object(); object psmLock = new object(); Status("Searching files...", taskId); Status("Searching files...", new List <string> { taskId, "Individual Spectra Files" }); Dictionary <string, int[]> numMs2SpectraPerFile = new Dictionary <string, int[]>(); for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { if (GlobalVariables.StopLoops) { break; } var origDataFile = currentRawFileList[spectraFileIndex]; // mark the file as in-progress StartingDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); MassDiffAcceptor massDiffAcceptor = GetMassDiffAcceptor(combinedParams.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac); var thisId = new List <string> { taskId, "Individual Spectra Files", origDataFile }; NewCollection(Path.GetFileName(origDataFile), thisId); Status("Loading spectra file...", thisId); MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams); Status("Getting ms2 scans...", thisId); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray(); numMs2SpectraPerFile.Add(Path.GetFileNameWithoutExtension(origDataFile), new int[] { myMsDataFile.GetAllScansList().Count(p => p.MsnOrder == 2), arrayOfMs2ScansSortedByMass.Length }); myFileManager.DoneWithFile(origDataFile); PeptideSpectralMatch[] fileSpecificPsms = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; // modern search if (SearchParameters.SearchType == SearchType.Modern) { for (int currentPartition = 0; currentPartition < combinedParams.TotalPartitions; currentPartition++) { List <PeptideWithSetModifications> peptideIndex = null; List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count / combinedParams.TotalPartitions)); Status("Getting fragment dictionary...", new List <string> { taskId }); var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, SearchParameters.SilacLabels, SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, currentPartition, SearchParameters.DecoyType, combinedParams, FileSpecificParameters, SearchParameters.MaxFragmentSize, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), SearchParameters.TCAmbiguity, new List <string> { taskId }); List <int>[] fragmentIndex = null; List <int>[] precursorIndex = null; lock (indexLock) { GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId); } Status("Searching files...", taskId); new ModernSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, currentPartition, combinedParams, this.FileSpecificParameters, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + combinedParams.TotalPartitions + "!", thisId)); if (GlobalVariables.StopLoops) { break; } } } // nonspecific search else if (SearchParameters.SearchType == SearchType.NonSpecific) { PeptideSpectralMatch[][] fileSpecificPsmsSeparatedByFdrCategory = new PeptideSpectralMatch[numFdrCategories][]; //generate an array of all possible locals for (int i = 0; i < numFdrCategories; i++) //only add if we're using for FDR, else ignore it as null. { fileSpecificPsmsSeparatedByFdrCategory[i] = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; } //create params for N, C, or both if semi List <CommonParameters> paramsToUse = new List <CommonParameters> { combinedParams }; if (combinedParams.DigestionParams.SearchModeType == CleavageSpecificity.Semi) //if semi, we need to do both N and C to hit everything { paramsToUse.Clear(); List <FragmentationTerminus> terminiToUse = new List <FragmentationTerminus> { FragmentationTerminus.N, FragmentationTerminus.C }; foreach (FragmentationTerminus terminus in terminiToUse) //set both termini { paramsToUse.Add(combinedParams.CloneWithNewTerminus(terminus)); } } //Compress array of deconvoluted ms2 scans to avoid searching the same ms2 multiple times while still identifying coisolated peptides List <int>[] coisolationIndex = new List <int>[] { new List <int>() }; if (arrayOfMs2ScansSortedByMass.Length != 0) { int maxScanNumber = arrayOfMs2ScansSortedByMass.Max(x => x.OneBasedScanNumber); coisolationIndex = new List <int> [maxScanNumber + 1]; for (int i = 0; i < arrayOfMs2ScansSortedByMass.Length; i++) { int scanNumber = arrayOfMs2ScansSortedByMass[i].OneBasedScanNumber; if (coisolationIndex[scanNumber] == null) { coisolationIndex[scanNumber] = new List <int> { i }; } else { coisolationIndex[scanNumber].Add(i); } } coisolationIndex = coisolationIndex.Where(x => x != null).ToArray(); } //foreach terminus we're going to look at foreach (CommonParameters paramToUse in paramsToUse) { //foreach database partition for (int currentPartition = 0; currentPartition < paramToUse.TotalPartitions; currentPartition++) { List <PeptideWithSetModifications> peptideIndex = null; List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count / paramToUse.TotalPartitions, ((currentPartition + 1) * proteinList.Count / paramToUse.TotalPartitions) - (currentPartition * proteinList.Count / paramToUse.TotalPartitions)); List <int>[] fragmentIndex = null; List <int>[] precursorIndex = null; Status("Getting fragment dictionary...", new List <string> { taskId }); var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, SearchParameters.SilacLabels, SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, currentPartition, SearchParameters.DecoyType, paramToUse, FileSpecificParameters, SearchParameters.MaxFragmentSize, true, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), SearchParameters.TCAmbiguity, new List <string> { taskId }); lock (indexLock) { GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId); } Status("Searching files...", taskId); new NonSpecificEnzymeSearchEngine(fileSpecificPsmsSeparatedByFdrCategory, arrayOfMs2ScansSortedByMass, coisolationIndex, peptideIndex, fragmentIndex, precursorIndex, currentPartition, paramToUse, this.FileSpecificParameters, variableModifications, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + paramToUse.TotalPartitions + "!", thisId)); if (GlobalVariables.StopLoops) { break; } } } lock (psmLock) { for (int i = 0; i < allCategorySpecificPsms.Length; i++) { if (allCategorySpecificPsms[i] != null) { allCategorySpecificPsms[i].AddRange(fileSpecificPsmsSeparatedByFdrCategory[i]); } } } } // classic search else { Status("Starting search...", thisId); var newClassicSearchEngine = new ClassicSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, SearchParameters.SilacLabels, SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, proteinList, massDiffAcceptor, combinedParams, this.FileSpecificParameters, spectralLibrary, thisId, SearchParameters.WriteSpectralLibrary); newClassicSearchEngine.Run(); ReportProgress(new ProgressEventArgs(100, "Done with search!", thisId)); } //look for internal fragments if (SearchParameters.MinAllowedInternalFragmentLength != 0) { MatchInternalFragmentIons(fileSpecificPsms, arrayOfMs2ScansSortedByMass, combinedParams, SearchParameters.MinAllowedInternalFragmentLength); } // calculate/set spectral angles if there is a spectral library being used if (spectralLibrary != null) { Status("Calculating spectral library similarity...", thisId); } SpectralLibrarySearchFunction.CalculateSpectralAngles(spectralLibrary, fileSpecificPsms, arrayOfMs2ScansSortedByMass, combinedParams); lock (psmLock) { allPsms.AddRange(fileSpecificPsms); } completedFiles++; FinishedDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> { taskId, "Individual Spectra Files" })); } if (spectralLibrary != null) { spectralLibrary.CloseConnections(); } ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> { taskId, "Individual Spectra Files" })); int numNotches = GetNumNotches(SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac); //resolve category specific fdrs (for speedy semi and nonspecific if (SearchParameters.SearchType == SearchType.NonSpecific) { allPsms = NonSpecificEnzymeSearchEngine.ResolveFdrCategorySpecificPsms(allCategorySpecificPsms, numNotches, taskId, CommonParameters, FileSpecificParameters); } PostSearchAnalysisParameters parameters = new PostSearchAnalysisParameters { SearchTaskResults = MyTaskResults, SearchTaskId = taskId, SearchParameters = SearchParameters, ProteinList = proteinList, AllPsms = allPsms, VariableModifications = variableModifications, FixedModifications = fixedModifications, ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams)), CurrentRawFileList = currentRawFileList, MyFileManager = myFileManager, NumNotches = numNotches, OutputFolder = OutputFolder, IndividualResultsOutputFolder = Path.Combine(OutputFolder, "Individual File Results"), FlashLfqResults = flashLfqResults, FileSettingsList = fileSettingsList, NumMs2SpectraPerFile = numMs2SpectraPerFile, DatabaseFilenameList = dbFilenameList }; PostSearchAnalysisTask postProcessing = new PostSearchAnalysisTask { Parameters = parameters, FileSpecificParameters = this.FileSpecificParameters, CommonParameters = CommonParameters }; return(postProcessing.Run()); }
public static void FdrTestMethod() { MassDiffAcceptor searchModes = new DotMassDiffAcceptor(null, new List <double> { 0, 1.0029 }, new PpmTolerance(5)); List <string> nestedIds = new List <string>(); Protein p = new Protein("MNKNNKNNNKNNNNK", null); DigestionParams digestionParams = new DigestionParams(); var digested = p.Digest(digestionParams, new List <Modification>(), new List <Modification>()).ToList(); PeptideWithSetModifications pep1 = digested[0]; PeptideWithSetModifications pep2 = digested[1]; PeptideWithSetModifications pep3 = digested[2]; PeptideWithSetModifications pep4 = digested[3]; TestDataFile t = new TestDataFile(new List <PeptideWithSetModifications> { pep1, pep2, pep3 }); MsDataScan mzLibScan1 = t.GetOneBasedScan(2); Ms2ScanWithSpecificMass scan1 = new Ms2ScanWithSpecificMass(mzLibScan1, pep1.MonoisotopicMass.ToMz(1), 1, null, new CommonParameters()); PeptideSpectralMatch psm1 = new PeptideSpectralMatch(pep1, 0, 3, 0, scan1, digestionParams, new List <MatchedFragmentIon>()); MsDataScan mzLibScan2 = t.GetOneBasedScan(4); Ms2ScanWithSpecificMass scan2 = new Ms2ScanWithSpecificMass(mzLibScan2, pep2.MonoisotopicMass.ToMz(1), 1, null, new CommonParameters()); PeptideSpectralMatch psm2 = new PeptideSpectralMatch(pep2, 1, 2, 1, scan2, digestionParams, new List <MatchedFragmentIon>()); MsDataScan mzLibScan3 = t.GetOneBasedScan(6); Ms2ScanWithSpecificMass scan3 = new Ms2ScanWithSpecificMass(mzLibScan3, pep3.MonoisotopicMass.ToMz(1), 1, null, new CommonParameters()); PeptideSpectralMatch psm3 = new PeptideSpectralMatch(pep3, 0, 1, 2, scan3, digestionParams, new List <MatchedFragmentIon>()); psm3.AddOrReplace(pep4, 1, 1, true, new List <MatchedFragmentIon>(), 0); var newPsms = new List <PeptideSpectralMatch> { psm1, psm2, psm3 }; foreach (PeptideSpectralMatch psm in newPsms) { psm.ResolveAllAmbiguities(); } FdrAnalysisEngine fdr = new FdrAnalysisEngine(newPsms, searchModes.NumNotches, new CommonParameters(), nestedIds); fdr.Run(); Assert.AreEqual(2, searchModes.NumNotches); Assert.AreEqual(0, newPsms[0].FdrInfo.CumulativeDecoyNotch); Assert.AreEqual(1, newPsms[0].FdrInfo.CumulativeTargetNotch); Assert.AreEqual(0, newPsms[1].FdrInfo.CumulativeDecoyNotch); Assert.AreEqual(1, newPsms[1].FdrInfo.CumulativeTargetNotch); Assert.AreEqual(0, newPsms[2].FdrInfo.CumulativeDecoyNotch); Assert.AreEqual(1, newPsms[2].FdrInfo.CumulativeTargetNotch); Assert.AreEqual(0, newPsms[0].FdrInfo.CumulativeDecoy); Assert.AreEqual(1, newPsms[0].FdrInfo.CumulativeTarget); Assert.AreEqual(0, newPsms[1].FdrInfo.CumulativeDecoy); Assert.AreEqual(2, newPsms[1].FdrInfo.CumulativeTarget); Assert.AreEqual(0, newPsms[2].FdrInfo.CumulativeDecoy); Assert.AreEqual(3, newPsms[2].FdrInfo.CumulativeTarget); }
protected override MetaMorpheusEngineResults RunSpecific() { double progress = 0; int oldPercentProgress = 0; ReportProgress(new ProgressEventArgs(oldPercentProgress, "Performing modern search... " + CurrentPartition + "/" + commonParameters.TotalPartitions, nestedIds)); byte byteScoreCutoff = (byte)commonParameters.ScoreCutoff; if (commonParameters.CalculateEValue) { byteScoreCutoff = 1; } int maxThreadsPerFile = commonParameters.MaxThreadsToUsePerFile; int[] threads = Enumerable.Range(0, maxThreadsPerFile).ToArray(); Parallel.ForEach(threads, (i) => { byte[] scoringTable = new byte[PeptideIndex.Count]; List <int> idsOfPeptidesPossiblyObserved = new List <int>(); for (; i < ListOfSortedMs2Scans.Length; i += maxThreadsPerFile) { // Stop loop if canceled if (GlobalVariables.StopLoops) { return; } // empty the scoring table to score the new scan (conserves memory compared to allocating a new array) Array.Clear(scoringTable, 0, scoringTable.Length); idsOfPeptidesPossiblyObserved.Clear(); Ms2ScanWithSpecificMass scan = ListOfSortedMs2Scans[i]; // get fragment bins for this scan List <int> allBinsToSearch = GetBinsToSearch(scan); // get allowed theoretical masses from the known experimental mass // note that this is the OPPOSITE of the classic search (which calculates experimental masses from theoretical values) // this is just PRELIMINARY precursor-mass filtering // additional checks are made later to ensure that the theoretical precursor mass is acceptable IEnumerable <AllowedIntervalWithNotch> notches = MassDiffAcceptor.GetAllowedPrecursorMassIntervalsFromObservedMass(scan.PrecursorMass); double lowestMassPeptideToLookFor = notches.Min(p => p.AllowedInterval.Minimum); double highestMassPeptideToLookFor = notches.Max(p => p.AllowedInterval.Maximum); // first-pass scoring IndexedScoring(allBinsToSearch, scoringTable, byteScoreCutoff, idsOfPeptidesPossiblyObserved, scan.PrecursorMass, lowestMassPeptideToLookFor, highestMassPeptideToLookFor, PeptideIndex, MassDiffAcceptor, MaxMassThatFragmentIonScoreIsDoubled, commonParameters.DissociationType); // done with indexed scoring; refine scores and create PSMs foreach (int id in idsOfPeptidesPossiblyObserved) { PeptideWithSetModifications peptide = PeptideIndex[id]; List <Product> peptideTheorProducts = peptide.Fragment(commonParameters.DissociationType, FragmentationTerminus.Both).ToList(); List <MatchedFragmentIon> matchedIons = MatchFragmentIons(scan, peptideTheorProducts, commonParameters); double thisScore = CalculatePeptideScore(scan.TheScan, matchedIons); int notch = MassDiffAcceptor.Accepts(scan.PrecursorMass, peptide.MonoisotopicMass); bool meetsScoreCutoff = thisScore >= commonParameters.ScoreCutoff; bool scoreImprovement = PeptideSpectralMatches[i] == null || (thisScore - PeptideSpectralMatches[i].RunnerUpScore) > -PeptideSpectralMatch.ToleranceForScoreDifferentiation; if (meetsScoreCutoff && scoreImprovement || commonParameters.CalculateEValue) { if (PeptideSpectralMatches[i] == null) { PeptideSpectralMatches[i] = new PeptideSpectralMatch(peptide, notch, thisScore, i, scan, commonParameters.DigestionParams, matchedIons); } else { PeptideSpectralMatches[i].AddOrReplace(peptide, thisScore, notch, commonParameters.ReportAllAmbiguity, matchedIons, 0); } if (commonParameters.CalculateEValue) { PeptideSpectralMatches[i].AllScores.Add(thisScore); } } } // report search progress progress++; var percentProgress = (int)((progress / ListOfSortedMs2Scans.Length) * 100); if (percentProgress > oldPercentProgress) { oldPercentProgress = percentProgress; ReportProgress(new ProgressEventArgs(percentProgress, "Performing modern search... " + CurrentPartition + "/" + commonParameters.TotalPartitions, nestedIds)); } } }); // remove peptides below the score cutoff that were stored to calculate expectation values if (commonParameters.CalculateEValue) { for (int i = 0; i < PeptideSpectralMatches.Length; i++) { if (PeptideSpectralMatches[i] != null && PeptideSpectralMatches[i].Score < commonParameters.ScoreCutoff) { PeptideSpectralMatches[i] = null; } } } foreach (PeptideSpectralMatch psm in PeptideSpectralMatches.Where(p => p != null)) { psm.ResolveAllAmbiguities(); } return(new MetaMorpheusEngineResults(this)); }
private static Tuple <List <PeptideSpectralMatch>, Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >, MassDiffAcceptor, bool, CompactPeptideBase, CompactPeptideBase> GetInfo(bool localizeable) { CommonParameters CommonParameters = new CommonParameters(digestionParams: new DigestionParams(maxMissedCleavages: 0, minPeptideLength: 1, maxModificationIsoforms: 2, initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain, maxModsForPeptides: 1), scoreCutoff: 1); // Alanine = Glycine + CH2 Protein protein1 = new Protein("MA", "protein1"); Protein protein2 = new Protein("MG", "protein2"); Protein protein3; double monoisotopicMass = Chemistry.ChemicalFormula.ParseFormula("CH2").MonoisotopicMass; ModificationMotif.TryGetMotif("G", out ModificationMotif motif1); ModificationMotif.TryGetMotif("A", out ModificationMotif motif2); TerminusLocalization modificationSites = TerminusLocalization.Any; List <ModificationWithMass> allKnownFixedModifications = new List <ModificationWithMass> { new ModificationWithMass("CH2 on Glycine", null, motif1, modificationSites, monoisotopicMass) }; List <ModificationWithMass> variableModifications; ModificationWithMass alanineMod = new ModificationWithMass("CH2 on Alanine", null, motif2, modificationSites, monoisotopicMass); if (localizeable) { variableModifications = new List <ModificationWithMass>(); IDictionary <int, List <Modification> > oneBasedModifications = new Dictionary <int, List <Modification> > { { 2, new List <Modification> { alanineMod } } }; protein3 = new Protein("MA", "protein3", oneBasedModifications: oneBasedModifications); } else { variableModifications = new List <ModificationWithMass>(); variableModifications = new List <ModificationWithMass> { alanineMod }; protein3 = new Protein("MA", "protein3"); } var pepWithSetModifications1 = protein1.Digest(CommonParameters.DigestionParams, allKnownFixedModifications, variableModifications).First(); var pepWithSetModifications2 = protein2.Digest(CommonParameters.DigestionParams, allKnownFixedModifications, variableModifications).First(); var pepWithSetModifications3 = protein3.Digest(CommonParameters.DigestionParams, allKnownFixedModifications, variableModifications).Last(); CompactPeptide compactPeptide1 = new CompactPeptide(pepWithSetModifications1, TerminusType.None); CompactPeptide compactPeptideDuplicate = new CompactPeptide(pepWithSetModifications2, TerminusType.None); Assert.AreEqual(compactPeptide1, compactPeptideDuplicate); CompactPeptide compactPeptide2 = new CompactPeptide(pepWithSetModifications3, TerminusType.None); string fullFilePath = null; int precursorCharge = 0; TestDataFile testDataFile = new TestDataFile(); MsDataScan mzLibScan = testDataFile.GetOneBasedScan(2); Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(mzLibScan, 0, precursorCharge, fullFilePath); int scanIndex = 0; double score = 0; int notch = 0; PeptideSpectralMatch psm1 = new PeptideSpectralMatch(compactPeptide1, notch, score, scanIndex, scan, CommonParameters.DigestionParams); psm1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false); PeptideSpectralMatch psm2 = new PeptideSpectralMatch(compactPeptide1, notch, score, scanIndex, scan, CommonParameters.DigestionParams); psm2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false); PeptideSpectralMatch psm3 = new PeptideSpectralMatch(compactPeptide2, notch, score, scanIndex, scan, CommonParameters.DigestionParams); psm3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false); var newPsms = new List <PeptideSpectralMatch> { psm1, psm2, psm3 }; MassDiffAcceptor massDiffAcceptors = new SinglePpmAroundZeroSearchMode(5); SequencesToActualProteinPeptidesEngine stappe = new SequencesToActualProteinPeptidesEngine(newPsms, new List <Protein> { protein1, protein2, protein3 }, allKnownFixedModifications, variableModifications, new List <ProductType> { ProductType.B, ProductType.Y }, new List <DigestionParams> { CommonParameters.DigestionParams }, CommonParameters.ReportAllAmbiguity, CommonParameters, new List <string>()); var haha = (SequencesToActualProteinPeptidesEngineResults)stappe.Run(); var compactPeptideToProteinPeptideMatching = haha.CompactPeptideToProteinPeptideMatching; Assert.AreEqual(2, compactPeptideToProteinPeptideMatching.Count); psm1.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching); bool noOneHitWonders = false; return(new Tuple <List <PeptideSpectralMatch>, Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >, MassDiffAcceptor, bool, CompactPeptideBase, CompactPeptideBase> ( newPsms, compactPeptideToProteinPeptideMatching, massDiffAcceptors, noOneHitWonders, compactPeptide1, compactPeptide2 )); }
public static List <PeptideSpectralMatch> ResolveFdrCategorySpecificPsms(List <PeptideSpectralMatch>[] AllPsms, int numNotches, string taskId, CommonParameters commonParameters) { //update all psms with peptide info AllPsms.ToList() .Where(psmArray => psmArray != null).ToList() .ForEach(psmArray => psmArray.Where(psm => psm != null).ToList() .ForEach(psm => psm.ResolveAllAmbiguities())); foreach (List <PeptideSpectralMatch> psmsArray in AllPsms) { if (psmsArray != null) { List <PeptideSpectralMatch> cleanedPsmsArray = psmsArray.Where(b => b != null).OrderByDescending(b => b.Score) .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue) .GroupBy(b => (b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList(); new FdrAnalysisEngine(cleanedPsmsArray, numNotches, commonParameters, new List <string> { taskId }).Run(); for (int i = 0; i < psmsArray.Count; i++) { if (psmsArray[i] != null) { if (psmsArray[i].FdrInfo == null) //if it was grouped in the cleanedPsmsArray { psmsArray[i] = null; } } } } } int[] ranking = new int[AllPsms.Length]; //high int is good ranking List <int> indexesOfInterest = new List <int>(); for (int i = 0; i < ranking.Length; i++) { if (AllPsms[i] != null) { ranking[i] = AllPsms[i].Where(x => x != null).Count(x => x.FdrInfo.QValue <= 0.01); //set ranking as number of psms above 1% FDR indexesOfInterest.Add(i); } } //get the index of the category with the highest ranking int majorCategoryIndex = indexesOfInterest[0]; for (int i = 1; i < indexesOfInterest.Count; i++) { int currentCategoryIndex = indexesOfInterest[i]; if (ranking[currentCategoryIndex] > ranking[majorCategoryIndex]) { majorCategoryIndex = currentCategoryIndex; } } //update other category q-values //There's a chance of weird categories getting a random decoy before a random target, but we don't want to give that target a q value of zero. //We can't just take the q of the first decoy, because if the target wasn't random (score = 40), but there are no other targets before the decoy (score = 5), then we're incorrectly dinging the target //The current solution is such that if a minor category has a lower q value than it's corresponding score in the major category, then its q-value is changed to what it would be in the major category List <PeptideSpectralMatch> majorCategoryPsms = AllPsms[majorCategoryIndex].Where(x => x != null).OrderByDescending(x => x.Score).ToList(); //get sorted major category for (int i = 0; i < indexesOfInterest.Count; i++) { int minorCategoryIndex = indexesOfInterest[i]; if (minorCategoryIndex != majorCategoryIndex) { List <PeptideSpectralMatch> minorCategoryPsms = AllPsms[minorCategoryIndex].Where(x => x != null).OrderByDescending(x => x.Score).ToList(); //get sorted minor category int minorPsmIndex = 0; int majorPsmIndex = 0; while (minorPsmIndex < minorCategoryPsms.Count && majorPsmIndex < majorCategoryPsms.Count) //while in the lists { PeptideSpectralMatch majorPsm = majorCategoryPsms[majorPsmIndex]; PeptideSpectralMatch minorPsm = minorCategoryPsms[minorPsmIndex]; //major needs to be a lower score than the minor if (majorPsm.Score > minorPsm.Score) { majorPsmIndex++; } else { if (majorPsm.FdrInfo.QValue > minorPsm.FdrInfo.QValue) { minorPsm.FdrInfo.QValue = majorPsm.FdrInfo.QValue; } minorPsmIndex++; } } //wrap up if we hit the end of the major category while (minorPsmIndex < minorCategoryPsms.Count) { PeptideSpectralMatch majorPsm = majorCategoryPsms[majorPsmIndex - 1]; //-1 because it's out of index right now PeptideSpectralMatch minorPsm = minorCategoryPsms[minorPsmIndex]; if (majorPsm.FdrInfo.QValue > minorPsm.FdrInfo.QValue) { minorPsm.FdrInfo.QValue = majorPsm.FdrInfo.QValue; } minorPsmIndex++; } } } int numTotalSpectraWithPrecursors = AllPsms[indexesOfInterest[0]].Count; List <PeptideSpectralMatch> bestPsmsList = new List <PeptideSpectralMatch>(); for (int i = 0; i < numTotalSpectraWithPrecursors; i++) { PeptideSpectralMatch bestPsm = null; double lowestQ = double.MaxValue; int bestIndex = -1; foreach (int index in indexesOfInterest) //foreach category { PeptideSpectralMatch currentPsm = AllPsms[index][i]; if (currentPsm != null) { double currentQValue = currentPsm.FdrInfo.QValue; if (currentQValue < lowestQ || //if the new one is better (currentQValue == lowestQ && currentPsm.Score > bestPsm.Score)) { if (bestIndex != -1) { //remove the old one so we don't use it for fdr later AllPsms[bestIndex][i] = null; } bestPsm = currentPsm; lowestQ = currentQValue; bestIndex = index; } else //remove the old one so we don't use it for fdr later { AllPsms[index][i] = null; } } } if (bestPsm != null) { bestPsmsList.Add(bestPsm); } } //It's probable that psms from some categories were removed by psms from other categories. //however, the fdr is still affected by their presence, since it was calculated before their removal. foreach (List <PeptideSpectralMatch> psmsArray in AllPsms) { if (psmsArray != null) { List <PeptideSpectralMatch> cleanedPsmsArray = psmsArray.Where(b => b != null).OrderByDescending(b => b.Score) .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue) .ToList(); new FdrAnalysisEngine(cleanedPsmsArray, numNotches, commonParameters, new List <string> { taskId }).Run(); } } return(bestPsmsList.OrderBy(b => b.FdrInfo.QValue).ThenByDescending(b => b.Score).ToList()); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes); // TODO: print error messages loading GPTMD mods List <Modification> gptmdModifications = GlobalVariables.AllModsKnown.OfType <Modification>().Where(b => GptmdParameters.ListOfModsGptmd.Contains((b.ModificationType, b.IdWithMotif))).ToList(); IEnumerable <Tuple <double, double> > combos = LoadCombos(gptmdModifications).ToList(); // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters); List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>(); var numRawFiles = currentRawFileList.Count; // write prose settings ProseCreatedWhileRunning.Append("The following G-PTM-D settings were used: "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ? "maximum peptide length = unspecified; " : "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("G-PTM-D modifications count = " + gptmdModifications.Count + "; "); // temporary search type for writing prose // the actual search type is technically file-specific but we don't allow file-specific notches, so it's safe to do this MassDiffAcceptor tempSearchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), CommonParameters.PrecursorMassTolerance); ProseCreatedWhileRunning.Append("precursor mass tolerance(s) = {" + tempSearchMode.ToProseString() + "}; "); ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". "); ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. "); // start the G-PTM-D task Status("Running G-PTM-D...", new List <string> { taskId }); MyTaskResults = new MyTaskResults(this) { NewDatabases = new List <DbForTask>() }; var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b)); HashSet <DigestionParams> ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams)); MyFileManager myFileManager = new MyFileManager(true); object lock1 = new object(); object lock2 = new object(); for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { // Stop if canceled if (GlobalVariables.StopLoops) { break; } var origDataFile = currentRawFileList[spectraFileIndex]; // mark the file as in-progress StartingDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); MassDiffAcceptor searchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), combinedParams.PrecursorMassTolerance); NewCollection(Path.GetFileName(origDataFile), new List <string> { taskId, "Individual Spectra Files", origDataFile }); Status("Loading spectra file...", new List <string> { taskId, "Individual Spectra Files", origDataFile }); MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams); Status("Getting ms2 scans...", new List <string> { taskId, "Individual Spectra Files", origDataFile }); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray(); myFileManager.DoneWithFile(origDataFile); PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; new ClassicSearchEngine(allPsmsArray, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, null, null, null, proteinList, searchMode, combinedParams, new List <string> { taskId, "Individual Spectra Files", origDataFile }).Run(); allPsms.AddRange(allPsmsArray.Where(p => p != null)); FinishedDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files", origDataFile })); } ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> { taskId, "Individual Spectra Files" })); allPsms = allPsms.OrderByDescending(b => b.Score) .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue) .GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)) .Select(b => b.First()).ToList(); new FdrAnalysisEngine(allPsms, tempSearchMode.NumNotches, CommonParameters, new List <string> { taskId }).Run(); var writtenFile = Path.Combine(OutputFolder, "GPTMD_Candidates.psmtsv"); WritePsmsToTsv(allPsms, writtenFile, new Dictionary <string, int>()); FinishedWritingFile(writtenFile, new List <string> { taskId }); // get file-specific precursor mass tolerances for the GPTMD engine var filePathToPrecursorMassTolerance = new Dictionary <string, Tolerance>(); for (int i = 0; i < currentRawFileList.Count; i++) { string filePath = currentRawFileList[i]; Tolerance fileTolerance = CommonParameters.PrecursorMassTolerance; if (fileSettingsList[i] != null && fileSettingsList[i].PrecursorMassTolerance != null) { fileTolerance = fileSettingsList[i].PrecursorMassTolerance; } filePathToPrecursorMassTolerance.Add(filePath, fileTolerance); } // run GPTMD engine var gptmdResults = (GptmdResults) new GptmdEngine(allPsms, gptmdModifications, combos, filePathToPrecursorMassTolerance, CommonParameters, new List <string> { taskId }).Run(); // Stop if canceled if (GlobalVariables.StopLoops) { return(MyTaskResults); } // write GPTMD databases if (dbFilenameList.Any(b => !b.IsContaminant)) { List <string> databaseNames = new List <string>(); foreach (var nonContaminantDb in dbFilenameList.Where(p => !p.IsContaminant)) { var dbName = Path.GetFileNameWithoutExtension(nonContaminantDb.FilePath); var theExtension = Path.GetExtension(nonContaminantDb.FilePath).ToLowerInvariant(); bool compressed = theExtension.EndsWith("gz"); databaseNames.Add(compressed ? Path.GetFileNameWithoutExtension(dbName) : dbName); } string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml"); var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && !b.IsContaminant).ToList(), outputXMLdbFullName); FinishedWritingFile(outputXMLdbFullName, new List <string> { taskId }); MyTaskResults.NewDatabases.Add(new DbForTask(outputXMLdbFullName, false)); MyTaskResults.AddTaskSummaryText("Modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum()); MyTaskResults.AddTaskSummaryText("Mods types and counts:"); MyTaskResults.AddTaskSummaryText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value))); } if (dbFilenameList.Any(b => b.IsContaminant)) { // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written //string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml"); List <string> databaseNames = new List <string>(); foreach (var contaminantDb in dbFilenameList.Where(p => p.IsContaminant)) { var dbName = Path.GetFileName(contaminantDb.FilePath); int indexOfFirstDot = dbName.IndexOf("."); databaseNames.Add(dbName.Substring(0, indexOfFirstDot)); } string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml"); var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && b.IsContaminant).ToList(), outputXMLdbFullNameContaminants); FinishedWritingFile(outputXMLdbFullNameContaminants, new List <string> { taskId }); MyTaskResults.NewDatabases.Add(new DbForTask(outputXMLdbFullNameContaminants, true)); MyTaskResults.AddTaskSummaryText("Contaminant modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum()); MyTaskResults.AddTaskSummaryText("Mods types and counts:"); MyTaskResults.AddTaskSummaryText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value))); } return(MyTaskResults); }
protected override MetaMorpheusEngineResults RunSpecific() { bool semiSpecificSearch = CommonParameters.DigestionParams.SearchModeType == CleavageSpecificity.Semi; double progress = 0; int oldPercentProgress = 0; ReportProgress(new ProgressEventArgs(oldPercentProgress, "Performing nonspecific search... " + CurrentPartition + "/" + CommonParameters.TotalPartitions, NestedIds)); byte byteScoreCutoff = (byte)CommonParameters.ScoreCutoff; int maxThreadsPerFile = CommonParameters.MaxThreadsToUsePerFile; int[] threads = Enumerable.Range(0, maxThreadsPerFile).ToArray(); Parallel.ForEach(threads, (i) => { byte[] scoringTable = new byte[PeptideIndex.Count]; HashSet <int> idsOfPeptidesPossiblyObserved = new HashSet <int>(); for (; i < ListOfSortedMs2Scans.Length; i += maxThreadsPerFile) { // Stop loop if canceled if (GlobalVariables.StopLoops) { return; } // empty the scoring table to score the new scan (conserves memory compared to allocating a new array) Array.Clear(scoringTable, 0, scoringTable.Length); idsOfPeptidesPossiblyObserved.Clear(); Ms2ScanWithSpecificMass scan = ListOfSortedMs2Scans[i]; //get bins to add points to List <int> allBinsToSearch = GetBinsToSearch(scan, FragmentIndex, CommonParameters.DissociationType); //the entire indexed scoring is done here for (int j = 0; j < allBinsToSearch.Count; j++) { FragmentIndex[allBinsToSearch[j]].ForEach(id => scoringTable[id]++); } //populate ids of possibly observed with those containing allowed precursor masses List <AllowedIntervalWithNotch> validIntervals = MassDiffAcceptor.GetAllowedPrecursorMassIntervalsFromObservedMass(scan.PrecursorMass).ToList(); //get all valid notches foreach (AllowedIntervalWithNotch interval in validIntervals) { int obsPrecursorFloorMz = (int)Math.Floor(interval.AllowedInterval.Minimum * FragmentBinsPerDalton); int obsPrecursorCeilingMz = (int)Math.Ceiling(interval.AllowedInterval.Maximum * FragmentBinsPerDalton); foreach (ProductType pt in ProductTypesToSearch) { int dissociationBinShift = (int)Math.Round((WaterMonoisotopicMass - DissociationTypeCollection.GetMassShiftFromProductType(pt)) * FragmentBinsPerDalton); int lowestBin = obsPrecursorFloorMz - dissociationBinShift; int highestBin = obsPrecursorCeilingMz - dissociationBinShift; for (int bin = lowestBin; bin <= highestBin; bin++) { if (bin < FragmentIndex.Length && FragmentIndex[bin] != null) { FragmentIndex[bin].ForEach(id => idsOfPeptidesPossiblyObserved.Add(id)); } } } for (int bin = obsPrecursorFloorMz; bin <= obsPrecursorCeilingMz; bin++) //no bin shift, since they're precursor masses { if (bin < PrecursorIndex.Length && PrecursorIndex[bin] != null) { PrecursorIndex[bin].ForEach(id => idsOfPeptidesPossiblyObserved.Add(id)); } } } // done with initial scoring; refine scores and create PSMs if (idsOfPeptidesPossiblyObserved.Any()) { int maxInitialScore = idsOfPeptidesPossiblyObserved.Max(id => scoringTable[id]) + 1; while (maxInitialScore > CommonParameters.ScoreCutoff) //go through all until we hit the end { maxInitialScore--; foreach (int id in idsOfPeptidesPossiblyObserved.Where(id => scoringTable[id] == maxInitialScore)) { PeptideWithSetModifications peptide = PeptideIndex[id]; List <Product> peptideTheorProducts = peptide.Fragment(CommonParameters.DissociationType, CommonParameters.DigestionParams.FragmentationTerminus).ToList(); Tuple <int, PeptideWithSetModifications> notchAndUpdatedPeptide = Accepts(peptideTheorProducts, scan.PrecursorMass, peptide, CommonParameters.DigestionParams.FragmentationTerminus, MassDiffAcceptor, semiSpecificSearch); int notch = notchAndUpdatedPeptide.Item1; if (notch >= 0) { peptide = notchAndUpdatedPeptide.Item2; peptideTheorProducts = peptide.Fragment(CommonParameters.DissociationType, FragmentationTerminus.Both).ToList(); List <MatchedFragmentIon> matchedIons = MatchFragmentIons(scan, peptideTheorProducts, ModifiedParametersNoComp); double thisScore = CalculatePeptideScore(scan.TheScan, matchedIons); if (thisScore > CommonParameters.ScoreCutoff) { PeptideSpectralMatch[] localPeptideSpectralMatches = GlobalCategorySpecificPsms[(int)FdrClassifier.GetCleavageSpecificityCategory(peptide.CleavageSpecificityForFdrCategory)]; if (localPeptideSpectralMatches[i] == null) { localPeptideSpectralMatches[i] = new PeptideSpectralMatch(peptide, notch, thisScore, i, scan, CommonParameters.DigestionParams, matchedIons); } else { localPeptideSpectralMatches[i].AddOrReplace(peptide, thisScore, notch, CommonParameters.ReportAllAmbiguity, matchedIons, 0); } } } } } } // report search progress progress++; int percentProgress = (int)((progress / ListOfSortedMs2Scans.Length) * 100); if (percentProgress > oldPercentProgress) { oldPercentProgress = percentProgress; ReportProgress(new ProgressEventArgs(percentProgress, "Performing nonspecific search... " + CurrentPartition + "/" + CommonParameters.TotalPartitions, NestedIds)); } } }); return(new MetaMorpheusEngineResults(this)); }