public void TestInitTerminalLoss() { var item = new MS2Item() { Charge = 3, Precursor = 376.52331, Peptide = "-.EHSSL^AYWK.-" }; var aas = new Aminoacids(); aas['^'].ResetMass(7.017, 7.017); item.InitTerminalLoss(aas, 6, 2); Assert.AreEqual(12, item.TerminalLoss.Count); Assert.AreEqual("LAYWK", item.TerminalLoss[3].Sequence); Assert.IsTrue(item.TerminalLoss[3].IsNterminal); Assert.AreEqual("AYWK", item.TerminalLoss[4].Sequence); Assert.IsTrue(item.TerminalLoss[4].IsNterminal); Assert.AreEqual(7.017, (item.TerminalLoss[3].Precursor - item.TerminalLoss[4].Precursor) * 3 - aas['L'].MonoMass, 0.001); Assert.AreEqual("EHSSL", item.TerminalLoss[9].Sequence); Assert.IsFalse(item.TerminalLoss[9].IsNterminal); Assert.AreEqual("EHSS", item.TerminalLoss[10].Sequence); Assert.IsFalse(item.TerminalLoss[10].IsNterminal); Assert.AreEqual(7.017, (item.TerminalLoss[9].Precursor - item.TerminalLoss[10].Precursor) * 3 - aas['L'].MonoMass, 0.001); //item.TerminalLoss.ForEach(m => Console.WriteLine(m.Precursor.ToString() + "\t" + m.Sequence)); }
protected void CheckTerminalExtension(List <SapPredicted> predicted, MS2Item query, MS2Item libms2, SapMatchedCount ms3match) { var bNterminalValid = libms2.Peptide.StartsWith("-"); var bCterminalValid = libms2.Peptide.EndsWith("-"); if (bNterminalValid || bCterminalValid) { foreach (var ne in options.ExtensionDeltaMassList) { var neMz = libms2.Precursor + ne.DeltaMass / libms2.Charge; if (neMz >= query.MinPrecursorMz && neMz <= query.MaxPrecursorMz) { var seq = PeptideUtils.GetPureSequence(libms2.Peptide); if (bNterminalValid) { predicted.Add(new SapPredicted() { Ms2 = query, LibMs2 = libms2, Matched = ms3match, Target = new TargetVariant() { Source = PeptideUtils.GetPureSequence(libms2.Peptide), Target = new HashSet <string>(from t in ne.Target select t + seq), DeltaMass = ne.DeltaMass, TargetType = VariantType.NTerminalExtension } }); } if (bCterminalValid) { predicted.Add(new SapPredicted() { Ms2 = query, LibMs2 = libms2, Matched = ms3match, Target = new TargetVariant() { Source = PeptideUtils.GetPureSequence(libms2.Peptide), Target = new HashSet <string>(from t in ne.Target select seq + t), DeltaMass = ne.DeltaMass, TargetType = VariantType.CTerminalExtension } }); } } } } }
protected SapMatchedCount GetMS3MatchedCount(MS2Item libms2, MS2Item query) { var precursorMatched = new List <double>(); var ms3Matched = new List <int>(); foreach (var pLib in libms2.MS3Spectra) { foreach (var pQuery in query.MS3Spectra) { if (pQuery.PrecursorMZ >= pLib.MinPrecursorMz && pQuery.PrecursorMZ <= pLib.MaxPrecursorMz) { precursorMatched.Add(pLib.PrecursorMZ); int ionMatched = 0; var iLib = 0; var iQuery = 0; while (iLib < pLib.Count && iQuery < pQuery.Count) { if (pQuery[iQuery].Mz < pLib[iLib].MinMatchMz) { iQuery++; continue; } if (pQuery[iQuery].Mz > pLib[iLib].MaxMatchMz) { iLib++; continue; } ionMatched++; iLib++; iQuery++; } ms3Matched.Add(ionMatched); break; } } } return(new SapMatchedCount() { Item1 = libms2, Item2 = query, PrecursorMatched = precursorMatched, MS3Matched = ms3Matched }); }
protected static void OutputIntervalResult(StreamWriter sw, MS2Item query, MS2Item libms2, SapMatchedCount ms3match) { sw.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}\t{11}", query.GetFileScans(), query.Precursor, query.Charge, libms2.Precursor, ms3match.PrecursorMatched.ConvertAll(m => m.ToString()).Merge(";"), ms3match.MS3Matched.ConvertAll(m => m.ToString()).Merge(";"), (query.Precursor - libms2.Precursor) * query.Charge, libms2.GetFileScans(), libms2.Peptide, libms2.Score, libms2.ExpectValue, libms2.Proteins); }
protected void CheckSAP(List <SapPredicted> predicted, MS2Item query, MS2Item libms2, SapMatchedCount ms3match) { foreach (var aa in libms2.AminoacidCompsition) { var lst = options.SubstitutionDeltaMassMap[aa]; //the list has been ordered by deltamass foreach (var ts in lst) { var targetMz = libms2.Precursor + ts.DeltaMass / query.Charge; if (targetMz < query.MinPrecursorMz) { continue; } if (targetMz > query.MaxPrecursorMz) { break; } var curp = new SapPredicted() { Ms2 = query, LibMs2 = libms2, Matched = ms3match, Target = new TargetVariant() { Source = ts.Source, Target = ts.Target, DeltaMass = ts.DeltaMass, TargetType = ts.TargetType } }; predicted.Add(curp); } } }
protected void CheckTerminalLoss(List <SapPredicted> predicted, MS2Item query, MS2Item libms2, SapMatchedCount ms3match) { foreach (var nl in libms2.TerminalLoss) { if (nl.Precursor >= query.MinPrecursorMz && nl.Precursor <= query.MaxPrecursorMz) { var curp = new SapPredicted() { Ms2 = query, LibMs2 = libms2, Matched = ms3match, Target = new TargetVariant() { Source = PeptideUtils.GetPureSequence(libms2.Peptide), Target = new HashSet <string>(new[] { nl.Sequence }), DeltaMass = (nl.Precursor - libms2.Precursor) * libms2.Charge, TargetType = nl.IsNterminal ? VariantType.NTerminalLoss : VariantType.CTerminalLoss } }; predicted.Add(curp); } } }
private List <MS2Item> GetCandidateMs2ItemList(Dictionary <string, string> expRawfileMap, Dictionary <string, HashSet <int> > expScanMap) { var result = new List <MS2Item>(); foreach (var exp in expRawfileMap.Keys) { var rawfile = expRawfileMap[exp]; var scans = expScanMap.ContainsKey(exp) ? expScanMap[exp] : new HashSet <int>(); Progress.SetMessage("Reading MS2/MS3 from {0} ...", rawfile); using (var reader = RawFileFactory.GetRawFileReader(rawfile, false)) { var firstScan = reader.GetFirstSpectrumNumber(); var lastScan = reader.GetLastSpectrumNumber(); Progress.SetRange(firstScan, lastScan); for (int scan = firstScan; scan < lastScan; scan++) { var msLevel = reader.GetMsLevel(scan); if (msLevel != 2) { continue; } if (scans.Contains(scan)) { continue; } if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } Progress.SetPosition(scan); var ms2precursor = reader.GetPrecursorPeak(scan); var ms2 = new MS2Item() { Precursor = ms2precursor.Mz, Charge = ms2precursor.Charge, FileScans = new SequestFilename[] { new SequestFilename(exp, scan, scan, ms2precursor.Charge, string.Empty) }.ToList() }; for (int ms3scan = scan + 1; ms3scan < lastScan; ms3scan++) { var mslevel = reader.GetMsLevel(ms3scan); if (mslevel != 3) { scan = ms3scan - 1; break; } var pkl = reader.GetPeakList(ms3scan); if (pkl.Count == 0) { continue; } var ms3precursor = reader.GetPrecursorPeak(ms3scan); pkl.PrecursorMZ = ms3precursor.Mz; ms2.MS3Spectra.Add(new MS3Item(pkl)); } if (ms2.MS3Spectra.Count > 0) { result.Add(ms2); } } } } return(result); }
public override IEnumerable <string> Process() { var format = new MascotPeptideTextFormat(); var expPeptidesMap = format.ReadFromFile(options.PeptideFile).GroupBy(m => m.Query.FileScan.Experimental).ToDictionary(m => m.Key, m => m.ToList()); var expRawfileMap = options.RawFiles.ToDictionary(m => Path.GetFileNameWithoutExtension(m)); foreach (var exp in expPeptidesMap.Keys) { if (!expRawfileMap.ContainsKey(exp)) { throw new Exception(string.Format("Raw file of {0} is not assigned in RawFiles.", exp)); } } var ms2list = new List <MS2Item>(); foreach (var exp in expPeptidesMap.Keys) { var rawfile = expRawfileMap[exp]; var peptides = expPeptidesMap[exp]; using (var reader = RawFileFactory.GetRawFileReader(rawfile, false)) { var firstScan = reader.GetFirstSpectrumNumber(); var lastScan = reader.GetLastSpectrumNumber(); Progress.SetRange(0, peptides.Count); Progress.SetMessage("Extracting MS2/MS3 information ..."); int count = 0; foreach (var peptide in peptides) { count++; Progress.SetPosition(count); var ms2 = new MS2Item() { Peptide = peptide.Peptide.Sequence, Precursor = peptide.GetPrecursorMz(), Charge = peptide.Query.Charge, Modification = peptide.Modifications, FileScans = new SequestFilename[] { peptide.Query.FileScan }.ToList(), Score = peptide.Score, ExpectValue = peptide.ExpectValue, Proteins = peptide.GetProteins("/") }; for (int ms3scan = peptide.Query.FileScan.FirstScan + 1; ms3scan < lastScan; ms3scan++) { var mslevel = reader.GetMsLevel(ms3scan); if (mslevel != 3) { break; } var pkl = reader.GetPeakList(ms3scan); if (pkl.Count == 0) { continue; } var precursor = reader.GetPrecursorPeak(ms3scan); pkl.PrecursorMZ = precursor.Mz; ms2.MS3Spectra.Add(new MS3Item(pkl)); } if (ms2.MS3Spectra.Count > 0) { ms2list.Add(ms2); } } } } Progress.SetMessage("Merging MS2 by peptide and charge ..."); var ms2group = ms2list.GroupBy(m => string.Format("{0}:{1}", m.Peptide, m.Charge)).ToList(); var ms2library = new List <MS2Item>(); foreach (var g in ms2group) { if (g.Count() < options.MinIdentifiedSpectraPerPeptide) { continue; } var gitem = g.First(); gitem.CombinedCount = g.Count(); gitem.Precursor = g.Average(m => m.Precursor); gitem.Score = g.Max(m => m.Score); gitem.ExpectValue = g.Min(m => m.ExpectValue); gitem.FileScans = (from gg in g from fs in gg.FileScans select fs).ToList(); foreach (var ms2 in g.Skip(1)) { gitem.MS3Spectra.AddRange(ms2.MS3Spectra); } ms2library.Add(gitem); } ms2library.Sort((m1, m2) => { var res = m1.Peptide.CompareTo(m2.Peptide); if (res == 0) { res = m1.Charge.CompareTo(m2.Charge); } return(res); }); new MS2ItemXmlFormat().WriteToFile(options.OutputUncombinedFile, ms2library); Progress.SetMessage("Combing MS3 by precursor ..."); var builder = new BestSpectrumTopSharedPeaksBuilder(options.FragmentPPMTolerance, options.MaxFragmentPeakCount); ms2library.ForEach(m => m.CombineMS3Spectra(builder, options.PrecursorPPMTolerance)); Progress.SetMessage("Initialize terminal loss ..."); var aas = options.GetAminoacids(); ms2library.ForEach(l => l.InitTerminalLoss(aas, options.MaxTerminalLossLength, options.MinSequenceLength)); new MS2ItemXmlFormat().WriteToFile(options.OutputFile, ms2library); Progress.End(); return(new[] { options.OutputFile, options.OutputUncombinedFile }); }