/// <summary> /// Builds the isotopic profile plot based on the selected isotope ratios. /// </summary> private void BuildIsotopicProfilePlot() { if (this.Mass.Equals(0.0)) { // Mass has not been set return; } // Set up the concentration tuner if any of the proportions changed. var predictor = new IsoProfilePredictor( this.IsotopeProportions["C"].GetProportions(), this.IsotopeProportions["H"].GetProportions(), this.IsotopeProportions["N"].GetProportions(), this.IsotopeProportions["O"].GetProportions(), this.IsotopeProportions["S"].GetProportions(), this.RelativeIntensityThreshold ); var averagine = new Averagine(); var theoreticalPeaks = averagine.GetTheoreticalIsotopeProfileInst( this.Mass, this.Charge, this.RelativeIntensityThreshold, predictor); //var actualPeaks = isotopicConcentrationTuner.AlignObservedPeaks( // this.ObservedPeaks.Select(peakDataPoint => new Peak(peakDataPoint.X, peakDataPoint.Y)).ToList(), // theoreticalPeaks); this.IsotopicEnvelopePlotViewModel.BuildPlot( theoreticalPeaks, this.ObservedPeaks.Select(pd => new Peak(pd.Item.X, pd.Item.Y)).ToList(), this.IsProfile); }
public MercuryDistCollectionCreator() { this.averagine = new Averagine(); this.averagineFormula = this.averagine.AveragineFormula; this.tagFormula = this.averagine.TagFormula; }
public TheoreticalIsotopeEnvelope(double monoMass, int maxNumOfIsotopes, double relativeIntensityThreshold = 0.1) { MonoMass = monoMass; var isoEnv = Averagine.GetIsotopomerEnvelope(monoMass); var isotopeRankings = ArrayUtil.GetRankings(isoEnv.Envelope); Isotopes = new List <Isotope>(maxNumOfIsotopes); var ratioSum = 0d; for (var i = 0; i < isoEnv.Envelope.Length; i++) { if (isoEnv.Envelope[i] < relativeIntensityThreshold || isotopeRankings[i] > maxNumOfIsotopes) { continue; } ratioSum += isoEnv.Envelope[i]; Isotopes.Add(new Isotope(i, isoEnv.Envelope[i])); } if (!(ratioSum > 0)) { throw new Exception("Abnormal Theoretical Envelope"); } _probability = new double[Isotopes.Count]; Ranking = new int[Isotopes.Count]; IndexOrderByRanking = new int[Isotopes.Count]; for (var i = 0; i < Isotopes.Count; i++) { _probability[i] = Isotopes[i].Ratio / ratioSum; Ranking[i] = isotopeRankings[Isotopes[i].Index]; IndexOrderByRanking[isotopeRankings[Isotopes[i].Index] - 1] = i; } }
public IEnumerable <int> GetMatchingMs2ScanNums(double sequenceMass) { var sequenceMassBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(sequenceMass); IList <int> ms2ScanNums; if (_sequenceMassBinToScanNumsMap.TryGetValue(sequenceMassBinNum, out ms2ScanNums)) { return(ms2ScanNums); } ms2ScanNums = new List <int>(); var averagineEnvelope = Averagine.GetIsotopomerEnvelope(sequenceMass); var mostAbundantIsotopeIndex = averagineEnvelope.MostAbundantIsotopeIndex; for (var precursorCharge = _minCharge; precursorCharge <= _maxCharge; precursorCharge++) { var mostAbundantIsotopeMz = Ion.GetIsotopeMz(sequenceMass, precursorCharge, mostAbundantIsotopeIndex); var binNumber = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(mostAbundantIsotopeMz); IList <ChargeAndScanNum> chargeAndScanNumList; if (!_mostAbundantIsotopeMzIndexToChargeAndScanNums.TryGetValue(binNumber, out chargeAndScanNumList)) { continue; } foreach (var chargeAndScanNum in chargeAndScanNumList) { if (chargeAndScanNum.Charge == precursorCharge) { ms2ScanNums.Add(chargeAndScanNum.ScanNum); } } } _sequenceMassBinToScanNumsMap.Add(sequenceMassBinNum, ms2ScanNums); return(ms2ScanNums); }
private void SetLcMsMatches(double peakMz, int scanNum) { var xicThisPeak = _run.GetPrecursorExtractedIonChromatogram(peakMz, _tolerance, scanNum); if (xicThisPeak.Count < 2) { return; } for (var charge = _maxCharge; charge >= _minCharge; charge--) { // check whether next isotope peak exists var nextIsotopeMz = peakMz + Constants.C13MinusC12 / charge; var xicNextIsotope = _run.GetPrecursorExtractedIonChromatogram(nextIsotopeMz, _tolerance, scanNum); if (!xicNextIsotope.Any()) { continue; } var mostAbundantIsotopeMass = (peakMz - Constants.Proton) * charge; var averagineIsoEnv = Averagine.GetIsotopomerEnvelope(mostAbundantIsotopeMass); var approxMostAbundantIsotopeIndex = averagineIsoEnv.MostAbundantIsotopeIndex; var monoIsotopicMass = mostAbundantIsotopeMass - approxMostAbundantIsotopeIndex * Constants.C13MinusC12; _lcMsMatchMap.SetMatches(monoIsotopicMass, xicThisPeak[0].ScanNum, xicThisPeak[xicThisPeak.Count - 1].ScanNum); } }
public IsotopicProfileFitScorer(IsotopicProfileFitScorer fit) { // only copies settings not variables. CompleteFitThrash = fit.CompleteFitThrash; UseThrash = fit.UseThrash; ChargeCarrierMass = fit.ChargeCarrierMass; AveragineObj = new Averagine(fit.AveragineObj); IsotopeDistribution = new MercuryIsotopeDistribution(fit.IsotopeDistribution); Init(); }
private FlankingMassMatch GetBestMatchInTheGraph(ShiftedSequenceGraph seqGraph, ProductSpectrum spec, double?featureMass) { FlankingMassMatch match = null; var bestScore = double.NegativeInfinity; var protCompositions = seqGraph.GetSequenceCompositions(); for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++) { seqGraph.SetSink(modIndex); var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O(); var sequenceMass = protCompositionWithH2O.Mass; if (featureMass != null && !_tolerance.IsWithin(sequenceMass, (double)featureMass)) { continue; } var charge = (int) Math.Round(sequenceMass / (spec.IsolationWindow.IsolationWindowTargetMz - Constants.Proton)); var mostAbundantIsotopeMz = Ion.GetIsotopeMz(sequenceMass, charge, Averagine.GetIsotopomerEnvelope(sequenceMass).MostAbundantIsotopeIndex); if (!spec.IsolationWindow.Contains(mostAbundantIsotopeMz)) { continue; } //var feature = new TargetFeature(sequenceMass, charge, spec.ScanNum); if (_featureFinder != null) { var ms1Corr = _featureFinder.GetMs1EvidenceScore(spec.ScanNum, sequenceMass, charge); if (ms1Corr < Ms1CorrThreshold) { continue; } } var curScoreAndModifications = seqGraph.GetScoreAndModifications(_ms2Scorer); var curScore = curScoreAndModifications.Item1; // var curScore = seqGraph.GetFragmentScore(_ms2Scorer); if (curScore > bestScore) { match = new FlankingMassMatch(curScore, sequenceMass - Composition.H2O.Mass - seqGraph.ShiftMass, charge, curScoreAndModifications.Item2); //match = new FlankingMassMatch(curScore, // sequenceMass - Composition.H2O.Mass - seqGraph.ShiftMass, charge, new ModificationInstance[0]); bestScore = curScore; } } return(match); }
private IEnumerable <DeisotopedPeak> GetDeisotopedPeaks(List <Peak> specWindow, IEnumerable <Peak> peakList, int numDeisotopedPeaksToGet) { var peakListSortedByIntensity = new List <Peak>(peakList); peakListSortedByIntensity.Sort(new IntensityComparer()); var remainingPeakList = new LinkedList <Peak>(peakListSortedByIntensity); var deisotopedPeakSet = new SortedSet <DeisotopedPeak>(); while (remainingPeakList.Any()) { var peakWithHighestIntensity = remainingPeakList.First.Value; var peakMz = peakWithHighestIntensity.Mz; var score = new double[_maxCharge + 1]; for (var charge = _maxCharge; charge >= _minCharge; charge--) { // check whether next isotope peak exists var nextIsotopeMz = peakMz + Constants.C13MinusC12 / charge; var nextIsotopePeak = PeakListUtils.FindPeak(specWindow, nextIsotopeMz, _tolerance); if (nextIsotopePeak == null) { continue; } var mostAbundantIsotopeMass = (peakMz - Constants.Proton) * charge; var averagineIsoEnv = Averagine.GetIsotopomerEnvelope(mostAbundantIsotopeMass); var approxMostAbundantIsotopeIndex = averagineIsoEnv.MostAbundantIsotopeIndex; var monoIsotopicMass = mostAbundantIsotopeMass - approxMostAbundantIsotopeIndex * Constants.C13MinusC12; var averagineIsotopeProfile = Averagine.GetTheoreticalIsotopeProfile(monoIsotopicMass, charge); var corr = PeakListUtils.GetPearsonCorrelation(specWindow, averagineIsotopeProfile, _comparer); score[charge] = corr; var isValid = true; for (var mult = 2; mult <= _maxCharge / charge; mult++) { var multiple = charge * mult; if (score[multiple] > 0.8 * corr) { isValid = false; break; } } if (!isValid) { continue; } deisotopedPeakSet.Add(new DeisotopedPeak(monoIsotopicMass, charge, corr)); if (deisotopedPeakSet.Count > numDeisotopedPeaksToGet) { deisotopedPeakSet.Remove(deisotopedPeakSet.Min); } } remainingPeakList.RemoveFirst(); } return(deisotopedPeakSet); }
private void ApplyDeconvolution(List <Peak> specWindow, ref LinkedList <Peak> remainingPeakList, ref List <double> deisotopedMassList) { if (!remainingPeakList.Any()) { return; } var peakWithHighestIntensity = remainingPeakList.First.Value; var peakMz = peakWithHighestIntensity.Mz; var score = new double[_maxCharge + 1]; for (var charge = _maxCharge; charge >= _minCharge; charge--) { // check whether next isotope peak exists var nextIsotopeMz = peakMz + Constants.C13MinusC12 / charge; var nextIsotopePeak = PeakListUtils.FindPeak(specWindow, nextIsotopeMz, _tolerance); if (nextIsotopePeak == null) { continue; } var mostAbundantIsotopeMass = (peakMz - Constants.Proton) * charge; var averagineIsoEnv = Averagine.GetIsotopomerEnvelope(mostAbundantIsotopeMass); var approxMostAbundantIsotopeIndex = averagineIsoEnv.MostAbundantIsotopeIndex; var monoIsotopicMass = mostAbundantIsotopeMass - approxMostAbundantIsotopeIndex * Constants.C13MinusC12; var averagineIsotopeProfile = Averagine.GetTheoreticalIsotopeProfile(monoIsotopicMass, charge); var corr = PeakListUtils.GetPearsonCorrelation(specWindow, averagineIsotopeProfile, _comparer); score[charge] = corr; var isValid = true; for (var mult = 2; mult <= _maxCharge / charge; mult++) { var multiple = charge * mult; if (score[multiple] > 0.8 * corr) { isValid = false; break; } } if (!isValid) { continue; } if (corr > _corrThreshold) { deisotopedMassList.Add(monoIsotopicMass); } } remainingPeakList.RemoveFirst(); }
public void TestAveragine() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); //for (var nominalMass = 1000; nominalMass <= 1000; nominalMass++) //{ // Console.WriteLine("{0}\t{1}", nominalMass, // string.Join(",", Averagine.GetIsotopomerEnvelopeFromNominalMass(nominalMass).Envelope.Select(v => string.Format("{0:f3}", v)))); //} for (var nominalMass = 1000; nominalMass <= 50000; nominalMass++) { var averagine = Averagine.GetIsotopomerEnvelopeFromNominalMass(nominalMass); } }
public void TestAveragine() { var methodName = MethodBase.GetCurrentMethod().Name; ShowStarting(methodName); const double monoMass = 10247.5293287335; const int charge = 14; var profile = Averagine.GetTheoreticalIsotopeProfile(monoMass, charge); Console.WriteLine("Isotope ions:"); foreach (var p in profile) { Console.WriteLine("{0}\t{1}", p.Mz, p.Intensity); } Console.WriteLine(); }
/// <summary> /// Gets the theoretical isotope profile calculated using Averagine with the provided /// isotope proportions. /// </summary> /// <param name="proportions">The proportions of each isotope.</param> /// <returns>The theoretical isotope profile peaks.</returns> public List <Peak> GetTheoreticalIsotopeProfile(double[] proportions) { // Get IsoProfilePredictor with updated proportions var isoProfilePredictor = new IsoProfilePredictor( Element.Code == "C" ? proportions : IsoProfilePredictor.DefaultProbC, Element.Code == "H" ? proportions : IsoProfilePredictor.DefaultProbH, Element.Code == "N" ? proportions : IsoProfilePredictor.DefaultProbN, Element.Code == "O" ? proportions : IsoProfilePredictor.DefaultProbO, Element.Code == "S" ? proportions : IsoProfilePredictor.DefaultProbS ); var averagine = new Averagine(); return(averagine.GetTheoreticalIsotopeProfileInst( Mass, Charge, RelativeIntensityThreshold, isoProfilePredictor)); }
/// <summary> /// Get a summed MS2 spectrum from the dataset, with the provided limits /// </summary> /// <param name="monoIsotopicMass"></param> /// <param name="minScanNum">min scan number, inclusive</param> /// <param name="maxScanNum">max scan number, inclusive</param> /// <param name="minCharge">min charge, inclusive</param> /// <param name="maxCharge">max charge, inclusive</param> /// <param name="activationMethod"></param> /// <returns></returns> public ProductSpectrum GetSummedMs2Spectrum(double monoIsotopicMass, int minScanNum, int maxScanNum, int minCharge, int maxCharge, ActivationMethod activationMethod = ActivationMethod.Unknown) { var isoEnv = Averagine.GetIsotopomerEnvelope(monoIsotopicMass); var ms2ScanNums = new List <int>(); for (var charge = minCharge; charge <= maxCharge; charge++) { var mostAbundantIsotopeMz = Ion.GetIsotopeMz(monoIsotopicMass, charge, isoEnv.MostAbundantIsotopeIndex); ms2ScanNums.AddRange(GetFragmentationSpectraScanNums(mostAbundantIsotopeMz) .Where(ms2ScanNum => ms2ScanNum >= minScanNum && ms2ScanNum <= maxScanNum && (activationMethod == ActivationMethod.Unknown || ((ProductSpectrum)GetSpectrum(ms2ScanNum)).ActivationMethod == activationMethod))) ; } var summedSpec = GetSummedSpectrum(ms2ScanNums); return(new ProductSpectrum(summedSpec.Peaks, 0) { ActivationMethod = activationMethod }); }
/// <summary> /// Build the isotope plot showing theoretical isotopic profile and /// actual isotopic profile. /// This will calculate the theoretical using averagine from the provided monoisotopic mass. /// </summary> /// <param name="actual">Actual isotopic profile.</param> /// <param name="mass">Monoisotopic mass, for calculating theoretical isotopic profile.</param> /// <param name="charge">Charge, for calculating actual isotopic profile.</param> public void BuildPlot(Isotope[] actual, double mass, int charge) { // Calculate theoretical isotopic profile using averagine var theoEnvelope = Averagine.GetIsotopomerEnvelope(mass); var theoretical = new PeakDataPoint[theoEnvelope.Envelope.Length]; // Calculate m/z for each isotope index (observed) for (var isotopeIndex = 0; isotopeIndex < theoEnvelope.Envelope.Length; isotopeIndex++) { var intensity = theoEnvelope.Envelope[isotopeIndex]; var mz = Ion.GetIsotopeMz(mass, charge, isotopeIndex); var m = (mz * charge * Constants.Proton) - (charge * Constants.Proton); theoretical[isotopeIndex] = new PeakDataPoint(m, intensity, 0.0, 0.0, string.Empty); } // Create peak data points from isotopes and calculate m/z values (actual) var observed = actual.Select(i => new PeakDataPoint(theoretical[i.Index].X, i.Ratio, 0.0, 0.0, string.Empty) { Index = i.Index }).ToArray(); BuildPlot(theoretical, observed, false); }
/// <summary> /// Initializes a new instance of the MainWindowViewModel class. /// </summary> /// <param name="dialogService">Service for view model friendly dialogs</param> /// <param name="dataReader">Service for reading raw files, id files, and feature files</param> public MainWindowViewModel(IMainDialogService dialogService, IDataReader dataReader) { this.dialogService = dialogService; this.dataReader = dataReader; // Initialize child view models DataSets = new ReactiveList <DataSetViewModel> { ChangeTrackingEnabled = true }; CreateSequenceViewModel = new CreateSequenceViewModel(this.dialogService); ScanViewModel = new ScanViewModel(this.dialogService, new List <PrSm>()); // Remove filter by unidentified scans from ScanViewModel filters ScanViewModel.Filters.Remove(ScanViewModel.Filters.FirstOrDefault(f => f.Name == "Hide Unidentified Scans")); // Create commands for file operations OpenDataSetCommand = ReactiveCommand.CreateFromTask(async _ => await OpenDataSetImplementation()); OpenRawFileCommand = ReactiveCommand.CreateFromTask(async _ => await OpenRawFileImplementation()); OpenTsvFileCommand = ReactiveCommand.CreateFromTask(async _ => await OpenIdFileImplementation()); OpenFeatureFileCommand = ReactiveCommand.CreateFromTask(async _ => await OpenFeatureFileImplementation()); OpenFromDmsCommand = ReactiveCommand.CreateFromTask(async _ => await OpenFromDmsImplementation()); // Create command to open settings window OpenSettingsCommand = ReactiveCommand.Create(() => this.dialogService.OpenSettings(new SettingsViewModel(this.dialogService))); // Create command to open isotopic profile viewer OpenIsotopicProfileViewerCommand = ReactiveCommand.Create(OpenIsotopicProfileViewer); //this.OpenIsotopicProfileViewer(new object()); // Create command to open about box OpenAboutBoxCommand = ReactiveCommand.Create(() => this.dialogService.OpenAboutBox()); // Create command to open new modification management window OpenManageModificationsCommand = ReactiveCommand.Create(ManageModificationsImplementation); // Create MSPathFinder search command RunMsPathFinderSearchCommand = ReactiveCommand.Create(RunMsPathFinderSearchImplementation); // Create export command ExportResultsCommand = ReactiveCommand.Create(ExportResultsImplementation, DataSets.WhenAnyValue(x => x.Count).Select(count => count > 0)); // Create export command QuitProgramCommand = ReactiveCommand.Create(() => this.dialogService.QuitProgram()); ShowSplash = true; // When a data set sets its ReadyToClose property to true, remove it from dataset list DataSets.ItemChanged.Where(x => x.PropertyName == "ReadyToClose") .Select(x => x.Sender).Where(sender => sender.ReadyToClose) .Subscribe(dataSet => { ScanViewModel.RemovePrSmsFromRawFile(dataSet.Title); DataSets.Remove(dataSet); }); // If all datasets are closed, show splash screen DataSets.BeforeItemsRemoved.Subscribe(x => ShowSplash = DataSets.Count == 1); // If a dataset is opened, show splash screen DataSets.BeforeItemsAdded.Subscribe(x => ShowSplash = false); // When the data reader is reading an ID file, show the loading screen this.dataReader.WhenAnyValue(x => x.ReadingIdFiles) .Subscribe(readingIdFiles => IdFileLoading = readingIdFiles); // When a PrSm is selected in the Protein Tree, make all data sets show the PrSm ScanViewModel.WhenAnyValue(x => x.SelectedPrSm) .Where(selectedPrSm => selectedPrSm != null) .Subscribe(selectedPrSm => { foreach (var dataSet in DataSets) { dataSet.SelectedPrSm = selectedPrSm; } }); // Warm up InformedProteomics Averagine using arbitrary mass Task.Run(() => Averagine.GetIsotopomerEnvelopeFromNominalMass(50000)); }
public void ParallelRun(string path, string outputDir, AveragineType type, ChargerType chargerType) { string file = Path.GetFileNameWithoutExtension(path) + ".mgf"; string output = Path.Combine(outputDir, file); ThermoRawSpectrumReader reader = new ThermoRawSpectrumReader(); LocalMaximaPicking picking = new LocalMaximaPicking(ms1PrcisionPPM); reader.Init(path); Dictionary <int, List <int> > scanGroup = new Dictionary <int, List <int> >(); int current = -1; int start = reader.GetFirstScan(); int end = reader.GetLastScan(); for (int i = start; i < end; i++) { if (reader.GetMSnOrder(i) == 1) { current = i; scanGroup[i] = new List <int>(); } else if (reader.GetMSnOrder(i) == 2) { scanGroup[current].Add(i); } } List <MS2Info> ms2Infos = new List <MS2Info>(); Parallel.ForEach(scanGroup, (scanPair) => { if (scanPair.Value.Count > 0) { ISpectrum ms1 = reader.GetSpectrum(scanPair.Key); foreach (int i in scanPair.Value) { double mz = reader.GetPrecursorMass(i, reader.GetMSnOrder(i)); List <IPeak> ms1Peaks = FilterPeaks(ms1.GetPeaks(), mz, searchRange); if (ms1Peaks.Count() == 0) { continue; } // insert pseudo peaks for large gap List <IPeak> peaks = new List <IPeak>(); double precision = 0.02; double last = ms1Peaks.First().GetMZ(); foreach (IPeak peak in ms1Peaks) { if (peak.GetMZ() - last > precision) { peaks.Add(new GeneralPeak(last + precision / 2, 0)); peaks.Add(new GeneralPeak(peak.GetMZ() - precision / 2, 0)); } peaks.Add(peak); last = peak.GetMZ(); } List <IPeak> majorPeaks = picking.Process(peaks); ICharger charger = new Patterson(); if (chargerType == ChargerType.Fourier) { charger = new Fourier(); } else if (chargerType == ChargerType.Combined) { charger = new PattersonFourierCombine(); } int charge = charger.Charge(peaks, mz - searchRange, mz + searchRange); // find evelope cluster EnvelopeProcess envelope = new EnvelopeProcess(); var cluster = envelope.Cluster(majorPeaks, mz, charge); if (cluster.Count == 0) { continue; } // find monopeak Averagine averagine = new Averagine(type); BrainCSharp braincs = new BrainCSharp(); MonoisotopicSearcher searcher = new MonoisotopicSearcher(averagine, braincs); MonoisotopicScore result = searcher.Search(mz, charge, cluster); double precursorMZ = result.GetMZ(); // write mgf ISpectrum ms2 = reader.GetSpectrum(i); IProcess processer = new WeightedAveraging(new LocalNeighborPicking()); ms2 = processer.Process(ms2); MS2Info ms2Info = new MS2Info { PrecursorMZ = result.GetMZ(), PrecursorCharge = charge, Scan = ms2.GetScanNum(), Retention = ms2.GetRetention(), Peaks = ms2.GetPeaks() }; lock (resultLock) { ms2Infos.Add(ms2Info); } } } readingProgress.Add(scanGroup.Count); }); ms2Infos = ms2Infos.OrderBy(m => m.Scan).ToList(); using (FileStream ostrm = new FileStream(output, FileMode.OpenOrCreate, FileAccess.Write)) { using (StreamWriter writer = new StreamWriter(ostrm)) { foreach (MS2Info ms2 in ms2Infos) { WriteMGF(writer, path + ",SCANS=" + ms2.Scan.ToString() + ",PRECURSOR=" + ms2.PrecursorMZ, ms2.PrecursorMZ, ms2.PrecursorCharge, ms2.Scan, ms2.Retention * 60, reader.GetActivation(ms2.Scan), ms2.Peaks); writer.Flush(); } } } // update progress progress.Add(); }
/// <summary> /// Get the deconvoluted peaks that correspond to the provided peak list /// </summary> /// <param name="peaks"></param> /// <param name="minCharge"></param> /// <param name="maxCharge"></param> /// <param name="isotopeOffsetTolerance"></param> /// <param name="tolerance"></param> /// <param name="corrScoreThreshold"></param> /// <returns></returns> public static List <DeconvolutedPeak> GetDeconvolutedPeaks_new( Peak[] peaks, int minCharge, int maxCharge, int isotopeOffsetTolerance, Tolerance tolerance, double corrScoreThreshold) { var spectrum = new Spectrum(peaks, 0); var monoIsotopePeakList = new List <DeconvolutedPeak>(); var sortedPeaks = peaks.OrderByDescending(peak => peak.Intensity).ToArray(); var peakUsed = new bool[peaks.Length]; foreach (var peak in sortedPeaks) { var peakIndex = Array.BinarySearch(peaks, peak); if (peakUsed[peakIndex]) { continue; } var bestScore = 0.0; DeconvolutedPeak bestPeak = null; Tuple <Peak, int>[] bestObservedPeaks = null; for (var charge = minCharge; charge <= maxCharge; charge++) { var mass = peak.Mz * charge - (charge * Constants.Proton); if (mass > MaxMass) { continue; } var isotopomerEnvelope = Averagine.GetIsotopomerEnvelope(mass); var mostAbundantIsotopeIndex = isotopomerEnvelope.MostAbundantIsotopeIndex; var offsetTolerance = isotopeOffsetTolerance; if (isotopeOffsetTolerance < 0) { offsetTolerance = isotopomerEnvelope.Envelope.Length; } for (var isotopeIndex = mostAbundantIsotopeIndex - offsetTolerance; isotopeIndex <= mostAbundantIsotopeIndex + offsetTolerance; isotopeIndex++) { var monoIsotopeMass = Ion.GetMonoIsotopicMass(peak.Mz, charge, isotopeIndex); var observedPeaks = GetAllIsotopePeaks(spectrum, monoIsotopeMass, charge, isotopomerEnvelope, tolerance, 0.1); if (observedPeaks == null) { continue; } var envelop = isotopomerEnvelope.Envelope; var observedIntensities = new double[observedPeaks.Length]; var observedPeakCount = 0; for (var i = 0; i < observedPeaks.Length; i++) { var observedPeak = observedPeaks[i]; if (observedPeak != null && peakUsed[observedPeak.Item2]) { observedPeak = null; observedPeaks[i] = null; } observedPeakCount += observedPeak != null ? 1 : 0; observedIntensities[i] = observedPeak != null ? (float)observedPeak.Item1.Intensity : 0.0; } var sim = FitScoreCalculator.GetDistanceAndCorrelation(envelop, observedIntensities); var bcDist = sim.Item1; var corr = sim.Item2; var foundPeakRatio = observedPeakCount / ((double)envelop.Length); var interferenceScore = 10.0; var filteredObserved = observedPeaks.Where(p => p != null).ToArray(); if (filteredObserved.Length >= 2) { var allPeaks = spectrum.Peaks.Where(p => p.Mz >= filteredObserved[0].Item1.Mz && p.Mz <= filteredObserved[filteredObserved.Length - 1].Item1.Mz).ToArray(); interferenceScore = CalculateInterferenceScore(allPeaks, filteredObserved); } bcDist = Math.Max(bcDist, double.Epsilon); if (corr < corrScoreThreshold && bcDist > 0.1) { continue; } var score = (foundPeakRatio * corr) / (bcDist * (Math.Abs(mostAbundantIsotopeIndex - isotopeIndex) + 1) * interferenceScore); //if (corr < corrScoreThreshold) continue; // monoIsotopeMass is valid if (score >= bestScore) { bestScore = score; bestPeak = new DeconvolutedPeak(monoIsotopeMass, observedIntensities[mostAbundantIsotopeIndex], charge, corr, bcDist, observedPeaks.Where(p => p != null).Select(p => p.Item1).ToArray()); bestObservedPeaks = observedPeaks; } } } if (bestPeak != null) { monoIsotopePeakList.Add(bestPeak); foreach (var p in bestObservedPeaks) { if (p != null) { bestPeak.ObservedPeakIndices.Add(p.Item2); peakUsed[p.Item2] = true; } } } } monoIsotopePeakList.Sort(); return(monoIsotopePeakList); }
/// <summary> /// Get the deconvoluted peaks, selecting the best peak within +/- filteringWindowSize /// </summary> /// <param name="scanNum">Scan number (included in any exceptions that are caught)</param> /// <param name="peaks"></param> /// <param name="minCharge"></param> /// <param name="maxCharge"></param> /// <param name="isotopeOffsetTolerance"></param> /// <param name="filteringWindowSize"></param> /// <param name="tolerance"></param> /// <param name="corrScoreThreshold"></param> /// <returns></returns> public static List <DeconvolutedPeak> GetDeconvolutedPeaks( int scanNum, Peak[] peaks, int minCharge, int maxCharge, int isotopeOffsetTolerance, double filteringWindowSize, Tolerance tolerance, double corrScoreThreshold) { try { var monoIsotopePeakList = new List <DeconvolutedPeak>(); for (var peakIndex = 0; peakIndex < peaks.Length; peakIndex++) { var peak = peaks[peakIndex]; // Check whether peak has the maximum intensity within the window var isBest = true; var prevIndex = peakIndex - 1; while (prevIndex >= 0) { var prevPeak = peaks[prevIndex]; if ((peak.Mz - prevPeak.Mz) > filteringWindowSize) { break; } if (prevPeak.Intensity > peak.Intensity) { isBest = false; break; } prevIndex--; } if (!isBest) { continue; } var nextIndex = peakIndex + 1; while (nextIndex < peaks.Length) { var nextPeak = peaks[nextIndex]; if ((nextPeak.Mz - peak.Mz) > filteringWindowSize) { break; } if (nextPeak.Intensity > peak.Intensity) { isBest = false; break; } nextIndex++; } if (!isBest) { continue; } // peak has the maximum intensity, window = [prevIndex+1,nextIndex-1] var window = new Peak[nextIndex - prevIndex - 1]; Array.Copy(peaks, prevIndex + 1, window, 0, window.Length); var windowSpectrum = new Spectrum(window, 1); var peakMz = peak.Mz; //var bestScore = 0.0; //DeconvolutedPeak bestPeak = null; for (var charge = maxCharge; charge >= minCharge; charge--) { var mass = (peak.Mz * charge) - charge * Constants.Proton; //var isotopomerEnvelope = Averagine.GetIsotopomerEnvelope(mass); //var mostAbundantIsotopeIndex = isotopomerEnvelope.MostAbundantIsotopeIndex; var mostAbundantIsotopeIndex = Averagine.GetIsotopomerEnvelope(mass).MostAbundantIsotopeIndex; for (var isotopeIndex = mostAbundantIsotopeIndex - isotopeOffsetTolerance; isotopeIndex <= mostAbundantIsotopeIndex + isotopeOffsetTolerance; isotopeIndex++) { var monoIsotopeMass = Ion.GetMonoIsotopicMass(peakMz, charge, isotopeIndex); var isotopomerEnvelope = Averagine.GetIsotopomerEnvelope(monoIsotopeMass); var observedPeaks = windowSpectrum.GetAllIsotopePeaks(monoIsotopeMass, charge, isotopomerEnvelope, tolerance, 0.1); if (observedPeaks == null) { continue; } var envelop = isotopomerEnvelope.Envelope; var observedIntensities = new double[observedPeaks.Length]; for (var i = 0; i < observedPeaks.Length; i++) { var observedPeak = observedPeaks[i]; observedIntensities[i] = observedPeak != null ? (float)observedPeak.Intensity : 0.0; } var sim = FitScoreCalculator.GetDistanceAndCorrelation(envelop, observedIntensities); var bcDist = sim.Item1; var corr = sim.Item2; //var score = corr / (bcDist * ((double)Math.Abs(isotopeIndex - mostAbundantIsotopeIndex) / envelop.Length)); if (corr < corrScoreThreshold && bcDist > 0.03) { continue; } // monoIsotopeMass is valid //if (score >= bestScore) //{ // bestScore = score; // bestPeak = new DeconvolutedPeak(monoIsotopeMass, observedIntensities[mostAbundantIsotopeIndex], charge, corr, bcDist, observedPeaks); //} var deconvPeak = new DeconvolutedPeak(monoIsotopeMass, observedIntensities[mostAbundantIsotopeIndex], charge, corr, bcDist, observedPeaks); monoIsotopePeakList.Add(deconvPeak); } } //if (bestPeak != null) //{ // monoIsotopePeakList.Add(bestPeak); //} } monoIsotopePeakList.Sort(); return(monoIsotopePeakList); } catch (Exception ex) { throw new Exception(string.Format("Error getting deconvoluted peaks for scan {0} in GetDeconvolutedPeaks: {1}", scanNum, ex.Message), ex); } }
public void TestFeatureIdMatching() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string resultFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V092\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var resultParser = new MsPathFinderParser(resultFilePath); const double qValueThreshold = 0.01; const double tolerancePpm = 13; const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3"; var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw"); if (!File.Exists(rawFileName)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFileName); } var run = PbfLcMsRun.GetLcMsRun(rawFileName); var idList = resultParser.GetIdList().TakeWhile(id => id.QValue <= qValueThreshold).OrderBy(id => id.Mass).ToList(); var idMassList = idList.Select(id => id.Mass).ToList(); var idFlag = new bool[idList.Count]; // Parse sequence tags var tagFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".seqtag"); const int minTagLength = 6; const int numProtMatches = 4; // const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta"; const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.icsfldecoy.fasta"; if (!File.Exists(tagFileName)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFileName); } if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var searchableDb = new SearchableDatabase(fastaDb); var tagParser = new SequenceTagParser(tagFileName, minTagLength); var featureFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".ms1ft"); var featureParser = new TsvFileParser(featureFileName); var minScan = featureParser.GetData("MinScan").Select(s => Convert.ToInt32(s)).ToArray(); var maxScan = featureParser.GetData("MaxScan").Select(s => Convert.ToInt32(s)).ToArray(); var minCharge = featureParser.GetData("MinCharge").Select(s => Convert.ToInt32(s)).ToArray(); var maxCharge = featureParser.GetData("MaxCharge").Select(s => Convert.ToInt32(s)).ToArray(); var monoMass = featureParser.GetData("MonoMass").Select(Convert.ToDouble).ToArray(); var numFeaturesWithId = 0; var numFeaturesWithMs2 = 0; var numFeaturesWithTags = 0; var numFeaturesWithMatchingTags = 0; var numFeaturesWithTwoOrMoreMatchingTags = 0; var numFeaturesWithNoIdAndMatchingTags = 0; for (var i = 0; i < featureParser.NumData; i++) { var mass = monoMass[i]; // Find Id var tolDa = new Tolerance(tolerancePpm).GetToleranceAsDa(mass, 1); var minMass = mass - tolDa; var maxMass = mass + tolDa; var index = idMassList.BinarySearch(mass); if (index < 0) { index = ~index; } var matchedId = new List <MsPathFinderId>(); // go down var curIndex = index - 1; while (curIndex >= 0) { var curId = idList[curIndex]; if (curId.Mass < minMass) { break; } if (curId.Scan > minScan[i] && curId.Scan < maxScan[i] && curId.Charge >= minCharge[i] && curId.Charge <= maxCharge[i]) { matchedId.Add(curId); idFlag[curIndex] = true; } --curIndex; } // go up curIndex = index; while (curIndex < idList.Count) { var curId = idList[curIndex]; if (curId.Mass > maxMass) { break; } if (curId.Scan >= minScan[i] && curId.Scan <= maxScan[i] && curId.Charge >= minCharge[i] && curId.Charge <= maxCharge[i]) { matchedId.Add(curId); idFlag[curIndex] = true; } ++curIndex; } var hasId = false; if (matchedId.Any()) { ++numFeaturesWithId; hasId = true; } // Find MS2 scans // var numMs2Scans = 0; var tags = new List <SequenceTag>(); var hasMs2 = false; for (var scanNum = minScan[i]; scanNum <= maxScan[i]; scanNum++) { var isolationWindow = run.GetIsolationWindow(scanNum); if (isolationWindow == null) { continue; } var isolationWindowTargetMz = isolationWindow.IsolationWindowTargetMz; var charge = (int)Math.Round(mass / isolationWindowTargetMz); if (charge < minCharge[i] || charge > maxCharge[i]) { continue; } var mz = Ion.GetIsotopeMz(mass, charge, Averagine.GetIsotopomerEnvelope(mass).MostAbundantIsotopeIndex); if (isolationWindow.Contains(mz)) { // ++numMs2Scans; tags.AddRange(tagParser.GetSequenceTags(scanNum)); hasMs2 = true; } } if (hasMs2) { ++numFeaturesWithMs2; } if (tags.Any()) { ++numFeaturesWithTags; } var protHist = new Dictionary <string, int>(); var hasMatchedTag = false; foreach (var tag in tags) { var matchedProteins = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).Select(idx => fastaDb.GetProteinName(idx)).ToArray(); if (matchedProteins.Any()) { hasMatchedTag = true; foreach (var protein in matchedProteins) { int num; if (protHist.TryGetValue(protein, out num)) { protHist[protein] = num + 1; } else { protHist[protein] = 1; } } } } if (hasMatchedTag) { ++numFeaturesWithMatchingTags; if (!hasId) { ++numFeaturesWithNoIdAndMatchingTags; } } if (protHist.Any()) { var maxOcc = protHist.Values.Max(); if (maxOcc >= numProtMatches) { ++numFeaturesWithTwoOrMoreMatchingTags; } } } Console.WriteLine("NumFeatures: {0}", featureParser.NumData); Console.WriteLine("NumId: {0}", idList.Count); Console.WriteLine("NumFeaturesWithId: {0} ({1})", numFeaturesWithId, numFeaturesWithId / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithMs2: {0} ({1})", numFeaturesWithMs2, numFeaturesWithMs2 / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithTag: {0} ({1})", numFeaturesWithTags, numFeaturesWithTags / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithMatchedTag: {0} ({1})", numFeaturesWithMatchingTags, numFeaturesWithMatchingTags / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithMoreThanOneMatchedTag: {0} ({1})", numFeaturesWithTwoOrMoreMatchingTags, numFeaturesWithTwoOrMoreMatchingTags / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithNoIdAndMatchedTag: {0} ({1})", numFeaturesWithNoIdAndMatchingTags, numFeaturesWithNoIdAndMatchingTags / (float)featureParser.NumData); for (var i = 0; i < idFlag.Length; i++) { if (!idFlag[i]) { Console.WriteLine(idList[i].Scan); } } // Console.WriteLine(string.Join(",", filter.GetMatchingMs2ScanNums(8115.973001))); // // Console.WriteLine(featureFileName); }
public void Test1() { string path = @"C:\Users\Rui Zhang\Downloads\ZC_20171218_C16_R1.raw"; string fasta = @"C:\Users\Rui Zhang\Downloads\haptoglobin.fasta"; // peptides IProteinReader proteinReader = new FastaReader(); List <IProtein> proteins = proteinReader.Read(fasta); List <IProtein> decoyProteins = new List <IProtein>(); foreach (IProtein protein in proteins) { IProtein p = new BaseProtein(); p.SetSequence(Reverse(protein.Sequence())); decoyProteins.Add(p); } List <Proteases> proteases = new List <Proteases>() { Proteases.Trypsin, Proteases.GluC }; HashSet <string> peptides = new HashSet <string>(); ProteinDigest proteinDigest = new ProteinDigest(2, 5, proteases[0]); foreach (IProtein protein in decoyProteins) { peptides.UnionWith(proteinDigest.Sequences(protein.Sequence(), ProteinPTM.ContainsNGlycanSite)); } for (int i = 1; i < proteases.Count; i++) { proteinDigest.SetProtease(proteases[i]); List <string> peptidesList = peptides.ToList(); foreach (string seq in peptidesList) { peptides.UnionWith(proteinDigest.Sequences(seq, ProteinPTM.ContainsNGlycanSite)); } } Assert.True(peptides.Contains("KDNLTYVGDGETR")); // build glycan GlycanBuilder glycanBuilder = new GlycanBuilder(); glycanBuilder.Build(); // search List <SearchResult> searchResults = new List <SearchResult>(); ThermoRawSpectrumReader reader = new ThermoRawSpectrumReader(); LocalMaximaPicking picking = new LocalMaximaPicking(); IProcess process = new LocalNeighborPicking(); reader.Init(path); double searchRange = 2; ISpectrum ms1 = null; List <IPeak> majorPeaks = new List <IPeak>(); ISearch <string> oneSearcher = new BucketSearch <string>(ToleranceBy.PPM, 10); PrecursorMatch precursorMatcher = new PrecursorMatch(oneSearcher); precursorMatcher.Init(peptides.ToList(), glycanBuilder.GlycanMaps()); ISearch <string> moreSearcher = new BucketSearch <string>(ToleranceBy.Dalton, 0.01); SequenceSearch sequenceSearcher = new SequenceSearch(moreSearcher); ISearch <int> extraSearcher = new BucketSearch <int>(ToleranceBy.Dalton, 0.01); GlycanSearch glycanSearcher = new GlycanSearch(extraSearcher, glycanBuilder.GlycanMaps()); SearchAnalyzer searchAnalyzer = new SearchAnalyzer(); for (int i = reader.GetFirstScan(); i < reader.GetLastScan(); i++) { if (reader.GetMSnOrder(i) < 2) { ms1 = reader.GetSpectrum(i); majorPeaks = picking.Process(ms1.GetPeaks()); } else { double mz = reader.GetPrecursorMass(i, reader.GetMSnOrder(i)); if (ms1.GetPeaks() .Where(p => p.GetMZ() > mz - searchRange && p.GetMZ() < mz + searchRange) .Count() == 0) { continue; } Patterson charger = new Patterson(); int charge = charger.Charge(ms1.GetPeaks(), mz - searchRange, mz + searchRange); // find evelope cluster EnvelopeProcess envelope = new EnvelopeProcess(); var cluster = envelope.Cluster(majorPeaks, mz, charge); if (cluster.Count == 0) { continue; } // find monopeak Averagine averagine = new Averagine(AveragineType.GlycoPeptide); BrainCSharp braincs = new BrainCSharp(); MonoisotopicSearcher searcher = new MonoisotopicSearcher(averagine, braincs); MonoisotopicScore result = searcher.Search(mz, charge, cluster); double precursorMZ = result.GetMZ(); // search ISpectrum ms2 = reader.GetSpectrum(i); ms2 = process.Process(ms2); //precursor match var pre_results = precursorMatcher.Match(precursorMZ, charge); if (pre_results.Count == 0) { continue; } // spectrum search var peptide_results = sequenceSearcher.Search(ms2.GetPeaks(), charge, pre_results); if (peptide_results.Count == 0) { continue; } var glycan_results = glycanSearcher.Search(ms2.GetPeaks(), charge, pre_results); if (glycan_results.Count == 0) { continue; } var temp_results = searchAnalyzer.Analyze(i, ms2.GetPeaks(), peptide_results, glycan_results); break; } } }
/// <summary> /// Set the matches /// </summary> /// <param name="featureId"></param> /// <param name="monoIsotopicMass"></param> /// <param name="minScanNum"></param> /// <param name="maxScanNum"></param> /// <param name="repScanNum"></param> /// <param name="minCharge"></param> /// <param name="maxCharge"></param> public void SetMatches(int featureId, double monoIsotopicMass, int minScanNum, int maxScanNum, int repScanNum, int minCharge, int maxCharge) { if (minScanNum < _run.MinLcScan) { minScanNum = _run.MinLcScan; } if (maxScanNum > _run.MaxLcScan) { maxScanNum = _run.MaxLcScan; } if (repScanNum < minScanNum && repScanNum > maxScanNum) { return; } // Keys are elution time, values are scan number var registeredMs2Scans = new List <KeyValuePair <double, int> >(); var repRt = _run.GetElutionTime(repScanNum); for (var scanNum = minScanNum; scanNum <= maxScanNum; scanNum++) { if (_scanToIsolationWindow.TryGetValue(scanNum, out var isolationWindow)) { var isolationWindowTargetMz = isolationWindow.IsolationWindowTargetMz; var charge = (int)Math.Round(monoIsotopicMass / isolationWindowTargetMz); //if (charge < minCharge || charge > maxCharge) continue; var mz = Ion.GetIsotopeMz(monoIsotopicMass, charge, Averagine.GetIsotopomerEnvelope(monoIsotopicMass).MostAbundantIsotopeIndex); if (isolationWindow.Contains(mz)) { var rt = _run.GetElutionTime(scanNum); registeredMs2Scans.Add(new KeyValuePair <double, int>(Math.Abs(rt - repRt), scanNum)); } } } // determine bit array var bitArray = new BitArray(_run.MaxLcScan - _run.MinLcScan + 1); foreach (var e in registeredMs2Scans.OrderBy(x => x.Key).Take(_maxNumMs2ScansPerMass)) { var scanNum = e.Value; bitArray.Set(scanNum - _run.MinLcScan, true); } var deltaMass = _tolerance.GetToleranceAsDa(monoIsotopicMass, 1); var minBinNum = GetBinNumber(monoIsotopicMass - deltaMass); var maxBinNum = GetBinNumber(monoIsotopicMass + deltaMass); for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { if (!_map.TryGetValue(binNum, out var scanBitArray)) { _map.Add(binNum, bitArray); _binToFeatureMap.Add(binNum, new List <int>()); _binToFeatureMap[binNum].Add(featureId); } else { scanBitArray.Or(bitArray); _binToFeatureMap[binNum].Add(featureId); } } }
/// <summary> /// Read a line from the feature file containing a single feature. /// </summary> /// <param name="line">The line from the feature file.</param> /// <param name="delimeter">The delimiter used in feature file.</param> /// <param name="headers">The headers of the feature file columns.</param> /// <returns>Parsed feature.</returns> private static Feature ReadFeature(string line, char delimeter, IReadOnlyDictionary <string, int> headers) { var expectedHeaders = new List <string> { "MonoMass", "Abundance", "LikelihoodRatio", "Envelope", "MinCharge", "MaxCharge", ////"SummedCorr", "MinScan", "MaxScan" }; string likelihoodVarHeader = "LikelihoodRatio"; foreach (var header in expectedHeaders.Where(header => !headers.ContainsKey(header))) { if (header == "LikelihoodRatio" && headers.ContainsKey("Probability")) { likelihoodVarHeader = "Probability"; } else { throw new KeyNotFoundException(string.Format("Missing expected column header \"{0}\" in feature file.", header)); } } var parts = line.Split(delimeter); var mass = Convert.ToDouble(parts[headers["MonoMass"]]); var abundance = Convert.ToDouble(parts[headers["Abundance"]]); var score = Convert.ToDouble(parts[headers[likelihoodVarHeader]]); var isotopes = ReadIsotopicEnvelope(parts[headers["Envelope"]]); var minCharge = Convert.ToInt32(parts[headers["MinCharge"]]); var maxCharge = Convert.ToInt32(parts[headers["MaxCharge"]]); int id = -1; if (headers.ContainsKey("FeatureID")) { id = Convert.ToInt32(parts[headers["FeatureID"]]); } var summedCorr = headers.ContainsKey("SummedCorr") ? Convert.ToDouble(parts[headers["SummedCorr"]]) : 0.0; int mostAbundantIsotopeIndex = Averagine.GetIsotopomerEnvelope(mass).MostAbundantIsotopeIndex; List <Peak> minIsotopicProfile = Averagine.GetTheoreticalIsotopeProfile(mass, minCharge, 0); List <Peak> maxIsotopicProfile = Averagine.GetTheoreticalIsotopeProfile(mass, maxCharge, 0); var minPoint = new Feature.FeaturePoint { Id = id, Mass = mass, Scan = Convert.ToInt32(parts[headers["MinScan"]]), Mz = minIsotopicProfile[mostAbundantIsotopeIndex].Mz, Charge = minCharge, Abundance = abundance, Score = score, Isotopes = isotopes, Correlation = summedCorr }; var maxPoint = new Feature.FeaturePoint { Id = id, Mass = mass, Scan = Convert.ToInt32(parts[headers["MaxScan"]]), Mz = maxIsotopicProfile[mostAbundantIsotopeIndex].Mz, Charge = maxCharge, Abundance = abundance, Score = score, Isotopes = isotopes, Correlation = summedCorr, }; return(new Feature(minPoint, maxPoint) { Id = id }); }
void GenerateTasks() { if (Path.GetExtension(msPath) == ".mgf") { MGFSpectrumReader reader = new MGFSpectrumReader(); reader.Init(msPath); Dictionary <int, MS2Spectrum> spectraData = reader.GetSpectrum(); foreach (int scan in spectraData.Keys) { MS2Spectrum spectrum = spectraData[scan]; SearchTask searchTask = new SearchTask(spectrum, spectrum.PrecursorMZ(), spectrum.PrecursorCharge()); tasks.Enqueue(searchTask); readingCounter.Add(spectraData.Count); } } else { ISpectrumReader reader = new ThermoRawSpectrumReader(); LocalMaximaPicking picking = new LocalMaximaPicking(); IProcess process = new WeightedAveraging(new LocalNeighborPicking()); reader.Init(msPath); int start = reader.GetFirstScan(); int end = reader.GetLastScan(); Dictionary <int, List <int> > scanGroup = new Dictionary <int, List <int> >(); int current = -1; for (int i = start; i < end; i++) { if (reader.GetMSnOrder(i) == 1) { current = i; scanGroup[i] = new List <int>(); } else if (reader.GetMSnOrder(i) == 2) { scanGroup[current].Add(i); } } Parallel.ForEach(scanGroup, new ParallelOptions { MaxDegreeOfParallelism = SearchingParameters.Access.ThreadNums }, (scanPair) => { if (scanPair.Value.Count > 0) { ISpectrum ms1 = reader.GetSpectrum(scanPair.Key); foreach (int i in scanPair.Value) { double mz = reader.GetPrecursorMass(i, reader.GetMSnOrder(i)); List <IPeak> ms1Peaks = FilterPeaks(ms1.GetPeaks(), mz, searchRange); if (ms1Peaks.Count() == 0) { continue; } ICharger charger = new Patterson(); int charge = charger.Charge(ms1Peaks, mz - searchRange, mz + searchRange); // insert pseudo peaks for large gap List <IPeak> peaks = new List <IPeak>(); double precision = 0.02; double last = ms1Peaks.First().GetMZ(); foreach (IPeak peak in ms1Peaks) { if (peak.GetMZ() - last > precision) { peaks.Add(new GeneralPeak(last + precision / 2, 0)); peaks.Add(new GeneralPeak(peak.GetMZ() - precision / 2, 0)); } peaks.Add(peak); last = peak.GetMZ(); } List <IPeak> majorPeaks = picking.Process(peaks); // find evelope cluster EnvelopeProcess envelope = new EnvelopeProcess(); var cluster = envelope.Cluster(majorPeaks, mz, charge); if (cluster.Count == 0) { continue; } // find monopeak Averagine averagine = new Averagine(AveragineType.GlycoPeptide); BrainCSharp braincs = new BrainCSharp(); MonoisotopicSearcher searcher = new MonoisotopicSearcher(averagine, braincs); MonoisotopicScore result = searcher.Search(mz, charge, cluster); double precursorMZ = result.GetMZ(); // search ISpectrum ms2 = reader.GetSpectrum(i); ms2 = process.Process(ms2); SearchTask searchTask = new SearchTask(ms2, precursorMZ, charge); tasks.Enqueue(searchTask); } } readingCounter.Add(scanGroup.Count); }); } }
private void SetLcMsMatches(double peakMz, int scanNum, IList <Peak> precursorSpecWindow, IList <Peak> nextMs1SpecWindow) { var xicThisPeak = _run.GetPrecursorExtractedIonChromatogram(peakMz, _tolerance, scanNum); if (xicThisPeak.Count < 2) { return; } for (var charge = _maxCharge; charge >= _minCharge; charge--) { // check whether next isotope peak exists var nextIsotopeMz = peakMz + Constants.C13MinusC12 / charge; var xicNextIsotope = _run.GetPrecursorExtractedIonChromatogram(nextIsotopeMz, _tolerance, scanNum); if (!xicNextIsotope.Any()) { continue; } if (xicThisPeak.GetCorrelation(xicNextIsotope) < _mostAbundantPlusOneIsotopeCorrThreshold) { continue; } var mostAbundantIsotopeMass = (peakMz - Constants.Proton) * charge; var averagineIsoEnv = Averagine.GetIsotopomerEnvelope(mostAbundantIsotopeMass); var approxMostAbundantIsotopeIndex = averagineIsoEnv.MostAbundantIsotopeIndex; var monoIsotopicMass = mostAbundantIsotopeMass - approxMostAbundantIsotopeIndex * Constants.C13MinusC12; // Isotope correlation var averagineIsotopeProfile = Averagine.GetTheoreticalIsotopeProfile(monoIsotopicMass, charge); var precursorIsotopeCorr = precursorSpecWindow != null?PeakListUtils.GetPearsonCorrelation(precursorSpecWindow, averagineIsotopeProfile, _comparer) : 0; var nextMs1IsotopeCorr = nextMs1SpecWindow != null?PeakListUtils.GetPearsonCorrelation(nextMs1SpecWindow, averagineIsotopeProfile, _comparer) : 0; var isotopeCorr = Math.Max(precursorIsotopeCorr, nextMs1IsotopeCorr); if (isotopeCorr < _isotopeCorrThresholdThreshold) { continue; } if (_chargeCorrThresholdThreshold > 0.0) { var mzChargePlusOne = Ion.GetIsotopeMz(monoIsotopicMass, charge + 1, approxMostAbundantIsotopeIndex); var xicPlusOneCharge = _run.GetPrecursorExtractedIonChromatogram(mzChargePlusOne, _tolerance, scanNum); var corrPlusOneCharge = xicPlusOneCharge.Count >= 3 ? xicThisPeak.GetCorrelation(xicPlusOneCharge) : 0; double corrMinusOneCharge; if (charge > 1) { var mzChargeMinusOne = Ion.GetIsotopeMz(monoIsotopicMass, charge - 1, approxMostAbundantIsotopeIndex); var xicMinusOneCharge = _run.GetPrecursorExtractedIonChromatogram(mzChargeMinusOne, _tolerance, scanNum); corrMinusOneCharge = xicMinusOneCharge.Count >= 3 ? xicThisPeak.GetCorrelation(xicMinusOneCharge) : 0; } else { corrMinusOneCharge = 0.0; } var chargeCorr = Math.Max(corrPlusOneCharge, corrMinusOneCharge); if (chargeCorr < _chargeCorrThresholdThreshold) { continue; } } _lcMsMatchMap.SetMatches(monoIsotopicMass, xicThisPeak[0].ScanNum, xicThisPeak[xicThisPeak.Count - 1].ScanNum); } }
public MercuryDistributionCreator() { averagineFormulaCreator = new Averagine(); decon2LSMercuryDistribution = new ProcessingTasks.Deconvoluters.HornDeconvolutor.ThrashV1.Mercury.MercuryIsotopeDistribution(); peakDetector = new DeconToolsPeakDetector(); }
/// <summary> /// Create a map of sequence masses and MS2 scans /// </summary> /// <param name="run"></param> /// <param name="tolerance"></param> /// <param name="minMass"></param> /// <param name="maxMass"></param> public void CreateSequenceMassToMs2ScansMap(LcMsRun run, Tolerance tolerance, double minMass, double maxMass) { // Make a bin to scan numbers map without considering tolerance var massBinToScanNumsMapNoTolerance = new Dictionary <int, List <int> >(); var minBinNum = GetBinNumber(minMass); var maxBinNum = GetBinNumber(maxMass); for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { if (!_map.TryGetValue(binNum, out var scanRanges)) { continue; } var sequenceMass = GetMass(binNum); var ms2ScanNums = new List <int>(); foreach (var scanRange in scanRanges) { for (var scanNum = scanRange.Min; scanNum <= scanRange.Max; scanNum++) { if (scanNum < run.MinLcScan || scanNum > run.MaxLcScan) { continue; } if (run.GetMsLevel(scanNum) == 2) { var productSpec = run.GetSpectrum(scanNum) as ProductSpectrum; if (productSpec == null) { continue; } var isolationWindow = productSpec.IsolationWindow; var isolationWindowTargetMz = isolationWindow.IsolationWindowTargetMz; var charge = (int)Math.Round(sequenceMass / isolationWindowTargetMz); var mz = Ion.GetIsotopeMz(sequenceMass, charge, Averagine.GetIsotopomerEnvelope(sequenceMass).MostAbundantIsotopeIndex); if (productSpec.IsolationWindow.Contains(mz)) { ms2ScanNums.Add(scanNum); } } } } ms2ScanNums.Sort(); massBinToScanNumsMapNoTolerance.Add(binNum, ms2ScanNums); } // Account for mass tolerance _sequenceMassBinToScanNumsMap = new Dictionary <int, IEnumerable <int> >(); var sumScanNums = 0L; for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { var sequenceMass = GetMass(binNum); var deltaMass = tolerance.GetToleranceAsDa(sequenceMass, 1); var curMinBinNum = GetBinNumber(sequenceMass - deltaMass); var curMaxBinNum = GetBinNumber(sequenceMass + deltaMass); var ms2ScanNums = new HashSet <int>(); for (var curBinNum = curMinBinNum; curBinNum <= curMaxBinNum; curBinNum++) { if (curBinNum < minBinNum || curBinNum > maxBinNum) { continue; } if (!massBinToScanNumsMapNoTolerance.TryGetValue(curBinNum, out var existingMs2ScanNums)) { continue; } foreach (var ms2ScanNum in existingMs2ScanNums) { ms2ScanNums.Add(ms2ScanNum); } } _sequenceMassBinToScanNumsMap[binNum] = ms2ScanNums.ToArray(); sumScanNums += ms2ScanNums.Count; } Console.WriteLine("#MS/MS matches per sequence: {0}", sumScanNums / (float)(maxBinNum - minBinNum + 1)); _map = null; }
public static Spectrum GetMS2Spectrum(ref ThermoRawSpectrumReader reader, int scan, AveragineType type, ChargerType chargerType, LocalMaximaPicking picking, IProcess process, ISpectrum ms1) { // scan header Spectrum spectrum = new Spectrum { id = "scan=" + scan.ToString() }; double dLowMass = 0; double dHighMass = 0; double dTIC = 0; double dBasePeakMass = 0; double dBasePeakIntensity = 0; reader.GetScanHeaderInfoForScanNum(scan, ref dLowMass, ref dHighMass, ref dTIC, ref dBasePeakMass, ref dBasePeakIntensity); SetScanHeader(spectrum, dLowMass, dHighMass, dTIC, dBasePeakMass, dBasePeakIntensity); // binary data spectrum.binaryDataArrayList = new BinaryDataArrayList(); SetBinaryDataArrayHeader(spectrum.binaryDataArrayList); spectrum.cvParam[0] = new Component.CVParam() { cvRef = "MS", accession = "MS:1000511", name = "ms level", value = "2", }; double mz = reader.GetPrecursorMass(scan, reader.GetMSnOrder(scan)); List <IPeak> ms1Peaks = FilterPeaks(ms1.GetPeaks(), mz, searchRange); if (ms1Peaks.Count() == 0) { return(null); } // insert pseudo peaks for large gaps List <IPeak> peaks = new List <IPeak>(); double precision = 0.02; double last = ms1Peaks.First().GetMZ(); foreach (IPeak peak in ms1Peaks) { if (peak.GetMZ() - last > precision) { peaks.Add(new GeneralPeak(last + precision / 2, 0)); peaks.Add(new GeneralPeak(peak.GetMZ() - precision / 2, 0)); } peaks.Add(peak); last = peak.GetMZ(); } List <IPeak> majorPeaks = picking.Process(peaks); ICharger charger = new Patterson(); if (chargerType == ChargerType.Fourier) { charger = new Fourier(); } else if (chargerType == ChargerType.Combined) { charger = new PattersonFourierCombine(); } int charge = charger.Charge(peaks, mz - searchRange, mz + searchRange); // find evelope cluster EnvelopeProcess envelope = new EnvelopeProcess(); var cluster = envelope.Cluster(majorPeaks, mz, charge); if (cluster.Count == 0) { return(null); } // find monopeak Averagine averagine = new Averagine(type); BrainCSharp braincs = new BrainCSharp(); MonoisotopicSearcher searcher = new MonoisotopicSearcher(averagine, braincs); MonoisotopicScore result = searcher.Search(mz, charge, cluster); // process spectrum ISpectrum ms2 = reader.GetSpectrum(scan); List <IPeak> ms2Peaks = process.Process(ms2).GetPeaks(); spectrum.binaryDataArrayList.binaryDataArray[0].binary = ms2Peaks.SelectMany(p => BitConverter.GetBytes(p.GetMZ())).ToArray(); spectrum.binaryDataArrayList.binaryDataArray[1].binary = ms2Peaks.SelectMany(p => BitConverter.GetBytes(p.GetIntensity())).ToArray(); spectrum.defaultArrayLength = ms2Peaks.Count.ToString(); spectrum.precursorList = new PrecursorList { count = "1", precursor = new Precursor[1] }; for (int i = 0; i < spectrum.precursorList.precursor.Length; i++) { spectrum.precursorList.precursor[i] = new Precursor(); } spectrum.precursorList.precursor[0].selectedIonList = new SelectedIonList { count = "1", selectedIon = new SelectedIon[1] }; for (int i = 0; i < spectrum.precursorList.precursor[0].selectedIonList.selectedIon.Length; i++) { spectrum.precursorList.precursor[0].selectedIonList.selectedIon[i] = new SelectedIon(); } spectrum.precursorList.precursor[0].selectedIonList.selectedIon[0].cvParam = new Component.CVParam[2]; spectrum.precursorList.precursor[0].selectedIonList.selectedIon[0].cvParam[0] = new Component.CVParam() { cvRef = "MS", accession = "MS:1000744", name = "selected ion m/z", value = result.GetMZ().ToString(), unitCvRef = "MS", unitAccession = "MS:1000040", unitName = "m/z" }; spectrum.precursorList.precursor[0].selectedIonList.selectedIon[0].cvParam[1] = new Component.CVParam() { cvRef = "MS", accession = "MS:1000041", name = "charge state", value = charge.ToString() }; spectrum.precursorList.precursor[0].activation = new Activation { cvParam = new Component.CVParam[1] }; spectrum.precursorList.precursor[0].activation.cvParam[0] = ActivationCVParam(reader.GetActivation(scan)); spectrum.binaryDataArrayList.binaryDataArray[0].encodedLength = Convert.ToBase64String(spectrum.binaryDataArrayList.binaryDataArray[0].binary).Length.ToString(); spectrum.binaryDataArrayList.binaryDataArray[1].encodedLength = Convert.ToBase64String(spectrum.binaryDataArrayList.binaryDataArray[1].binary).Length.ToString(); return(spectrum); }
public void TestGeneratingXicsOfAllCharges() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath); } var run = PbfLcMsRun.GetLcMsRun(TestRawFilePath, 0.0, 0.0); var comparer = new MzComparerWithBinning(27); const string protSequence = "AIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVGLHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTVTSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVGIGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGSAAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEANQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDLKSLKELLKDQEGAVALKIVRGKSMLYLVLR"; //const string annotation = "_." + protSequence + "._"; var seqGraph = SequenceGraph.CreateGraph(new AminoAcidSet(), AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm); if (seqGraph == null) { return; } seqGraph.SetSink(0); var neutral = seqGraph.GetSinkSequenceCompositionWithH2O() - Composition.Hydrogen; var proteinMass = neutral.Mass; var isoEnv = Averagine.GetIsotopomerEnvelope(proteinMass); const bool SHOW_ALL_SCANS = false; var targetColIndex = 0; #pragma warning disable 0162 if (SHOW_ALL_SCANS) { Console.WriteLine("Charge\t" + string.Join("\t", run.GetScanNumbers(1))); } else { // Just display data for scan 161 Console.WriteLine("Charge\t161"); foreach (var scanNumber in run.GetScanNumbers(1)) { if (scanNumber == 161) { break; } targetColIndex++; } } #pragma warning restore 0162 const int minCharge = 2; const int maxCharge = 60; for (var charge = minCharge; charge <= maxCharge; charge++) { var ion = new Ion(neutral, charge); var mostAbundantIsotopeMz = ion.GetIsotopeMz(isoEnv.MostAbundantIsotopeIndex); //var secondMostAbundantIsotopeMz = ion.GetIsotopeMz(isoEnv.MostAbundantIsotopeIndex + 1); var binNum = comparer.GetBinNumber(mostAbundantIsotopeMz); var mzStart = comparer.GetMzStart(binNum); var mzEnd = comparer.GetMzEnd(binNum); var xic = run.GetFullPrecursorIonExtractedIonChromatogram(mzStart, mzEnd); Console.Write(charge + "\t"); #pragma warning disable 0162 if (SHOW_ALL_SCANS) { Console.WriteLine(string.Join("\t", xic.Select(p => p.Intensity))); } else { Console.WriteLine(xic[targetColIndex].Intensity); } #pragma warning restore 0162 } }
// Select the best peak within +/- filteringWindowSize public static List <DeconvolutedPeak> GetDeconvolutedPeaks( Peak[] peaks, int minCharge, int maxCharge, int isotopeOffsetTolerance, double filteringWindowSize, Tolerance tolerance, double corrScoreThreshold) { var monoIsotopePeakList = new List <DeconvolutedPeak>(); for (var peakIndex = 0; peakIndex < peaks.Length; peakIndex++) { var peak = peaks[peakIndex]; // Check whether peak has the maximum intensity within the window var isBest = true; var prevIndex = peakIndex - 1; while (prevIndex >= 0) { var prevPeak = peaks[prevIndex]; if ((peak.Mz - prevPeak.Mz) > filteringWindowSize) { break; } if (prevPeak.Intensity > peak.Intensity) { isBest = false; break; } prevIndex--; } if (!isBest) { continue; } var nextIndex = peakIndex + 1; while (nextIndex < peaks.Length) { var nextPeak = peaks[nextIndex]; if ((nextPeak.Mz - peak.Mz) > filteringWindowSize) { break; } if (nextPeak.Intensity > peak.Intensity) { isBest = false; break; } nextIndex++; } if (!isBest) { continue; } // peak has the maximum intensity, window = [prevIndex+1,nextIndex-1] var window = new Peak[nextIndex - prevIndex - 1]; Array.Copy(peaks, prevIndex + 1, window, 0, window.Length); var windowSpectrum = new Spectrum(window, 1); var peakMz = peak.Mz; for (var charge = maxCharge; charge >= minCharge; charge--) { var mass = peak.Mz * charge; var mostAbundantIsotopeIndex = Averagine.GetIsotopomerEnvelope(mass).MostAbundantIsotopeIndex; for (var isotopeIndex = mostAbundantIsotopeIndex - isotopeOffsetTolerance; isotopeIndex <= mostAbundantIsotopeIndex + isotopeOffsetTolerance; isotopeIndex++) { var monoIsotopeMass = Ion.GetMonoIsotopicMass(peakMz, charge, isotopeIndex); var isotopomerEnvelope = Averagine.GetIsotopomerEnvelope(monoIsotopeMass); var observedPeaks = windowSpectrum.GetAllIsotopePeaks(monoIsotopeMass, charge, isotopomerEnvelope, tolerance, 0.1); if (observedPeaks == null) { continue; } var envelop = isotopomerEnvelope.Envolope; var observedIntensities = new double[observedPeaks.Length]; for (var i = 0; i < observedPeaks.Length; i++) { var observedPeak = observedPeaks[i]; observedIntensities[i] = observedPeak != null ? (float)observedPeak.Intensity : 0.0; } var sim = FitScoreCalculator.GetDistanceAndCorrelation(envelop, observedIntensities); var bcDist = sim.Item1; var corr = sim.Item2; if (corr < corrScoreThreshold && bcDist > 0.03) { continue; } // monoIsotopeMass is valid var deconvPeak = new DeconvolutedPeak(monoIsotopeMass, observedIntensities[mostAbundantIsotopeIndex], charge, corr, bcDist, observedPeaks); monoIsotopePeakList.Add(deconvPeak); } } } monoIsotopePeakList.Sort(); return(monoIsotopePeakList); }