public IEnumerable <int> GetMatchingMs2ScanNums(double sequenceMass) { var sequenceMassBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(sequenceMass); IList <int> ms2ScanNums; if (_sequenceMassBinToScanNumsMap.TryGetValue(sequenceMassBinNum, out ms2ScanNums)) { return(ms2ScanNums); } ms2ScanNums = new List <int>(); var averagineEnvelope = Averagine.GetIsotopomerEnvelope(sequenceMass); var mostAbundantIsotopeIndex = averagineEnvelope.MostAbundantIsotopeIndex; for (var precursorCharge = _minCharge; precursorCharge <= _maxCharge; precursorCharge++) { var mostAbundantIsotopeMz = Ion.GetIsotopeMz(sequenceMass, precursorCharge, mostAbundantIsotopeIndex); var binNumber = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(mostAbundantIsotopeMz); IList <ChargeAndScanNum> chargeAndScanNumList; if (!_mostAbundantIsotopeMzIndexToChargeAndScanNums.TryGetValue(binNumber, out chargeAndScanNumList)) { continue; } foreach (var chargeAndScanNum in chargeAndScanNumList) { if (chargeAndScanNum.Charge == precursorCharge) { ms2ScanNums.Add(chargeAndScanNum.ScanNum); } } } _sequenceMassBinToScanNumsMap.Add(sequenceMassBinNum, ms2ScanNums); return(ms2ScanNums); }
private void SetLcMsMatches(double peakMz, int scanNum) { var xicThisPeak = _run.GetPrecursorExtractedIonChromatogram(peakMz, _tolerance, scanNum); if (xicThisPeak.Count < 2) { return; } for (var charge = _maxCharge; charge >= _minCharge; charge--) { // check whether next isotope peak exists var nextIsotopeMz = peakMz + Constants.C13MinusC12 / charge; var xicNextIsotope = _run.GetPrecursorExtractedIonChromatogram(nextIsotopeMz, _tolerance, scanNum); if (!xicNextIsotope.Any()) { continue; } var mostAbundantIsotopeMass = (peakMz - Constants.Proton) * charge; var averagineIsoEnv = Averagine.GetIsotopomerEnvelope(mostAbundantIsotopeMass); var approxMostAbundantIsotopeIndex = averagineIsoEnv.MostAbundantIsotopeIndex; var monoIsotopicMass = mostAbundantIsotopeMass - approxMostAbundantIsotopeIndex * Constants.C13MinusC12; _lcMsMatchMap.SetMatches(monoIsotopicMass, xicThisPeak[0].ScanNum, xicThisPeak[xicThisPeak.Count - 1].ScanNum); } }
public TheoreticalIsotopeEnvelope(double monoMass, int maxNumOfIsotopes, double relativeIntensityThreshold = 0.1) { MonoMass = monoMass; var isoEnv = Averagine.GetIsotopomerEnvelope(monoMass); var isotopeRankings = ArrayUtil.GetRankings(isoEnv.Envelope); Isotopes = new List <Isotope>(maxNumOfIsotopes); var ratioSum = 0d; for (var i = 0; i < isoEnv.Envelope.Length; i++) { if (isoEnv.Envelope[i] < relativeIntensityThreshold || isotopeRankings[i] > maxNumOfIsotopes) { continue; } ratioSum += isoEnv.Envelope[i]; Isotopes.Add(new Isotope(i, isoEnv.Envelope[i])); } if (!(ratioSum > 0)) { throw new Exception("Abnormal Theoretical Envelope"); } _probability = new double[Isotopes.Count]; Ranking = new int[Isotopes.Count]; IndexOrderByRanking = new int[Isotopes.Count]; for (var i = 0; i < Isotopes.Count; i++) { _probability[i] = Isotopes[i].Ratio / ratioSum; Ranking[i] = isotopeRankings[Isotopes[i].Index]; IndexOrderByRanking[isotopeRankings[Isotopes[i].Index] - 1] = i; } }
private IEnumerable <DeisotopedPeak> GetDeisotopedPeaks(List <Peak> specWindow, IEnumerable <Peak> peakList, int numDeisotopedPeaksToGet) { var peakListSortedByIntensity = new List <Peak>(peakList); peakListSortedByIntensity.Sort(new IntensityComparer()); var remainingPeakList = new LinkedList <Peak>(peakListSortedByIntensity); var deisotopedPeakSet = new SortedSet <DeisotopedPeak>(); while (remainingPeakList.Any()) { var peakWithHighestIntensity = remainingPeakList.First.Value; var peakMz = peakWithHighestIntensity.Mz; var score = new double[_maxCharge + 1]; for (var charge = _maxCharge; charge >= _minCharge; charge--) { // check whether next isotope peak exists var nextIsotopeMz = peakMz + Constants.C13MinusC12 / charge; var nextIsotopePeak = PeakListUtils.FindPeak(specWindow, nextIsotopeMz, _tolerance); if (nextIsotopePeak == null) { continue; } var mostAbundantIsotopeMass = (peakMz - Constants.Proton) * charge; var averagineIsoEnv = Averagine.GetIsotopomerEnvelope(mostAbundantIsotopeMass); var approxMostAbundantIsotopeIndex = averagineIsoEnv.MostAbundantIsotopeIndex; var monoIsotopicMass = mostAbundantIsotopeMass - approxMostAbundantIsotopeIndex * Constants.C13MinusC12; var averagineIsotopeProfile = Averagine.GetTheoreticalIsotopeProfile(monoIsotopicMass, charge); var corr = PeakListUtils.GetPearsonCorrelation(specWindow, averagineIsotopeProfile, _comparer); score[charge] = corr; var isValid = true; for (var mult = 2; mult <= _maxCharge / charge; mult++) { var multiple = charge * mult; if (score[multiple] > 0.8 * corr) { isValid = false; break; } } if (!isValid) { continue; } deisotopedPeakSet.Add(new DeisotopedPeak(monoIsotopicMass, charge, corr)); if (deisotopedPeakSet.Count > numDeisotopedPeaksToGet) { deisotopedPeakSet.Remove(deisotopedPeakSet.Min); } } remainingPeakList.RemoveFirst(); } return(deisotopedPeakSet); }
private FlankingMassMatch GetBestMatchInTheGraph(ShiftedSequenceGraph seqGraph, ProductSpectrum spec, double?featureMass) { FlankingMassMatch match = null; var bestScore = double.NegativeInfinity; var protCompositions = seqGraph.GetSequenceCompositions(); for (var modIndex = 0; modIndex < protCompositions.Length; modIndex++) { seqGraph.SetSink(modIndex); var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O(); var sequenceMass = protCompositionWithH2O.Mass; if (featureMass != null && !_tolerance.IsWithin(sequenceMass, (double)featureMass)) { continue; } var charge = (int) Math.Round(sequenceMass / (spec.IsolationWindow.IsolationWindowTargetMz - Constants.Proton)); var mostAbundantIsotopeMz = Ion.GetIsotopeMz(sequenceMass, charge, Averagine.GetIsotopomerEnvelope(sequenceMass).MostAbundantIsotopeIndex); if (!spec.IsolationWindow.Contains(mostAbundantIsotopeMz)) { continue; } //var feature = new TargetFeature(sequenceMass, charge, spec.ScanNum); if (_featureFinder != null) { var ms1Corr = _featureFinder.GetMs1EvidenceScore(spec.ScanNum, sequenceMass, charge); if (ms1Corr < Ms1CorrThreshold) { continue; } } var curScoreAndModifications = seqGraph.GetScoreAndModifications(_ms2Scorer); var curScore = curScoreAndModifications.Item1; // var curScore = seqGraph.GetFragmentScore(_ms2Scorer); if (curScore > bestScore) { match = new FlankingMassMatch(curScore, sequenceMass - Composition.H2O.Mass - seqGraph.ShiftMass, charge, curScoreAndModifications.Item2); //match = new FlankingMassMatch(curScore, // sequenceMass - Composition.H2O.Mass - seqGraph.ShiftMass, charge, new ModificationInstance[0]); bestScore = curScore; } } return(match); }
private void ApplyDeconvolution(List <Peak> specWindow, ref LinkedList <Peak> remainingPeakList, ref List <double> deisotopedMassList) { if (!remainingPeakList.Any()) { return; } var peakWithHighestIntensity = remainingPeakList.First.Value; var peakMz = peakWithHighestIntensity.Mz; var score = new double[_maxCharge + 1]; for (var charge = _maxCharge; charge >= _minCharge; charge--) { // check whether next isotope peak exists var nextIsotopeMz = peakMz + Constants.C13MinusC12 / charge; var nextIsotopePeak = PeakListUtils.FindPeak(specWindow, nextIsotopeMz, _tolerance); if (nextIsotopePeak == null) { continue; } var mostAbundantIsotopeMass = (peakMz - Constants.Proton) * charge; var averagineIsoEnv = Averagine.GetIsotopomerEnvelope(mostAbundantIsotopeMass); var approxMostAbundantIsotopeIndex = averagineIsoEnv.MostAbundantIsotopeIndex; var monoIsotopicMass = mostAbundantIsotopeMass - approxMostAbundantIsotopeIndex * Constants.C13MinusC12; var averagineIsotopeProfile = Averagine.GetTheoreticalIsotopeProfile(monoIsotopicMass, charge); var corr = PeakListUtils.GetPearsonCorrelation(specWindow, averagineIsotopeProfile, _comparer); score[charge] = corr; var isValid = true; for (var mult = 2; mult <= _maxCharge / charge; mult++) { var multiple = charge * mult; if (score[multiple] > 0.8 * corr) { isValid = false; break; } } if (!isValid) { continue; } if (corr > _corrThreshold) { deisotopedMassList.Add(monoIsotopicMass); } } remainingPeakList.RemoveFirst(); }
/// <summary> /// Get a summed MS2 spectrum from the dataset, with the provided limits /// </summary> /// <param name="monoIsotopicMass"></param> /// <param name="minScanNum">min scan number, inclusive</param> /// <param name="maxScanNum">max scan number, inclusive</param> /// <param name="minCharge">min charge, inclusive</param> /// <param name="maxCharge">max charge, inclusive</param> /// <param name="activationMethod"></param> /// <returns></returns> public ProductSpectrum GetSummedMs2Spectrum(double monoIsotopicMass, int minScanNum, int maxScanNum, int minCharge, int maxCharge, ActivationMethod activationMethod = ActivationMethod.Unknown) { var isoEnv = Averagine.GetIsotopomerEnvelope(monoIsotopicMass); var ms2ScanNums = new List <int>(); for (var charge = minCharge; charge <= maxCharge; charge++) { var mostAbundantIsotopeMz = Ion.GetIsotopeMz(monoIsotopicMass, charge, isoEnv.MostAbundantIsotopeIndex); ms2ScanNums.AddRange(GetFragmentationSpectraScanNums(mostAbundantIsotopeMz) .Where(ms2ScanNum => ms2ScanNum >= minScanNum && ms2ScanNum <= maxScanNum && (activationMethod == ActivationMethod.Unknown || ((ProductSpectrum)GetSpectrum(ms2ScanNum)).ActivationMethod == activationMethod))) ; } var summedSpec = GetSummedSpectrum(ms2ScanNums); return(new ProductSpectrum(summedSpec.Peaks, 0) { ActivationMethod = activationMethod }); }
/// <summary> /// Build the isotope plot showing theoretical isotopic profile and /// actual isotopic profile. /// This will calculate the theoretical using averagine from the provided monoisotopic mass. /// </summary> /// <param name="actual">Actual isotopic profile.</param> /// <param name="mass">Monoisotopic mass, for calculating theoretical isotopic profile.</param> /// <param name="charge">Charge, for calculating actual isotopic profile.</param> public void BuildPlot(Isotope[] actual, double mass, int charge) { // Calculate theoretical isotopic profile using averagine var theoEnvelope = Averagine.GetIsotopomerEnvelope(mass); var theoretical = new PeakDataPoint[theoEnvelope.Envelope.Length]; // Calculate m/z for each isotope index (observed) for (var isotopeIndex = 0; isotopeIndex < theoEnvelope.Envelope.Length; isotopeIndex++) { var intensity = theoEnvelope.Envelope[isotopeIndex]; var mz = Ion.GetIsotopeMz(mass, charge, isotopeIndex); var m = (mz * charge * Constants.Proton) - (charge * Constants.Proton); theoretical[isotopeIndex] = new PeakDataPoint(m, intensity, 0.0, 0.0, string.Empty); } // Create peak data points from isotopes and calculate m/z values (actual) var observed = actual.Select(i => new PeakDataPoint(theoretical[i.Index].X, i.Ratio, 0.0, 0.0, string.Empty) { Index = i.Index }).ToArray(); BuildPlot(theoretical, observed, false); }
public void TestFeatureIdMatching() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string resultFilePath = @"H:\Research\QCShew_TopDown\Production\M1_V092\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda.tsv"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var resultParser = new MsPathFinderParser(resultFilePath); const double qValueThreshold = 0.01; const double tolerancePpm = 13; const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3"; var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw"); if (!File.Exists(rawFileName)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFileName); } var run = PbfLcMsRun.GetLcMsRun(rawFileName); var idList = resultParser.GetIdList().TakeWhile(id => id.QValue <= qValueThreshold).OrderBy(id => id.Mass).ToList(); var idMassList = idList.Select(id => id.Mass).ToList(); var idFlag = new bool[idList.Count]; // Parse sequence tags var tagFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".seqtag"); const int minTagLength = 6; const int numProtMatches = 4; // const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta"; const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.icsfldecoy.fasta"; if (!File.Exists(tagFileName)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tagFileName); } if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var searchableDb = new SearchableDatabase(fastaDb); var tagParser = new SequenceTagParser(tagFileName, minTagLength); var featureFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".ms1ft"); var featureParser = new TsvFileParser(featureFileName); var minScan = featureParser.GetData("MinScan").Select(s => Convert.ToInt32(s)).ToArray(); var maxScan = featureParser.GetData("MaxScan").Select(s => Convert.ToInt32(s)).ToArray(); var minCharge = featureParser.GetData("MinCharge").Select(s => Convert.ToInt32(s)).ToArray(); var maxCharge = featureParser.GetData("MaxCharge").Select(s => Convert.ToInt32(s)).ToArray(); var monoMass = featureParser.GetData("MonoMass").Select(Convert.ToDouble).ToArray(); var numFeaturesWithId = 0; var numFeaturesWithMs2 = 0; var numFeaturesWithTags = 0; var numFeaturesWithMatchingTags = 0; var numFeaturesWithTwoOrMoreMatchingTags = 0; var numFeaturesWithNoIdAndMatchingTags = 0; for (var i = 0; i < featureParser.NumData; i++) { var mass = monoMass[i]; // Find Id var tolDa = new Tolerance(tolerancePpm).GetToleranceAsDa(mass, 1); var minMass = mass - tolDa; var maxMass = mass + tolDa; var index = idMassList.BinarySearch(mass); if (index < 0) { index = ~index; } var matchedId = new List <MsPathFinderId>(); // go down var curIndex = index - 1; while (curIndex >= 0) { var curId = idList[curIndex]; if (curId.Mass < minMass) { break; } if (curId.Scan > minScan[i] && curId.Scan < maxScan[i] && curId.Charge >= minCharge[i] && curId.Charge <= maxCharge[i]) { matchedId.Add(curId); idFlag[curIndex] = true; } --curIndex; } // go up curIndex = index; while (curIndex < idList.Count) { var curId = idList[curIndex]; if (curId.Mass > maxMass) { break; } if (curId.Scan >= minScan[i] && curId.Scan <= maxScan[i] && curId.Charge >= minCharge[i] && curId.Charge <= maxCharge[i]) { matchedId.Add(curId); idFlag[curIndex] = true; } ++curIndex; } var hasId = false; if (matchedId.Any()) { ++numFeaturesWithId; hasId = true; } // Find MS2 scans // var numMs2Scans = 0; var tags = new List <SequenceTag>(); var hasMs2 = false; for (var scanNum = minScan[i]; scanNum <= maxScan[i]; scanNum++) { var isolationWindow = run.GetIsolationWindow(scanNum); if (isolationWindow == null) { continue; } var isolationWindowTargetMz = isolationWindow.IsolationWindowTargetMz; var charge = (int)Math.Round(mass / isolationWindowTargetMz); if (charge < minCharge[i] || charge > maxCharge[i]) { continue; } var mz = Ion.GetIsotopeMz(mass, charge, Averagine.GetIsotopomerEnvelope(mass).MostAbundantIsotopeIndex); if (isolationWindow.Contains(mz)) { // ++numMs2Scans; tags.AddRange(tagParser.GetSequenceTags(scanNum)); hasMs2 = true; } } if (hasMs2) { ++numFeaturesWithMs2; } if (tags.Any()) { ++numFeaturesWithTags; } var protHist = new Dictionary <string, int>(); var hasMatchedTag = false; foreach (var tag in tags) { var matchedProteins = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).Select(idx => fastaDb.GetProteinName(idx)).ToArray(); if (matchedProteins.Any()) { hasMatchedTag = true; foreach (var protein in matchedProteins) { int num; if (protHist.TryGetValue(protein, out num)) { protHist[protein] = num + 1; } else { protHist[protein] = 1; } } } } if (hasMatchedTag) { ++numFeaturesWithMatchingTags; if (!hasId) { ++numFeaturesWithNoIdAndMatchingTags; } } if (protHist.Any()) { var maxOcc = protHist.Values.Max(); if (maxOcc >= numProtMatches) { ++numFeaturesWithTwoOrMoreMatchingTags; } } } Console.WriteLine("NumFeatures: {0}", featureParser.NumData); Console.WriteLine("NumId: {0}", idList.Count); Console.WriteLine("NumFeaturesWithId: {0} ({1})", numFeaturesWithId, numFeaturesWithId / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithMs2: {0} ({1})", numFeaturesWithMs2, numFeaturesWithMs2 / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithTag: {0} ({1})", numFeaturesWithTags, numFeaturesWithTags / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithMatchedTag: {0} ({1})", numFeaturesWithMatchingTags, numFeaturesWithMatchingTags / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithMoreThanOneMatchedTag: {0} ({1})", numFeaturesWithTwoOrMoreMatchingTags, numFeaturesWithTwoOrMoreMatchingTags / (float)featureParser.NumData); Console.WriteLine("NumFeaturesWithNoIdAndMatchedTag: {0} ({1})", numFeaturesWithNoIdAndMatchingTags, numFeaturesWithNoIdAndMatchingTags / (float)featureParser.NumData); for (var i = 0; i < idFlag.Length; i++) { if (!idFlag[i]) { Console.WriteLine(idList[i].Scan); } } // Console.WriteLine(string.Join(",", filter.GetMatchingMs2ScanNums(8115.973001))); // // Console.WriteLine(featureFileName); }
/// <summary> /// Set the matches /// </summary> /// <param name="featureId"></param> /// <param name="monoIsotopicMass"></param> /// <param name="minScanNum"></param> /// <param name="maxScanNum"></param> /// <param name="repScanNum"></param> /// <param name="minCharge"></param> /// <param name="maxCharge"></param> public void SetMatches(int featureId, double monoIsotopicMass, int minScanNum, int maxScanNum, int repScanNum, int minCharge, int maxCharge) { if (minScanNum < _run.MinLcScan) { minScanNum = _run.MinLcScan; } if (maxScanNum > _run.MaxLcScan) { maxScanNum = _run.MaxLcScan; } if (repScanNum < minScanNum && repScanNum > maxScanNum) { return; } // Keys are elution time, values are scan number var registeredMs2Scans = new List <KeyValuePair <double, int> >(); var repRt = _run.GetElutionTime(repScanNum); for (var scanNum = minScanNum; scanNum <= maxScanNum; scanNum++) { if (_scanToIsolationWindow.TryGetValue(scanNum, out var isolationWindow)) { var isolationWindowTargetMz = isolationWindow.IsolationWindowTargetMz; var charge = (int)Math.Round(monoIsotopicMass / isolationWindowTargetMz); //if (charge < minCharge || charge > maxCharge) continue; var mz = Ion.GetIsotopeMz(monoIsotopicMass, charge, Averagine.GetIsotopomerEnvelope(monoIsotopicMass).MostAbundantIsotopeIndex); if (isolationWindow.Contains(mz)) { var rt = _run.GetElutionTime(scanNum); registeredMs2Scans.Add(new KeyValuePair <double, int>(Math.Abs(rt - repRt), scanNum)); } } } // determine bit array var bitArray = new BitArray(_run.MaxLcScan - _run.MinLcScan + 1); foreach (var e in registeredMs2Scans.OrderBy(x => x.Key).Take(_maxNumMs2ScansPerMass)) { var scanNum = e.Value; bitArray.Set(scanNum - _run.MinLcScan, true); } var deltaMass = _tolerance.GetToleranceAsDa(monoIsotopicMass, 1); var minBinNum = GetBinNumber(monoIsotopicMass - deltaMass); var maxBinNum = GetBinNumber(monoIsotopicMass + deltaMass); for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { if (!_map.TryGetValue(binNum, out var scanBitArray)) { _map.Add(binNum, bitArray); _binToFeatureMap.Add(binNum, new List <int>()); _binToFeatureMap[binNum].Add(featureId); } else { scanBitArray.Or(bitArray); _binToFeatureMap[binNum].Add(featureId); } } }
/// <summary> /// Read a line from the feature file containing a single feature. /// </summary> /// <param name="line">The line from the feature file.</param> /// <param name="delimeter">The delimiter used in feature file.</param> /// <param name="headers">The headers of the feature file columns.</param> /// <returns>Parsed feature.</returns> private static Feature ReadFeature(string line, char delimeter, IReadOnlyDictionary <string, int> headers) { var expectedHeaders = new List <string> { "MonoMass", "Abundance", "LikelihoodRatio", "Envelope", "MinCharge", "MaxCharge", ////"SummedCorr", "MinScan", "MaxScan" }; string likelihoodVarHeader = "LikelihoodRatio"; foreach (var header in expectedHeaders.Where(header => !headers.ContainsKey(header))) { if (header == "LikelihoodRatio" && headers.ContainsKey("Probability")) { likelihoodVarHeader = "Probability"; } else { throw new KeyNotFoundException(string.Format("Missing expected column header \"{0}\" in feature file.", header)); } } var parts = line.Split(delimeter); var mass = Convert.ToDouble(parts[headers["MonoMass"]]); var abundance = Convert.ToDouble(parts[headers["Abundance"]]); var score = Convert.ToDouble(parts[headers[likelihoodVarHeader]]); var isotopes = ReadIsotopicEnvelope(parts[headers["Envelope"]]); var minCharge = Convert.ToInt32(parts[headers["MinCharge"]]); var maxCharge = Convert.ToInt32(parts[headers["MaxCharge"]]); int id = -1; if (headers.ContainsKey("FeatureID")) { id = Convert.ToInt32(parts[headers["FeatureID"]]); } var summedCorr = headers.ContainsKey("SummedCorr") ? Convert.ToDouble(parts[headers["SummedCorr"]]) : 0.0; int mostAbundantIsotopeIndex = Averagine.GetIsotopomerEnvelope(mass).MostAbundantIsotopeIndex; List <Peak> minIsotopicProfile = Averagine.GetTheoreticalIsotopeProfile(mass, minCharge, 0); List <Peak> maxIsotopicProfile = Averagine.GetTheoreticalIsotopeProfile(mass, maxCharge, 0); var minPoint = new Feature.FeaturePoint { Id = id, Mass = mass, Scan = Convert.ToInt32(parts[headers["MinScan"]]), Mz = minIsotopicProfile[mostAbundantIsotopeIndex].Mz, Charge = minCharge, Abundance = abundance, Score = score, Isotopes = isotopes, Correlation = summedCorr }; var maxPoint = new Feature.FeaturePoint { Id = id, Mass = mass, Scan = Convert.ToInt32(parts[headers["MaxScan"]]), Mz = maxIsotopicProfile[mostAbundantIsotopeIndex].Mz, Charge = maxCharge, Abundance = abundance, Score = score, Isotopes = isotopes, Correlation = summedCorr, }; return(new Feature(minPoint, maxPoint) { Id = id }); }
public void TestGeneratingXicsOfAllCharges() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath); } var run = PbfLcMsRun.GetLcMsRun(TestRawFilePath, 0.0, 0.0); var comparer = new MzComparerWithBinning(27); const string protSequence = "AIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVGLHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTVTSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVGIGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGSAAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEANQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDLKSLKELLKDQEGAVALKIVRGKSMLYLVLR"; //const string annotation = "_." + protSequence + "._"; var seqGraph = SequenceGraph.CreateGraph(new AminoAcidSet(), AminoAcid.ProteinNTerm, protSequence, AminoAcid.ProteinCTerm); if (seqGraph == null) { return; } seqGraph.SetSink(0); var neutral = seqGraph.GetSinkSequenceCompositionWithH2O() - Composition.Hydrogen; var proteinMass = neutral.Mass; var isoEnv = Averagine.GetIsotopomerEnvelope(proteinMass); const bool SHOW_ALL_SCANS = false; var targetColIndex = 0; #pragma warning disable 0162 if (SHOW_ALL_SCANS) { Console.WriteLine("Charge\t" + string.Join("\t", run.GetScanNumbers(1))); } else { // Just display data for scan 161 Console.WriteLine("Charge\t161"); foreach (var scanNumber in run.GetScanNumbers(1)) { if (scanNumber == 161) { break; } targetColIndex++; } } #pragma warning restore 0162 const int minCharge = 2; const int maxCharge = 60; for (var charge = minCharge; charge <= maxCharge; charge++) { var ion = new Ion(neutral, charge); var mostAbundantIsotopeMz = ion.GetIsotopeMz(isoEnv.MostAbundantIsotopeIndex); //var secondMostAbundantIsotopeMz = ion.GetIsotopeMz(isoEnv.MostAbundantIsotopeIndex + 1); var binNum = comparer.GetBinNumber(mostAbundantIsotopeMz); var mzStart = comparer.GetMzStart(binNum); var mzEnd = comparer.GetMzEnd(binNum); var xic = run.GetFullPrecursorIonExtractedIonChromatogram(mzStart, mzEnd); Console.Write(charge + "\t"); #pragma warning disable 0162 if (SHOW_ALL_SCANS) { Console.WriteLine(string.Join("\t", xic.Select(p => p.Intensity))); } else { Console.WriteLine(xic[targetColIndex].Intensity); } #pragma warning restore 0162 } }
private void SetLcMsMatches(double peakMz, int scanNum, IList <Peak> precursorSpecWindow, IList <Peak> nextMs1SpecWindow) { var xicThisPeak = _run.GetPrecursorExtractedIonChromatogram(peakMz, _tolerance, scanNum); if (xicThisPeak.Count < 2) { return; } for (var charge = _maxCharge; charge >= _minCharge; charge--) { // check whether next isotope peak exists var nextIsotopeMz = peakMz + Constants.C13MinusC12 / charge; var xicNextIsotope = _run.GetPrecursorExtractedIonChromatogram(nextIsotopeMz, _tolerance, scanNum); if (!xicNextIsotope.Any()) { continue; } if (xicThisPeak.GetCorrelation(xicNextIsotope) < _mostAbundantPlusOneIsotopeCorrThreshold) { continue; } var mostAbundantIsotopeMass = (peakMz - Constants.Proton) * charge; var averagineIsoEnv = Averagine.GetIsotopomerEnvelope(mostAbundantIsotopeMass); var approxMostAbundantIsotopeIndex = averagineIsoEnv.MostAbundantIsotopeIndex; var monoIsotopicMass = mostAbundantIsotopeMass - approxMostAbundantIsotopeIndex * Constants.C13MinusC12; // Isotope correlation var averagineIsotopeProfile = Averagine.GetTheoreticalIsotopeProfile(monoIsotopicMass, charge); var precursorIsotopeCorr = precursorSpecWindow != null?PeakListUtils.GetPearsonCorrelation(precursorSpecWindow, averagineIsotopeProfile, _comparer) : 0; var nextMs1IsotopeCorr = nextMs1SpecWindow != null?PeakListUtils.GetPearsonCorrelation(nextMs1SpecWindow, averagineIsotopeProfile, _comparer) : 0; var isotopeCorr = Math.Max(precursorIsotopeCorr, nextMs1IsotopeCorr); if (isotopeCorr < _isotopeCorrThresholdThreshold) { continue; } if (_chargeCorrThresholdThreshold > 0.0) { var mzChargePlusOne = Ion.GetIsotopeMz(monoIsotopicMass, charge + 1, approxMostAbundantIsotopeIndex); var xicPlusOneCharge = _run.GetPrecursorExtractedIonChromatogram(mzChargePlusOne, _tolerance, scanNum); var corrPlusOneCharge = xicPlusOneCharge.Count >= 3 ? xicThisPeak.GetCorrelation(xicPlusOneCharge) : 0; double corrMinusOneCharge; if (charge > 1) { var mzChargeMinusOne = Ion.GetIsotopeMz(monoIsotopicMass, charge - 1, approxMostAbundantIsotopeIndex); var xicMinusOneCharge = _run.GetPrecursorExtractedIonChromatogram(mzChargeMinusOne, _tolerance, scanNum); corrMinusOneCharge = xicMinusOneCharge.Count >= 3 ? xicThisPeak.GetCorrelation(xicMinusOneCharge) : 0; } else { corrMinusOneCharge = 0.0; } var chargeCorr = Math.Max(corrPlusOneCharge, corrMinusOneCharge); if (chargeCorr < _chargeCorrThresholdThreshold) { continue; } } _lcMsMatchMap.SetMatches(monoIsotopicMass, xicThisPeak[0].ScanNum, xicThisPeak[xicThisPeak.Count - 1].ScanNum); } }
/// <summary> /// Get the deconvoluted peaks, selecting the best peak within +/- filteringWindowSize /// </summary> /// <param name="scanNum">Scan number (included in any exceptions that are caught)</param> /// <param name="peaks"></param> /// <param name="minCharge"></param> /// <param name="maxCharge"></param> /// <param name="isotopeOffsetTolerance"></param> /// <param name="filteringWindowSize"></param> /// <param name="tolerance"></param> /// <param name="corrScoreThreshold"></param> /// <returns></returns> public static List <DeconvolutedPeak> GetDeconvolutedPeaks( int scanNum, Peak[] peaks, int minCharge, int maxCharge, int isotopeOffsetTolerance, double filteringWindowSize, Tolerance tolerance, double corrScoreThreshold) { try { var monoIsotopePeakList = new List <DeconvolutedPeak>(); for (var peakIndex = 0; peakIndex < peaks.Length; peakIndex++) { var peak = peaks[peakIndex]; // Check whether peak has the maximum intensity within the window var isBest = true; var prevIndex = peakIndex - 1; while (prevIndex >= 0) { var prevPeak = peaks[prevIndex]; if ((peak.Mz - prevPeak.Mz) > filteringWindowSize) { break; } if (prevPeak.Intensity > peak.Intensity) { isBest = false; break; } prevIndex--; } if (!isBest) { continue; } var nextIndex = peakIndex + 1; while (nextIndex < peaks.Length) { var nextPeak = peaks[nextIndex]; if ((nextPeak.Mz - peak.Mz) > filteringWindowSize) { break; } if (nextPeak.Intensity > peak.Intensity) { isBest = false; break; } nextIndex++; } if (!isBest) { continue; } // peak has the maximum intensity, window = [prevIndex+1,nextIndex-1] var window = new Peak[nextIndex - prevIndex - 1]; Array.Copy(peaks, prevIndex + 1, window, 0, window.Length); var windowSpectrum = new Spectrum(window, 1); var peakMz = peak.Mz; //var bestScore = 0.0; //DeconvolutedPeak bestPeak = null; for (var charge = maxCharge; charge >= minCharge; charge--) { var mass = (peak.Mz * charge) - charge * Constants.Proton; //var isotopomerEnvelope = Averagine.GetIsotopomerEnvelope(mass); //var mostAbundantIsotopeIndex = isotopomerEnvelope.MostAbundantIsotopeIndex; var mostAbundantIsotopeIndex = Averagine.GetIsotopomerEnvelope(mass).MostAbundantIsotopeIndex; for (var isotopeIndex = mostAbundantIsotopeIndex - isotopeOffsetTolerance; isotopeIndex <= mostAbundantIsotopeIndex + isotopeOffsetTolerance; isotopeIndex++) { var monoIsotopeMass = Ion.GetMonoIsotopicMass(peakMz, charge, isotopeIndex); var isotopomerEnvelope = Averagine.GetIsotopomerEnvelope(monoIsotopeMass); var observedPeaks = windowSpectrum.GetAllIsotopePeaks(monoIsotopeMass, charge, isotopomerEnvelope, tolerance, 0.1); if (observedPeaks == null) { continue; } var envelop = isotopomerEnvelope.Envelope; var observedIntensities = new double[observedPeaks.Length]; for (var i = 0; i < observedPeaks.Length; i++) { var observedPeak = observedPeaks[i]; observedIntensities[i] = observedPeak != null ? (float)observedPeak.Intensity : 0.0; } var sim = FitScoreCalculator.GetDistanceAndCorrelation(envelop, observedIntensities); var bcDist = sim.Item1; var corr = sim.Item2; //var score = corr / (bcDist * ((double)Math.Abs(isotopeIndex - mostAbundantIsotopeIndex) / envelop.Length)); if (corr < corrScoreThreshold && bcDist > 0.03) { continue; } // monoIsotopeMass is valid //if (score >= bestScore) //{ // bestScore = score; // bestPeak = new DeconvolutedPeak(monoIsotopeMass, observedIntensities[mostAbundantIsotopeIndex], charge, corr, bcDist, observedPeaks); //} var deconvPeak = new DeconvolutedPeak(monoIsotopeMass, observedIntensities[mostAbundantIsotopeIndex], charge, corr, bcDist, observedPeaks); monoIsotopePeakList.Add(deconvPeak); } } //if (bestPeak != null) //{ // monoIsotopePeakList.Add(bestPeak); //} } monoIsotopePeakList.Sort(); return(monoIsotopePeakList); } catch (Exception ex) { throw new Exception(string.Format("Error getting deconvoluted peaks for scan {0} in GetDeconvolutedPeaks: {1}", scanNum, ex.Message), ex); } }
/// <summary> /// Get the deconvoluted peaks that correspond to the provided peak list /// </summary> /// <param name="peaks"></param> /// <param name="minCharge"></param> /// <param name="maxCharge"></param> /// <param name="isotopeOffsetTolerance"></param> /// <param name="tolerance"></param> /// <param name="corrScoreThreshold"></param> /// <returns></returns> public static List <DeconvolutedPeak> GetDeconvolutedPeaks_new( Peak[] peaks, int minCharge, int maxCharge, int isotopeOffsetTolerance, Tolerance tolerance, double corrScoreThreshold) { var spectrum = new Spectrum(peaks, 0); var monoIsotopePeakList = new List <DeconvolutedPeak>(); var sortedPeaks = peaks.OrderByDescending(peak => peak.Intensity).ToArray(); var peakUsed = new bool[peaks.Length]; foreach (var peak in sortedPeaks) { var peakIndex = Array.BinarySearch(peaks, peak); if (peakUsed[peakIndex]) { continue; } var bestScore = 0.0; DeconvolutedPeak bestPeak = null; Tuple <Peak, int>[] bestObservedPeaks = null; for (var charge = minCharge; charge <= maxCharge; charge++) { var mass = peak.Mz * charge - (charge * Constants.Proton); if (mass > MaxMass) { continue; } var isotopomerEnvelope = Averagine.GetIsotopomerEnvelope(mass); var mostAbundantIsotopeIndex = isotopomerEnvelope.MostAbundantIsotopeIndex; var offsetTolerance = isotopeOffsetTolerance; if (isotopeOffsetTolerance < 0) { offsetTolerance = isotopomerEnvelope.Envelope.Length; } for (var isotopeIndex = mostAbundantIsotopeIndex - offsetTolerance; isotopeIndex <= mostAbundantIsotopeIndex + offsetTolerance; isotopeIndex++) { var monoIsotopeMass = Ion.GetMonoIsotopicMass(peak.Mz, charge, isotopeIndex); var observedPeaks = GetAllIsotopePeaks(spectrum, monoIsotopeMass, charge, isotopomerEnvelope, tolerance, 0.1); if (observedPeaks == null) { continue; } var envelop = isotopomerEnvelope.Envelope; var observedIntensities = new double[observedPeaks.Length]; var observedPeakCount = 0; for (var i = 0; i < observedPeaks.Length; i++) { var observedPeak = observedPeaks[i]; if (observedPeak != null && peakUsed[observedPeak.Item2]) { observedPeak = null; observedPeaks[i] = null; } observedPeakCount += observedPeak != null ? 1 : 0; observedIntensities[i] = observedPeak != null ? (float)observedPeak.Item1.Intensity : 0.0; } var sim = FitScoreCalculator.GetDistanceAndCorrelation(envelop, observedIntensities); var bcDist = sim.Item1; var corr = sim.Item2; var foundPeakRatio = observedPeakCount / ((double)envelop.Length); var interferenceScore = 10.0; var filteredObserved = observedPeaks.Where(p => p != null).ToArray(); if (filteredObserved.Length >= 2) { var allPeaks = spectrum.Peaks.Where(p => p.Mz >= filteredObserved[0].Item1.Mz && p.Mz <= filteredObserved[filteredObserved.Length - 1].Item1.Mz).ToArray(); interferenceScore = CalculateInterferenceScore(allPeaks, filteredObserved); } bcDist = Math.Max(bcDist, double.Epsilon); if (corr < corrScoreThreshold && bcDist > 0.1) { continue; } var score = (foundPeakRatio * corr) / (bcDist * (Math.Abs(mostAbundantIsotopeIndex - isotopeIndex) + 1) * interferenceScore); //if (corr < corrScoreThreshold) continue; // monoIsotopeMass is valid if (score >= bestScore) { bestScore = score; bestPeak = new DeconvolutedPeak(monoIsotopeMass, observedIntensities[mostAbundantIsotopeIndex], charge, corr, bcDist, observedPeaks.Where(p => p != null).Select(p => p.Item1).ToArray()); bestObservedPeaks = observedPeaks; } } } if (bestPeak != null) { monoIsotopePeakList.Add(bestPeak); foreach (var p in bestObservedPeaks) { if (p != null) { bestPeak.ObservedPeakIndices.Add(p.Item2); peakUsed[p.Item2] = true; } } } } monoIsotopePeakList.Sort(); return(monoIsotopePeakList); }
/// <summary> /// Create a map of sequence masses and MS2 scans /// </summary> /// <param name="run"></param> /// <param name="tolerance"></param> /// <param name="minMass"></param> /// <param name="maxMass"></param> public void CreateSequenceMassToMs2ScansMap(LcMsRun run, Tolerance tolerance, double minMass, double maxMass) { // Make a bin to scan numbers map without considering tolerance var massBinToScanNumsMapNoTolerance = new Dictionary <int, List <int> >(); var minBinNum = GetBinNumber(minMass); var maxBinNum = GetBinNumber(maxMass); for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { if (!_map.TryGetValue(binNum, out var scanRanges)) { continue; } var sequenceMass = GetMass(binNum); var ms2ScanNums = new List <int>(); foreach (var scanRange in scanRanges) { for (var scanNum = scanRange.Min; scanNum <= scanRange.Max; scanNum++) { if (scanNum < run.MinLcScan || scanNum > run.MaxLcScan) { continue; } if (run.GetMsLevel(scanNum) == 2) { var productSpec = run.GetSpectrum(scanNum) as ProductSpectrum; if (productSpec == null) { continue; } var isolationWindow = productSpec.IsolationWindow; var isolationWindowTargetMz = isolationWindow.IsolationWindowTargetMz; var charge = (int)Math.Round(sequenceMass / isolationWindowTargetMz); var mz = Ion.GetIsotopeMz(sequenceMass, charge, Averagine.GetIsotopomerEnvelope(sequenceMass).MostAbundantIsotopeIndex); if (productSpec.IsolationWindow.Contains(mz)) { ms2ScanNums.Add(scanNum); } } } } ms2ScanNums.Sort(); massBinToScanNumsMapNoTolerance.Add(binNum, ms2ScanNums); } // Account for mass tolerance _sequenceMassBinToScanNumsMap = new Dictionary <int, IEnumerable <int> >(); var sumScanNums = 0L; for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { var sequenceMass = GetMass(binNum); var deltaMass = tolerance.GetToleranceAsDa(sequenceMass, 1); var curMinBinNum = GetBinNumber(sequenceMass - deltaMass); var curMaxBinNum = GetBinNumber(sequenceMass + deltaMass); var ms2ScanNums = new HashSet <int>(); for (var curBinNum = curMinBinNum; curBinNum <= curMaxBinNum; curBinNum++) { if (curBinNum < minBinNum || curBinNum > maxBinNum) { continue; } if (!massBinToScanNumsMapNoTolerance.TryGetValue(curBinNum, out var existingMs2ScanNums)) { continue; } foreach (var ms2ScanNum in existingMs2ScanNums) { ms2ScanNums.Add(ms2ScanNum); } } _sequenceMassBinToScanNumsMap[binNum] = ms2ScanNums.ToArray(); sumScanNums += ms2ScanNums.Count; } Console.WriteLine("#MS/MS matches per sequence: {0}", sumScanNums / (float)(maxBinNum - minBinNum + 1)); _map = null; }
// Select the best peak within +/- filteringWindowSize public static List <DeconvolutedPeak> GetDeconvolutedPeaks( Peak[] peaks, int minCharge, int maxCharge, int isotopeOffsetTolerance, double filteringWindowSize, Tolerance tolerance, double corrScoreThreshold) { var monoIsotopePeakList = new List <DeconvolutedPeak>(); for (var peakIndex = 0; peakIndex < peaks.Length; peakIndex++) { var peak = peaks[peakIndex]; // Check whether peak has the maximum intensity within the window var isBest = true; var prevIndex = peakIndex - 1; while (prevIndex >= 0) { var prevPeak = peaks[prevIndex]; if ((peak.Mz - prevPeak.Mz) > filteringWindowSize) { break; } if (prevPeak.Intensity > peak.Intensity) { isBest = false; break; } prevIndex--; } if (!isBest) { continue; } var nextIndex = peakIndex + 1; while (nextIndex < peaks.Length) { var nextPeak = peaks[nextIndex]; if ((nextPeak.Mz - peak.Mz) > filteringWindowSize) { break; } if (nextPeak.Intensity > peak.Intensity) { isBest = false; break; } nextIndex++; } if (!isBest) { continue; } // peak has the maximum intensity, window = [prevIndex+1,nextIndex-1] var window = new Peak[nextIndex - prevIndex - 1]; Array.Copy(peaks, prevIndex + 1, window, 0, window.Length); var windowSpectrum = new Spectrum(window, 1); var peakMz = peak.Mz; for (var charge = maxCharge; charge >= minCharge; charge--) { var mass = peak.Mz * charge; var mostAbundantIsotopeIndex = Averagine.GetIsotopomerEnvelope(mass).MostAbundantIsotopeIndex; for (var isotopeIndex = mostAbundantIsotopeIndex - isotopeOffsetTolerance; isotopeIndex <= mostAbundantIsotopeIndex + isotopeOffsetTolerance; isotopeIndex++) { var monoIsotopeMass = Ion.GetMonoIsotopicMass(peakMz, charge, isotopeIndex); var isotopomerEnvelope = Averagine.GetIsotopomerEnvelope(monoIsotopeMass); var observedPeaks = windowSpectrum.GetAllIsotopePeaks(monoIsotopeMass, charge, isotopomerEnvelope, tolerance, 0.1); if (observedPeaks == null) { continue; } var envelop = isotopomerEnvelope.Envolope; var observedIntensities = new double[observedPeaks.Length]; for (var i = 0; i < observedPeaks.Length; i++) { var observedPeak = observedPeaks[i]; observedIntensities[i] = observedPeak != null ? (float)observedPeak.Intensity : 0.0; } var sim = FitScoreCalculator.GetDistanceAndCorrelation(envelop, observedIntensities); var bcDist = sim.Item1; var corr = sim.Item2; if (corr < corrScoreThreshold && bcDist > 0.03) { continue; } // monoIsotopeMass is valid var deconvPeak = new DeconvolutedPeak(monoIsotopeMass, observedIntensities[mostAbundantIsotopeIndex], charge, corr, bcDist, observedPeaks); monoIsotopePeakList.Add(deconvPeak); } } } monoIsotopePeakList.Sort(); return(monoIsotopePeakList); }