public new Xic GetFullProductExtractedIonChromatogram(double mz, Tolerance tolerance, double precursorIonMz) { var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; return GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorIonMz); }
public static IList<Peak> FindAllPeaks(List<Peak> peakList, double mz, Tolerance tolerance) { var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; return FindAllPeaks(peakList, minMz, maxMz); }
/// <summary> /// Gets the extracted ion chromatogram of the specified m/z (using only MS1 spectra) /// </summary> /// <param name="mz">target m/z</param> /// <param name="tolerance">tolerance</param> /// <returns>XIC as a list of XICPeaks</returns> public IList<XicPeak> GetExtractedIonChromatogram(double mz, Tolerance tolerance) { var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; return GetExtractedIonChromatogram(minMz, maxMz); }
/// <summary> /// Finds the maximum intensity peak within the specified range /// </summary> /// <param name="mz">m/z</param> /// <param name="tolerance">tolerance</param> /// <returns>maximum intensity peak</returns> public Peak FindPeak(double mz, Tolerance tolerance) { var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; return FindPeak(minMz, maxMz); }
public int FindPeakIndex(double mz, Tolerance tolerance) { var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; return(FindPeakIndex(minMz, maxMz)); }
public void FilterNosieByIntensityHistogram() { var filteredPeaks = new List <Peak>(); var intensities = new double[Peaks.Length]; var tolerance = new Tolerance(10000); var st = 0; var ed = 0; foreach (var peak in Peaks) { var mzWindowWidth = tolerance.GetToleranceAsTh(peak.Mz); var intensity = peak.Intensity; var mzStart = peak.Mz - mzWindowWidth; var mzEnd = peak.Mz + mzWindowWidth; while (st < Peaks.Length) { if (st < Peaks.Length - 1 && Peaks[st].Mz < mzStart) { st++; } else { break; } } while (ed < Peaks.Length) { if (ed < Peaks.Length - 1 && Peaks[ed].Mz < mzEnd) { ed++; } else { break; } } var abundantIntensityBucket = GetMostAbundantIntensity(st, ed); if (abundantIntensityBucket.LowerBound < intensity && intensity < abundantIntensityBucket.UpperBound) { continue; } filteredPeaks.Add(peak); } filteredPeaks.Sort(); Peaks = filteredPeaks.ToArray(); }
internal RankedPeak FindPeak(double mz, Tolerance tolerance) { var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; var index = Array.BinarySearch(Peaks, new RankedPeak((minMz + maxMz) / 2, 0, 0)); if (index < 0) index = ~index; RankedPeak bestPeak = null; var bestIntensity = 0.0; // go down var i = index - 1; while (i >= 0 && i < Peaks.Length) { if (Peaks[i].Mz <= minMz) break; if (Peaks[i].Intensity > bestIntensity) { bestIntensity = Peaks[i].Intensity; bestPeak = Peaks[i]; } --i; } // go up i = index; while (i >= 0 && i < Peaks.Length) { if (Peaks[i].Mz >= maxMz) break; if (Peaks[i].Intensity > bestIntensity) { bestIntensity = Peaks[i].Intensity; bestPeak = Peaks[i]; } ++i; } return bestPeak; }
/// <summary> /// Gets the extracted ion chromatogram of the specified m/z range (using only MS1 spectra) /// Only XicPeaks around the targetScanNum are returned /// </summary> /// <param name="mz">target m/z</param> /// <param name="tolerance">tolerance</param> /// <param name="targetScanNum">target scan number to generate xic</param> /// <param name="maxNumConsecutiveScansWithoutPeak">maximum number of consecutive scans with a peak</param> /// <returns>XIC around targetScanNum</returns> public Xic GetPrecursorExtractedIonChromatogram(double mz, Tolerance tolerance, int targetScanNum, int maxNumConsecutiveScansWithoutPeak = 3) { var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; if (targetScanNum < 0) return GetPrecursorExtractedIonChromatogram(minMz, maxMz); return GetPrecursorExtractedIonChromatogram(minMz, maxMz, targetScanNum, maxNumConsecutiveScansWithoutPeak); }
/// <summary> /// Gets the extracted ion chromatogram of the specified m/z (using only MS1 spectra) /// </summary> /// <param name="mz">target m/z</param> /// <param name="tolerance">tolerance</param> /// <returns>XIC as an Xic object</returns> public Xic GetPrecursorExtractedIonChromatogram(double mz, Tolerance tolerance) { var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; return GetPrecursorExtractedIonChromatogram(minMz, maxMz); }
public void FindMissingLcMsFeatures() { var mspfFolder = @"D:\MassSpecFiles\CompRef_Kelleher\Study3"; var ms1ftFolder = @"D:\MassSpecFiles\CompRef_Kelleher\Study3"; const int Nfraction1 = 3; const int Nfraction2 = 5; for (var frac1 = 1; frac1 <= Nfraction1; frac1++) { for (var frac2 = 1; frac2 <= Nfraction2; frac2++) { var datasets = GetDataSetNamesStudy3(frac1, frac2); //var outFilePath = string.Format(@"D:\MassSpecFiles\CompRef_Kelleher\study3_GFrep{0}_Gfrac{1}.tsv", frac1.ToString("D2"), frac2.ToString("D2")); var nDataset = datasets.Count; var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(12); for (var i = 0; i < nDataset; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", PbfPath, datasets[i]); var mspFile = string.Format(@"{0}\{1}_IcTda.tsv", mspfFolder, datasets[i]); var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", ms1ftFolder, datasets[i]); var outPath = string.Format(@"{0}\{1}.seqtag.ms1ft", ms1ftFolder, datasets[i]); if (File.Exists(outPath)) continue; var run = PbfLcMsRun.GetLcMsRun(rawFile); var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run); var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder); var prsmFeatureMatch = new bool[prsmList.Count]; for (var j = 0; j < features.Count; j++) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsTh(features[j].Mass); for (var k = 0; k < prsmList.Count; k++) { var match = prsmList[k]; if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol) { features[j].ProteinSpectrumMatches.Add(match); prsmFeatureMatch[k] = true; } } } var missingPrsm = new List<ProteinSpectrumMatch>(); for (var k = 0; k < prsmList.Count; k++) if (!prsmFeatureMatch[k]) missingPrsm.Add(prsmList[k]); FeatureFind(missingPrsm, run, outPath); Console.WriteLine(outPath); } } } }
public Ms1Peak[] GetAllIsotopePeaks(double monoIsotopeMass, int charge, TheoreticalIsotopeEnvelope isotopeList, Tolerance tolerance) { var observedPeaks = new Ms1Peak[isotopeList.Size]; var mz = isotopeList.GetIsotopeMz(charge, 0); var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; var index = Array.BinarySearch(Peaks, new Ms1Peak(minMz, 0, 0)); if (index < 0) { index = ~index; } var bestPeakIndex = -1; var bestIntensity = 0.0; // go up var i = index; while (i >= 0 && i < Peaks.Length) { if (Peaks[i].Mz >= maxMz) { break; } if (Peaks[i].Intensity > bestIntensity) { bestIntensity = Peaks[i].Intensity; bestPeakIndex = i; observedPeaks[0] = (Ms1Peak)Peaks[bestPeakIndex]; } ++i; } var peakIndex = (bestPeakIndex >= 0) ? bestPeakIndex + 1 : index; // go up for (var j = 1; j < isotopeList.Size; j++) { var isotopeMz = isotopeList.GetIsotopeMz(charge, j); tolTh = tolerance.GetToleranceAsTh(isotopeMz); minMz = isotopeMz - tolTh; maxMz = isotopeMz + tolTh; for (i = peakIndex; i < Peaks.Length; i++) { var peakMz = Peaks[i].Mz; if (peakMz > maxMz) { peakIndex = i; break; } if (peakMz >= minMz) // find match, move to prev isotope { var peak = Peaks[i]; if (observedPeaks[j] == null || peak.Intensity > observedPeaks[j].Intensity) { observedPeaks[j] = (Ms1Peak)peak; } } } } return(observedPeaks); }
private IList<LcMsPeakCluster> GetLcMs1PeakClusters(int binNumber) { const int chargeNeighborGap = 4; var targetMass = Comparer.GetMzAverage(binNumber); BuildFeatureMatrix(targetMass); // should be called first var clusters = new List<LcMsPeakCluster>(); // todo : bottom up dataset?? if (_rows.Length < 2 || _cols.Length < 1) return clusters; var tempEnvelope = new double[_theoreticalEnvelope.Size]; var tempEnvelope2 = new double[_theoreticalEnvelope.Size]; var ms1ScanNums = Run.GetMs1ScanVector(); var ms1ScanNumToIndex = Run.GetMs1ScanNumToIndex(); var mostAbuInternalIndex = _theoreticalEnvelope.IndexOrderByRanking[0]; var tolerance = new Tolerance(Comparer.Ppm*0.5); foreach (var seed in _seedEnvelopes.OrderBy(s=>s.Key).Select(s=> s.Value)) { var row = seed.Charge - _targetMinCharge; var col = ms1ScanNumToIndex[seed.ScanNum]; if (_featureMatrix[row][col].CheckedOutFlag) continue; var mostAbuMz = _theoreticalEnvelope.GetIsotopeMz(seed.Charge, mostAbuInternalIndex); var seedLocalWin = Ms1Spectra[col].GetLocalMzWindow(mostAbuMz); var poissonPvalue = seedLocalWin.GetPoissonTestPvalue(_featureMatrix[row][col].EnvelopePeaks, _theoreticalEnvelope.Size); var rankSumPvalue = seedLocalWin.GetRankSumTestPvalue(_featureMatrix[row][col].EnvelopePeaks, _theoreticalEnvelope.Size); var goodEnvelope = (rankSumPvalue < 0.01 || poissonPvalue < 0.01); if (!goodEnvelope) continue; var chargeCheck = CorrectChargeState(seed, Ms1Spectra[col]); if (!chargeCheck) continue; var seedMass = _featureMatrix[row][col].AccurateMass; var massTol = tolerance.GetToleranceAsTh(seedMass); var newCluster = new LcMsPeakCluster(Run, seed); Array.Clear(tempEnvelope, 0, tempEnvelope.Length); seed.Peaks.SumEnvelopeTo(tempEnvelope); var neighbors = new Queue<ObservedIsotopeEnvelope>(); neighbors.Enqueue(seed); // pick a seed _featureMatrix[row][col].CheckedOutFlag = true; var summedBcDist = _featureMatrix[row][col].DivergenceDist; var summedCorr = _featureMatrix[row][col].CorrelationCoeff; while (neighbors.Count > 0) { var cell = neighbors.Dequeue(); var charge = cell.Charge; var minRw = (int)Math.Max(charge - _targetMinCharge - chargeNeighborGap, _rows.First()); var maxRw = (int)Math.Min(charge - _targetMinCharge + chargeNeighborGap, _rows.Last()); var currCol = ms1ScanNumToIndex[cell.ScanNum]; for (var k = 0; k < 5; k++) { var j = currCol; if (k < 3) j += k; else j -= (k - 2); if (j < _cols.First() || j > _cols.Last()) continue; for (var i = minRw; i <= maxRw; i++) { if (_featureMatrix[i][j].CheckedOutFlag) continue; if (!(_featureMatrix[i][j].AccurateMass > 0)) continue; if (Math.Abs(seedMass - _featureMatrix[i][j].AccurateMass) > massTol) continue; Array.Copy(tempEnvelope, tempEnvelope2, tempEnvelope2.Length); _featureMatrix[i][j].EnvelopePeaks.SumEnvelopeTo(tempEnvelope); var newDivergence = _theoreticalEnvelope.GetBhattacharyyaDistance(tempEnvelope); var newCorrelation = _theoreticalEnvelope.GetPearsonCorrelation(tempEnvelope); if (_featureMatrix[i][j].DivergenceDist < 0.02 ||_featureMatrix[i][j].CorrelationCoeff > 0.7 || newDivergence < summedBcDist || newCorrelation > summedCorr) { var envelope = new ObservedIsotopeEnvelope(_featureMatrix[i][j].AccurateMass, i + _targetMinCharge, ms1ScanNums[j], _featureMatrix[i][j].EnvelopePeaks, _theoreticalEnvelope); neighbors.Enqueue(envelope); newCluster.Expand(envelope); _featureMatrix[i][j].CheckedOutFlag = true; summedBcDist = newDivergence; summedCorr = newCorrelation; } else { Array.Copy(tempEnvelope2, tempEnvelope, tempEnvelope.Length); } } } } LcMsPeakCluster refinedCluster = null; if (summedCorr > 0.5 || summedBcDist < 0.15) { // re-update check-out map SetCheckOutFlag(newCluster.MinCharge - _targetMinCharge, newCluster.MaxCharge - _targetMinCharge, ms1ScanNumToIndex[newCluster.MinScanNum], ms1ScanNumToIndex[newCluster.MaxScanNum], false); refinedCluster = GetLcMsPeakCluster(newCluster.RepresentativeMass, newCluster.RepresentativeCharge, newCluster.MinScanNum, newCluster.MaxScanNum, true); } if (refinedCluster != null && (_scorer == null || (_scorer != null && refinedCluster.GoodEnougth && refinedCluster.Score >= _scorer.ScoreThreshold))) { SetCheckOutFlag(_rows.First(), _rows.Last(), ms1ScanNumToIndex[refinedCluster.MinScanNum], ms1ScanNumToIndex[refinedCluster.MaxScanNum], true); clusters.Add(refinedCluster); } else { SetCheckOutFlag(newCluster.MinCharge - _targetMinCharge, newCluster.MaxCharge - _targetMinCharge, ms1ScanNumToIndex[newCluster.MinScanNum], ms1ScanNumToIndex[newCluster.MaxScanNum], true); } } return clusters; }
private bool CorrectChargeState(ObservedIsotopeEnvelope envelope, Ms1Spectrum spectrum) { if (envelope.Charge > 20) return true; //high charge (> +20), just pass var peaks = spectrum.Peaks; var peakStartIndex = envelope.MinMzPeak.IndexInSpectrum; var peakEndIndex = envelope.MaxMzPeak.IndexInSpectrum; var intensityThreshold = envelope.HighestIntensity * 0.15; var nPeaks = 0; for (var i = peakStartIndex; i <= peakEndIndex; i++) { if (peaks[i].Intensity > intensityThreshold) nPeaks++; } if (envelope.NumberOfPeaks > nPeaks * 0.7) return true; //var tolerance = new Tolerance(5); var tolerance = new Tolerance(Comparer.Ppm * 0.5); var threshold = nPeaks * 0.5; var threshold2 = envelope.NumberOfPeaks + (envelope.TheoreticalEnvelope.Size - 1) * 0.7; var mzTol = tolerance.GetToleranceAsTh(peaks[peakStartIndex].Mz); var minCheckCharge = Math.Max(envelope.Charge * 2 - 1, 4); var maxCheckCharge = Math.Min(envelope.Charge * 5 + 1, 60); var maxDeltaMz = Constants.C13MinusC12 / minCheckCharge + mzTol; var nChargeGaps = new int[maxCheckCharge - minCheckCharge + 1]; for (var i = peakStartIndex; i <= peakEndIndex; i++) { if (!(peaks[i].Intensity > intensityThreshold)) continue; for (var j = i + 1; j <= peakEndIndex; j++) { if (!(peaks[j].Intensity > intensityThreshold)) continue; var deltaMz = peaks[j].Mz - peaks[i].Mz; if (deltaMz > maxDeltaMz) break; if (Math.Abs(deltaMz - mzTol) < float.Epsilon) { // Peaks are too close together; continue continue; } for (var c = Math.Round(1 / (deltaMz + mzTol)); c <= Math.Round(1 / (deltaMz - mzTol)); c++) { if (c < minCheckCharge) continue; if (c > maxCheckCharge) break; var k = (int)c - minCheckCharge; nChargeGaps[k]++; if (nChargeGaps[k] + 1 > threshold && nChargeGaps[k] + 1 > threshold2) return false; } } } return true; }
public void FilterNosieByLocalWindow(double signalToNoiseRatio = 1.4826, int windowPpm = 10000) { var filteredPeaks = new List<Peak>(); var tolerance = new Tolerance(windowPpm); var st = 0; var ed = 0; var prevSt = 0; var prevEd = 0; var intensityValues = new SortedSet<double>(); foreach (var peak in Peaks) { var mzWindowWidth = tolerance.GetToleranceAsTh(peak.Mz); var mzStart = peak.Mz - mzWindowWidth; var mzEnd = peak.Mz + mzWindowWidth; while (st < Peaks.Length) { if (st < Peaks.Length - 1 && Peaks[st].Mz < mzStart) st++; else break; } while (ed < Peaks.Length) { if (ed < Peaks.Length - 1 && Peaks[ed].Mz < mzEnd) ed++; else break; } if (ed - st + 1 < 2) { filteredPeaks.Add(peak); continue; } if (intensityValues.Count < 1) { for (var i = st; i <= ed; i++) intensityValues.Add(Peaks[i].Intensity); } else { if (prevEd >= st) { for (var i = prevSt; i < ed; i++) intensityValues.Remove(Peaks[i].Intensity); for (var i = prevEd+1; i <= ed; i++) intensityValues.Add(Peaks[i].Intensity); } else { for (var i = prevSt; i <= prevEd; i++) intensityValues.Remove(Peaks[i].Intensity); } } var intensityMedian = intensityValues.Median(); if (peak.Intensity > intensityMedian*signalToNoiseRatio) filteredPeaks.Add(peak); prevSt = st; prevEd = ed; } filteredPeaks.Sort(); Peaks = filteredPeaks.ToArray(); }
public Peak[] GetAllIsotopePeaks(double monoIsotopeMass, int charge, IsotopomerEnvelope envelope, Tolerance tolerance, double relativeIntensityThreshold) { var mostAbundantIsotopeIndex = envelope.MostAbundantIsotopeIndex; var isotopomerEnvelope = envelope.Envolope; var mostAbundantIsotopeMz = Ion.GetIsotopeMz(monoIsotopeMass, charge, mostAbundantIsotopeIndex); var mostAbundantIsotopePeakIndex = FindPeakIndex(mostAbundantIsotopeMz, tolerance); if (mostAbundantIsotopePeakIndex < 0) return null; var observedPeaks = new Peak[isotopomerEnvelope.Length]; observedPeaks[mostAbundantIsotopeIndex] = Peaks[mostAbundantIsotopePeakIndex]; // go down var peakIndex = mostAbundantIsotopePeakIndex - 1; for (var isotopeIndex = mostAbundantIsotopeIndex - 1; isotopeIndex >= 0; isotopeIndex--) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) break; var isotopeMz = Ion.GetIsotopeMz(monoIsotopeMass, charge, isotopeIndex); var tolTh = tolerance.GetToleranceAsTh(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex; i >= 0; i--) { var peakMz = Peaks[i].Mz; if (peakMz < minMz) { peakIndex = i; break; } if (peakMz <= maxMz) // find match, move to prev isotope { var peak = Peaks[i]; if (observedPeaks[isotopeIndex] == null || peak.Intensity > observedPeaks[isotopeIndex].Intensity) { observedPeaks[isotopeIndex] = peak; } } } } // go up peakIndex = mostAbundantIsotopePeakIndex + 1; for (var isotopeIndex = mostAbundantIsotopeIndex + 1; isotopeIndex < isotopomerEnvelope.Length; isotopeIndex++) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) break; var isotopeMz = Ion.GetIsotopeMz(monoIsotopeMass, charge, isotopeIndex); var tolTh = tolerance.GetToleranceAsTh(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex; i < Peaks.Length; i++) { var peakMz = Peaks[i].Mz; if (peakMz > maxMz) { peakIndex = i; break; } if (peakMz >= minMz) // find match, move to prev isotope { var peak = Peaks[i]; if (observedPeaks[isotopeIndex] == null || peak.Intensity > observedPeaks[isotopeIndex].Intensity) { observedPeaks[isotopeIndex] = peak; } } } } return observedPeaks; }
/// <summary> /// Checks whether this spectrum contains all isotope peaks whose relative intensity is equal or larter than the threshold /// </summary> /// <param name="ion">ion</param> /// <param name="tolerance">tolerance</param> /// <param name="relativeIntensityThreshold">relative intensity threshold of the theoretical isotope profile</param> /// <returns>true if spectrum contains all ions; false otherwise.</returns> public bool ContainsIon(Ion ion, Tolerance tolerance, double relativeIntensityThreshold) { var baseIsotopeIndex = ion.Composition.GetMostAbundantIsotopeZeroBasedIndex(); var isotopomerEnvelope = ion.Composition.GetIsotopomerEnvelopeRelativeIntensities(); var baseIsotopMz = ion.GetIsotopeMz(baseIsotopeIndex); var baseIsotopePeakIndex = FindPeakIndex(baseIsotopMz, tolerance); if (baseIsotopePeakIndex < 0) return false; // go down var peakIndex = baseIsotopePeakIndex; for (var isotopeIndex = baseIsotopeIndex - 1; isotopeIndex >= 0; isotopeIndex--) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) break; var isotopeMz = ion.GetIsotopeMz(isotopeIndex); var tolTh = tolerance.GetToleranceAsTh(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex - 1; i >= 0; i--) { var peakMz = Peaks[i].Mz; if (peakMz < minMz) return false; if (peakMz <= maxMz) // find match, move to prev isotope { peakIndex = i; break; } } } // go up peakIndex = baseIsotopePeakIndex; for (var isotopeIndex = baseIsotopeIndex + 1; isotopeIndex < isotopomerEnvelope.Length; isotopeIndex++) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) break; var isotopeMz = ion.GetIsotopeMz(isotopeIndex); var tolTh = tolerance.GetToleranceAsTh(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex + 1; i < Peaks.Length; i++) { var peakMz = Peaks[i].Mz; if (peakMz > maxMz) return false; if (peakMz >= minMz) // find match, move to prev isotope { peakIndex = i; break; } } } return true; }
public void TestGeneratingProductManyXics() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string rawFilePath = TestRawFilePath; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath); //var run2 = new DiaLcMsRun(new OldPbfReader(Path.ChangeExtension(rawFilePath, ".pbf")), 0.0, 0.0); var tolerance = new Tolerance(10); var mzArr = new double[100000]; var precursorMzArr = new double[mzArr.Length]; var rnd = new Random(); for (var i = 0; i < mzArr.Length; i++) { mzArr[i] = rnd.NextDouble()*1450.0 + 50.0; precursorMzArr[i] = rnd.NextDouble()*(810.0-390.0) + 390.0; } var sw = new System.Diagnostics.Stopwatch(); //double sec; // method 1 sw.Start(); for (var i = 0; i < mzArr.Length; i++) { var mz = mzArr[i]; var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; var xic1 = run.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); //var xic2 = run.GetFullProductExtractedIonChromatogram2(minMz, maxMz, precursorMzArr[i]); //Assert.True(xic1.Equals(xic2)); } sw.Stop(); Console.WriteLine(@"Method 1: {0:f4} sec", sw.Elapsed.TotalSeconds); sw.Reset(); sw.Start(); for (var i = 0; i < mzArr.Length; i++) { var mz = mzArr[i]; var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; run.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); } sw.Stop(); Console.WriteLine(@"Method 2: {0:f4} sec", sw.Elapsed.TotalSeconds); Console.WriteLine("Done"); }
public void FilterNosieByLocalWindow(double signalToNoiseRatio = 1.4826, int windowPpm = 10000) { var filteredPeaks = new List <Peak>(); var tolerance = new Tolerance(windowPpm); var st = 0; var ed = 0; var prevSt = 0; var prevEd = 0; var intensityValues = new SortedSet <double>(); foreach (var peak in Peaks) { var mzWindowWidth = tolerance.GetToleranceAsTh(peak.Mz); var mzStart = peak.Mz - mzWindowWidth; var mzEnd = peak.Mz + mzWindowWidth; while (st < Peaks.Length) { if (st < Peaks.Length - 1 && Peaks[st].Mz < mzStart) { st++; } else { break; } } while (ed < Peaks.Length) { if (ed < Peaks.Length - 1 && Peaks[ed].Mz < mzEnd) { ed++; } else { break; } } if (ed - st + 1 < 2) { filteredPeaks.Add(peak); continue; } if (intensityValues.Count < 1) { for (var i = st; i <= ed; i++) { intensityValues.Add(Peaks[i].Intensity); } } else { if (prevEd >= st) { for (var i = prevSt; i < ed; i++) { intensityValues.Remove(Peaks[i].Intensity); } for (var i = prevEd + 1; i <= ed; i++) { intensityValues.Add(Peaks[i].Intensity); } } else { for (var i = prevSt; i <= prevEd; i++) { intensityValues.Remove(Peaks[i].Intensity); } } } var intensityMedian = intensityValues.Median(); if (peak.Intensity > intensityMedian * signalToNoiseRatio) { filteredPeaks.Add(peak); } prevSt = st; prevEd = ed; } filteredPeaks.Sort(); Peaks = filteredPeaks.ToArray(); }
public void TestGeneratingProductXics() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestRawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(TestRawFilePath); // const string rafFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raf"; const string rafFilePath = @"H:\Research\Jarret\10mz\raw\Q_2014_0523_50_10_fmol_uL_10mz.raf"; if (!File.Exists(rafFilePath)) { Assert.Ignore(@"Skipping raf portion of test {0} since file not found: {1}", methodName, rafFilePath); } var rafRun = new PbfLcMsRun(rafFilePath); var tolerance = new Tolerance(10); var mzArr = new double[100000]; var precursorMzArr = new double[mzArr.Length]; var rnd = new Random(); for (var i = 0; i < mzArr.Length; i++) { mzArr[i] = rnd.NextDouble() * 1450.0 + 50.0; precursorMzArr[i] = rnd.NextDouble() * (810.0 - 390.0) + 390.0; } var sw = new System.Diagnostics.Stopwatch(); //double sec; // method 1 sw.Start(); for (var i = 0; i < mzArr.Length; i++) { var mz = mzArr[i]; var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; var xic1 = run.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); //var xic2 = rafRun.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); //Assert.True(xic1.Equals(xic2)); } sw.Stop(); Console.WriteLine(@"Method 1: {0:f4} sec", sw.Elapsed.TotalSeconds); sw.Reset(); sw.Start(); for (var i = 0; i < mzArr.Length; i++) { var mz = mzArr[i]; var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; rafRun.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); } sw.Stop(); Console.WriteLine(@"Method 2: {0:f4} sec", sw.Elapsed.TotalSeconds); Console.WriteLine(@"Done"); }
public void ExtractLcMsFeaturesForTrainingSet() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string idFileFolder = @"D:\MassSpecFiles\training\FilteredIdResult"; if (!Directory.Exists(idFileFolder)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, idFileFolder); } var tolerance = new Tolerance(10); var tolerance2 = new Tolerance(20); var id = 1; for(var d = 0; d < TrainSetFileLists.Length; d++) { var dataset = TrainSetFileLists[d]; var dataname = Path.GetFileNameWithoutExtension(dataset); var filtedIdResultFile = string.Format(@"{0}\{1}.trainset.tsv", idFileFolder, Path.GetFileNameWithoutExtension(dataset)); var featureResult = string.Format(@"{0}\{1}.ms1ft", idFileFolder, Path.GetFileNameWithoutExtension(dataset)); if (!File.Exists(dataset)) { Console.WriteLine(@"Warning: Skipping since file not found: {0}", dataset); continue; } if (!File.Exists(filtedIdResultFile)) { Console.WriteLine(@"Warning: Skipping since file not found: {0}", filtedIdResultFile); continue; } var run = PbfLcMsRun.GetLcMsRun(dataset); var targetStatWriter = new StreamWriter(string.Format(@"D:\MassSpecFiles\training\statistics\{0}.tsv", Path.GetFileNameWithoutExtension(dataset))); var decoyStatWriter = new StreamWriter(string.Format(@"D:\MassSpecFiles\training\statistics\{0}_decoy.tsv", Path.GetFileNameWithoutExtension(dataset))); var writer = new StreamWriter(featureResult); writer.Write("Ms2MinScan\tMs2MaxScan\tMs2MinCharge\tMs2MaxCharge\tMs2Mass\t"); writer.Write("Mass\tMinScan\tMaxScan\tMinCharge\tMaxCharge\tMinTime\tMaxTime\tElution\tGood\n"); var tsvParser = new TsvFileParser(filtedIdResultFile); var featureFinder = new LcMsPeakMatrix(run); for (var i = 0; i < tsvParser.NumData; i++) { var minScan = int.Parse(tsvParser.GetData("MinScan")[i]); var maxScan = int.Parse(tsvParser.GetData("MaxScan")[i]); var minCharge = int.Parse(tsvParser.GetData("MinCharge")[i]); var maxCharge = int.Parse(tsvParser.GetData("MaxCharge")[i]); var mass = double.Parse(tsvParser.GetData("Mass")[i]); writer.Write(minScan); writer.Write("\t"); writer.Write(maxScan); writer.Write("\t"); writer.Write(minCharge); writer.Write("\t"); writer.Write(maxCharge); writer.Write("\t"); writer.Write(mass); writer.Write("\t"); var binNum = featureFinder.Comparer.GetBinNumber(mass); var binMass = featureFinder.Comparer.GetMzAverage(binNum); var binNumList = (mass < binMass) ? new int[] { binNum, binNum - 1, binNum + 1 } : new int[] { binNum, binNum + 1, binNum - 1 }; LcMsPeakCluster refinedFeature = null; foreach (var bi in binNumList) { var tempList = new List<LcMsPeakCluster>(); var features = featureFinder.FindFeatures(bi); var massTh = (mass < 2000) ? tolerance2.GetToleranceAsTh(mass) : tolerance.GetToleranceAsTh(mass); foreach (var feature in features) { if (Math.Abs(mass - feature.Mass) < massTh) tempList.Add(feature); } //var nHits = 0; var highestAbu = 0d; //var scans = Enumerable.Range(minScan, maxScan - minScan + 1); foreach (var feature in tempList) { //var scans2 = Enumerable.Range(feature.MinScanNum, feature.MaxScanNum - feature.MinScanNum + 1); //var hitScans = scans.Intersect(scans2).Count(); if (feature.MinScanNum < 0.5*(minScan + maxScan) && 0.5*(minScan + maxScan) < feature.MaxScanNum) { if (feature.Abundance > highestAbu) { refinedFeature = feature; highestAbu = feature.Abundance; } } /*if (hitScans > 0) { refinedFeature = feature; nHits = hitScans; }*/ } if (refinedFeature != null) break; } if (refinedFeature != null) { writer.Write(refinedFeature.Mass); writer.Write("\t"); writer.Write(refinedFeature.MinScanNum); writer.Write("\t"); writer.Write(refinedFeature.MaxScanNum); writer.Write("\t"); writer.Write(refinedFeature.MinCharge); writer.Write("\t"); writer.Write(refinedFeature.MaxCharge); writer.Write("\t"); writer.Write(refinedFeature.MinElutionTime); writer.Write("\t"); writer.Write(refinedFeature.MaxElutionTime); writer.Write("\t"); writer.Write(refinedFeature.MaxElutionTime - refinedFeature.MinElutionTime); writer.Write("\t"); var good = (refinedFeature.MinScanNum <= minScan && refinedFeature.MaxScanNum >= maxScan); writer.Write(good ? 1 : 0); writer.Write("\n"); //writer.Write(0); writer.Write("\t"); //writer.Write(0); writer.Write("\n"); OutputEnvelopPeakStat(id, refinedFeature, targetStatWriter); var chargeRange = featureFinder.GetDetectableMinMaxCharge(refinedFeature.RepresentativeMass, run.MinMs1Mz, run.MaxMs1Mz); refinedFeature.UpdateWithDecoyScore(featureFinder.Ms1Spectra, chargeRange.Item1, chargeRange.Item2); OutputEnvelopPeakStat(id, refinedFeature, decoyStatWriter); id++; } else { writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\n"); } //var feature = featureFinder.FindLcMsPeakCluster(mass, (int) scan, (int) charge); } writer.Close(); targetStatWriter.Close(); decoyStatWriter.Close(); Console.WriteLine(dataname); } }
private double[] GetSummedEnvelopeAtCharge(double targetMass, int row, int minCol, int maxCol) { const int maxScanSkips = 2; const double goodEnoughBcDistance = 0.07; const double goodEnoughCorrCoeff = 0.7; var summedEnvelope = new double[_theoreticalEnvelope.Size]; var seedCol = -1; var seedDist = 10.0d; var newMinCol = minCol; var newMaxCol = maxCol; //var tolerance = new Tolerance(5); var tolerance = new Tolerance(Comparer.Ppm * 0.5); var massTol = tolerance.GetToleranceAsTh(targetMass); for (var j = minCol; j <= maxCol; j++) { if (!_featureMatrix[row][j].Exist) continue; if (_featureMatrix[row][j].CheckedOutFlag) continue; if (Math.Abs(targetMass - _featureMatrix[row][j].AccurateMass) > massTol) continue; var bcDist = _featureMatrix[row][j].DivergenceDist; if (bcDist > seedDist) continue; //var signalToNoiseRatio = _featureMatrix[row][j].HighestIntensity / Ms1Spectra[j].MedianIntensity; //if (signalToNoiseRatio < 3) continue; seedCol = j; seedDist = bcDist; } var summedBcDist = 1.0d; var summedCorr = 0d; _corrProfileAcrossCharge[row] = 0d; _distProfileAcrossCharge[row] = 1.0d; if (seedCol < 0) return summedEnvelope; // going forward var tempEnvelope = new double[_theoreticalEnvelope.Size]; var n = 0; for(var col = seedCol; col < NColumns; col++) { if (_featureMatrix[row][col].CheckedOutFlag) break; if (n >= maxScanSkips) { newMaxCol = col; break; } if (!_featureMatrix[row][col].Exist) { n++; continue; } _featureMatrix[row][col].EnvelopePeaks.SumEnvelopeTo(tempEnvelope); var tempBcDist = _theoreticalEnvelope.GetBhattacharyyaDistance(tempEnvelope); var tempCorr = _theoreticalEnvelope.GetPearsonCorrelation(tempEnvelope); if (tempBcDist < summedBcDist || tempCorr > summedCorr || _featureMatrix[row][col].DivergenceDist < goodEnoughBcDistance || _featureMatrix[row][col].CorrelationCoeff > goodEnoughCorrCoeff) { summedBcDist = tempBcDist; summedCorr = tempCorr; Array.Copy(tempEnvelope, summedEnvelope, tempEnvelope.Length); n = 0; } else { Array.Copy(summedEnvelope, tempEnvelope, tempEnvelope.Length); n++; } } // going backward summedBcDist = 10.0d; summedCorr = 0d; Array.Clear(tempEnvelope, 0, tempEnvelope.Length); Array.Clear(summedEnvelope, 0, summedEnvelope.Length); n = 0; for (var col = seedCol; col >= 0; col--) { if (_featureMatrix[row][col].CheckedOutFlag) break; if (n >= maxScanSkips) { newMinCol = col; break; } if (!_featureMatrix[row][col].Exist) { n++; continue; } _featureMatrix[row][col].EnvelopePeaks.SumEnvelopeTo(tempEnvelope); var tempBcDist = _theoreticalEnvelope.GetBhattacharyyaDistance(tempEnvelope); var tempCorr = _theoreticalEnvelope.GetPearsonCorrelation(tempEnvelope); if (tempBcDist < summedBcDist || tempCorr > summedCorr || _featureMatrix[row][col].DivergenceDist < goodEnoughBcDistance || _featureMatrix[row][col].CorrelationCoeff > goodEnoughCorrCoeff) { summedBcDist = tempBcDist; summedCorr = tempCorr; Array.Copy(tempEnvelope, summedEnvelope, tempEnvelope.Length); n = 0; } else { Array.Copy(summedEnvelope, tempEnvelope, tempEnvelope.Length); n++; } } // construct summed envelope, given newMinCol and newMaxCol summedBcDist = 10.0d; summedCorr = 0d; Array.Clear(tempEnvelope, 0, tempEnvelope.Length); _featureMatrix[row][seedCol].EnvelopePeaks.SumEnvelopeTo(tempEnvelope); Array.Copy(tempEnvelope, summedEnvelope, summedEnvelope.Length); var colShift = 0; var hitMinCol = false; var hitMaxCol = false; while (true) { if (hitMinCol && hitMaxCol) break; if (n > 3) break; colShift++; for (var colDir = 0; colDir < 2; colDir++) { var col = (colDir == 0) ? seedCol + colShift : seedCol - colShift; if (col < newMinCol) { hitMinCol = true; continue; } if (col > newMaxCol) { hitMaxCol = true; continue; } if (!_featureMatrix[row][col].Exist) continue; _featureMatrix[row][col].EnvelopePeaks.SumEnvelopeTo(tempEnvelope); var tempBcDist = _theoreticalEnvelope.GetBhattacharyyaDistance(tempEnvelope); var tempCorr = _theoreticalEnvelope.GetPearsonCorrelation(tempEnvelope); if (tempBcDist < summedBcDist || tempCorr > summedCorr) { summedBcDist = tempBcDist; summedCorr = tempCorr; Array.Copy(tempEnvelope, summedEnvelope, tempEnvelope.Length); } else { Array.Copy(summedEnvelope, tempEnvelope, tempEnvelope.Length); n++; } } } _corrProfileAcrossCharge[row] = summedCorr; _distProfileAcrossCharge[row] = summedBcDist; _summedEnvelopeColRange[row, 0] = newMinCol; _summedEnvelopeColRange[row, 1] = newMaxCol; return summedEnvelope; }
public void FilterNosieByIntensityHistogram() { var filteredPeaks = new List<Peak>(); var intensities = new double[Peaks.Length]; var tolerance = new Tolerance(10000); var st = 0; var ed = 0; foreach (var peak in Peaks) { var mzWindowWidth = tolerance.GetToleranceAsTh(peak.Mz); var intensity = peak.Intensity; var mzStart = peak.Mz - mzWindowWidth; var mzEnd = peak.Mz + mzWindowWidth; while (st < Peaks.Length) { if (st < Peaks.Length - 1 && Peaks[st].Mz < mzStart) st++; else break; } while (ed < Peaks.Length) { if (ed < Peaks.Length - 1 && Peaks[ed].Mz < mzEnd) ed++; else break; } var abundantIntensityBucket = GetMostAbundantIntensity(st, ed); if (abundantIntensityBucket.LowerBound < intensity && intensity < abundantIntensityBucket.UpperBound) continue; filteredPeaks.Add(peak); } filteredPeaks.Sort(); Peaks = filteredPeaks.ToArray(); }
//public const double SNRthreshold = 1.4826; private void BuildFeatureMatrix(double targetMass) { InitFeatureMatrix(); SetTargetMass(targetMass); var observedRows = new BitArray(NRows); var observedCols = new BitArray(NColumns); var mostAbuInternalIdx = _theoreticalEnvelope.IndexOrderByRanking[0]; var totalElutionLength = Run.GetElutionTime(Run.MaxLcScan); var elutionSamplingHalfLen = Math.Max(Math.Min(totalElutionLength * 0.003, 5.0), 0.5); var neighborHalfColumns = (int) Math.Max((elutionSamplingHalfLen/totalElutionLength)*NColumns, 5); var targetMassBinNum = Comparer.GetBinNumber(targetMass); var tolerance = new Tolerance(Comparer.Ppm*0.5); var minMs1Mz = _ms1PeakList.First().Mz; var maxMs1Mz = _ms1PeakList.Last().Mz; var nPeaksCutoff = NumberOfPeaksCutoff; var bcSeedCutoff = GetSeedBcDistThreshold(); var corrSeedCutoff = GetSeedCorrThreshold(); var ms1ScanNums = Run.GetMs1ScanVector(); var ms1ScanNumToIndex = Run.GetMs1ScanNumToIndex(); var options = new ParallelOptions(); if (_maxThreadCount > 0) options.MaxDegreeOfParallelism = _maxThreadCount; _seedEnvelopes.Clear(); Parallel.ForEach(_rows, options, row => { var charge = row + _targetMinCharge; for (var col = 0; col < NColumns; col++) _featureMatrix[row][col].Init(); for (var k = 0; k < _theoreticalEnvelope.Size; k++) { var i = _theoreticalEnvelope.IndexOrderByRanking[k]; var isotopeIndex = _theoreticalEnvelope.Isotopes[i].Index; var isotopeMzLb = (k == 0) ? Ion.GetIsotopeMz(Comparer.GetMzStart(targetMassBinNum), charge, isotopeIndex) : Ion.GetIsotopeMz(Comparer.GetMzAverage(targetMassBinNum - 1), charge, isotopeIndex); var isotopeMzUb = (k == 0) ? Ion.GetIsotopeMz(Comparer.GetMzEnd(targetMassBinNum), charge, isotopeIndex) : Ion.GetIsotopeMz(Comparer.GetMzAverage(targetMassBinNum + 1), charge, isotopeIndex); if (isotopeMzLb < minMs1Mz || isotopeMzUb > maxMs1Mz) continue; var st = _ms1PeakList.BinarySearch(new Ms1Peak(isotopeMzLb, 0, 0)); if (st < 0) st = ~st; for (var j = st; j < _ms1PeakList.Count; j++) { var ms1Peak = _ms1PeakList[j]; if (ms1Peak.Mz > isotopeMzUb) break; var col = ms1Peak.Ms1SpecIndex; if (k == 0) // most abundant peak { if (_featureMatrix[row][col].EnvelopePeaks[i] == null || ms1Peak.Intensity > _featureMatrix[row][col].EnvelopePeaks[i].Intensity) { _featureMatrix[row][col].EnvelopePeaks[i] = ms1Peak; _featureMatrix[row][col].AccurateMass = Ion.GetMonoIsotopicMass(ms1Peak.Mz, charge, isotopeIndex); } } else { if (!(_featureMatrix[row][col].AccurateMass > 0)) continue; var expectedPeakMz = Ion.GetIsotopeMz(_featureMatrix[row][col].AccurateMass, charge, isotopeIndex); if (Math.Abs(expectedPeakMz - ms1Peak.Mz) > tolerance.GetToleranceAsTh(ms1Peak.Mz)) continue; // in case of existing isotope peaks, select peaks maximizing envelope similairty if (_featureMatrix[row][col].EnvelopePeaks[i] != null) { if (_featureMatrix[row][col].CountActivePeaks == 1) { if (ms1Peak.Intensity > _featureMatrix[row][col].EnvelopePeaks[i].Intensity) _featureMatrix[row][col].EnvelopePeaks[i] = ms1Peak; } else { var tmpPeak = _featureMatrix[row][col].EnvelopePeaks[i]; var bc1 = _theoreticalEnvelope.GetBhattacharyyaDistance(_featureMatrix[row][col].EnvelopePeaks); _featureMatrix[row][col].EnvelopePeaks[i] = ms1Peak; var bc2 = _theoreticalEnvelope.GetBhattacharyyaDistance(_featureMatrix[row][col].EnvelopePeaks); if (bc1 < bc2) _featureMatrix[row][col].EnvelopePeaks[i] = tmpPeak; } } else { _featureMatrix[row][col].EnvelopePeaks[i] = ms1Peak; } } } if (k == 0) { // for cells missing most abundant peaks for (var col = 0; col < NColumns; col++) { if (_featureMatrix[row][col].Exist) continue; var highestIntensity = 0d; var inferredAccurateMass = 0d; // find the most intense abundant peak from neighboring cells for (var j = Math.Max(col - neighborHalfColumns, 0); j <= Math.Min(col + neighborHalfColumns, NColumns - 1); j++) { var mostAbuPeak = _featureMatrix[row][j].EnvelopePeaks[mostAbuInternalIdx]; if (mostAbuPeak != null && mostAbuPeak.Intensity > highestIntensity) { highestIntensity = mostAbuPeak.Intensity; inferredAccurateMass = _featureMatrix[row][j].AccurateMass; } } _featureMatrix[row][col].AccurateMass = inferredAccurateMass; } } } for (var col = 0; col < NColumns; col++) { if (!(_featureMatrix[row][col].Exist)) continue; if (_featureMatrix[row][col].CountActivePeaks >= nPeaksCutoff) { var corr = _theoreticalEnvelope.GetPearsonCorrelation(_featureMatrix[row][col].EnvelopePeaks); var bcDist = _theoreticalEnvelope.GetBhattacharyyaDistance(_featureMatrix[row][col].EnvelopePeaks); _featureMatrix[row][col].CorrelationCoeff = corr; _featureMatrix[row][col].DivergenceDist = bcDist; if (!observedRows[row]) observedRows[row] = true; if (!observedCols[col]) observedCols[col] = true; // collect seed envelopes var mostAbuPeak = _featureMatrix[row][col].EnvelopePeaks[mostAbuInternalIdx]; if (mostAbuPeak != null && (bcDist < bcSeedCutoff || corr < corrSeedCutoff)) { var signalToNoiseRatio = mostAbuPeak.Intensity / Ms1Spectra[col].MedianIntensity; if (signalToNoiseRatio > 3) { var seed = new ObservedIsotopeEnvelope(_featureMatrix[row][col].AccurateMass, row + _targetMinCharge, ms1ScanNums[col], _featureMatrix[row][col].EnvelopePeaks, _theoreticalEnvelope); lock (_seedEnvelopes) { _seedEnvelopes.Add(new KeyValuePair<double, ObservedIsotopeEnvelope>(bcDist, seed)); } } } } else { _featureMatrix[row][col].AccurateMass = 0d; } } }// end or row for-loop ); var temp = new List<int>(); for (var i = 0; i < observedRows.Length; i++) if (observedRows[i]) temp.Add(i); _rows = temp.ToArray(); temp.Clear(); for (var i = 0; i < observedCols.Length; i++) if (observedCols[i]) temp.Add(i); _cols = temp.ToArray(); }
private bool FindIon(Ion ion, Tolerance tolerance, double relativeIntensityThreshold, out int baseIsotopePeakIndex, out int nIsotopes, out int nMatchedIsotopes) { //matchedPeakIndex = new List<int>(); var baseIsotopeIndex = ion.Composition.GetMostAbundantIsotopeZeroBasedIndex(); var isotopomerEnvelope = ion.Composition.GetIsotopomerEnvelopeRelativeIntensities(); var baseIsotopMz = ion.GetIsotopeMz(baseIsotopeIndex); baseIsotopePeakIndex = _ms2Spec.FindPeakIndex(baseIsotopMz, tolerance); nIsotopes = isotopomerEnvelope.Select(x => x >= relativeIntensityThreshold).Count(); nMatchedIsotopes = 0; if (baseIsotopePeakIndex < 0) return false; //if (baseIsotopePeakIndex < 0) baseIsotopePeakIndex = ~baseIsotopePeakIndex; nMatchedIsotopes++; // go down var peakIndex = baseIsotopePeakIndex; //matchedPeakIndex.Add(peakIndex); for (var isotopeIndex = baseIsotopeIndex - 1; isotopeIndex >= 0; isotopeIndex--) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) break; var isotopeMz = ion.GetIsotopeMz(isotopeIndex); var tolTh = tolerance.GetToleranceAsTh(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex - 1; i >= 0; i--) { var peakMz = _ms2Spec.Peaks[i].Mz; if (peakMz < minMz) { //peakIndex = i; //break; return false; } if (peakMz <= maxMz) // find match, move to prev isotope { peakIndex = i; //matchedPeakIndex.Add(peakIndex); nMatchedIsotopes++; break; } } } // go up peakIndex = baseIsotopePeakIndex; for (var isotopeIndex = baseIsotopeIndex + 1; isotopeIndex < isotopomerEnvelope.Length; isotopeIndex++) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) break; var isotopeMz = ion.GetIsotopeMz(isotopeIndex); var tolTh = tolerance.GetToleranceAsTh(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex + 1; i < _ms2Spec.Peaks.Length; i++) { var peakMz = _ms2Spec.Peaks[i].Mz; if (peakMz > maxMz) { //peakIndex = i; //break; return false; } if (peakMz >= minMz) // find match, move to prev isotope { peakIndex = i; //matchedPeakIndex.Add(peakIndex); nMatchedIsotopes++; break; } } } return true; }
private LcMsPeakCluster CollectLcMsPeaks(double targetMass, int minRow, int maxRow, int minCol, int maxCol, bool reCollectAllPeaks = false) { var ms1ScanNums = Run.GetMs1ScanVector(); var envelopes = new List<ObservedIsotopeEnvelope>(); var bestBcDist = 100d; ObservedIsotopeEnvelope bestEnvelope = null; var mostAbuInternalIndex = _theoreticalEnvelope.IndexOrderByRanking[0]; var tolerance = new Tolerance(Comparer.Ppm * 0.5); var massTol = tolerance.GetToleranceAsTh(targetMass); var nPeaksCutoff = NumberOfPeaksCutoff; var bcCutoff = GetSeedBcDistThreshold(); var corrCutoff = GetSeedCorrThreshold(); for (var i = minRow; i <= maxRow; i++) { for (var j = minCol; j <= maxCol; j++) { if (reCollectAllPeaks) _featureMatrix[i][j].Init(); if (reCollectAllPeaks || !_featureMatrix[i][j].Exist || Math.Abs(_featureMatrix[i][j].AccurateMass - targetMass) > massTol) { var peaks = Ms1Spectra[j].GetAllIsotopePeaks(targetMass, i + _targetMinCharge, _theoreticalEnvelope, tolerance); if (peaks.Count(p => p != null) > 0) { _featureMatrix[i][j].DivergenceDist = _theoreticalEnvelope.GetBhattacharyyaDistance(peaks); ; _featureMatrix[i][j].AccurateMass = targetMass; _featureMatrix[i][j].CorrelationCoeff = _theoreticalEnvelope.GetPearsonCorrelation(peaks); ; Array.Copy(peaks, _featureMatrix[i][j].EnvelopePeaks, peaks.Length); } } if (!_featureMatrix[i][j].Exist) continue; if (_featureMatrix[i][j].CountActivePeaks < nPeaksCutoff) continue; if (_featureMatrix[i][j].DivergenceDist > bcCutoff && _featureMatrix[i][j].CorrelationCoeff < corrCutoff) continue; // exclude outliers var envelope = new ObservedIsotopeEnvelope(_featureMatrix[i][j].AccurateMass, i + _targetMinCharge, ms1ScanNums[j], _featureMatrix[i][j].EnvelopePeaks, _theoreticalEnvelope); envelopes.Add(envelope); if (_featureMatrix[i][j].EnvelopePeaks[mostAbuInternalIndex] != null && _featureMatrix[i][j].DivergenceDist < bestBcDist) { bestBcDist = _featureMatrix[i][j].DivergenceDist; bestEnvelope = envelope; } } } if (bestEnvelope == null) return null; var cluster = new LcMsPeakCluster(Run, bestEnvelope); cluster.AddEnvelopes(minRow + _targetMinCharge, maxRow + _targetMinCharge, ms1ScanNums[minCol], ms1ScanNums[maxCol], envelopes); return cluster; }
public Peak[] GetAllIsotopePeaks(double monoIsotopeMass, int charge, IsotopomerEnvelope envelope, Tolerance tolerance, double relativeIntensityThreshold) { var mostAbundantIsotopeIndex = envelope.MostAbundantIsotopeIndex; var isotopomerEnvelope = envelope.Envolope; var mostAbundantIsotopeMz = Ion.GetIsotopeMz(monoIsotopeMass, charge, mostAbundantIsotopeIndex); var mostAbundantIsotopePeakIndex = FindPeakIndex(mostAbundantIsotopeMz, tolerance); if (mostAbundantIsotopePeakIndex < 0) { return(null); } var observedPeaks = new Peak[isotopomerEnvelope.Length]; observedPeaks[mostAbundantIsotopeIndex] = Peaks[mostAbundantIsotopePeakIndex]; // go down var peakIndex = mostAbundantIsotopePeakIndex - 1; for (var isotopeIndex = mostAbundantIsotopeIndex - 1; isotopeIndex >= 0; isotopeIndex--) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) { break; } var isotopeMz = Ion.GetIsotopeMz(monoIsotopeMass, charge, isotopeIndex); var tolTh = tolerance.GetToleranceAsTh(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex; i >= 0; i--) { var peakMz = Peaks[i].Mz; if (peakMz < minMz) { peakIndex = i; break; } if (peakMz <= maxMz) // find match, move to prev isotope { var peak = Peaks[i]; if (observedPeaks[isotopeIndex] == null || peak.Intensity > observedPeaks[isotopeIndex].Intensity) { observedPeaks[isotopeIndex] = peak; } } } } // go up peakIndex = mostAbundantIsotopePeakIndex + 1; for (var isotopeIndex = mostAbundantIsotopeIndex + 1; isotopeIndex < isotopomerEnvelope.Length; isotopeIndex++) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) { break; } var isotopeMz = Ion.GetIsotopeMz(monoIsotopeMass, charge, isotopeIndex); var tolTh = tolerance.GetToleranceAsTh(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex; i < Peaks.Length; i++) { var peakMz = Peaks[i].Mz; if (peakMz > maxMz) { peakIndex = i; break; } if (peakMz >= minMz) // find match, move to prev isotope { var peak = Peaks[i]; if (observedPeaks[isotopeIndex] == null || peak.Intensity > observedPeaks[isotopeIndex].Intensity) { observedPeaks[isotopeIndex] = peak; } } } } return(observedPeaks); }
public void TestFeatureAlignment() { const string outFilePath = @"\\protoapps\UserData\Jungkap\Lewy\aligned\promex_crosstab_temp.tsv"; //CPTAC_Intact_CR32A_24Aug15_Bane_15-02-06-RZ var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(10); var alignment = new LcMsFeatureAlignment(new AnalysisCompRef.CompRefFeatureComparer(tolerance)); for (var i = 0; i < NdataSet; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", PbfPath, GetDataSetNames(i)); var mspFile = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder, GetDataSetNames(i)); var mspFile2 = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder2, GetDataSetNames(i)); var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", Ms1FtFolder, GetDataSetNames(i)); Console.WriteLine(rawFile); var run = PbfLcMsRun.GetLcMsRun(rawFile); var prsmList1 = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder); var prsmList2 = prsmReader.LoadIdentificationResult(mspFile2, ProteinSpectrumMatch.SearchTool.MsPathFinder); prsmList1.AddRange(prsmList2); var prsmList = MergePrsm(prsmList1); var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run); for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName; } // tag features by PrSMs for (var j = 0; j < features.Count; j++) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsTh(features[j].Mass); foreach (var match in prsmList) { if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol) { features[j].ProteinSpectrumMatches.Add(match); } } } alignment.AddDataSet(i, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); for (var i = 0; i < NdataSet; i++) { alignment.FillMissingFeatures(i); Console.WriteLine("{0} has been processed", GetDataSetNames(i)); } OutputCrossTabWithId(outFilePath, alignment); }
/// <summary> /// Checks whether this spectrum contains all isotope peaks whose relative intensity is equal or larter than the threshold /// </summary> /// <param name="ion">ion</param> /// <param name="tolerance">tolerance</param> /// <param name="relativeIntensityThreshold">relative intensity threshold of the theoretical isotope profile</param> /// <returns>true if spectrum contains all ions; false otherwise.</returns> public bool ContainsIon(Ion ion, Tolerance tolerance, double relativeIntensityThreshold) { var baseIsotopeIndex = ion.Composition.GetMostAbundantIsotopeZeroBasedIndex(); var isotopomerEnvelope = ion.Composition.GetIsotopomerEnvelopeRelativeIntensities(); var baseIsotopMz = ion.GetIsotopeMz(baseIsotopeIndex); var baseIsotopePeakIndex = FindPeakIndex(baseIsotopMz, tolerance); if (baseIsotopePeakIndex < 0) { return(false); } // go down var peakIndex = baseIsotopePeakIndex; for (var isotopeIndex = baseIsotopeIndex - 1; isotopeIndex >= 0; isotopeIndex--) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) { break; } var isotopeMz = ion.GetIsotopeMz(isotopeIndex); var tolTh = tolerance.GetToleranceAsTh(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex - 1; i >= 0; i--) { var peakMz = Peaks[i].Mz; if (peakMz < minMz) { return(false); } if (peakMz <= maxMz) // find match, move to prev isotope { peakIndex = i; break; } } } // go up peakIndex = baseIsotopePeakIndex; for (var isotopeIndex = baseIsotopeIndex + 1; isotopeIndex < isotopomerEnvelope.Length; isotopeIndex++) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) { break; } var isotopeMz = ion.GetIsotopeMz(isotopeIndex); var tolTh = tolerance.GetToleranceAsTh(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex + 1; i < Peaks.Length; i++) { var peakMz = Peaks[i].Mz; if (peakMz > maxMz) { return(false); } if (peakMz >= minMz) // find match, move to prev isotope { peakIndex = i; break; } } } return(true); }
public Peak[] GetAllIsotopePeaks(Spectrum spec, Ion ion, Tolerance tolerance, double relativeIntensityThreshold, out int[] peakIndexList) { var mostAbundantIsotopeIndex = ion.Composition.GetMostAbundantIsotopeZeroBasedIndex(); var isotopomerEnvelope = ion.Composition.GetIsotopomerEnvelopeRelativeIntensities(); peakIndexList = new int[isotopomerEnvelope.Length]; var mostAbundantIsotopeMz = ion.GetIsotopeMz(mostAbundantIsotopeIndex); var mostAbundantIsotopeMatchedPeakIndex = spec.FindPeakIndex(mostAbundantIsotopeMz, tolerance); if (mostAbundantIsotopeMatchedPeakIndex < 0) return null; var observedPeaks = new Peak[isotopomerEnvelope.Length]; observedPeaks[mostAbundantIsotopeIndex] = spec.Peaks[mostAbundantIsotopeMatchedPeakIndex]; peakIndexList[mostAbundantIsotopeIndex] = mostAbundantIsotopeMatchedPeakIndex; // go down var peakIndex = mostAbundantIsotopeMatchedPeakIndex - 1; for (var isotopeIndex = mostAbundantIsotopeIndex - 1; isotopeIndex >= 0; isotopeIndex--) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) break; var isotopeMz = ion.GetIsotopeMz(isotopeIndex); var tolTh = tolerance.GetToleranceAsTh(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex; i >= 0; i--) { var peakMz = spec.Peaks[i].Mz; if (peakMz < minMz) { peakIndex = i; break; } if (peakMz <= maxMz) // find match, move to prev isotope { var peak = spec.Peaks[i]; if (observedPeaks[isotopeIndex] == null || peak.Intensity > observedPeaks[isotopeIndex].Intensity) { observedPeaks[isotopeIndex] = peak; peakIndexList[isotopeIndex] = i; } } } } // go up peakIndex = mostAbundantIsotopeMatchedPeakIndex + 1; for (var isotopeIndex = mostAbundantIsotopeIndex + 1; isotopeIndex < isotopomerEnvelope.Length; isotopeIndex++) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) break; var isotopeMz = ion.GetIsotopeMz(isotopeIndex); var tolTh = tolerance.GetToleranceAsTh(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex; i < spec.Peaks.Length; i++) { var peakMz = spec.Peaks[i].Mz; if (peakMz > maxMz) { peakIndex = i; break; } if (peakMz >= minMz) // find match, move to prev isotope { var peak = spec.Peaks[i]; if (observedPeaks[isotopeIndex] == null || peak.Intensity > observedPeaks[isotopeIndex].Intensity) { observedPeaks[isotopeIndex] = peak; peakIndexList[isotopeIndex] = i; } } } } return observedPeaks; }
/// <summary> /// Gets the extracted ion chromatogram of the specified m/z range (using only MS2 spectra) /// </summary> /// <param name="mz">target m/z</param> /// <param name="tolerance">tolerance</param> /// <param name="precursorIonMz">precursor m/z of the precursor ion</param> /// <param name="minScanNum">minimum scan number (inclusive)</param> /// <param name="maxScanNum">maximum scan number (inclusive)</param> /// <returns>XIC as an Xic object</returns> public Xic GetProductExtractedIonChromatogram(double mz, Tolerance tolerance, double precursorIonMz, int minScanNum, int maxScanNum) { var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; return GetProductExtractedIonChromatogram(minMz, maxMz, precursorIonMz, minScanNum, maxScanNum); }
public void AlignFeatures(List<string> datasets, string mspfFolder, string ms1ftFolder, string outFilePath) { var nDataset = datasets.Count; var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(12); var alignment = new LcMsFeatureAlignment(new AnalysisCompRef.CompRefFeatureComparer(tolerance)); for (var i = 0; i < nDataset; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", PbfPath, datasets[i]); var mspFile = string.Format(@"{0}\{1}_IcTda.tsv", mspfFolder, datasets[i]); var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", ms1ftFolder, datasets[i]); var ms1FtFile2 = string.Format(@"{0}\{1}.seqtag.ms1ft", ms1ftFolder, datasets[i]); var run = PbfLcMsRun.GetLcMsRun(rawFile); var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run); var features2 = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile2, run); features.AddRange(features2); if (File.Exists(mspFile)) { var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder); //var prsmFeatureMatch = new bool[prsmList.Count]; for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName; } // tag features by PrSMs for (var j = 0; j < features.Count; j++) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsTh(features[j].Mass); for(var k = 0; k < prsmList.Count; k++) { var match = prsmList[k]; if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol) { features[j].ProteinSpectrumMatches.Add(match); //prsmFeatureMatch[k] = true; } } } } alignment.AddDataSet(i, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); for (var i = 0; i < nDataset; i++) { alignment.FillMissingFeatures(i); Console.WriteLine("{0} has been processed", datasets[i]); } AnalysisCompRef.OutputCrossTabWithId(outFilePath, alignment, datasets.ToArray()); }
public bool CheckChargeState(ObservedIsotopeEnvelope envelope) { var checkCharge = envelope.Charge; if (checkCharge > 20) return true; //high charge (> +20), just pass var peakStartIndex = envelope.MinMzPeak.IndexInSpectrum; var peakEndIndex = envelope.MaxMzPeak.IndexInSpectrum; var nPeaks = peakEndIndex - peakStartIndex + 1; if (nPeaks < 10) return false; if (envelope.NumberOfPeaks > nPeaks * 0.7) return true; var tolerance = new Tolerance(5); var threshold = nPeaks * 0.5; var mzTol = tolerance.GetToleranceAsTh(Spectrum.Peaks[peakStartIndex].Mz); var minCheckCharge = Math.Max(checkCharge * 2 - 1, 4); var maxCheckCharge = Math.Min(checkCharge * 5 + 1, 60); var maxDeltaMz = Constants.C13MinusC12 / minCheckCharge + mzTol; var nChargeGaps = new int[maxCheckCharge - minCheckCharge + 1]; for (var i = peakStartIndex; i <= peakEndIndex; i++) { for (var j = i + 1; j <= peakEndIndex; j++) { var deltaMz = Spectrum.Peaks[j].Mz - Spectrum.Peaks[i].Mz; if (deltaMz > maxDeltaMz) break; for (var c = Math.Round(1 / (deltaMz + mzTol)); c <= Math.Round(1 / (deltaMz - mzTol)); c++) { if (c < minCheckCharge || c > maxCheckCharge) continue; var k = (int)c - minCheckCharge; nChargeGaps[k]++; if (nChargeGaps[k] + 1 > threshold && nChargeGaps[k] + 1 > 1.25 * envelope.NumberOfPeaks) return false; } } } return true; }