public bool SameCluster(LcMsFeature f1, LcMsFeature f2) { if (f1.DataSetId == f2.DataSetId) { return(false); } // tolerant in mass dimension? if (!_oneDaltonShift) { var massTol = Math.Min(_tolerance.GetToleranceAsTh(f1.Mass), _tolerance.GetToleranceAsTh(f2.Mass)); if (Math.Abs(f1.Mass - f2.Mass) > massTol) { return(false); } } else { var massTol = Math.Min(_tolerance.GetToleranceAsTh(f1.Mass), _tolerance.GetToleranceAsTh(f2.Mass)); var massDiff = Math.Abs(f1.Mass - f2.Mass); if (f1.Mass > 10000 && f2.Mass > 10000) { if (massDiff > massTol && Math.Abs(massDiff - 1) > massTol && Math.Abs(massDiff - 2) > massTol) { return(false); } } else { if (massDiff > massTol && Math.Abs(massDiff - 1) > massTol) { return(false); } } } /* * var coeLen = f1.CoElutionNetLength(f2); * if (coeLen > f1.NetLength * 0.25 || coeLen > f2.NetLength * 0.25) return true; * * // tolerant in elution time dimension? * var lenDiff = Math.Abs(f1.NetLength - f2.NetLength) / Math.Max(f1.NetLength, f2.NetLength); * if (lenDiff > 0.8) return false; */ //if (f1.CoElutedByNet(f2, 0.01)) return true; //e.g) 200*0.001 = 0.2 min = 30 sec if (f1.CoElutedByNet(f2, 0.01)) { return(true); //e.g) 200*0.001 = 0.2 min = 30 sec } //if (NetDiff(f1, f2) < TolNet) return true; return(false); }
public bool SameCluster(ProteinSpectrumMatch prsm1, ProteinSpectrumMatch prsm2) { var tol = new Tolerance(10); //if (!prsm1.ProteinName.Equals(prsm2.ProteinName)) return false; var massDiff = Math.Abs(prsm1.Mass - prsm2.Mass); if (massDiff > tol.GetToleranceAsTh(prsm1.Mass)) { return(false); } var elutionDiff = Math.Abs(_run.GetElutionTime(prsm1.ScanNum) - _run.GetElutionTime(prsm2.ScanNum)); if (prsm1.SequenceText.Equals(prsm2.SequenceText)) { if (elutionDiff > _elutionLength * 0.02) { return(false); } } else { if (elutionDiff > _elutionLength * 0.005) { return(false); } } return(true); }
public void TestFeatureAlignment() { const string outFilePath = @"\\protoapps\UserData\Jungkap\Lewy\aligned\promex_crosstab_temp.tsv"; //CPTAC_Intact_CR32A_24Aug15_Bane_15-02-06-RZ var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(10); var alignment = new LcMsFeatureAlignment(new AnalysisCompRef.CompRefFeatureComparer(tolerance)); for (var i = 0; i < NdataSet; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", PbfPath, GetDataSetNames(i)); var mspFile = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder, GetDataSetNames(i)); var mspFile2 = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder2, GetDataSetNames(i)); var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", Ms1FtFolder, GetDataSetNames(i)); Console.WriteLine(rawFile); var run = PbfLcMsRun.GetLcMsRun(rawFile); var prsmList1 = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder); var prsmList2 = prsmReader.LoadIdentificationResult(mspFile2, ProteinSpectrumMatch.SearchTool.MsPathFinder); prsmList1.AddRange(prsmList2); var prsmList = MergePrsm(prsmList1); var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run); for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName; } // tag features by PrSMs for (var j = 0; j < features.Count; j++) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsTh(features[j].Mass); foreach (var match in prsmList) { if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol) { features[j].ProteinSpectrumMatches.Add(match); } } } alignment.AddDataSet(i, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); for (var i = 0; i < NdataSet; i++) { alignment.FillMissingFeatures(i); Console.WriteLine("{0} has been processed", GetDataSetNames(i)); } OutputCrossTabWithId(outFilePath, alignment); }
public bool CheckChargeState(ObservedIsotopeEnvelope envelope) { var checkCharge = envelope.Charge; if (checkCharge > 20) { return(true); //high charge (> +20), just pass } var peakStartIndex = envelope.MinMzPeak.IndexInSpectrum; var peakEndIndex = envelope.MaxMzPeak.IndexInSpectrum; var nPeaks = peakEndIndex - peakStartIndex + 1; if (nPeaks < 10) { return(false); } if (envelope.NumberOfPeaks > nPeaks * 0.7) { return(true); } var tolerance = new Tolerance(5); var threshold = nPeaks * 0.5; var mzTol = tolerance.GetToleranceAsTh(Spectrum.Peaks[peakStartIndex].Mz); var minCheckCharge = Math.Max(checkCharge * 2 - 1, 4); var maxCheckCharge = Math.Min(checkCharge * 5 + 1, 60); var maxDeltaMz = Constants.C13MinusC12 / minCheckCharge + mzTol; var nChargeGaps = new int[maxCheckCharge - minCheckCharge + 1]; for (var i = peakStartIndex; i <= peakEndIndex; i++) { for (var j = i + 1; j <= peakEndIndex; j++) { var deltaMz = Spectrum.Peaks[j].Mz - Spectrum.Peaks[i].Mz; if (deltaMz > maxDeltaMz) { break; } for (var c = Math.Round(1 / (deltaMz + mzTol)); c <= Math.Round(1 / (deltaMz - mzTol)); c++) { if (c < minCheckCharge || c > maxCheckCharge) { continue; } var k = (int)c - minCheckCharge; nChargeGaps[k]++; if (nChargeGaps[k] + 1 > threshold && nChargeGaps[k] + 1 > 1.25 * envelope.NumberOfPeaks) { return(false); } } } } return(true); }
/// <summary> /// Gets the extracted ion chromatogram of the specified m/z (using only MS1 spectra) /// </summary> /// <param name="mz">target m/z</param> /// <param name="tolerance">tolerance</param> /// <returns>XIC as a list of XICPeaks</returns> public IList <XicPeak> GetExtractedIonChromatogram(double mz, Tolerance tolerance) { var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; return(GetExtractedIonChromatogram(minMz, maxMz)); }
/// <summary> /// Gets the extracted ion chromatogram of the specified m/z range (using only MS2 spectra) /// </summary> /// <param name="mz">target m/z</param> /// <param name="tolerance">tolerance</param> /// <param name="precursorIonMz">precursor m/z of the precursor ion</param> /// <param name="minScanNum">minimum scan number (inclusive)</param> /// <param name="maxScanNum">maximum scan number (inclusive)</param> /// <returns>XIC as an Xic object</returns> public Xic GetProductExtractedIonChromatogram(double mz, Tolerance tolerance, double precursorIonMz, int minScanNum, int maxScanNum) { var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; return(GetProductExtractedIonChromatogram(minMz, maxMz, precursorIonMz, minScanNum, maxScanNum)); }
public static IList <Peak> FindAllPeaks(List <Peak> peakList, double mz, Tolerance tolerance) { var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; return(FindAllPeaks(peakList, minMz, maxMz)); }
public new Xic GetFullProductExtractedIonChromatogram(double mz, Tolerance tolerance, double precursorIonMz) { var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; return(GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorIonMz)); }
/// <summary> /// Gets the extracted ion chromatogram of the specified m/z (using only MS1 spectra) /// </summary> /// <param name="mz">target m/z</param> /// <param name="tolerance">tolerance</param> /// <returns>XIC as an Xic object</returns> public Xic GetPrecursorExtractedIonChromatogram(double mz, Tolerance tolerance) { var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; return(GetPrecursorExtractedIonChromatogram(minMz, maxMz)); }
public void TestFeatureAlignment() { const string outFilePath = @"\\protoapps\UserData\Jungkap\Quant\aligned\promex_crosstab.tsv"; //const string outFolder = @"\\protoapps\UserData\Jungkap\CompRef\aligned"; var runLabels = new string[] { "1x1", "1x2", "1x3", "1x4", "1x5", "5x1", "5x2", "5x3", "5x4", "5x5", "10x1", "10x2", "10x3", "10x4", "10x5", }; var nDataset = runLabels.Length; var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(10); var alignment = new LcMsFeatureAlignment(new SpikeInFeatureComparer(tolerance)); for (var i = 0; i < nDataset; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", RawFolder, datasets[i]); var mspFile = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder, datasets[i]); var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", Ms1FtFolder, datasets[i]); var run = PbfLcMsRun.GetLcMsRun(rawFile); var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder); var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run); for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName; } // tag features by PrSMs for (var j = 0; j < features.Count; j++) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsTh(features[j].Mass); foreach (var match in prsmList) { if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol) { features[j].ProteinSpectrumMatches.Add(match); } } } alignment.AddDataSet(i, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); /* * for (var i = 0; i < nDataset; i++) * { * alignment.FillMissingFeatures(i); * Console.WriteLine("{0} has been processed", runLabels[i]); * } */ OutputCrossTabWithId(outFilePath, alignment, runLabels); }
/// <summary> /// Gets the extracted ion chromatogram of the specified m/z range (using only MS1 spectra) /// Only XicPeaks around the targetScanNum are returned /// </summary> /// <param name="mz">target m/z</param> /// <param name="tolerance">tolerance</param> /// <param name="targetScanNum">target scan number to generate xic</param> /// <param name="maxNumConsecutiveScansWithoutPeak">maximum number of consecutive scans with a peak</param> /// <returns>XIC around targetScanNum</returns> public Xic GetPrecursorExtractedIonChromatogram(double mz, Tolerance tolerance, int targetScanNum, int maxNumConsecutiveScansWithoutPeak = 3) { var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; if (targetScanNum < 0) { return(GetPrecursorExtractedIonChromatogram(minMz, maxMz)); } return(GetPrecursorExtractedIonChromatogram(minMz, maxMz, targetScanNum, maxNumConsecutiveScansWithoutPeak)); }
private void CollectSequenceTagGraphEdges() { for (var i = 0; i < _deconvolutedPeaks.Count; i++) { var massTh = _tolerance.GetToleranceAsTh(_deconvolutedPeaks[i].Mass); for (var j = i + 1; j < _deconvolutedPeaks.Count; j++) { //if (_deconvolutedPeaks[i].PeakShare(_deconvolutedPeaks[j])) continue; var massGap = _deconvolutedPeaks[j].Mass - _deconvolutedPeaks[i].Mass; var maxMassGap = massGap + massTh; var minMassGap = massGap - massTh; var peakGap = new SequenceTagGraphEdge(i, j, massGap); if (minMassGap > _maxAminoAcidMass) { break; } if (maxMassGap < _minAminoAcidMass) { continue; } foreach (var aa in _aminoAcidsArray) { var massError = Math.Abs(peakGap.Mass - aa.Composition.Mass); if (minMassGap < aa.Composition.Mass && aa.Composition.Mass < maxMassGap) { peakGap.AddMatchedAminoAcid(aa, massError); } } if (peakGap.AminoAcidList.Count > 0) { AddEdge(peakGap); } } } }
public List <MSMSSpectrumPeak> GetPeaks(double mz, Tolerance tolerance) { var minMz = mz - tolerance.GetToleranceAsTh(mz); var maxMz = mz + tolerance.GetToleranceAsTh(mz); var matchList = new List <MSMSSpectrumPeak>(); var start = BinarySearch(new MSMSSpectrumPeak(minMz, 1)); if (start < 0) { start = ~start; } for (var i = start; i < Count; i++) { var p = this[i]; if (p.Mz > maxMz) { break; } matchList.Add(p); } return(matchList); }
private IList <LcMsPeakCluster> RemoveOverlappedFeatures(SortedSet <LcMsPeakCluster> featureSet) { var outFeatures = new List <LcMsPeakCluster>(); var tol = new Tolerance(5); while (true) { if (featureSet.Count < 1) { break; } var bestFeature = featureSet.First(); featureSet.Remove(bestFeature); outFeatures.Add(bestFeature); var massTol = tol.GetToleranceAsTh(bestFeature.RepresentativeMass); var tempList = new List <LcMsPeakCluster>(); foreach (var f in bestFeature.OverlappedFeatures) { if (featureSet.Remove(f)) { var massDiff = Math.Abs(bestFeature.RepresentativeMass - f.RepresentativeMass); if ((Math.Abs(massDiff - 1.0) < massTol || Math.Abs(massDiff - 2.0) < massTol) && SimilarScore(bestFeature, f)) { outFeatures.Add(f); continue; } tempList.Add(f); } } bestFeature.InActivateMajorPeaks(); foreach (var f in tempList) { f.UpdateScore(_spectra); f.Score = _scorer.GetScore(f); if (f.Score > _scorer.ScoreThreshold && f.GoodEnougth) { featureSet.Add(f); } else { //Console.WriteLine("{0}\t{1}\t{2} killed by {3}\t{4}\t{5}", f.Mass, f.MinScanNum, f.MaxScanNum, bestFeature.Mass, bestFeature.MinScanNum, bestFeature.MaxScanNum); } } } return(outFeatures); }
internal RankedPeak FindPeak(double mz, Tolerance tolerance) { var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; var index = Array.BinarySearch(Peaks, new RankedPeak((minMz + maxMz) / 2, 0, 0)); if (index < 0) { index = ~index; } RankedPeak bestPeak = null; var bestIntensity = 0.0; // go down var i = index - 1; while (i >= 0 && i < Peaks.Length) { if (Peaks[i].Mz <= minMz) { break; } if (Peaks[i].Intensity > bestIntensity) { bestIntensity = Peaks[i].Intensity; bestPeak = Peaks[i]; } --i; } // go up i = index; while (i >= 0 && i < Peaks.Length) { if (Peaks[i].Mz >= maxMz) { break; } if (Peaks[i].Intensity > bestIntensity) { bestIntensity = Peaks[i].Intensity; bestPeak = Peaks[i]; } ++i; } return(bestPeak); }
/// <summary> /// Finds all isotope peaks corresponding to theoretical profiles with relative intensity higher than the threshold /// </summary> /// <param name="spectrum">Observed spectrum.</param> /// <param name="isotopomerEnvelope">The theoretical isotopic profile.</param> /// <param name="mass">Monoisotopic mass of the lipid.</param> /// <param name="tolerance">Peak ppm tolerance.</param> /// <returns>array of observed isotope peaks in the spectrum. null if no peak found.</returns> /// <remarks> /// This differs from the GetAllIsotopePeaks in <see cref="LipidUtil" /> in that it accepts the isotopomer envelope /// as an argument rather than calculating it on its own. This way we only calculate it once. /// </remarks> private Peak[] GetAllIsotopePeaks(Spectrum spectrum, IReadOnlyCollection <double> isotopomerEnvelope, double mass, Tolerance tolerance) { var peaks = spectrum.Peaks; var mostAbundantIsotopeIndex = 0; var mostAbundantIsotopeMz = mass; var mostAbundantIsotopeMatchedPeakIndex = spectrum.FindPeakIndex(mostAbundantIsotopeMz, tolerance); if (mostAbundantIsotopeMatchedPeakIndex < 0) { return(null); } var observedPeaks = new Peak[isotopomerEnvelope.Count]; observedPeaks[mostAbundantIsotopeIndex] = peaks[mostAbundantIsotopeMatchedPeakIndex]; // go up var peakIndex = mostAbundantIsotopeMatchedPeakIndex + 1; for (var isotopeIndex = mostAbundantIsotopeIndex + 1; isotopeIndex < isotopomerEnvelope.Count; isotopeIndex++) { var isotopeMz = mostAbundantIsotopeMz + isotopeIndex * Constants.C13MinusC12; var tolTh = tolerance.GetToleranceAsTh(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex; i < peaks.Length; i++) { var peakMz = peaks[i].Mz; if (peakMz > maxMz) { peakIndex = i; break; } if (peakMz >= minMz) // find match, move to prev isotope { var peak = peaks[i]; if (observedPeaks[isotopeIndex] == null || peak.Intensity > observedPeaks[isotopeIndex].Intensity) { observedPeaks[isotopeIndex] = peak; } } } } return(observedPeaks); }
public bool Equals(ProteinSpectrumMatch other) { if (SearchToolType == other.SearchToolType) { return(SequenceText.Equals(other.SequenceText)); } var massDiff = Math.Abs(Mass - other.Mass); var tol = new Tolerance(10); if (massDiff < tol.GetToleranceAsTh(Mass) && FirstResidue == other.FirstResidue && LastResidue == other.LastResidue) { return(true); } return(false); }
public void TestFeatureAlignment() { const string outFilePath = @"\\protoapps\UserData\Jungkap\CompRef\aligned\promex_crosstab_temp.tsv"; const string outFolder = @"\\protoapps\UserData\Jungkap\CompRef\aligned"; var runLabels = new string[] { "32A", "32B", "32C", "32D", "32E", "32F", "32G", "33A", "33B", "33C", "33D", "33E", "33F", "33G" }; var nDataset = runLabels.Length; //CPTAC_Intact_CR32A_24Aug15_Bane_15-02-06-RZ var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(10); var alignment = new LcMsFeatureAlignment(new CompRefFeatureComparer(tolerance)); for (var i = 0; i < nDataset; i++) { var rawFile = string.Format(@"{0}\CPTAC_Intact_CR{1}_24Aug15_Bane_15-02-06-RZ.pbf", RawFolder, runLabels[i]); var mspFile = string.Format(@"{0}\CPTAC_Intact_CR{1}_24Aug15_Bane_15-02-06-RZ_IcTda.tsv", MsPfFolder, runLabels[i]); var ms1FtFile = string.Format(@"{0}\CPTAC_Intact_CR{1}_24Aug15_Bane_15-02-06-RZ.ms1ft", Ms1FtFolder, runLabels[i]); var run = PbfLcMsRun.GetLcMsRun(rawFile); var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run); if (File.Exists(mspFile)) { var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder); for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName; } // tag features by PrSMs for (var j = 0; j < features.Count; j++) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsTh(features[j].Mass); foreach (var match in prsmList) { if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol) { features[j].ProteinSpectrumMatches.Add(match); } } } } alignment.AddDataSet(i, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); for (var i = 0; i < nDataset; i++) { alignment.FillMissingFeatures(i); Console.WriteLine("{0} has been processed", runLabels[i]); } OutputCrossTabWithId(outFilePath, alignment, runLabels); }
public void TestGeneratingProductXics() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestRawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(TestRawFilePath); // const string rafFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raf"; const string rafFilePath = @"H:\Research\Jarret\10mz\raw\Q_2014_0523_50_10_fmol_uL_10mz.raf"; if (!File.Exists(rafFilePath)) { Assert.Ignore(@"Skipping raf portion of test {0} since file not found: {1}", methodName, rafFilePath); } var rafRun = new PbfLcMsRun(rafFilePath); var tolerance = new Tolerance(10); var mzArr = new double[100000]; var precursorMzArr = new double[mzArr.Length]; var rnd = new Random(); for (var i = 0; i < mzArr.Length; i++) { mzArr[i] = rnd.NextDouble() * 1450.0 + 50.0; precursorMzArr[i] = rnd.NextDouble() * (810.0 - 390.0) + 390.0; } var sw = new System.Diagnostics.Stopwatch(); //double sec; // method 1 sw.Start(); for (var i = 0; i < mzArr.Length; i++) { var mz = mzArr[i]; var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; var xic1 = run.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); //var xic2 = rafRun.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); //Assert.True(xic1.Equals(xic2)); } sw.Stop(); Console.WriteLine(@"Method 1: {0:f4} sec", sw.Elapsed.TotalSeconds); sw.Reset(); sw.Start(); for (var i = 0; i < mzArr.Length; i++) { var mz = mzArr[i]; var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; rafRun.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); } sw.Stop(); Console.WriteLine(@"Method 2: {0:f4} sec", sw.Elapsed.TotalSeconds); Console.WriteLine(@"Done"); }
public Peak[] GetAllIsotopePeaks(Spectrum spec, Ion ion, Tolerance tolerance, double relativeIntensityThreshold, out int[] peakIndexList) { var mostAbundantIsotopeIndex = ion.Composition.GetMostAbundantIsotopeZeroBasedIndex(); var isotopomerEnvelope = ion.Composition.GetIsotopomerEnvelopeRelativeIntensities(); peakIndexList = new int[isotopomerEnvelope.Length]; var mostAbundantIsotopeMz = ion.GetIsotopeMz(mostAbundantIsotopeIndex); var mostAbundantIsotopeMatchedPeakIndex = spec.FindPeakIndex(mostAbundantIsotopeMz, tolerance); if (mostAbundantIsotopeMatchedPeakIndex < 0) { return(null); } var observedPeaks = new Peak[isotopomerEnvelope.Length]; observedPeaks[mostAbundantIsotopeIndex] = spec.Peaks[mostAbundantIsotopeMatchedPeakIndex]; peakIndexList[mostAbundantIsotopeIndex] = mostAbundantIsotopeMatchedPeakIndex; // go down var peakIndex = mostAbundantIsotopeMatchedPeakIndex - 1; for (var isotopeIndex = mostAbundantIsotopeIndex - 1; isotopeIndex >= 0; isotopeIndex--) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) { break; } var isotopeMz = ion.GetIsotopeMz(isotopeIndex); var tolTh = tolerance.GetToleranceAsTh(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex; i >= 0; i--) { var peakMz = spec.Peaks[i].Mz; if (peakMz < minMz) { peakIndex = i; break; } if (peakMz <= maxMz) // find match, move to prev isotope { var peak = spec.Peaks[i]; if (observedPeaks[isotopeIndex] == null || peak.Intensity > observedPeaks[isotopeIndex].Intensity) { observedPeaks[isotopeIndex] = peak; peakIndexList[isotopeIndex] = i; } } } } // go up peakIndex = mostAbundantIsotopeMatchedPeakIndex + 1; for (var isotopeIndex = mostAbundantIsotopeIndex + 1; isotopeIndex < isotopomerEnvelope.Length; isotopeIndex++) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) { break; } var isotopeMz = ion.GetIsotopeMz(isotopeIndex); var tolTh = tolerance.GetToleranceAsTh(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex; i < spec.Peaks.Length; i++) { var peakMz = spec.Peaks[i].Mz; if (peakMz > maxMz) { peakIndex = i; break; } if (peakMz >= minMz) // find match, move to prev isotope { var peak = spec.Peaks[i]; if (observedPeaks[isotopeIndex] == null || peak.Intensity > observedPeaks[isotopeIndex].Intensity) { observedPeaks[isotopeIndex] = peak; peakIndexList[isotopeIndex] = i; } } } } return(observedPeaks); }
public void TestQuantifyIdedProteoforms() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string rawFolder = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_2"; const string promexOutFolder = @"D:\MassSpecFiles\UTEX\MSAlign"; const string msAlignResultFolder = @"D:\MassSpecFiles\UTEX\MSAlign"; if (!Directory.Exists(rawFolder)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, rawFolder); } var nDataset = 32; var dataset = new string[nDataset]; for (var i = 0; i < nDataset; i++) { dataset[i] = String.Format("Syn_utex2973_Top_{0,2:D2}_TopDown_7May15_Bane_14-09-01RZ", i + 1); //var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]); } var prsmReader = new ProteinSpectrumMatchReader(0.01); var tolerance = new Tolerance(10); for (var i = 0; i < dataset.Length; i++) { var rawFile = String.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]); if (!File.Exists(rawFile)) { Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile); continue; } var run = PbfLcMsRun.GetLcMsRun(rawFile); var path = String.Format(@"{0}\{1}_MSAlign_ResultTable.txt", msAlignResultFolder, dataset[i]); if (!File.Exists(path)) { Console.WriteLine(@"Warning: Skipping file not found: {0}", path); continue; } var prsmList = prsmReader.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign); for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName.Substring(match.ProteinName.IndexOf(ProteinNamePrefix) + ProteinNamePrefix.Length, 5); } // PrSM To Feature var prsmToFeatureIdMap = new int[prsmList.Count]; for (var k = 0; k < prsmToFeatureIdMap.Length; k++) { prsmToFeatureIdMap[k] = -1; } // Feature To PrSM var featureToPrsm = new List <ProteinSpectrumMatchSet>(); var featureFinder = new LcMsPeakMatrix(run, new LcMsFeatureLikelihood()); var featureList = new List <LcMsPeakCluster>(); var featureId = 0; for (var j = 0; j < prsmList.Count; j++) { if (prsmToFeatureIdMap[j] >= 0) { continue; } var match = prsmList[j]; var minScanNum = match.ScanNum; var maxScanNum = match.ScanNum; var mass = match.Mass; var charge = match.Charge; var massTh = tolerance.GetToleranceAsTh(mass); var id1 = match.ProteinId; var feature = featureFinder.GetLcMsPeakCluster(mass, charge, minScanNum, maxScanNum); var prsmSet = new ProteinSpectrumMatchSet(i) { match }; if (feature == null) { feature = featureFinder.GetLcMsPeaksFromNoisePeaks(mass, charge, minScanNum, maxScanNum, charge, charge); prsmToFeatureIdMap[j] = featureId; } else { prsmToFeatureIdMap[j] = featureId; var etTol = Math.Max(run.GetElutionTime(run.MaxLcScan) * 0.005, feature.ElutionLength * 0.2); for (var k = j + 1; k < prsmList.Count; k++) { var otherMatch = prsmList[k]; var id2 = otherMatch.ProteinId; var et2 = run.GetElutionTime(otherMatch.ScanNum); if (id1.Equals(id2) && feature.MinElutionTime - etTol < et2 && et2 < feature.MaxElutionTime - etTol && Math.Abs(otherMatch.Mass - mass) < massTh) { prsmToFeatureIdMap[k] = featureId; prsmSet.Add(otherMatch); } } } featureId++; feature.Flag = 1; featureList.Add(feature); featureToPrsm.Add(prsmSet); } // overalp between features??? for (var j = 0; j < featureList.Count; j++) { var f1 = featureList[j]; if (f1.Flag < 1) { continue; } var prsm1 = featureToPrsm[j]; for (var k = j + 1; k < featureList.Count; k++) { var f2 = featureList[k]; if (f2.Flag < 1) { continue; } var prsm2 = featureToPrsm[k]; if (Math.Abs(f1.Mass - f2.Mass) > tolerance.GetToleranceAsTh(f1.Mass)) { continue; } if (!f1.CoElutedByNet(f2, 0.005)) { continue; } if (!prsm1.ShareProteinId(prsm2)) { continue; } // let us merge!! if (f1.ScanLength > f2.ScanLength) { prsm1.AddRange(prsm2); prsm2.Clear(); f2.Flag = 0; } else { prsm2.AddRange(prsm1); prsm1.Clear(); f1.Flag = 0; } } } // now output results!! var ms1ftFilePath = String.Format(@"{0}\{1}.ms1ft", promexOutFolder, dataset[i]); var writer = new StreamWriter(ms1ftFilePath); writer.WriteLine(LcMsFeatureFinderLauncher.GetHeaderString()); for (var j = 0; j < featureList.Count; j++) { var f1 = featureList[j]; if (f1.Flag < 1) { continue; } var prsm1 = featureToPrsm[j]; var minScanNum = run.GetPrevScanNum(prsm1.MinScanNum, 1); var maxScanNum = run.GetNextScanNum(prsm1.MaxScanNum, 1); f1.ExpandScanRange(minScanNum, maxScanNum); writer.Write("{0}\t", j + 1); writer.WriteLine(LcMsFeatureFinderLauncher.GetString(f1)); } writer.Close(); Console.WriteLine(ms1ftFilePath); } }
public void TestAlignFeatures() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string rawFolder = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_2"; const string promexOutFolder = @"D:\MassSpecFiles\UTEX\MSAlign"; const string msAlignResultFolder = @"D:\MassSpecFiles\UTEX\MSAlign"; if (!Directory.Exists(rawFolder)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, rawFolder); } var nDataset = 32; var dataset = new string[nDataset]; for (var i = 0; i < nDataset; i++) { dataset[i] = String.Format("Syn_utex2973_Top_{0,2:D2}_TopDown_7May15_Bane_14-09-01RZ", i + 1); //var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]); } var tolerance = new Tolerance(10); var ftComparer = new UtexFeatureComparer(tolerance); var align = new LcMsFeatureAlignment(ftComparer); var prsmReader = new ProteinSpectrumMatchReader(0.01); var validCount = 0; for (var i = 0; i < dataset.Length; i++) { var rawFile = String.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]); if (!File.Exists(rawFile)) { Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile); continue; } var run = PbfLcMsRun.GetLcMsRun(rawFile); var path = String.Format(@"{0}\{1}_MSAlign_ResultTable.txt", msAlignResultFolder, dataset[i]); if (!File.Exists(path)) { Console.WriteLine(@"Warning: Skipping file not found: {0}", path); continue; } var ms1ftPath = String.Format(@"{0}\{1}.ms1ft", promexOutFolder, dataset[i]); if (!File.Exists(ms1ftPath)) { Console.WriteLine(@"Warning: Skipping file not found: {0}", ms1ftPath); continue; } validCount++; //var map = new ProteinSpectrumMathMap(run, i, dataset[i]); //map.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign); var prsmList = prsmReader.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign); for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName.Substring( match.ProteinName.IndexOf(ProteinNamePrefix) + ProteinNamePrefix.Length, 5); } var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1ftPath, run); // tag features by PrSMs for (var j = 0; j < features.Count; j++) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsTh(features[j].Mass); foreach (var match in prsmList) { if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol) { features[j].ProteinSpectrumMatches.Add(match); } } } align.AddDataSet(i, features, run); } if (validCount == 0) { Assert.Ignore("No files found!"); } align.AlignFeatures(); Console.WriteLine("{0} alignments ", align.CountAlignedFeatures); align.RefineAbundance(); var alignedFeatureList = align.GetAlignedFeatures(); for (var i = 0; i < nDataset; i++) { var ms1ftPath = String.Format(@"{0}\{1}_aligned.ms1ft", promexOutFolder, dataset[i]); var writer = new StreamWriter(ms1ftPath); writer.Write(LcMsFeatureFinderLauncher.GetHeaderString()); writer.WriteLine("\tIdedMs2ScanNums"); for (var j = 0; j < alignedFeatureList.Count; j++) { writer.Write(j + 1); writer.Write("\t"); if (alignedFeatureList[j][i] == null) { for (var k = 0; k < 14; k++) { writer.Write("0\t"); } writer.Write("0\n"); } else { writer.Write(LcMsFeatureFinderLauncher.GetString(alignedFeatureList[j][i])); writer.Write("\t"); if (alignedFeatureList[j][i].ProteinSpectrumMatches == null) { writer.Write(""); } else { var scanNums = string.Join(";", alignedFeatureList[j][i].ProteinSpectrumMatches.Select(prsm => prsm.ScanNum)); writer.Write(scanNums); } writer.Write("\n"); } } writer.Close(); } }
public void TestGeneratingProductManyXics() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string rawFilePath = TestRawFilePath; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath); //var run2 = new DiaLcMsRun(new OldPbfReader(Path.ChangeExtension(rawFilePath, ".pbf")), 0.0, 0.0); var tolerance = new Tolerance(10); var mzArr = new double[100000]; var precursorMzArr = new double[mzArr.Length]; var rnd = new Random(); for (var i = 0; i < mzArr.Length; i++) { mzArr[i] = rnd.NextDouble() * 1450.0 + 50.0; precursorMzArr[i] = rnd.NextDouble() * (810.0 - 390.0) + 390.0; } var sw = new System.Diagnostics.Stopwatch(); //double sec; // method 1 sw.Start(); for (var i = 0; i < mzArr.Length; i++) { var mz = mzArr[i]; var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; var xic1 = run.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); //var xic2 = run.GetFullProductExtractedIonChromatogram2(minMz, maxMz, precursorMzArr[i]); //Assert.True(xic1.Equals(xic2)); } sw.Stop(); Console.WriteLine(@"Method 1: {0:f4} sec", sw.Elapsed.TotalSeconds); sw.Reset(); sw.Start(); for (var i = 0; i < mzArr.Length; i++) { var mz = mzArr[i]; var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; run.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); } sw.Stop(); Console.WriteLine(@"Method 2: {0:f4} sec", sw.Elapsed.TotalSeconds); Console.WriteLine("Done"); }
public void AlignFeatures(List <string> datasets, string mspfFolder, string ms1ftFolder, string outFilePath) { var nDataset = datasets.Count; var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(12); var alignment = new LcMsFeatureAlignment(new AnalysisCompRef.CompRefFeatureComparer(tolerance)); for (var i = 0; i < nDataset; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", PbfPath, datasets[i]); var mspFile = string.Format(@"{0}\{1}_IcTda.tsv", mspfFolder, datasets[i]); var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", ms1ftFolder, datasets[i]); var ms1FtFile2 = string.Format(@"{0}\{1}.seqtag.ms1ft", ms1ftFolder, datasets[i]); var run = PbfLcMsRun.GetLcMsRun(rawFile); var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run); var features2 = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile2, run); features.AddRange(features2); if (File.Exists(mspFile)) { var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder); //var prsmFeatureMatch = new bool[prsmList.Count]; for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName; } // tag features by PrSMs for (var j = 0; j < features.Count; j++) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsTh(features[j].Mass); for (var k = 0; k < prsmList.Count; k++) { var match = prsmList[k]; if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol) { features[j].ProteinSpectrumMatches.Add(match); //prsmFeatureMatch[k] = true; } } } } alignment.AddDataSet(i, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); for (var i = 0; i < nDataset; i++) { alignment.FillMissingFeatures(i); Console.WriteLine("{0} has been processed", datasets[i]); } AnalysisCompRef.OutputCrossTabWithId(outFilePath, alignment, datasets.ToArray()); }
public void ExtractLcMsFeaturesForTrainingSet() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string idFileFolder = @"D:\MassSpecFiles\training\FilteredIdResult"; if (!Directory.Exists(idFileFolder)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, idFileFolder); } var tolerance = new Tolerance(10); var tolerance2 = new Tolerance(20); var id = 1; for (var d = 0; d < TrainSetFileLists.Length; d++) { var dataset = TrainSetFileLists[d]; var dataname = Path.GetFileNameWithoutExtension(dataset); var filtedIdResultFile = string.Format(@"{0}\{1}.trainset.tsv", idFileFolder, Path.GetFileNameWithoutExtension(dataset)); var featureResult = string.Format(@"{0}\{1}.ms1ft", idFileFolder, Path.GetFileNameWithoutExtension(dataset)); if (!File.Exists(dataset)) { Console.WriteLine(@"Warning: Skipping since file not found: {0}", dataset); continue; } if (!File.Exists(filtedIdResultFile)) { Console.WriteLine(@"Warning: Skipping since file not found: {0}", filtedIdResultFile); continue; } var run = PbfLcMsRun.GetLcMsRun(dataset); var targetStatWriter = new StreamWriter(string.Format(@"D:\MassSpecFiles\training\statistics\{0}.tsv", Path.GetFileNameWithoutExtension(dataset))); var decoyStatWriter = new StreamWriter(string.Format(@"D:\MassSpecFiles\training\statistics\{0}_decoy.tsv", Path.GetFileNameWithoutExtension(dataset))); var writer = new StreamWriter(featureResult); writer.Write("Ms2MinScan\tMs2MaxScan\tMs2MinCharge\tMs2MaxCharge\tMs2Mass\t"); writer.Write("Mass\tMinScan\tMaxScan\tMinCharge\tMaxCharge\tMinTime\tMaxTime\tElution\tGood\n"); var tsvParser = new TsvFileParser(filtedIdResultFile); var featureFinder = new LcMsPeakMatrix(run); for (var i = 0; i < tsvParser.NumData; i++) { var minScan = int.Parse(tsvParser.GetData("MinScan")[i]); var maxScan = int.Parse(tsvParser.GetData("MaxScan")[i]); var minCharge = int.Parse(tsvParser.GetData("MinCharge")[i]); var maxCharge = int.Parse(tsvParser.GetData("MaxCharge")[i]); var mass = double.Parse(tsvParser.GetData("Mass")[i]); writer.Write(minScan); writer.Write("\t"); writer.Write(maxScan); writer.Write("\t"); writer.Write(minCharge); writer.Write("\t"); writer.Write(maxCharge); writer.Write("\t"); writer.Write(mass); writer.Write("\t"); var binNum = featureFinder.Comparer.GetBinNumber(mass); var binMass = featureFinder.Comparer.GetMzAverage(binNum); var binNumList = (mass < binMass) ? new int[] { binNum, binNum - 1, binNum + 1 } : new int[] { binNum, binNum + 1, binNum - 1 }; LcMsPeakCluster refinedFeature = null; foreach (var bi in binNumList) { var tempList = new List <LcMsPeakCluster>(); var features = featureFinder.FindFeatures(bi); var massTh = (mass < 2000) ? tolerance2.GetToleranceAsTh(mass) : tolerance.GetToleranceAsTh(mass); foreach (var feature in features) { if (Math.Abs(mass - feature.Mass) < massTh) { tempList.Add(feature); } } //var nHits = 0; var highestAbu = 0d; //var scans = Enumerable.Range(minScan, maxScan - minScan + 1); foreach (var feature in tempList) { //var scans2 = Enumerable.Range(feature.MinScanNum, feature.MaxScanNum - feature.MinScanNum + 1); //var hitScans = scans.Intersect(scans2).Count(); if (feature.MinScanNum < 0.5 * (minScan + maxScan) && 0.5 * (minScan + maxScan) < feature.MaxScanNum) { if (feature.Abundance > highestAbu) { refinedFeature = feature; highestAbu = feature.Abundance; } } /*if (hitScans > 0) * { * refinedFeature = feature; * nHits = hitScans; * }*/ } if (refinedFeature != null) { break; } } if (refinedFeature != null) { writer.Write(refinedFeature.Mass); writer.Write("\t"); writer.Write(refinedFeature.MinScanNum); writer.Write("\t"); writer.Write(refinedFeature.MaxScanNum); writer.Write("\t"); writer.Write(refinedFeature.MinCharge); writer.Write("\t"); writer.Write(refinedFeature.MaxCharge); writer.Write("\t"); writer.Write(refinedFeature.MinElutionTime); writer.Write("\t"); writer.Write(refinedFeature.MaxElutionTime); writer.Write("\t"); writer.Write(refinedFeature.MaxElutionTime - refinedFeature.MinElutionTime); writer.Write("\t"); var good = (refinedFeature.MinScanNum <= minScan && refinedFeature.MaxScanNum >= maxScan); writer.Write(good ? 1 : 0); writer.Write("\n"); //writer.Write(0); writer.Write("\t"); //writer.Write(0); writer.Write("\n"); OutputEnvelopPeakStat(id, refinedFeature, targetStatWriter); var chargeRange = featureFinder.GetDetectableMinMaxCharge(refinedFeature.RepresentativeMass, run.MinMs1Mz, run.MaxMs1Mz); refinedFeature.UpdateWithDecoyScore(featureFinder.Ms1Spectra, chargeRange.Item1, chargeRange.Item2); OutputEnvelopPeakStat(id, refinedFeature, decoyStatWriter); id++; } else { writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\n"); } //var feature = featureFinder.FindLcMsPeakCluster(mass, (int) scan, (int) charge); } writer.Close(); targetStatWriter.Close(); decoyStatWriter.Close(); Console.WriteLine(dataname); } }
public void FindMissingLcMsFeatures() { var mspfFolder = @"D:\MassSpecFiles\CompRef_Kelleher\Study3"; var ms1ftFolder = @"D:\MassSpecFiles\CompRef_Kelleher\Study3"; const int Nfraction1 = 3; const int Nfraction2 = 5; for (var frac1 = 1; frac1 <= Nfraction1; frac1++) { for (var frac2 = 1; frac2 <= Nfraction2; frac2++) { var datasets = GetDataSetNamesStudy3(frac1, frac2); //var outFilePath = string.Format(@"D:\MassSpecFiles\CompRef_Kelleher\study3_GFrep{0}_Gfrac{1}.tsv", frac1.ToString("D2"), frac2.ToString("D2")); var nDataset = datasets.Count; var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(12); for (var i = 0; i < nDataset; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", PbfPath, datasets[i]); var mspFile = string.Format(@"{0}\{1}_IcTda.tsv", mspfFolder, datasets[i]); var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", ms1ftFolder, datasets[i]); var outPath = string.Format(@"{0}\{1}.seqtag.ms1ft", ms1ftFolder, datasets[i]); if (File.Exists(outPath)) { continue; } var run = PbfLcMsRun.GetLcMsRun(rawFile); var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run); var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder); var prsmFeatureMatch = new bool[prsmList.Count]; for (var j = 0; j < features.Count; j++) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsTh(features[j].Mass); for (var k = 0; k < prsmList.Count; k++) { var match = prsmList[k]; if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol) { features[j].ProteinSpectrumMatches.Add(match); prsmFeatureMatch[k] = true; } } } var missingPrsm = new List <ProteinSpectrumMatch>(); for (var k = 0; k < prsmList.Count; k++) { if (!prsmFeatureMatch[k]) { missingPrsm.Add(prsmList[k]); } } FeatureFind(missingPrsm, run, outPath); Console.WriteLine(outPath); } } } }
public void TestIMERFeatureAlignment() { const string outFilePath = @"D:\MassSpecFiles\IMER\promex_crosstab.tsv"; const string rawFolder = @"D:\MassSpecFiles\IMER"; var runLabels = new string[] { "1", "2", "3", "4", "5", "6" }; var nDataset = runLabels.Length; //CPTAC_Intact_CR32A_24Aug15_Bane_15-02-06-RZ var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(10); var alignment = new LcMsFeatureAlignment(new CompRefFeatureComparer(tolerance)); for (var i = 0; i < nDataset; i++) { var k = runLabels[i].Equals("2") || runLabels[i].Equals("3") ? 14 : 13; var rawFile = string.Format(@"{0}\Diabetes_iPSC_Beta_{1}_IMER_{2}May14_Alder_14-01-33.pbf", rawFolder, runLabels[i], k); var mspFile = string.Format(@"{0}\Diabetes_iPSC_Beta_{1}_IMER_{2}May14_Alder_14-01-33_msgfdb_syn.txt", rawFolder, runLabels[i], k); var ms1FtFile = string.Format(@"{0}\Diabetes_iPSC_Beta_{1}_IMER_{2}May14_Alder_14-01-33.ms1ft", rawFolder, runLabels[i], k); Console.WriteLine(rawFile); Console.WriteLine(File.Exists(rawFile)); var run = PbfLcMsRun.GetLcMsRun(rawFile); var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run, 500, 15000); if (File.Exists(mspFile)) { var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsGfPlus); for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName; } // tag features by PrSMs for (var j = 0; j < features.Count; j++) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsTh(features[j].Mass); foreach (var match in prsmList) { if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol) { features[j].ProteinSpectrumMatches.Add(match); } } } } alignment.AddDataSet(i, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); for (var i = 0; i < nDataset; i++) { alignment.FillMissingFeatures(i); Console.WriteLine("{0} has been processed", runLabels[i]); } OutputCrossTabWithId(outFilePath, alignment, runLabels); }
private bool FindIon(Ion ion, Tolerance tolerance, double relativeIntensityThreshold, out int baseIsotopePeakIndex, out int nIsotopes, out int nMatchedIsotopes) { //matchedPeakIndex = new List<int>(); var baseIsotopeIndex = ion.Composition.GetMostAbundantIsotopeZeroBasedIndex(); var isotopomerEnvelope = ion.Composition.GetIsotopomerEnvelopeRelativeIntensities(); var baseIsotopMz = ion.GetIsotopeMz(baseIsotopeIndex); baseIsotopePeakIndex = _ms2Spec.FindPeakIndex(baseIsotopMz, tolerance); nIsotopes = isotopomerEnvelope.Select(x => x >= relativeIntensityThreshold).Count(); nMatchedIsotopes = 0; if (baseIsotopePeakIndex < 0) { return(false); } //if (baseIsotopePeakIndex < 0) baseIsotopePeakIndex = ~baseIsotopePeakIndex; nMatchedIsotopes++; // go down var peakIndex = baseIsotopePeakIndex; //matchedPeakIndex.Add(peakIndex); for (var isotopeIndex = baseIsotopeIndex - 1; isotopeIndex >= 0; isotopeIndex--) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) { break; } var isotopeMz = ion.GetIsotopeMz(isotopeIndex); var tolTh = tolerance.GetToleranceAsTh(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex - 1; i >= 0; i--) { var peakMz = _ms2Spec.Peaks[i].Mz; if (peakMz < minMz) { //peakIndex = i; //break; return(false); } if (peakMz <= maxMz) // find match, move to prev isotope { peakIndex = i; //matchedPeakIndex.Add(peakIndex); nMatchedIsotopes++; break; } } } // go up peakIndex = baseIsotopePeakIndex; for (var isotopeIndex = baseIsotopeIndex + 1; isotopeIndex < isotopomerEnvelope.Length; isotopeIndex++) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) { break; } var isotopeMz = ion.GetIsotopeMz(isotopeIndex); var tolTh = tolerance.GetToleranceAsTh(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex + 1; i < _ms2Spec.Peaks.Length; i++) { var peakMz = _ms2Spec.Peaks[i].Mz; if (peakMz > maxMz) { //peakIndex = i; //break; return(false); } if (peakMz >= minMz) // find match, move to prev isotope { peakIndex = i; //matchedPeakIndex.Add(peakIndex); nMatchedIsotopes++; break; } } } return(true); }
public void TestTagAlignedFeatures() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); var featureDir = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\Output"; var mspDir = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\Output\MSP"; var outFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\Output\aligned_features.tsv"; var resultFile = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\Output\aligned_ids.tsv"; if (!Directory.Exists(featureDir)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, featureDir); } if (!Directory.Exists(mspDir)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, mspDir); } var dataset = GetDataList(featureDir); var tsvParser = new TsvFileParser(outFile); var massList = new List <double>(); for (var i = 0; i < tsvParser.NumData; i++) { massList.Add(Double.Parse(tsvParser.GetData("MonoMass")[i])); } var featureIdMap = new Dictionary <int, string>(); var tolerance = new Tolerance(12); var headers = new List <string>(); //foreach (var data in dataset) for (var d = 0; d < dataset.Count; d++) { var data = dataset[d]; var minScanColName = String.Format("{0}_minScan", d); var maxScanColName = String.Format("{0}_maxScan", d); var fname = String.Format(@"{0}\{1}_IcTda.tsv", mspDir, data); var idParser = new TsvFileParser(fname); var idRows = idParser.GetRows(); if (headers.Count < 1) { headers.AddRange(idParser.GetHeaders()); } for (var i = 0; i < idParser.NumData; i++) { var scan = Int32.Parse(idParser.GetData("Scan")[i]); var mass = Double.Parse(idParser.GetData("Mass")[i]); var qvalue = Double.Parse(idParser.GetData("QValue")[i]); if (qvalue > 0.01) { break; } var massTol = tolerance.GetToleranceAsTh(mass); var idx = massList.BinarySearch(mass); if (idx < 0) { idx = ~idx; } var found = false; for (var j = idx; j >= 0; j--) { if (Math.Abs(mass - massList[j]) > massTol) { break; } if (tsvParser.GetData(minScanColName)[j].Length < 1) { continue; } if (Int32.Parse(tsvParser.GetData(minScanColName)[j]) < scan && scan < Int32.Parse(tsvParser.GetData(maxScanColName)[j])) { found = true; if (!featureIdMap.ContainsKey(j)) { featureIdMap.Add(j, idRows[i]); } break; } } if (found) { continue; } for (var j = idx + 1; j < massList.Count; j++) { if (Math.Abs(mass - massList[j]) > massTol) { break; } if (tsvParser.GetData(minScanColName)[j].Length < 1) { continue; } if (Int32.Parse(tsvParser.GetData(minScanColName)[j]) < scan && scan < Int32.Parse(tsvParser.GetData(maxScanColName)[j])) { found = true; if (!featureIdMap.ContainsKey(j)) { featureIdMap.Add(j, idRows[i]); } break; } } } } var writer = new StreamWriter(resultFile); writer.Write("AlignedFeatureID"); writer.Write("\t"); writer.Write(string.Join("\t", headers)); for (var i = 0; i < 32; i++) { writer.Write("\t"); writer.Write("{0}", i); } writer.Write("\n"); var id = 1; foreach (var key in featureIdMap.Keys) { writer.Write(id); writer.Write("\t"); writer.Write(featureIdMap[key]); for (var i = 0; i < 32; i++) { writer.Write("\t"); writer.Write("{0}", tsvParser.GetData(String.Format("{0}", i))[key]); } writer.Write("\n"); id++; } writer.Close(); }