public bool SameCluster(LcMsFeature f1, LcMsFeature f2) { if (f1.DataSetId == f2.DataSetId) { return(false); } // tolerant in mass dimension? if (!_oneDaltonShift) { var massTol = Math.Min(_tolerance.GetToleranceAsMz(f1.Mass), _tolerance.GetToleranceAsMz(f2.Mass)); if (Math.Abs(f1.Mass - f2.Mass) > massTol) { return(false); } } else { var massTol = Math.Min(_tolerance.GetToleranceAsMz(f1.Mass), _tolerance.GetToleranceAsMz(f2.Mass)); var massDiff = Math.Abs(f1.Mass - f2.Mass); if (f1.Mass > 10000 && f2.Mass > 10000) { if (massDiff > massTol && Math.Abs(massDiff - 1) > massTol && Math.Abs(massDiff - 2) > massTol) { return(false); } } else { if (massDiff > massTol && Math.Abs(massDiff - 1) > massTol) { return(false); } } } /* * var coeLen = f1.CoElutionNetLength(f2); * if (coeLen > f1.NetLength * 0.25 || coeLen > f2.NetLength * 0.25) return true; * * // tolerant in elution time dimension? * var lenDiff = Math.Abs(f1.NetLength - f2.NetLength) / Math.Max(f1.NetLength, f2.NetLength); * if (lenDiff > 0.8) return false; */ //if (f1.CoElutedByNet(f2, 0.01)) return true; //e.g) 200*0.001 = 0.2 min = 30 sec if (f1.CoElutedByNet(f2, 0.01)) { return(true); //e.g) 200*0.001 = 0.2 min = 30 sec } //if (NetDiff(f1, f2) < TolNet) return true; return(false); }
public void TestFeatureAlignment() { const string outFilePath = @"\\protoapps\UserData\Jungkap\CompRef\aligned\promex_crosstab_temp.tsv"; var runLabels = new[] { "32A", "32B", "32C", "32D", "32E", "32F", "32G", "33A", "33B", "33C", "33D", "33E", "33F", "33G" }; var nDataset = runLabels.Length; var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(10); var alignment = new LcMsFeatureAlignment(new CompRefFeatureComparer(tolerance)); for (var i = 0; i < nDataset; i++) { var rawFile = string.Format(@"{0}\CPTAC_Intact_CR{1}_24Aug15_Bane_15-02-06-RZ.pbf", RawFolder, runLabels[i]); var mspFile = string.Format(@"{0}\CPTAC_Intact_CR{1}_24Aug15_Bane_15-02-06-RZ_IcTda.tsv", MsPfFolder, runLabels[i]); var ms1FtFile = string.Format(@"{0}\CPTAC_Intact_CR{1}_24Aug15_Bane_15-02-06-RZ.ms1ft", Ms1FtFolder, runLabels[i]); var run = PbfLcMsRun.GetLcMsRun(rawFile); var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run); if (File.Exists(mspFile)) { var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder); for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName; } // tag features by PrSMs for (var j = 0; j < features.Count; j++) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsMz(features[j].Mass); foreach (var match in prsmList) { if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol) { features[j].ProteinSpectrumMatches.Add(match); } } } } alignment.AddDataSet(i, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); for (var i = 0; i < nDataset; i++) { alignment.FillMissingFeatures(i); Console.WriteLine("{0} has been processed", runLabels[i]); } OutputCrossTabWithId(outFilePath, alignment, runLabels); }
public bool SameCluster(ProteinSpectrumMatch prsm1, ProteinSpectrumMatch prsm2) { var tol = new Tolerance(10); //if (!prsm1.ProteinName.Equals(prsm2.ProteinName)) return false; var massDiff = Math.Abs(prsm1.Mass - prsm2.Mass); if (massDiff > tol.GetToleranceAsMz(prsm1.Mass)) { return(false); } var elutionDiff = Math.Abs(_run.GetElutionTime(prsm1.ScanNum) - _run.GetElutionTime(prsm2.ScanNum)); if (prsm1.SequenceText.Equals(prsm2.SequenceText)) { if (elutionDiff > _elutionLength * 0.02) { return(false); } } else { if (elutionDiff > _elutionLength * 0.005) { return(false); } } return(true); }
private void AlignFeatures(List <string> datasets, string mspfFolder, string ms1ftFolder, string outFilePath) { var nDataset = datasets.Count; var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(12); var alignment = new LcMsFeatureAlignment(new AnalysisCompRef.CompRefFeatureComparer(tolerance)); for (var i = 0; i < nDataset; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", PbfPath, datasets[i]); var mspFile = string.Format(@"{0}\{1}_IcTda.tsv", mspfFolder, datasets[i]); var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", ms1ftFolder, datasets[i]); var ms1FtFile2 = string.Format(@"{0}\{1}.seqtag.ms1ft", ms1ftFolder, datasets[i]); var run = PbfLcMsRun.GetLcMsRun(rawFile); var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run); var features2 = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile2, run); features.AddRange(features2); if (File.Exists(mspFile)) { var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder); //var prsmFeatureMatch = new bool[prsmList.Count]; foreach (var match in prsmList) { match.ProteinId = match.ProteinName; } // tag features by PrSMs foreach (var feature in features) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsMz(feature.Mass); foreach (var match in prsmList) { if (feature.MinScanNum < match.ScanNum && match.ScanNum < feature.MaxScanNum && Math.Abs(feature.Mass - match.Mass) < massTol) { feature.ProteinSpectrumMatches.Add(match); //prsmFeatureMatch[k] = true; } } } } alignment.AddDataSet(i, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); for (var i = 0; i < nDataset; i++) { alignment.FillMissingFeatures(i); Console.WriteLine("{0} has been processed", datasets[i]); } AnalysisCompRef.OutputCrossTabWithId(outFilePath, alignment, datasets); }
public bool CheckChargeState(ObservedIsotopeEnvelope envelope) { var checkCharge = envelope.Charge; if (checkCharge > 20) { return(true); //high charge (> +20), just pass } var peakStartIndex = envelope.MinMzPeak.IndexInSpectrum; var peakEndIndex = envelope.MaxMzPeak.IndexInSpectrum; var nPeaks = peakEndIndex - peakStartIndex + 1; if (nPeaks < 10) { return(false); } if (envelope.NumberOfPeaks > nPeaks * 0.7) { return(true); } var tolerance = new Tolerance(5); var threshold = nPeaks * 0.5; var mzTol = tolerance.GetToleranceAsMz(Spectrum.Peaks[peakStartIndex].Mz); var minCheckCharge = Math.Max(checkCharge * 2 - 1, 4); var maxCheckCharge = Math.Min(checkCharge * 5 + 1, 60); var maxDeltaMz = Constants.C13MinusC12 / minCheckCharge + mzTol; var nChargeGaps = new int[maxCheckCharge - minCheckCharge + 1]; for (var i = peakStartIndex; i <= peakEndIndex; i++) { for (var j = i + 1; j <= peakEndIndex; j++) { var deltaMz = Spectrum.Peaks[j].Mz - Spectrum.Peaks[i].Mz; if (deltaMz > maxDeltaMz) { break; } for (var c = Math.Round(1 / (deltaMz + mzTol)); c <= Math.Round(1 / (deltaMz - mzTol)); c++) { if (c < minCheckCharge || c > maxCheckCharge) { continue; } var k = (int)c - minCheckCharge; nChargeGaps[k]++; if (nChargeGaps[k] + 1 > threshold && nChargeGaps[k] + 1 > 1.25 * envelope.NumberOfPeaks) { return(false); } } } } return(true); }
public void TestFeatureAlignment() { const string outFilePath = @"\\protoapps\UserData\Jungkap\Lewy\aligned\promex_crosstab_temp.tsv"; //CPTAC_Intact_CR32A_24Aug15_Bane_15-02-06-RZ var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(10); var alignment = new LcMsFeatureAlignment(new AnalysisCompRef.CompRefFeatureComparer(tolerance)); for (var i = 0; i < NdataSet; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", PbfPath, GetDataSetNames(i)); var mspFile = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder, GetDataSetNames(i)); var mspFile2 = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder2, GetDataSetNames(i)); var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", Ms1FtFolder, GetDataSetNames(i)); Console.WriteLine(rawFile); var run = PbfLcMsRun.GetLcMsRun(rawFile); var prsmList1 = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder); var prsmList2 = prsmReader.LoadIdentificationResult(mspFile2, ProteinSpectrumMatch.SearchTool.MsPathFinder); prsmList1.AddRange(prsmList2); var prsmList = MergePrsm(prsmList1); var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run); for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName; } // tag features by PrSMs for (var j = 0; j < features.Count; j++) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsMz(features[j].Mass); foreach (var match in prsmList) { if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol) { features[j].ProteinSpectrumMatches.Add(match); } } } alignment.AddDataSet(i, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); for (var i = 0; i < NdataSet; i++) { alignment.FillMissingFeatures(i); Console.WriteLine("{0} has been processed", GetDataSetNames(i)); } OutputCrossTabWithId(outFilePath, alignment); }
/// <summary> /// Find all peaks that are in the range specified by <paramref name="mz"/> and a tolerance /// </summary> /// <param name="peakList"></param> /// <param name="mz"></param> /// <param name="tolerance"></param> /// <returns></returns> public static IList <Peak> FindAllPeaks(List <Peak> peakList, double mz, Tolerance tolerance) { var tolTh = tolerance.GetToleranceAsMz(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; return(FindAllPeaks(peakList, minMz, maxMz)); }
/// <summary> /// Gets the extracted ion chromatogram of the specified m/z (using only MS1 spectra) /// </summary> /// <param name="mz">target m/z</param> /// <param name="tolerance">tolerance</param> /// <returns>XIC as an Xic object</returns> public Xic GetPrecursorExtractedIonChromatogram(double mz, Tolerance tolerance) { var tolTh = tolerance.GetToleranceAsMz(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; return(GetPrecursorExtractedIonChromatogram(minMz, maxMz)); }
/// <summary> /// Gets the extracted ion chromatogram of the specified m/z range (using only MS2 spectra) /// </summary> /// <param name="mz">target m/z</param> /// <param name="tolerance">tolerance</param> /// <param name="precursorIonMz">precursor m/z of the precursor ion</param> /// <param name="minScanNum">minimum scan number (inclusive)</param> /// <param name="maxScanNum">maximum scan number (inclusive)</param> /// <returns>XIC as an Xic object</returns> public Xic GetProductExtractedIonChromatogram(double mz, Tolerance tolerance, double precursorIonMz, int minScanNum, int maxScanNum) { var tolTh = tolerance.GetToleranceAsMz(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; return(GetProductExtractedIonChromatogram(minMz, maxMz, precursorIonMz, minScanNum, maxScanNum)); }
public void TestFeatureAlignment() { const string outFilePath = @"\\protoapps\UserData\Jungkap\Quant\aligned\promex_crosstab.tsv"; //const string outFolder = @"\\protoapps\UserData\Jungkap\CompRef\aligned"; var runLabels = new string[] { "1x1", "1x2", "1x3", "1x4", "1x5", "5x1", "5x2", "5x3", "5x4", "5x5", "10x1", "10x2", "10x3", "10x4", "10x5", }; var nDataset = runLabels.Length; var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(10); var alignment = new LcMsFeatureAlignment(new SpikeInFeatureComparer(tolerance)); for (var i = 0; i < nDataset; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", RawFolder, datasets[i]); var mspFile = string.Format(@"{0}\{1}_IcTda.tsv", MsPfFolder, datasets[i]); var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", Ms1FtFolder, datasets[i]); var run = PbfLcMsRun.GetLcMsRun(rawFile); var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder); var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run); for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName; } // tag features by PrSMs for (var j = 0; j < features.Count; j++) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsMz(features[j].Mass); foreach (var match in prsmList) { if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol) { features[j].ProteinSpectrumMatches.Add(match); } } } alignment.AddDataSet(i, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); /* * for (var i = 0; i < nDataset; i++) * { * alignment.FillMissingFeatures(i); * Console.WriteLine("{0} has been processed", runLabels[i]); * } */ OutputCrossTabWithId(outFilePath, alignment, runLabels); }
/// <summary> /// Gets the extracted ion chromatogram of the specified m/z range (using only MS1 spectra) /// Only XicPeaks around the targetScanNum are returned /// </summary> /// <param name="mz">target m/z</param> /// <param name="tolerance">tolerance</param> /// <param name="targetScanNum">target scan number to generate xic</param> /// <param name="maxNumConsecutiveScansWithoutPeak">maximum number of consecutive scans with a peak</param> /// <returns>XIC around targetScanNum</returns> public Xic GetPrecursorExtractedIonChromatogram(double mz, Tolerance tolerance, int targetScanNum, int maxNumConsecutiveScansWithoutPeak = 3) { var tolTh = tolerance.GetToleranceAsMz(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; if (targetScanNum < 0) { return(GetPrecursorExtractedIonChromatogram(minMz, maxMz)); } return(GetPrecursorExtractedIonChromatogram(minMz, maxMz, targetScanNum, maxNumConsecutiveScansWithoutPeak)); }
private void CollectSequenceTagGraphEdges() { for (var i = 0; i < _deconvolutedPeaks.Count; i++) { var massTh = _tolerance.GetToleranceAsMz(_deconvolutedPeaks[i].Mass); for (var j = i + 1; j < _deconvolutedPeaks.Count; j++) { //if (_deconvolutedPeaks[i].PeakShare(_deconvolutedPeaks[j])) continue; var massGap = _deconvolutedPeaks[j].Mass - _deconvolutedPeaks[i].Mass; var maxMassGap = massGap + massTh; var minMassGap = massGap - massTh; var peakGap = new SequenceTagGraphEdge(i, j, massGap); if (minMassGap > _maxAminoAcidMass) { break; } if (maxMassGap < _minAminoAcidMass) { continue; } foreach (var aa in _aminoAcidsArray) { var massError = Math.Abs(peakGap.Mass - aa.Composition.Mass); if (minMassGap < aa.Composition.Mass && aa.Composition.Mass < maxMassGap) { peakGap.AddMatchedAminoAcid(aa, massError); } } if (peakGap.AminoAcidList.Count > 0) { AddEdge(peakGap); } } } }
private IList <LcMsPeakCluster> RemoveOverlappedFeatures(SortedSet <LcMsPeakCluster> featureSet) { var outFeatures = new List <LcMsPeakCluster>(); var tol = new Tolerance(5); while (true) { if (featureSet.Count < 1) { break; } var bestFeature = featureSet.First(); featureSet.Remove(bestFeature); outFeatures.Add(bestFeature); var massTol = tol.GetToleranceAsMz(bestFeature.RepresentativeMass); var tempList = new List <LcMsPeakCluster>(); foreach (var f in bestFeature.OverlappedFeatures) { if (featureSet.Remove(f)) { var massDiff = Math.Abs(bestFeature.RepresentativeMass - f.RepresentativeMass); if ((Math.Abs(massDiff - 1.0) < massTol || Math.Abs(massDiff - 2.0) < massTol) && SimilarScore(bestFeature, f)) { outFeatures.Add(f); continue; } tempList.Add(f); } } bestFeature.InActivateMajorPeaks(); foreach (var f in tempList) { f.UpdateScore(_spectra); f.Score = _scorer.GetScore(f); if (f.Score > _scorer.ScoreThreshold && f.GoodEnougth) { featureSet.Add(f); } else { //Console.WriteLine("{0}\t{1}\t{2} killed by {3}\t{4}\t{5}", f.Mass, f.MinScanNum, f.MaxScanNum, bestFeature.Mass, bestFeature.MinScanNum, bestFeature.MaxScanNum); } } } return(outFeatures); }
internal RankedPeak FindPeak(double mz, Tolerance tolerance) { var tolTh = tolerance.GetToleranceAsMz(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; var index = Array.BinarySearch(Peaks, new RankedPeak((minMz + maxMz) / 2, 0, 0)); if (index < 0) { index = ~index; } RankedPeak bestPeak = null; var bestIntensity = 0.0; // go down var i = index - 1; while (i >= 0 && i < Peaks.Length) { if (Peaks[i].Mz <= minMz) { break; } if (Peaks[i].Intensity > bestIntensity) { bestIntensity = Peaks[i].Intensity; bestPeak = Peaks[i]; } --i; } // go up i = index; while (i >= 0 && i < Peaks.Length) { if (Peaks[i].Mz >= maxMz) { break; } if (Peaks[i].Intensity > bestIntensity) { bestIntensity = Peaks[i].Intensity; bestPeak = Peaks[i]; } ++i; } return(bestPeak); }
public bool Equals(ProteinSpectrumMatch other) { if (SearchToolType == other.SearchToolType) { return(SequenceText.Equals(other.SequenceText)); } var massDiff = Math.Abs(Mass - other.Mass); var tol = new Tolerance(10); if (massDiff < tol.GetToleranceAsMz(Mass) && FirstResidue == other.FirstResidue && LastResidue == other.LastResidue) { return(true); } return(false); }
/// <summary> /// Aligns observed peak list to theoretical peak list. /// </summary> /// <param name="observedPeaks"></param> /// <param name="theoreticalPeaks"></param> /// <param name="tolerance"></param> /// <returns></returns> public List <Peak> AlignObservedPeaks(IList <Peak> observedPeaks, IList <Peak> theoreticalPeaks, Tolerance tolerance = null) { tolerance = tolerance ?? new Tolerance(10, ToleranceUnit.Ppm); // Remove empty peaks observedPeaks = observedPeaks.Where(peak => peak.Mz > 0.0).ToList(); var alignedPeaks = new List <Peak> { Capacity = theoreticalPeaks.Count }; var j = 0; foreach (var theoPeak in theoreticalPeaks) { var tolDa = tolerance.GetToleranceAsMz(theoPeak.Mz); var maxMz = theoPeak.Mz + tolDa; var obsPeak = observedPeaks[j]; var selectedPeak = new Peak(theoPeak.Mz, 0); while (obsPeak.Mz <= maxMz) { var diff = Math.Abs(obsPeak.Mz - theoPeak.Mz); if (diff < tolDa && obsPeak.Intensity > selectedPeak.Intensity) { selectedPeak = obsPeak; } j = Math.Min(observedPeaks.Count - 1, j + 1); // Increment, but do not go out of bounds obsPeak = observedPeaks[j]; } alignedPeaks.Add(selectedPeak); } return(alignedPeaks); }
public Peak[] GetAllIsotopePeaks(Spectrum spec, Ion ion, Tolerance tolerance, double relativeIntensityThreshold, out int[] peakIndexList) { var mostAbundantIsotopeIndex = ion.Composition.GetMostAbundantIsotopeZeroBasedIndex(); var isotopomerEnvelope = ion.Composition.GetIsotopomerEnvelopeRelativeIntensities(); peakIndexList = new int[isotopomerEnvelope.Length]; var mostAbundantIsotopeMz = ion.GetIsotopeMz(mostAbundantIsotopeIndex); var mostAbundantIsotopeMatchedPeakIndex = spec.FindPeakIndex(mostAbundantIsotopeMz, tolerance); if (mostAbundantIsotopeMatchedPeakIndex < 0) { return(null); } var observedPeaks = new Peak[isotopomerEnvelope.Length]; observedPeaks[mostAbundantIsotopeIndex] = spec.Peaks[mostAbundantIsotopeMatchedPeakIndex]; peakIndexList[mostAbundantIsotopeIndex] = mostAbundantIsotopeMatchedPeakIndex; // go down var peakIndex = mostAbundantIsotopeMatchedPeakIndex - 1; for (var isotopeIndex = mostAbundantIsotopeIndex - 1; isotopeIndex >= 0; isotopeIndex--) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) { break; } var isotopeMz = ion.GetIsotopeMz(isotopeIndex); var tolTh = tolerance.GetToleranceAsMz(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex; i >= 0; i--) { var peakMz = spec.Peaks[i].Mz; if (peakMz < minMz) { peakIndex = i; break; } if (peakMz <= maxMz) // find match, move to prev isotope { var peak = spec.Peaks[i]; if (observedPeaks[isotopeIndex] == null || peak.Intensity > observedPeaks[isotopeIndex].Intensity) { observedPeaks[isotopeIndex] = peak; peakIndexList[isotopeIndex] = i; } } } } // go up peakIndex = mostAbundantIsotopeMatchedPeakIndex + 1; for (var isotopeIndex = mostAbundantIsotopeIndex + 1; isotopeIndex < isotopomerEnvelope.Length; isotopeIndex++) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) { break; } var isotopeMz = ion.GetIsotopeMz(isotopeIndex); var tolTh = tolerance.GetToleranceAsMz(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex; i < spec.Peaks.Length; i++) { var peakMz = spec.Peaks[i].Mz; if (peakMz > maxMz) { peakIndex = i; break; } if (peakMz >= minMz) // find match, move to prev isotope { var peak = spec.Peaks[i]; if (observedPeaks[isotopeIndex] == null || peak.Intensity > observedPeaks[isotopeIndex].Intensity) { observedPeaks[isotopeIndex] = peak; peakIndexList[isotopeIndex] = i; } } } } return(observedPeaks); }
public void TestIMERFeatureAlignment() { const string outFilePath = @"D:\MassSpecFiles\IMER\promex_crosstab.tsv"; const string rawFolder = @"D:\MassSpecFiles\IMER"; var runLabels = new string[] { "1", "2", "3", "4", "5", "6" }; var nDataset = runLabels.Length; //CPTAC_Intact_CR32A_24Aug15_Bane_15-02-06-RZ var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(10); var alignment = new LcMsFeatureAlignment(new CompRefFeatureComparer(tolerance)); for (var i = 0; i < nDataset; i++) { var k = runLabels[i].Equals("2") || runLabels[i].Equals("3") ? 14 : 13; var rawFile = string.Format(@"{0}\Diabetes_iPSC_Beta_{1}_IMER_{2}May14_Alder_14-01-33.pbf", rawFolder, runLabels[i], k); var mspFile = string.Format(@"{0}\Diabetes_iPSC_Beta_{1}_IMER_{2}May14_Alder_14-01-33_msgfdb_syn.txt", rawFolder, runLabels[i], k); var ms1FtFile = string.Format(@"{0}\Diabetes_iPSC_Beta_{1}_IMER_{2}May14_Alder_14-01-33.ms1ft", rawFolder, runLabels[i], k); Console.WriteLine(rawFile); Console.WriteLine(File.Exists(rawFile)); var run = PbfLcMsRun.GetLcMsRun(rawFile); var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run, 500, 15000); if (File.Exists(mspFile)) { var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsGfPlus); for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName; } // tag features by PrSMs for (var j = 0; j < features.Count; j++) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsMz(features[j].Mass); foreach (var match in prsmList) { if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol) { features[j].ProteinSpectrumMatches.Add(match); } } } } alignment.AddDataSet(i, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); for (var i = 0; i < nDataset; i++) { alignment.FillMissingFeatures(i); Console.WriteLine("{0} has been processed", runLabels[i]); } OutputCrossTabWithId(outFilePath, alignment, runLabels); }
public static void Main(string[] args) { if (args.Length == 0) { ShowSyntax(); return; } // Parse file var inputFilePath = args[0]; if (!File.Exists(inputFilePath)) { ConsoleMsgUtils.ShowError("File not found: " + inputFilePath); return; } var datasets = DatasetInfo.ParseDatasetInfoFile(inputFilePath); if (datasets.Count == 0) { ConsoleMsgUtils.ShowError("No valid data found in the dataset info file"); ShowSyntax(); return; } var fileName = Path.GetFileNameWithoutExtension(inputFilePath); var directory = Path.GetDirectoryName(inputFilePath); var crosstabFilename = string.Format("{0}_crosstab.tsv", fileName); string outputfilePath; if (string.IsNullOrWhiteSpace(directory)) { outputfilePath = crosstabFilename; } else { outputfilePath = Path.Combine(directory, crosstabFilename); } var nDataset = datasets.Count; var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(100); var alignment = new LcMsFeatureAlignment(new CompRefFeatureComparer(tolerance)); var dataId = 0; foreach (var dataset in datasets) { if (!File.Exists(dataset.RawFilePath)) { ConsoleMsgUtils.ShowError("Instrument file not found: " + dataset.RawFilePath); continue; } if (!File.Exists(dataset.Ms1FtFilePath)) { ConsoleMsgUtils.ShowError("ProMex results file not found: " + dataset.Ms1FtFilePath); continue; } Console.WriteLine("Opening " + dataset.RawFilePath); var run = PbfLcMsRun.GetLcMsRun(dataset.RawFilePath, 0, 0); Console.WriteLine("Opening " + dataset.Ms1FtFilePath); var features = LcMsFeatureAlignment.LoadProMexResult(dataId++, dataset.Ms1FtFilePath, run); if (!string.IsNullOrWhiteSpace(dataset.MsPfIdFilePath) && File.Exists(dataset.MsPfIdFilePath)) { Console.WriteLine("Opening " + dataset.MsPfIdFilePath); var prsmList = prsmReader.LoadIdentificationResult(dataset.MsPfIdFilePath, ProteinSpectrumMatch.SearchTool.MsPathFinder); foreach (var match in prsmList) { match.ProteinId = match.ProteinName; } // tag features by PrSMs foreach (var feature in features) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsMz(feature.Mass); foreach (var match in prsmList) { if (feature.MinScanNum < match.ScanNum && match.ScanNum < feature.MaxScanNum && Math.Abs(feature.Mass - match.Mass) < massTol) { feature.ProteinSpectrumMatches.Add(match); } } } } alignment.AddDataSet(dataId, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); var validResults = 0; for (var datasetIndex = 0; datasetIndex < nDataset; datasetIndex++) { if (datasetIndex >= alignment.CountDatasets) { ConsoleMsgUtils.ShowWarning(string.Format("Could not align {0}; features not found", datasets[datasetIndex].Label)); continue; } alignment.FillMissingFeatures(datasetIndex); Console.WriteLine("{0} has been processed", datasets[datasetIndex].Label); validResults++; } if (validResults > 0) { OutputCrossTabWithId(outputfilePath, alignment, datasets.Select(ds => ds.Label).ToArray()); } }
public Ms1Peak[] GetAllIsotopePeaks(double monoIsotopeMass, int charge, TheoreticalIsotopeEnvelope isotopeList, Tolerance tolerance) { var observedPeaks = new Ms1Peak[isotopeList.Size]; var mz = isotopeList.GetIsotopeMz(charge, 0); var tolTh = tolerance.GetToleranceAsMz(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; var index = Array.BinarySearch(Peaks, new Ms1Peak(minMz, 0, 0)); if (index < 0) { index = ~index; } var bestPeakIndex = -1; var bestIntensity = 0.0; // go up var i = index; while (i >= 0 && i < Peaks.Length) { if (Peaks[i].Mz >= maxMz) { break; } if (Peaks[i].Intensity > bestIntensity) { bestIntensity = Peaks[i].Intensity; bestPeakIndex = i; observedPeaks[0] = (Ms1Peak)Peaks[bestPeakIndex]; } ++i; } var peakIndex = (bestPeakIndex >= 0) ? bestPeakIndex + 1 : index; // go up for (var j = 1; j < isotopeList.Size; j++) { var isotopeMz = isotopeList.GetIsotopeMz(charge, j); tolTh = tolerance.GetToleranceAsMz(isotopeMz); minMz = isotopeMz - tolTh; maxMz = isotopeMz + tolTh; for (i = peakIndex; i < Peaks.Length; i++) { var peakMz = Peaks[i].Mz; if (peakMz > maxMz) { peakIndex = i; break; } if (peakMz >= minMz) // find match, move to prev isotope { var peak = Peaks[i]; if (observedPeaks[j] == null || peak.Intensity > observedPeaks[j].Intensity) { observedPeaks[j] = (Ms1Peak)peak; } } } } return(observedPeaks); }
private bool FindIon(Ion ion, Tolerance tolerance, double relativeIntensityThreshold, out int baseIsotopePeakIndex, out int nIsotopes, out int nMatchedIsotopes) { //matchedPeakIndex = new List<int>(); var baseIsotopeIndex = ion.Composition.GetMostAbundantIsotopeZeroBasedIndex(); var isotopomerEnvelope = ion.Composition.GetIsotopomerEnvelopeRelativeIntensities(); var baseIsotopMz = ion.GetIsotopeMz(baseIsotopeIndex); baseIsotopePeakIndex = _ms2Spec.FindPeakIndex(baseIsotopMz, tolerance); nIsotopes = isotopomerEnvelope.Select(x => x >= relativeIntensityThreshold).Count(); nMatchedIsotopes = 0; if (baseIsotopePeakIndex < 0) { return(false); } //if (baseIsotopePeakIndex < 0) baseIsotopePeakIndex = ~baseIsotopePeakIndex; nMatchedIsotopes++; // go down var peakIndex = baseIsotopePeakIndex; //matchedPeakIndex.Add(peakIndex); for (var isotopeIndex = baseIsotopeIndex - 1; isotopeIndex >= 0; isotopeIndex--) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) { break; } var isotopeMz = ion.GetIsotopeMz(isotopeIndex); var tolTh = tolerance.GetToleranceAsMz(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex - 1; i >= 0; i--) { var peakMz = _ms2Spec.Peaks[i].Mz; if (peakMz < minMz) { //peakIndex = i; //break; return(false); } if (peakMz <= maxMz) // find match, move to prev isotope { peakIndex = i; //matchedPeakIndex.Add(peakIndex); nMatchedIsotopes++; break; } } } // go up peakIndex = baseIsotopePeakIndex; for (var isotopeIndex = baseIsotopeIndex + 1; isotopeIndex < isotopomerEnvelope.Length; isotopeIndex++) { if (isotopomerEnvelope[isotopeIndex] < relativeIntensityThreshold) { break; } var isotopeMz = ion.GetIsotopeMz(isotopeIndex); var tolTh = tolerance.GetToleranceAsMz(isotopeMz); var minMz = isotopeMz - tolTh; var maxMz = isotopeMz + tolTh; for (var i = peakIndex + 1; i < _ms2Spec.Peaks.Length; i++) { var peakMz = _ms2Spec.Peaks[i].Mz; if (peakMz > maxMz) { //peakIndex = i; //break; return(false); } if (peakMz >= minMz) // find match, move to prev isotope { peakIndex = i; //matchedPeakIndex.Add(peakIndex); nMatchedIsotopes++; break; } } } return(true); }
public void TestQuantifyIdedProteoforms() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string rawFolder = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_2"; const string promexOutFolder = @"D:\MassSpecFiles\UTEX\MSAlign"; const string msAlignResultFolder = @"D:\MassSpecFiles\UTEX\MSAlign"; if (!Directory.Exists(rawFolder)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, rawFolder); } var nDataset = 32; var dataset = new string[nDataset]; for (var i = 0; i < nDataset; i++) { dataset[i] = string.Format("Syn_utex2973_Top_{0,2:D2}_TopDown_7May15_Bane_14-09-01RZ", i + 1); //var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]); } var prsmReader = new ProteinSpectrumMatchReader(0.01); var filesProcessed = 0; var tolerance = new Tolerance(10); for (var i = 0; i < dataset.Length; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]); if (!File.Exists(rawFile)) { Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile); continue; } var run = PbfLcMsRun.GetLcMsRun(rawFile); var path = string.Format(@"{0}\{1}_MSAlign_ResultTable.txt", msAlignResultFolder, dataset[i]); if (!File.Exists(path)) { Console.WriteLine(@"Warning: Skipping file not found: {0}", path); continue; } var prsmList = prsmReader.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign); filesProcessed++; for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName.Substring(match.ProteinName.IndexOf(ProteinNamePrefix) + ProteinNamePrefix.Length, 5); } // PrSM To Feature var prsmToFeatureIdMap = new int[prsmList.Count]; for (var k = 0; k < prsmToFeatureIdMap.Length; k++) { prsmToFeatureIdMap[k] = -1; } // Feature To PrSM var featureToPrsm = new List <ProteinSpectrumMatchSet>(); var featureFinder = new LcMsPeakMatrix(run, new LcMsFeatureLikelihood()); var featureList = new List <LcMsPeakCluster>(); var featureId = 0; for (var j = 0; j < prsmList.Count; j++) { if (prsmToFeatureIdMap[j] >= 0) { continue; } var match = prsmList[j]; var minScanNum = match.ScanNum; var maxScanNum = match.ScanNum; var mass = match.Mass; var charge = match.Charge; var massTh = tolerance.GetToleranceAsMz(mass); var id1 = match.ProteinId; var feature = featureFinder.GetLcMsPeakCluster(mass, charge, minScanNum, maxScanNum); var prsmSet = new ProteinSpectrumMatchSet(i) { match }; if (feature == null) { feature = featureFinder.GetLcMsPeaksFromNoisePeaks(mass, charge, minScanNum, maxScanNum, charge, charge); prsmToFeatureIdMap[j] = featureId; } else { prsmToFeatureIdMap[j] = featureId; var etTol = Math.Max(run.GetElutionTime(run.MaxLcScan) * 0.005, feature.ElutionLength * 0.2); for (var k = j + 1; k < prsmList.Count; k++) { var otherMatch = prsmList[k]; var id2 = otherMatch.ProteinId; var et2 = run.GetElutionTime(otherMatch.ScanNum); if (id1.Equals(id2) && feature.MinElutionTime - etTol < et2 && et2 < feature.MaxElutionTime - etTol && Math.Abs(otherMatch.Mass - mass) < massTh) { prsmToFeatureIdMap[k] = featureId; prsmSet.Add(otherMatch); } } } featureId++; feature.Flag = 1; featureList.Add(feature); featureToPrsm.Add(prsmSet); } // Overlap between features??? for (var j = 0; j < featureList.Count; j++) { var f1 = featureList[j]; if (f1.Flag < 1) { continue; } var prsm1 = featureToPrsm[j]; for (var k = j + 1; k < featureList.Count; k++) { var f2 = featureList[k]; if (f2.Flag < 1) { continue; } var prsm2 = featureToPrsm[k]; if (Math.Abs(f1.Mass - f2.Mass) > tolerance.GetToleranceAsMz(f1.Mass)) { continue; } if (!f1.CoElutedByNet(f2, 0.005)) { continue; } if (!prsm1.ShareProteinId(prsm2)) { continue; } // let us merge!! if (f1.ScanLength > f2.ScanLength) { prsm1.AddRange(prsm2); prsm2.Clear(); f2.Flag = 0; } else { prsm2.AddRange(prsm1); prsm1.Clear(); f1.Flag = 0; } } } // now output results!! var ms1ftFilePath = string.Format(@"{0}\{1}.ms1ft", promexOutFolder, dataset[i]); var writer = new StreamWriter(ms1ftFilePath); writer.WriteLine(LcMsFeatureFinderLauncher.GetHeaderString()); for (var j = 0; j < featureList.Count; j++) { var f1 = featureList[j]; if (f1.Flag < 1) { continue; } var prsm1 = featureToPrsm[j]; var minScanNum = run.GetPrevScanNum(prsm1.MinScanNum, 1); var maxScanNum = run.GetNextScanNum(prsm1.MaxScanNum, 1); f1.ExpandScanRange(minScanNum, maxScanNum); writer.Write("{0}\t", j + 1); writer.WriteLine(LcMsFeatureFinderLauncher.GetString(f1)); } writer.Close(); Console.WriteLine(ms1ftFilePath); } if (filesProcessed == 0) { Assert.Ignore("Skipped since data files not found"); } }
public void TestGeneratingProductManyXics() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var rawFilePath = FilePaths.TestRawFilePath; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath); //var run2 = new DiaLcMsRun(new OldPbfReader(Path.ChangeExtension(rawFilePath, ".pbf")), 0.0, 0.0); var tolerance = new Tolerance(10); var mzArr = new double[100000]; var precursorMzArr = new double[mzArr.Length]; var rnd = new Random(); for (var i = 0; i < mzArr.Length; i++) { mzArr[i] = rnd.NextDouble() * 1450.0 + 50.0; precursorMzArr[i] = rnd.NextDouble() * (810.0 - 390.0) + 390.0; } var sw = new System.Diagnostics.Stopwatch(); //double sec; // method 1 sw.Start(); for (var i = 0; i < mzArr.Length; i++) { var mz = mzArr[i]; var tolTh = tolerance.GetToleranceAsMz(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; var xic1 = run.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); //var xic2 = run.GetFullProductExtractedIonChromatogram2(minMz, maxMz, precursorMzArr[i]); //Assert.True(xic1.Equals(xic2)); } sw.Stop(); Console.WriteLine(@"Method 1: {0:f4} sec", sw.Elapsed.TotalSeconds); sw.Reset(); sw.Start(); for (var i = 0; i < mzArr.Length; i++) { var mz = mzArr[i]; var tolTh = tolerance.GetToleranceAsMz(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; run.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); } sw.Stop(); Console.WriteLine(@"Method 2: {0:f4} sec", sw.Elapsed.TotalSeconds); Console.WriteLine("Done"); }
public void TestGeneratingProductXics() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestRawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(TestRawFilePath); // const string rafFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raf"; const string rafFilePath = @"H:\Research\Jarret\10mz\raw\Q_2014_0523_50_10_fmol_uL_10mz.raf"; if (!File.Exists(rafFilePath)) { Assert.Ignore(@"Skipping raf portion of test {0} since file not found: {1}", methodName, rafFilePath); } var rafRun = new PbfLcMsRun(rafFilePath); var tolerance = new Tolerance(10); var mzArr = new double[100000]; var precursorMzArr = new double[mzArr.Length]; var rnd = new Random(); for (var i = 0; i < mzArr.Length; i++) { mzArr[i] = rnd.NextDouble() * 1450.0 + 50.0; precursorMzArr[i] = rnd.NextDouble() * (810.0 - 390.0) + 390.0; } var sw = new System.Diagnostics.Stopwatch(); //double sec; // method 1 sw.Start(); for (var i = 0; i < mzArr.Length; i++) { var mz = mzArr[i]; var tolTh = tolerance.GetToleranceAsMz(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; var xic1 = run.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); //var xic2 = rafRun.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); //Assert.True(xic1.Equals(xic2)); } sw.Stop(); Console.WriteLine(@"Method 1: {0:f4} sec", sw.Elapsed.TotalSeconds); sw.Reset(); sw.Start(); for (var i = 0; i < mzArr.Length; i++) { var mz = mzArr[i]; var tolTh = tolerance.GetToleranceAsMz(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; rafRun.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); } sw.Stop(); Console.WriteLine(@"Method 2: {0:f4} sec", sw.Elapsed.TotalSeconds); Console.WriteLine(@"Done"); }
public void ExtractLcMsFeaturesForTrainingSet() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string idFileFolder = @"D:\MassSpecFiles\training\FilteredIdResult"; if (!Directory.Exists(idFileFolder)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, idFileFolder); } var tolerance = new Tolerance(10); var tolerance2 = new Tolerance(20); var id = 1; foreach (var dataset in TrainSetFileLists) { var dataname = Path.GetFileNameWithoutExtension(dataset); var filtedIdResultFile = string.Format(@"{0}\{1}.trainset.tsv", idFileFolder, Path.GetFileNameWithoutExtension(dataset)); var featureResult = string.Format(@"{0}\{1}.ms1ft", idFileFolder, Path.GetFileNameWithoutExtension(dataset)); if (!File.Exists(dataset)) { Console.WriteLine(@"Warning: Skipping since file not found: {0}", dataset); continue; } if (!File.Exists(filtedIdResultFile)) { Console.WriteLine(@"Warning: Skipping since file not found: {0}", filtedIdResultFile); continue; } var run = PbfLcMsRun.GetLcMsRun(dataset); var targetStatWriter = new StreamWriter(string.Format(@"D:\MassSpecFiles\training\statistics\{0}.tsv", Path.GetFileNameWithoutExtension(dataset))); var decoyStatWriter = new StreamWriter(string.Format(@"D:\MassSpecFiles\training\statistics\{0}_decoy.tsv", Path.GetFileNameWithoutExtension(dataset))); var writer = new StreamWriter(featureResult); writer.Write("Ms2MinScan\tMs2MaxScan\tMs2MinCharge\tMs2MaxCharge\tMs2Mass\t"); writer.Write("Mass\tMinScan\tMaxScan\tMinCharge\tMaxCharge\tMinTime\tMaxTime\tElution\tGood\n"); var tsvParser = new TsvFileParser(filtedIdResultFile); var featureFinder = new LcMsPeakMatrix(run); for (var i = 0; i < tsvParser.NumData; i++) { var minScan = int.Parse(tsvParser.GetData("MinScan")[i]); var maxScan = int.Parse(tsvParser.GetData("MaxScan")[i]); var minCharge = int.Parse(tsvParser.GetData("MinCharge")[i]); var maxCharge = int.Parse(tsvParser.GetData("MaxCharge")[i]); var mass = double.Parse(tsvParser.GetData("Mass")[i]); writer.Write(minScan); writer.Write("\t"); writer.Write(maxScan); writer.Write("\t"); writer.Write(minCharge); writer.Write("\t"); writer.Write(maxCharge); writer.Write("\t"); writer.Write(mass); writer.Write("\t"); var binNum = featureFinder.Comparer.GetBinNumber(mass); var binMass = featureFinder.Comparer.GetMzAverage(binNum); var binNumList = (mass < binMass) ? new[] { binNum, binNum - 1, binNum + 1 } : new[] { binNum, binNum + 1, binNum - 1 }; LcMsPeakCluster refinedFeature = null; foreach (var bi in binNumList) { var tempList = new List <LcMsPeakCluster>(); var features = featureFinder.FindFeatures(bi); var massTh = (mass < 2000) ? tolerance2.GetToleranceAsMz(mass) : tolerance.GetToleranceAsMz(mass); foreach (var feature in features) { if (Math.Abs(mass - feature.Mass) < massTh) { tempList.Add(feature); } } //var nHits = 0; var highestAbu = 0d; //var scans = Enumerable.Range(minScan, maxScan - minScan + 1); foreach (var feature in tempList) { //var scans2 = Enumerable.Range(feature.MinScanNum, feature.MaxScanNum - feature.MinScanNum + 1); //var hitScans = scans.Intersect(scans2).Count(); if (feature.MinScanNum < 0.5 * (minScan + maxScan) && 0.5 * (minScan + maxScan) < feature.MaxScanNum) { if (feature.Abundance > highestAbu) { refinedFeature = feature; highestAbu = feature.Abundance; } } /*if (hitScans > 0) * { * refinedFeature = feature; * nHits = hitScans; * }*/ } if (refinedFeature != null) { break; } } if (refinedFeature != null) { writer.Write(refinedFeature.Mass); writer.Write("\t"); writer.Write(refinedFeature.MinScanNum); writer.Write("\t"); writer.Write(refinedFeature.MaxScanNum); writer.Write("\t"); writer.Write(refinedFeature.MinCharge); writer.Write("\t"); writer.Write(refinedFeature.MaxCharge); writer.Write("\t"); writer.Write(refinedFeature.MinElutionTime); writer.Write("\t"); writer.Write(refinedFeature.MaxElutionTime); writer.Write("\t"); writer.Write(refinedFeature.MaxElutionTime - refinedFeature.MinElutionTime); writer.Write("\t"); var good = (refinedFeature.MinScanNum <= minScan && refinedFeature.MaxScanNum >= maxScan); writer.Write(good ? 1 : 0); writer.Write("\n"); //writer.Write(0); writer.Write("\t"); //writer.Write(0); writer.Write("\n"); OutputEnvelopPeakStat(id, refinedFeature, targetStatWriter); var chargeRange = featureFinder.GetDetectableMinMaxCharge(refinedFeature.RepresentativeMass, run.MinMs1Mz, run.MaxMs1Mz); refinedFeature.UpdateWithDecoyScore(featureFinder.Ms1Spectra, chargeRange.Item1, chargeRange.Item2); OutputEnvelopPeakStat(id, refinedFeature, decoyStatWriter); id++; } else { writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\t"); writer.Write(0); writer.Write("\n"); } //var feature = featureFinder.FindLcMsPeakCluster(mass, (int) scan, (int) charge); } writer.Close(); targetStatWriter.Close(); decoyStatWriter.Close(); Console.WriteLine(dataname); } }
public void TestAlignFeatures() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string rawFolder = @"\\proto-11\MSXML_Cache\PBF_Gen_1_193\2015_2"; const string promexOutFolder = @"D:\MassSpecFiles\UTEX\MSAlign"; const string msAlignResultFolder = @"D:\MassSpecFiles\UTEX\MSAlign"; if (!Directory.Exists(rawFolder)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, rawFolder); } var nDataset = 32; var dataset = new string[nDataset]; for (var i = 0; i < nDataset; i++) { dataset[i] = string.Format("Syn_utex2973_Top_{0,2:D2}_TopDown_7May15_Bane_14-09-01RZ", i + 1); //var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]); } var tolerance = new Tolerance(10); var ftComparer = new UtexFeatureComparer(tolerance); var align = new LcMsFeatureAlignment(ftComparer); var prsmReader = new ProteinSpectrumMatchReader(0.01); var filesProcessed = 0; for (var i = 0; i < dataset.Length; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", rawFolder, dataset[i]); if (!File.Exists(rawFile)) { Console.WriteLine(@"Warning: Skipping file not found: {0}", rawFile); continue; } var run = PbfLcMsRun.GetLcMsRun(rawFile); var path = string.Format(@"{0}\{1}_MSAlign_ResultTable.txt", msAlignResultFolder, dataset[i]); if (!File.Exists(path)) { Console.WriteLine(@"Warning: Skipping file not found: {0}", path); continue; } var ms1ftPath = string.Format(@"{0}\{1}.ms1ft", promexOutFolder, dataset[i]); if (!File.Exists(ms1ftPath)) { Console.WriteLine(@"Warning: Skipping file not found: {0}", ms1ftPath); continue; } filesProcessed++; //var map = new ProteinSpectrumMathMap(run, i, dataset[i]); //map.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign); var prsmList = prsmReader.LoadIdentificationResult(path, ProteinSpectrumMatch.SearchTool.MsAlign); for (var j = 0; j < prsmList.Count; j++) { var match = prsmList[j]; match.ProteinId = match.ProteinName.Substring( match.ProteinName.IndexOf(ProteinNamePrefix) + ProteinNamePrefix.Length, 5); } var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1ftPath, run); // tag features by PrSMs for (var j = 0; j < features.Count; j++) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsMz(features[j].Mass); foreach (var match in prsmList) { if (features[j].MinScanNum < match.ScanNum && match.ScanNum < features[j].MaxScanNum && Math.Abs(features[j].Mass - match.Mass) < massTol) { features[j].ProteinSpectrumMatches.Add(match); } } } align.AddDataSet(i, features, run); } if (filesProcessed == 0) { Assert.Ignore("Skipped since input files not found"); } align.AlignFeatures(); Console.WriteLine("{0} alignments ", align.CountAlignedFeatures); align.RefineAbundance(); var alignedFeatureList = align.GetAlignedFeatures(); for (var i = 0; i < nDataset; i++) { var ms1ftPath = string.Format(@"{0}\{1}_aligned.ms1ft", promexOutFolder, dataset[i]); var writer = new StreamWriter(ms1ftPath); writer.Write(LcMsFeatureFinderLauncher.GetHeaderString()); writer.WriteLine("\tIdedMs2ScanNums"); for (var j = 0; j < alignedFeatureList.Count; j++) { writer.Write(j + 1); writer.Write("\t"); if (alignedFeatureList[j][i] == null) { for (var k = 0; k < 14; k++) { writer.Write("0\t"); } writer.Write("0\n"); } else { writer.Write(LcMsFeatureFinderLauncher.GetString(alignedFeatureList[j][i])); writer.Write("\t"); if (alignedFeatureList[j][i].ProteinSpectrumMatches == null) { writer.Write(""); } else { var scanNums = string.Join(";", alignedFeatureList[j][i].ProteinSpectrumMatches.Select(prsm => prsm.ScanNum)); writer.Write(scanNums); } writer.Write("\n"); } } writer.Close(); } }
public void FindMissingLcMsFeatures() { var mspfFolder = @"D:\MassSpecFiles\CompRef_Kelleher\Study3"; var ms1ftFolder = @"D:\MassSpecFiles\CompRef_Kelleher\Study3"; const int Nfraction1 = 3; const int Nfraction2 = 5; var filesProcessed = 0; for (var frac1 = 1; frac1 <= Nfraction1; frac1++) { for (var frac2 = 1; frac2 <= Nfraction2; frac2++) { var datasets = GetDataSetNamesStudy3(frac1, frac2); //var outFilePath = string.Format(@"D:\MassSpecFiles\CompRef_Kelleher\study3_GFrep{0}_Gfrac{1}.tsv", frac1.ToString("D2"), frac2.ToString("D2")); var nDataset = datasets.Count; var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(12); for (var i = 0; i < nDataset; i++) { var rawFile = string.Format(@"{0}\{1}.pbf", PbfPath, datasets[i]); var mspFile = string.Format(@"{0}\{1}_IcTda.tsv", mspfFolder, datasets[i]); var ms1FtFile = string.Format(@"{0}\{1}.ms1ft", ms1ftFolder, datasets[i]); var outPath = string.Format(@"{0}\{1}.seqtag.ms1ft", ms1ftFolder, datasets[i]); if (!File.Exists(rawFile)) { Console.WriteLine(@"Skipping dataset since file not found: " + rawFile); continue; } if (!File.Exists(ms1FtFile)) { Console.WriteLine(@"Skipping dataset since file not found: " + ms1FtFile); continue; } if (!File.Exists(mspFile)) { Console.WriteLine(@"Skipping dataset since file not found: " + mspFile); continue; } if (File.Exists(outPath)) { Console.WriteLine(@"Skipping dataset since results file already exists: " + outPath); continue; } filesProcessed++; var run = PbfLcMsRun.GetLcMsRun(rawFile); var features = LcMsFeatureAlignment.LoadProMexResult(i, ms1FtFile, run); var prsmList = prsmReader.LoadIdentificationResult(mspFile, ProteinSpectrumMatch.SearchTool.MsPathFinder); var prsmFeatureMatch = new bool[prsmList.Count]; foreach (var feature in features) { // feature.ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsMz(feature.Mass); for (var k = 0; k < prsmList.Count; k++) { var match = prsmList[k]; if (feature.MinScanNum < match.ScanNum && match.ScanNum < feature.MaxScanNum && Math.Abs(feature.Mass - match.Mass) < massTol) { feature.ProteinSpectrumMatches.Add(match); prsmFeatureMatch[k] = true; } } } var missingPrsm = new List <ProteinSpectrumMatch>(); for (var k = 0; k < prsmList.Count; k++) { if (!prsmFeatureMatch[k]) { missingPrsm.Add(prsmList[k]); } } FeatureFind(missingPrsm, run, outPath); Console.WriteLine(outPath); } } } if (filesProcessed == 0) { Assert.Ignore(@"Skipping since data files were not found"); } }
public void TestTagAlignedFeatures() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var featureDir = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, "Output"); var mspDir = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"Output\MSP"); var outFile = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"Output\aligned_features.tsv"); var resultFile = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"\Output\aligned_ids.tsv"); if (!Directory.Exists(featureDir)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, featureDir); } if (!Directory.Exists(mspDir)) { Assert.Ignore(@"Skipping test {0} since folder not found: {1}", methodName, mspDir); } if (!File.Exists(outFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, outFile); } var dataset = GetDataList(featureDir); var tsvParser = new TsvFileParser(outFile); var massList = new List <double>(); for (var i = 0; i < tsvParser.NumData; i++) { massList.Add(Double.Parse(tsvParser.GetData("MonoMass")[i])); } var featureIdMap = new Dictionary <int, string>(); var tolerance = new Tolerance(12); var headers = new List <string>(); //foreach (var data in dataset) for (var d = 0; d < dataset.Count; d++) { var data = dataset[d]; var minScanColName = string.Format("{0}_minScan", d); var maxScanColName = string.Format("{0}_maxScan", d); var fname = string.Format(@"{0}\{1}_IcTda.tsv", mspDir, data); var idParser = new TsvFileParser(fname); var idRows = idParser.GetRows(); if (headers.Count < 1) { headers.AddRange(idParser.GetHeaders()); } for (var i = 0; i < idParser.NumData; i++) { var scan = Int32.Parse(idParser.GetData("Scan")[i]); var mass = Double.Parse(idParser.GetData("Mass")[i]); var qvalue = Double.Parse(idParser.GetData("QValue")[i]); if (qvalue > 0.01) { break; } var massTol = tolerance.GetToleranceAsMz(mass); var idx = massList.BinarySearch(mass); if (idx < 0) { idx = ~idx; } var found = false; for (var j = idx; j >= 0; j--) { if (Math.Abs(mass - massList[j]) > massTol) { break; } if (tsvParser.GetData(minScanColName)[j].Length < 1) { continue; } if (Int32.Parse(tsvParser.GetData(minScanColName)[j]) < scan && scan < Int32.Parse(tsvParser.GetData(maxScanColName)[j])) { found = true; if (!featureIdMap.ContainsKey(j)) { featureIdMap.Add(j, idRows[i]); } break; } } if (found) { continue; } for (var j = idx + 1; j < massList.Count; j++) { if (Math.Abs(mass - massList[j]) > massTol) { break; } if (tsvParser.GetData(minScanColName)[j].Length < 1) { continue; } if (Int32.Parse(tsvParser.GetData(minScanColName)[j]) < scan && scan < Int32.Parse(tsvParser.GetData(maxScanColName)[j])) { found = true; if (!featureIdMap.ContainsKey(j)) { featureIdMap.Add(j, idRows[i]); } break; } } } } var writer = new StreamWriter(resultFile); writer.Write("AlignedFeatureID"); writer.Write("\t"); writer.Write(string.Join("\t", headers)); for (var i = 0; i < 32; i++) { writer.Write("\t"); writer.Write("{0}", i); } writer.Write("\n"); var id = 1; foreach (var key in featureIdMap.Keys) { writer.Write(id); writer.Write("\t"); writer.Write(featureIdMap[key]); for (var i = 0; i < 32; i++) { writer.Write("\t"); writer.Write("{0}", tsvParser.GetData(string.Format("{0}", i))[key]); } writer.Write("\n"); id++; } writer.Close(); }
public static void Main(string[] args) { // Parse file var inputFilePath = args[0]; var datasets = DatasetInfo.ParseDatasetInfoFile(inputFilePath); var fileName = Path.GetFileNameWithoutExtension(inputFilePath); var directory = Path.GetDirectoryName(inputFilePath); var outputfilePath = Path.Combine(directory, string.Format("{0}_crosstab.tsv", fileName)); int nDataset = datasets.Count; var prsmReader = new ProteinSpectrumMatchReader(); var tolerance = new Tolerance(100); var alignment = new LcMsFeatureAlignment(new CompRefFeatureComparer(tolerance)); int dataId = 0; foreach (var dataset in datasets) { var run = PbfLcMsRun.GetLcMsRun(dataset.RawFilePath, 0, 0); var features = LcMsFeatureAlignment.LoadProMexResult(dataId++, dataset.Ms1FtFilePath, run); if (File.Exists(dataset.MsPfIdFilePath)) { var prsmList = prsmReader.LoadIdentificationResult(dataset.MsPfIdFilePath, ProteinSpectrumMatch.SearchTool.MsPathFinder); foreach (var match in prsmList) { match.ProteinId = match.ProteinName; } // tag features by PrSMs foreach (LcMsFeature feature in features) { //features[j].ProteinSpectrumMatches = new ProteinSpectrumMatchSet(i); var massTol = tolerance.GetToleranceAsMz(feature.Mass); foreach (var match in prsmList) { if (feature.MinScanNum < match.ScanNum && match.ScanNum < feature.MaxScanNum && Math.Abs(feature.Mass - match.Mass) < massTol) { feature.ProteinSpectrumMatches.Add(match); } } } } alignment.AddDataSet(dataId, features, run); } alignment.AlignFeatures(); Console.WriteLine("{0} alignments ", alignment.CountAlignedFeatures); for (var i = 0; i < nDataset; i++) { alignment.FillMissingFeatures(i); Console.WriteLine("{0} has been processed", datasets[i].Label); } OutputCrossTabWithId(outputfilePath, alignment, datasets.Select(ds => ds.Label).ToArray()); }