public void TestGettingXicVector() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath); } var run1 = PbfLcMsRun.GetLcMsRun(TestRawFilePath, 0.0, 0.0); var run2 = InMemoryLcMsRun.GetLcMsRun(TestRawFilePath, 0.0, 0.0); Assert.True(run1 != null && run2 != null); var comparer = new MzComparerWithBinning(27); const double minMz = 600.0; // 600.0 const double maxMz = 2000.0; // 2000.0 var minBinNum = comparer.GetBinNumber(minMz); var maxBinNum = comparer.GetBinNumber(maxMz); Console.WriteLine(@"NumBins: " + (maxBinNum - minBinNum)); var warnCount = 0; var sw = new Stopwatch(); sw.Start(); for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { var mzStart = comparer.GetMzStart(binNum); var mzEnd = comparer.GetMzEnd(binNum); var vec1 = run1.GetFullPrecursorIonExtractedIonChromatogramVector(mzStart, mzEnd); var vec2 = run2.GetFullPrecursorIonExtractedIonChromatogramVector(mzStart, mzEnd); Assert.True(vec1.Length == vec2.Length, "Extracted Ion Chromatogram vector length mismatch, {0} vs. {1}", vec1.Length, vec2.Length); var matchCount = 0; var misMatchCount = 0; var positiveCount = 0; for (var i = 0; i < vec2.Length; i++) { if (vec1[i] > 0 || vec2[i] > 0) { positiveCount++; } if (Math.Abs(vec1[i] - vec2[i]) < float.Epsilon) { matchCount++; } else { misMatchCount++; } } if (misMatchCount <= 0 || positiveCount == 0) { continue; } var fractionAgreement = 1 - misMatchCount / (double)positiveCount; if (fractionAgreement < 0.80 && misMatchCount > 2) { Console.WriteLine(@"{0}/{1} Xic values do not match for bin {2} ({3:0.00} m/z); {4:0.0}% agreement", misMatchCount, positiveCount, binNum, mzStart, fractionAgreement * 100); warnCount++; } } sw.Stop(); Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds); Assert.IsTrue(warnCount < 10, "Too many Xic mismatch warnings: {0}", warnCount); }
public void TestMsAlignPlusResults() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); TopDownScorer.MaxCharge = 25; TopDownScorer.MinCharge = 8; const string specFilePath = @"C:\workspace\TopDown\E_coli_iscU_60_mock.raw"; const string msAlignPlusResultPath = @"C:\workspace\TopDown\E_coli_iscU_60_mock_MSAlign_ResultTable_sam.txt"; var dehydro = new SearchModification(Modification.PyroGluQ, 'C', SequenceLocation.Everywhere, false); var cysteinylC = new SearchModification(Modification.Cysteinyl, 'C', SequenceLocation.Everywhere, false); var glutathioneC = new SearchModification(Modification.Glutathione, 'C', SequenceLocation.Everywhere, false); var searchModifications = new List <SearchModification> { //pyroGluQ, dehydro, cysteinylC, glutathioneC, //oxM }; var aaSet = new AminoAcidSet(searchModifications, 0); var precursorTolerance = new Tolerance(10); var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); var writer = new StreamWriter(msAlignPlusResultPath + ".txt"); var reader = new StreamReader(msAlignPlusResultPath); string s; while ((s = reader.ReadLine()) != null) { if (s.StartsWith("Data_file_name ")) { writer.WriteLine(s + "\tScore"); continue; } var token = s.Split('\t'); var annotation = token[13]; // Console.WriteLine("***\t" + annotation); var seqGraph = SequenceGraph.CreateGraph(aaSet, annotation); if (seqGraph == null) { writer.WriteLine(s + "\tN/A"); continue; } var protCompositions = seqGraph.GetSequenceCompositions(); var scorer = new TopDownScorer(protCompositions[0], run, precursorTolerance, null); var score = scorer.GetScore(); writer.WriteLine(s + "\t" + score); Console.WriteLine(score); } writer.Close(); reader.Close(); }
public void AnalyizeFusionDdaData() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Parameters //const double relativeIntensityThreshold = 0.7; const double precursorTolerancePpm = 20; const string specFilePath = @"D:\Research\Data\UW\Fusion\WT_D_DDA_130412065618.raw"; var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); const double fdrThreshold = 0.01; var tolerance = new Tolerance(precursorTolerancePpm); var aaSet = new AminoAcidSet(Modification.Carbamidomethylation); const string resultFilePath = @"D:\Research\Data\UW\Fusion\WT_D_DDA_130412065618_10ppm_TI2_SGD_Decoy.tsv"; Console.WriteLine("IsDecoy\tPeptide\tScanNum\tCharge\tSpecEValue\tQValue\tPrecursorMz" + "\tTheo0\tTheo1\tTheo2\tTheo3" + "\tObs0\tCorr0\tObs1\tCorr1\tObs2\tCorr2\tObs3\tCorr3\tObs-1\tCorr-1\tObs0.5\tCorr0.5"); foreach (var line in File.ReadLines(resultFilePath)) { if (line.StartsWith("#")) { continue; } var token = line.Split('\t'); if (token.Length != 16) { continue; } var qValue = Convert.ToDouble(token[14]); if (qValue > fdrThreshold) { continue; } var peptide = token[8].Replace("C+57.021", "C"); var scanNum = Convert.ToInt32(token[2]); var charge = Convert.ToInt32(token[7]); var specEValue = Convert.ToDouble(token[12]); var protein = token[9]; var isDecoy = protein.StartsWith("XXX_"); var precursorIon = new Ion(aaSet.GetComposition(peptide) + Composition.H2O, charge); var baseXic = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance, scanNum); var baseIntensity = baseXic.GetSumIntensities(); Console.Write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", (isDecoy ? 1 : 0), peptide, scanNum, charge, specEValue, qValue, precursorIon.GetMonoIsotopicMz()); var isotopeIndices = new double[] { 0, 1, 2, 3, -1, 0.5 }; var theoIsotopes = precursorIon.GetIsotopes(0.01); var numIsotopes = 0; foreach (var theoIsotope in theoIsotopes) { Console.Write("\t" + theoIsotope.Ratio); if (++numIsotopes == 4) { break; } } foreach (var isotopeIndex in isotopeIndices) { var isotopeMz = precursorIon.GetIsotopeMz(isotopeIndex); var xic = run.GetPrecursorExtractedIonChromatogram(isotopeMz, tolerance, scanNum); var relativeIntensity = xic.GetSumIntensities() / baseIntensity; var correlation = xic.GetCorrelation(baseXic); Console.Write("\t{0}\t{1}", relativeIntensity, correlation); } Console.WriteLine(); } }
public void TestGeneratingProductXics() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestRawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(TestRawFilePath); // const string rafFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raf"; const string rafFilePath = @"H:\Research\Jarret\10mz\raw\Q_2014_0523_50_10_fmol_uL_10mz.raf"; if (!File.Exists(rafFilePath)) { Assert.Ignore(@"Skipping raf portion of test {0} since file not found: {1}", methodName, rafFilePath); } var rafRun = new PbfLcMsRun(rafFilePath); var tolerance = new Tolerance(10); var mzArr = new double[100000]; var precursorMzArr = new double[mzArr.Length]; var rnd = new Random(); for (var i = 0; i < mzArr.Length; i++) { mzArr[i] = rnd.NextDouble() * 1450.0 + 50.0; precursorMzArr[i] = rnd.NextDouble() * (810.0 - 390.0) + 390.0; } var sw = new System.Diagnostics.Stopwatch(); //double sec; // method 1 sw.Start(); for (var i = 0; i < mzArr.Length; i++) { var mz = mzArr[i]; var tolTh = tolerance.GetToleranceAsMz(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; var xic1 = run.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); //var xic2 = rafRun.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); //Assert.True(xic1.Equals(xic2)); } sw.Stop(); Console.WriteLine(@"Method 1: {0:f4} sec", sw.Elapsed.TotalSeconds); sw.Reset(); sw.Start(); for (var i = 0; i < mzArr.Length; i++) { var mz = mzArr[i]; var tolTh = tolerance.GetToleranceAsMz(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; rafRun.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); } sw.Stop(); Console.WriteLine(@"Method 2: {0:f4} sec", sw.Elapsed.TotalSeconds); Console.WriteLine(@"Done"); }
public void TestFusionDdaData() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Parameters const double relativeIntensityThreshold = 0.7; const double precursorTolerancePpm = 20; //const double isotopeRatioTolerance = 2; //const double correlationThreshold = 0.3; const double fdrThreshold = 0.01; const string specFilePath = @"D:\Research\Data\UW\Fusion\WT_D_DDA_130412065618.raw"; var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); var tolerance = new Tolerance(precursorTolerancePpm); var aaSet = new AminoAcidSet(Modification.Carbamidomethylation); const string resultFilePath = @"D:\Research\Data\UW\Fusion\oldResult\WT_D_DDA_130412065618_10ppm_TI2_SGD_Decoy.tsv"; var numTargets = 0; var numValidTargets = 0; var numDecoys = 0; var numValidDecoys = 0; foreach (var line in File.ReadLines(resultFilePath)) { if (line.StartsWith("#")) { continue; } var token = line.Split('\t'); if (token.Length != 16) { continue; } var qValue = Convert.ToDouble(token[14]); if (qValue > fdrThreshold) { continue; } var peptide = token[8].Replace("C+57.021", "C"); var scanNum = Convert.ToInt32(token[2]); var charge = Convert.ToInt32(token[7]); var protein = token[9]; var isDecoy = protein.StartsWith("XXX_"); if (isDecoy) { numDecoys++; } else { numTargets++; } var precursorIon = new Ion(aaSet.GetComposition(peptide) + Composition.H2O, charge); var basePeakIndex = precursorIon.Composition.GetMostAbundantIsotopeZeroBasedIndex(); var baseXic = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance, scanNum); var baseIntensity = baseXic.GetSumIntensities(); var isValid = true; foreach (var isotope in precursorIon.GetIsotopes(relativeIntensityThreshold)) { if (isotope.Index == basePeakIndex) { continue; } var isotopeMz = precursorIon.GetIsotopeMz(isotope.Index); var xic = run.GetPrecursorExtractedIonChromatogram(isotopeMz, tolerance, scanNum); if (xic.Count == 0) { isValid = false; break; } //if (xic.Count > 0) //{ // var isotopeRatio = xic.GetSumIntensities() / baseIntensity / isotope.Item2; // var correlation = xic.GetCorrelation(baseXic); // if (isotopeRatio > 0.8 && isotopeRatio < 1.2 // && correlation > 0.8) // { // isValid = true; // } //} // Check if isotope ratio is within tolerance //if (isotopeRatio > isotopeRatioTolerance || isotopeRatio < 1 / isotopeRatioTolerance) //{ // isValid = false; // //Console.WriteLine("Off ratio\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", isDecoy, peptide, scanNum, charge, precursorIon.GetMonoIsotopicMz(), isotopeMz, isotopeRatio); // break; //} // Check if correlation is high //if (correlation < correlationThreshold) //{ // isValid = false; // //Console.WriteLine("Low correlation\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", isDecoy, peptide, scanNum, charge, precursorIon.GetMonoIsotopicMz(), isotopeMz, correlation); // break; //} } if (isValid && !isDecoy) { numValidTargets++; } else if (isValid) { numValidDecoys++; } //Console.WriteLine("{0}\t{1}\t{2}", peptide, scanNum, charge); } Console.WriteLine("#Targets: {0}", numTargets); Console.WriteLine("#ValidTargets: {0}\t{1}", numValidTargets, numValidTargets / (double)numTargets); Console.WriteLine("#Decoys: {0}", numDecoys); Console.WriteLine("#ValidDecoys: {0}\t{1}", numValidDecoys, numValidDecoys / (double)numDecoys); sw.Stop(); Console.WriteLine(@"TimeForPrecursorValidation {0:f4} sec", sw.Elapsed.TotalSeconds); }
public IcBottomUpRescorer(string specFilePath, string icResultFilePath, string outputFilePath, AminoAcidSet aaSet, Tolerance tolerance) { _run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 0.0); Rescore(icResultFilePath, outputFilePath); }
public void TestMs1Filtering() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string resultFilePath = // @"C:\cygwin\home\kims336\Data\TopDown\raw\CorrMatches_N30\SBEP_STM_001_02272012_Aragon.tsv"; @"C:\cygwin\home\kims336\Data\TopDown\raw\CorrMatches_N30\SBEP_STM_001_02272012_Aragon.decoy.icresult"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\DataFiles\SBEP_STM_001_02272012_Aragon.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826); //const int minPrecursorCharge = 3; //const int maxPrecursorCharge = 30; //const int tolerancePpm = 15; var tolerance = new Tolerance(15); //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 0.7, 40); ////var ms1BasedFilter = new Ms1IsotopeTopKFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 20); //ISequenceFilter ms1Filter = ms1BasedFilter; var tsvReader = new TsvFileParser(resultFilePath); var compositions = tsvReader.GetData("Composition"); var scanNums = tsvReader.GetData("ScanNum"); var charges = tsvReader.GetData("Charge"); var qValues = tsvReader.GetData("QValue"); var scores = tsvReader.GetData("Score"); //var sequences = tsvReader.GetData("Annotation"); //var hist = new int[11]; Console.WriteLine("ScanNum\tScore\tPrecursor\tNext\tSum\tNextIsotope\tLessCharge\tMoreCharge\tMax\tNumXicPeaks"); for (var i = 0; i < compositions.Count; i++) { if (qValues != null) { var qValue = Convert.ToDouble(qValues[i]); if (qValue > 0.01) { continue; } } var scanNum = Convert.ToInt32(scanNums[i]); var composition = Composition.Parse(compositions[i]); var charge = Convert.ToInt32(charges[i]); var precursorIon = new Ion(composition, charge); var spec = run.GetSpectrum(scanNum) as ProductSpectrum; var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) { continue; } var score = Convert.ToDouble(scores[i]); var precursorScanNum = run.GetPrecursorScanNum(scanNum); var precursorSpec = run.GetSpectrum(precursorScanNum); var preIsotopeCorr = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1); var nextScanNum = run.GetNextScanNum(scanNum, 1); var nextSpec = run.GetSpectrum(nextScanNum); var nextIsotopeCorr = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1); var xicMostAbundant = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance, scanNum); var apexScanNum = xicMostAbundant.GetApexScanNum(); if (apexScanNum < run.MinLcScan) { apexScanNum = scanNum; } //var sumSpec = run.GetSummedMs1Spectrum(apexScanNum); // var apexIsotopeCorr = sumSpec.GetCorrScore(precursorIon, tolerance, 0.1); // var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0; var xicNextIsotope = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz() + Constants.C13MinusC12 / charge, tolerance, scanNum); var plusOneIsotopeCorr = xicMostAbundant.GetCorrelation(xicNextIsotope); var precursorIonChargeMinusOne = new Ion(composition, charge - 1); var xicChargeMinusOne = run.GetPrecursorExtractedIonChromatogram(precursorIonChargeMinusOne.GetMostAbundantIsotopeMz(), tolerance, scanNum); var chargeMinusOneCorr = xicMostAbundant.GetCorrelation(xicChargeMinusOne); var precursorIonChargePlusOne = new Ion(composition, charge + 1); var xicChargePlusOne = run.GetPrecursorExtractedIonChromatogram(precursorIonChargePlusOne.GetMostAbundantIsotopeMz(), tolerance, scanNum); var chargePlusOneCorr = xicMostAbundant.GetCorrelation(xicChargePlusOne); //var max = new[] {preIsotopeCorr, nextIsotopeCorr, apexIsotopeCorr, plusOneIsotopeCorr, chargeMinusOneCorr, chargePlusOneCorr}.Max(); //Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}", // scanNum, score, preIsotopeCorr, nextIsotopeCorr, apexIsotopeCorr, plusOneIsotopeCorr, chargeMinusOneCorr, chargePlusOneCorr, max, xicMostAbundant.Count); } //Console.WriteLine("Histogram"); //for (var i = 0; i < hist.Length; i++) //{ // Console.WriteLine("{0:f1}\t{1}", i / 10.0, hist[i]); //} }
public void FilteringEfficiency() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826); sw.Stop(); Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds); const int minPrecursorCharge = 3; const int maxPrecursorCharge = 30; const int tolerancePpm = 10; var tolerance = new Tolerance(tolerancePpm); sw.Reset(); sw.Start(); //var ms1BasedFilter = new Ms1BasedFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm); // //var ms1BasedFilter = new Ms1IsotopeTopKFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 20); //var ms1BasedFilter = new ProductScorerBasedOnDeconvolutedSpectra(run, // minPrecursorCharge, maxPrecursorCharge, // 0, 0, // 600.0, 1800.0, new Tolerance(tolerancePpm), null); //ms1BasedFilter.CachePrecursorMatchesBinCentric(); var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.5, 0.5, 0.5, 40); //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40); sw.Stop(); Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds); ISequenceFilter ms1Filter = ms1BasedFilter; sw.Reset(); sw.Start(); const double minProteinMass = 3000.0; const double maxProteinMass = 30000.0; var minBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass); var maxBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass); var numComparisons = 0L; for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum); numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count(); } sw.Stop(); Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds); const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon_4PTMs.icresult"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var tsvReader = new TsvFileParser(resultFilePath); var compositions = tsvReader.GetData("Composition"); var scanNums = tsvReader.GetData("ScanNum"); var charges = tsvReader.GetData("Charge"); var scores = tsvReader.GetData("Score"); var qvalues = tsvReader.GetData("QValue"); var sequences = tsvReader.GetData("Sequence"); var sequenceCount = new Dictionary <string, int>(); for (var i = 0; i < compositions.Count; i++) { if (qvalues != null) { var qValue = Convert.ToDouble(qvalues[i]); if (qValue > 0.01) { continue; } } else { var score = Convert.ToDouble(scores[i]); if (score < 13) { continue; } } var scanNum = Convert.ToInt32(scanNums[i]); var charge = Convert.ToInt32(charges[i]); var composition = Composition.Parse(compositions[i]); var precursorIon = new Ion(composition, charge); var spec = run.GetSpectrum(scanNum) as ProductSpectrum; var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) { continue; } var sequence = sequences[i]; int count; if (sequenceCount.TryGetValue(sequence, out count)) { sequenceCount[sequence] = count + 1; } else { sequenceCount[sequence] = 1; } } //var sequences = tsvReader.GetData("Annotation"); var seqSet = new HashSet <string>(); var allSeqSet = new HashSet <string>(); var numUnfilteredSpecs = 0; var totalSpecs = 0; for (var i = 0; i < compositions.Count; i++) { if (qvalues != null) { var qValue = Convert.ToDouble(qvalues[i]); if (qValue > 0.01) { continue; } } else { var score = Convert.ToDouble(scores[i]); if (score < 13) { continue; } } var scanNum = Convert.ToInt32(scanNums[i]); var charge = Convert.ToInt32(charges[i]); var composition = Composition.Parse(compositions[i]); var precursorIon = new Ion(composition, charge); var spec = run.GetSpectrum(scanNum) as ProductSpectrum; var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) { continue; } ++totalSpecs; var precursorScanNum = run.GetPrecursorScanNum(scanNum); var precursorSpec = run.GetSpectrum(precursorScanNum); var corr1 = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1); var nextScanNum = run.GetNextScanNum(scanNum, 1); var nextSpec = run.GetSpectrum(nextScanNum); var corr2 = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1); var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0; if (corr3 == 1) { numUnfilteredSpecs++; seqSet.Add(sequences[i]); } allSeqSet.Add(sequences[i]); //var xic = run.GetFullPrecursorIonExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance); ////xic.Display(); //var apexScanNum = xic.GetNearestApexScanNum(run.GetPrecursorScanNum(scanNum), false); //var apexSpec = run.GetSpectrum(apexScanNum); //var corr3 = apexSpec.GetCorrScore(precursorIon, tolerance, 0.1); var corrMax = new[] { corr1, corr2, corr3 }.Max(); Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax, sequenceCount[sequences[i]]); } Console.WriteLine("TotalNumComparisons: {0}", numComparisons); Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons / (double)(maxBinNum - minBinNum + 1)); Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs / (double)totalSpecs, numUnfilteredSpecs, totalSpecs); Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count / (double)allSeqSet.Count, seqSet.Count, allSeqSet.Count); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void FilteringEfficiencyQcShew() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826); sw.Stop(); Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds); const int minPrecursorCharge = 3; const int maxPrecursorCharge = 30; const int tolerancePpm = 10; var tolerance = new Tolerance(tolerancePpm); sw.Reset(); sw.Start(); var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.7, 0.7, 0.7, 40); //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40); sw.Stop(); Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds); ISequenceFilter ms1Filter = ms1BasedFilter; sw.Reset(); sw.Start(); const double minProteinMass = 3000.0; const double maxProteinMass = 30000.0; var minBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass); var maxBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass); var numComparisons = 0L; for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum); numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count(); } sw.Stop(); Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds); //const string prot = // "ADVFHLGLTKAMLDGATLAIVPGDPERVKRIAELMDNATFLASHREYTSYLAYADGKPVVICSTGIGGPSTSIAVEELAQLGVNTFLRVGTTGAIQPHVNVGDVIVTQASVRLDGASLHFAPMEFPAVANFECTTAMVAACRDAGVEPHIGVTASSDTFYPGQERYDTVTGRVTRRFAGSMKEWQDMGVLNYEMESATLFTMCATQGWRAACVAGVIVNRTQQEIPDEATMKKTEVSAVSIVVAAAKKLLA"; //var protMass = (new AminoAcidSet().GetComposition(prot) + Composition.H2O).Mass; //Console.WriteLine("************ScanNums: " + string.Join("\t", ms1Filter.GetMatchingMs2ScanNums(protMass))); const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\MSAlign\NoMod.tsv"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var tsvReader = new TsvFileParser(resultFilePath); var scanNums = tsvReader.GetData("Scan(s)"); var charges = tsvReader.GetData("Charge"); var scores = tsvReader.GetData("E-value"); var sequences = tsvReader.GetData("Peptide"); //const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402_N30_C30.tsv"; //var tsvReader = new TsvFileParser(resultFilePath); //var scanNums = tsvReader.GetData("ScanNum"); //var charges = tsvReader.GetData("Charge"); //var scores = tsvReader.GetData("Score"); //var sequences = tsvReader.GetData("Sequence"); var aaSet = new AminoAcidSet(); var seqSet = new HashSet <string>(); var allSeqSet = new HashSet <string>(); var numUnfilteredSpecs = 0; var totalSpecs = 0; for (var i = 0; i < scores.Count; i++) { var score = Convert.ToDouble(scores[i]); if (score > 1E-4) { continue; } //if (score < 10) continue; var scanNum = Convert.ToInt32(scanNums[i]); var charge = Convert.ToInt32(charges[i]); var sequence = SimpleStringProcessing.GetStringBetweenDots(sequences[i]); if (sequence == null || sequence.Contains("(")) { continue; } //var sequence = sequences[i]; var composition = aaSet.GetComposition(sequence) + Composition.H2O; var precursorIon = new Ion(composition, charge); var spec = run.GetSpectrum(scanNum) as ProductSpectrum; var isValid = spec != null && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) { continue; } ++totalSpecs; var precursorScanNum = run.GetPrecursorScanNum(scanNum); var precursorSpec = run.GetSpectrum(precursorScanNum); var corr1 = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1); var nextScanNum = run.GetNextScanNum(scanNum, 1); var nextSpec = run.GetSpectrum(nextScanNum); var corr2 = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1); var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0; if (corr3 == 1) { numUnfilteredSpecs++; seqSet.Add(sequences[i]); } allSeqSet.Add(sequences[i]); var corrMax = new[] { corr1, corr2, corr3 }.Max(); Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax); } Console.WriteLine("TotalNumComparisons: {0}", numComparisons); Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons / (double)(maxBinNum - minBinNum + 1)); Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs / (double)totalSpecs, numUnfilteredSpecs, totalSpecs); Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count / (double)allSeqSet.Count, seqSet.Count, allSeqSet.Count); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }