public void TestMs2Caching() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\DataFiles\SBEP_STM_001_02272012_Aragon.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826); //const int minPrecursorIonCharge = 3; // 3 //const int maxPrecursorIonCharge = 30;// 67 //const int minProductIonCharge = 1; //const int maxProductIonCharge = 10; var sw = new System.Diagnostics.Stopwatch(); sw.Start(); var runCache = new ProductScorerBasedOnDeconvolutedSpectra(run); runCache.DeconvoluteAllProductSpectra(); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestPossibleSequenceMasses() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); //const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\DataFiles\SBEP_STM_001_02272012_Aragon.raw"; const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, 3, 30, 15, 0.7, 1000); var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10)); //var masses = ms1BasedFilter.GetPossibleSequenceMasses(1113); //var ms1BasedFilter = new Ms1IsotopeTopKFilter(run, 3, 30, 15); //var masses = ms1BasedFilter.GetPossibleSequenceMasses(2819, 20); //foreach (var m in masses) //{ // Console.WriteLine(m); //} sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestFitScoreCalculationEtd() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); if (!File.Exists(FilePaths.TestTopDownRawFilePathEtd)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + FilePaths.TestTopDownRawFilePathCid); } var run = InMemoryLcMsRun.GetLcMsRunScanRange(FilePaths.TestTopDownRawFilePathEtd, 810, 810); var spec = run.GetSpectrum(810) as ProductSpectrum; Assert.True(spec != null); const string suf54 = "ENIKTLPAKRNEQDQKQLIVPLADSLKPGTYTVDWHVVSVDGHKTKGHYTFSVK"; var suf54Comp = new AminoAcidSet().GetComposition(suf54); Assert.True(suf54Comp != null); var ionType = new IonTypeFactory(10).GetIonType("z6"); var ion = ionType.GetIon(suf54Comp); //ion.Composition.ComputeApproximateIsotopomerEnvelop(); Console.WriteLine("MonoMz: {0}, MonoMass: {1}", ion.GetMonoIsotopicMz(), ion.Composition.Mass); var fitScore = spec.GetFitScore(ion, new Tolerance(15), 0.1); Console.WriteLine("FitScore: {0}", fitScore); Assert.True(fitScore < 0.15); }
public void TestGeneratingProductXic() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestRawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(TestRawFilePath); // const string rafFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raf"; const string rafFilePath = @"H:\Research\Jarret\10mz\raw\Q_2014_0523_50_10_fmol_uL_10mz.raf"; if (!File.Exists(rafFilePath)) { Assert.Ignore(@"Skipping raf portion of test {0} since file not found: {1}", methodName, rafFilePath); } var rafRun = new PbfLcMsRun(rafFilePath); const double precursorIonMz = 815.16; const double productIonMz = 902.445; var tolerance = new Tolerance(10); var xic1 = run.GetFullProductExtractedIonChromatogram(productIonMz, tolerance, precursorIonMz); // xic1.Display(); var xic2 = rafRun.GetFullProductExtractedIonChromatogram(productIonMz, tolerance, precursorIonMz); // xic2.Display(); Assert.True(xic1.Equals(xic2)); Console.WriteLine(@"Done"); }
public void TestLogLikelihoodScoring() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string specFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raw"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } //const string seqStr = "IAHESDDEKGHAAK"; //var composition = Composition.Parse("C(62) H(98) N(20) O(24) S(0)"); //const int charge = 4; //const int ms2ScanNum = 12901; var aaSet = new AminoAcidSet(); var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 0); var scorer = new InformedBottomUpScorer(run, aaSet, 1, 2, new Tolerance(10)); //var refinedScore = scorer.GetScores(AminoAcid.PeptideNTerm, seqStr, AminoAcid.PeptideCTerm, composition, // charge, ms2ScanNum); // Console.WriteLine("RefinedScores: {0}", refinedScore.Score); }
public void TestReadingDiaRawFile() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string rawFilePath = TestRawFilePath; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } const int SCAN = 100; const int MAX_POINTS = 50; var run = InMemoryLcMsRun.GetLcMsRunScanRange(rawFilePath, SCAN); var spec = run.GetSpectrum(SCAN); spec.Display(MAX_POINTS); // Console.WriteLine("{0}, {1}", spec.Peaks[50].Mz, spec.Peaks[50].Intensity); // Console.WriteLine("{0}, {1}", spec.Peaks[500].Mz, spec.Peaks[500].Intensity); // Console.WriteLine("{0}, {1}", spec.Peaks[1000].Mz, spec.Peaks[1000].Intensity); Assert.IsTrue(Math.Abs(spec.Peaks[50].Mz - 414.75503540039062) < 0.0001, "Invalid m/z for peak at index 50"); Assert.IsTrue(Math.Abs(spec.Peaks[50].Intensity - 1071.5673828125) < 0.01, "Invalid intensity for peak at index 50"); Assert.IsTrue(Math.Abs(spec.Peaks[500].Mz - 578.1298828125) < 0.0001, "Invalid m/z for peak at index 500"); Assert.IsTrue(Math.Abs(spec.Peaks[500].Intensity - 573.02374267578125) < 0.01, "Invalid intensity for peak at index 500"); Assert.IsTrue(Math.Abs(spec.Peaks[1000].Mz - 974.17694091796875) < 0.0001, "Invalid m/z for peak at index 1000"); Assert.IsTrue(Math.Abs(spec.Peaks[1000].Intensity - 678.13824462890625) < 0.01, "Invalid intensity for peak at index 1000"); }
public void TestReadingScanNums() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); if (!File.Exists(FilePaths.TestRawFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + FilePaths.TestRawFilePath); } var run = InMemoryLcMsRun.GetLcMsRunScanRange(FilePaths.TestRawFilePath, 20000, 20100); var msLevel = new Dictionary <int, int>(); //for (var scanNum = run.MinLcScan; scanNum <= run.MaxLcScan; scanNum++) foreach (var scanNum in run.AllScanNumbers) { msLevel[scanNum] = run.GetMsLevel(scanNum); } //for (var scanNum = run.MinLcScan; scanNum <= run.MaxLcScan; scanNum++) foreach (var scanNum in run.AllScanNumbers) { var spec = run.GetSpectrum(scanNum); Assert.True(spec.MsLevel == msLevel[scanNum]); if (spec.MsLevel == 2) { var precursorScanNum = 0; for (var prevScan = scanNum - 1; prevScan >= run.MinLcScan; prevScan--) { if (run.GetMsLevel(prevScan) == 1) { precursorScanNum = prevScan; break; } } Assert.True(run.GetPrecursorScanNum(scanNum) == precursorScanNum); var nextScanNum = run.MaxLcScan + 1; //for (var nextScan = scanNum + 1; nextScan <= run.MaxLcScan; nextScan++) foreach (var nextScan in run.AllScanNumbers.Where(x => x > scanNum)) { if (run.GetMsLevel(nextScan) == 1) { nextScanNum = nextScan; break; } } if (run.GetNextScanNum(scanNum) != nextScanNum) { Console.WriteLine("{0}\t{1}\t{2}", scanNum, run.GetNextScanNum(scanNum), nextScanNum); } Assert.True(run.GetNextScanNum(scanNum) == nextScanNum); } } Assert.True(run.GetNextScanNum(20025) == 20032); Console.WriteLine(run.GetNextScanNum(20025)); }
public MsAlignRescorer(string specFilePath, string msAlignFilePath, string outputFilePath, Tolerance tolerance, double ms2CorrThreshold = 0.7 , int minProductIonCharge = 1, int maxProductIonCharge = 10) { var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826); _topDownScorer = new InformedTopDownScorer(run, new AminoAcidSet(), minProductIonCharge, maxProductIonCharge, tolerance, ms2CorrThreshold); Rescore(msAlignFilePath, outputFilePath); }
public void TestFitScoreCalculationCid() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); if (!File.Exists(FilePaths.TestTopDownRawFilePathCid)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + FilePaths.TestTopDownRawFilePathCid); } var run = InMemoryLcMsRun.GetLcMsRunScanRange(FilePaths.TestTopDownRawFilePathCid, 5743, 5743); var spec = run.GetSpectrum(5743); Assert.True(spec != null); const string protein = "MRIILLGAPGAGKGTQAQFIMEKYGIPQISTGDMLRAAVKSGSELGKQAKDIMDAGKLVTDELVIALVKERIAQEDCRNGFLLDGFPRTIPQADAMKEAGIVVDYVLEFDVPDELIVDRIVGRRVHAASGRVYHVKFNPPKVEGKDDVTGEDLTTRKDDQEETVRKRLVEYHQMTAPLIGYYQKEAEAGNTKYAKVDGTQAVADVRAALEKILG"; var protComp = new AminoAcidSet().GetComposition(protein) + Composition.H2O; Assert.True(protComp != null); Assert.True(protComp.C == 1035); Assert.True(protComp.H == 1683); Assert.True(protComp.N == 289); Assert.True(protComp.O == 318); Assert.True(protComp.P == 0); Assert.True(protComp.S == 7); Assert.True(Math.Abs(protComp.Mass - 23473.245267145) < 0.0000001); Assert.True(protComp.NominalMass == 23461); var ion = new Ion(protComp, 20); // ion.Composition.ComputeApproximateIsotopomerEnvelop(); var isotopomerEnvelope = ion.Composition.GetIsotopomerEnvelopeRelativeIntensities(); Console.WriteLine(@"MonoMz: {0}, MonoMass: {1}", ion.GetMonoIsotopicMz(), ion.Composition.Mass); var matchedPeaks = spec.GetAllIsotopePeaks(ion, new Tolerance(15), 0.1); for (var i = 0; i < matchedPeaks.Length; i++) { var intensity = matchedPeaks[i] == null ? 0 : matchedPeaks[i].Intensity; Console.WriteLine(@"{0,3} {1,10:F4} {2,10:F3} {3,10:F3}", i, ion.GetIsotopeMz(i), isotopomerEnvelope[i], intensity); } var fitScore = spec.GetFitScore(ion, new Tolerance(15), 0.1); var cosine = spec.GetConsineScore(ion, new Tolerance(15), 0.1); var corr = spec.GetCorrScore(ion, new Tolerance(15), 0.1); Console.WriteLine(@"FitScore: {0}", fitScore); Console.WriteLine(@"Cosine: {0}", cosine); Console.WriteLine(@"Corr: {0}", corr); Assert.True(Math.Abs(fitScore - 0.181194589537041) < 0.0001); Assert.True(Math.Abs(cosine - 0.917609346566222) < 0.0001); Assert.True(Math.Abs(corr - 0.808326778009839) < 0.0001); }
public void DiaRankScore() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string dataFile = @"\\protoapps\UserData\Wilkins\BottomUp\HCD_QCShew\raw\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raw"; const string tsvFile = @"\\protoapps\UserData\Wilkins\BottomUp\HCD_QCShew\tsv\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.tsv"; if (!File.Exists(dataFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dataFile); } if (!File.Exists(tsvFile)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, tsvFile); } var parser = new TsvFileParser(tsvFile); var sequences = parser.GetData("Peptide"); var charges = parser.GetData("Charge"); var scans = parser.GetData("ScanNum"); var lcms = InMemoryLcMsRun.GetLcMsRun(dataFile, 0, 0); var rankScorer = new DiaRankScore( @"C:\Users\wilk011\Documents\DataFiles\TestFolder\HCD_QExactive_Tryp.txt"); using ( var outFile = new StreamWriter(@"C:\Users\wilk011\Documents\DataFiles\TestFolder\HCD_QCShew_Score_2.txt")) { outFile.WriteLine("Target\tDecoy"); for (int i = 0; i < sequences.Count; i++) { string sequenceStr = sequences[i]; int charge = Convert.ToInt32(charges[i]); int scan = Convert.ToInt32(scans[i]); var sequence = Sequence.GetSequenceFromMsGfPlusPeptideStr(sequenceStr); var decoySeq = Sequence.GetSequenceFromMsGfPlusPeptideStr(sequenceStr); decoySeq.Reverse(); var decoyStr = decoySeq.Aggregate("", (current, aa) => current + aa); decoyStr = SimpleStringProcessing.Mutate(decoyStr, sequence.Count / 2); decoySeq = Sequence.GetSequenceFromMsGfPlusPeptideStr(decoyStr); var sequenceScore = rankScorer.GetScore(sequence, charge, scan, lcms); var decoyScore = rankScorer.GetScore(decoySeq, charge, scan, lcms); outFile.WriteLine("{0}\t{1}", sequenceScore, decoyScore); } } }
public InformedProteomicsAnalysis( InMemoryLcMsRun run, IEnumerable <string> peptideEnumerator, AminoAcidSet aminoAcidSet, int minCharge, int maxCharge) { Run = run; PeptideEnumerator = peptideEnumerator; AminoAcidSet = aminoAcidSet; MinCharge = minCharge; MaxCharge = maxCharge; }
public void Train(string outFileName, InMemoryLcMsRun run, Tolerance tolerance, string annotationFileName) { // charge, scan number, protein _run = run; _tolerance = tolerance; var target = new Dictionary <int, Dictionary <int, Composition> >(); var decoy = new Dictionary <int, Dictionary <int, Composition> >(); _missingXicCounterTarget = new Dictionary <int, int>(); _missingXicCounterDecoy = new Dictionary <int, int>(); _xicCorrScoreCounterTarget = new Dictionary <int, Dictionary <int, int> >(); _xicCorrScoreCounterDecoy = new Dictionary <int, Dictionary <int, int> >(); }
public MsGfPostProcessor(IEnumerable <string> specFilePaths, string msGfResultPath, Tolerance tolForBaseXic, Tolerance tolFromBaseXic) { MsGfResultPath = msGfResultPath; ToleranceForBaseXic = tolForBaseXic; ToleranceFromBasicXic = tolFromBaseXic; Run = new Dictionary <string, LcMsRun>(); foreach (var specFilePath in specFilePaths) { var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); var specFileKey = Path.GetFileNameWithoutExtension(specFilePath); if (specFileKey != null) { Run[specFileKey] = run; } } }
public void TestXicGen() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string specFilePath = @"D:\Research\Data\UW\Fusion\WT_D_DDA_130412065618.raw"; var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); // Test var tolerance = new Tolerance(30); const string peptide = "AIANGQVDGFPTQEECR"; const int targetScanNum = 37633; const int charge = 2; //const string peptide = "IVDTNGAGDAFAGGFMAGLTK"; //const int targetScanNum = 67513; //const int charge = 3; var aaSet = new AminoAcidSet(Modification.Carbamidomethylation); var precursorIon = new Ion(aaSet.GetComposition(peptide) + Composition.H2O, charge); Console.WriteLine("Theoretical isotopomer profile:"); foreach (var p in precursorIon.GetIsotopes(0.1)) { Console.WriteLine("{0}\t{1}", precursorIon.GetIsotopeMz(p.Index), p.Ratio); } var xicArr = new Dictionary <int, Xic>(); var basePeakIndex = precursorIon.Composition.GetMostAbundantIsotopeZeroBasedIndex(); for (var i = -1; i < 3; i++) { xicArr[i] = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetIsotopeMz(i), tolerance, targetScanNum); } for (var i = -1; i < 3; i++) { Console.WriteLine("\nIndex: {0}", i); Console.WriteLine("m/z: {0}", precursorIon.GetIsotopeMz(i)); Console.WriteLine("#XicPeaks: {0}", xicArr[i].Count); Console.WriteLine("Intensity: {0}", xicArr[i].GetSumIntensities() / xicArr[basePeakIndex].GetSumIntensities()); Console.WriteLine("Correlation: {0}", xicArr[i].GetCorrelation(xicArr[basePeakIndex])); } }
public void TestMs1Filter() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string msgfPlusResultPath = @"C:\cygwin\home\kims336\Data\QCShewQE\NoMod.tsv"; if (!File.Exists(msgfPlusResultPath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, msgfPlusResultPath); } var msgfPlusResults = new MsGfResults(msgfPlusResultPath); const string specFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raw"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 0); var ms1Filter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10), 1, 4, 400, 5000, 0.3, 0, 0); var matches = msgfPlusResults.GetMatchesAtPsmFdr(0.01); var aminoAcidSet = new AminoAcidSet(); var numPsms = 0; var numSurvived = 0; Console.WriteLine("ScanNum\tPeptide\tSpecEValue\tFilter"); foreach (var match in matches) { var scanNum = match.ScanNum; var peptide = match.Peptide; var specEValue = match.SpecEValue; var peptideMass = (new Sequence(peptide, aminoAcidSet).Composition + Composition.H2O).Mass; var survive = ms1Filter.GetMatchingMs2ScanNums(peptideMass).Contains(scanNum) ? 1 : 0; ++numPsms; numSurvived += survive; Console.WriteLine("{0}\t{1}\t{2}\t{3}", scanNum, peptide, specEValue, survive); } Console.WriteLine("SuccessRage: {0}, {1}/{2}", numSurvived / (float)numPsms, numSurvived, numPsms); }
public Ms1IsotopeMostAbundantPlusOneFilter( InMemoryLcMsRun run, int minCharge = 3, int maxCharge = 30, double ppmTolerance = 10, double minMass = 3000.0, double maxMass = 50000.0, int maxNumPeaksToConsider = 40) { _run = run; _minCharge = minCharge; _maxCharge = maxCharge; MaxNumPeaksToConsider = maxNumPeaksToConsider; _tolerance = new Tolerance(ppmTolerance); _comparer = new MzComparerWithTolerance(ppmTolerance); _lcMsMatchMap = new LcMsMatchMap(); PrecomputePossibleSequenceMasses(); _lcMsMatchMap.CreateSequenceMassToMs2ScansMap(_run, _tolerance, minMass, maxMass); }
public void TestPsm() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string specFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raw"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } const char pre = 'R'; const string sequence = "LENWPPASLADDL"; const char post = 'A'; const string annotation = "R.LENWPPASLADDL._"; const int charge = 2; const int ms2ScanNum = 25534; var aaSet = new AminoAcidSet(); var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 0, 0); var ms2Scorer = new ProductScorerBasedOnDeconvolutedSpectra(run, 1, 2, 10, 0, 1.1); ms2Scorer.DeconvoluteAllProductSpectra(); var scorer = ms2Scorer.GetMs2Scorer(ms2ScanNum); var graph = SequenceGraph.CreateGraph(aaSet, annotation); graph.SetSink(0); var score = graph.GetFragmentScore(scorer); Console.WriteLine("Fast search score: " + score); var composition = graph.GetSinkSequenceCompositionWithH2O(); var informedScorer = new InformedBottomUpScorer(run, aaSet, 1, 15, new Tolerance(10)); var refinedScore = informedScorer.GetScores(pre, sequence, post, composition, charge, ms2ScanNum); Console.WriteLine("RefinedScores: {0}", refinedScore); }
public void TestReadingIsolationWindows() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); if (!File.Exists(TestLcMsRun.TestRawFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestLcMsRun.TestRawFilePath); } var run = InMemoryLcMsRun.GetLcMsRunScanRange(TestRawFilePath, 10000, 10100); for (var scanNum = run.MinLcScan; scanNum <= run.MaxLcScan; scanNum++) { var isolationWindow = run.GetIsolationWindow(scanNum); if (isolationWindow != null) { Console.WriteLine("{0}\t{1}\t{2}", scanNum, isolationWindow.MonoisotopicMz ?? 0.0, isolationWindow.Charge ?? 0.0); } } }
public void TestParsingSpectrumFile() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); if (!File.Exists(TestTopDownRawFilePathCid)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestTopDownRawFilePathCid); } const int SCAN = 425; const int MAX_POINTS = 50; var run = InMemoryLcMsRun.GetLcMsRunScanRange(TestTopDownRawFilePathCid, SCAN, SCAN); const int scanNum = SCAN; var spec = run.GetSpectrum(scanNum) as ProductSpectrum; if (spec != null) { spec.Display(MAX_POINTS); var precursorInfo = spec.IsolationWindow; Console.WriteLine("ActivationMethod: {0}", spec.ActivationMethod); Console.WriteLine("Rt: {0}", spec.ElutionTime); Console.WriteLine("PrecursorScan: {0}", run.GetPrecursorScanNum(spec.ScanNum)); Console.WriteLine("IsolationWindowTargetMz: {0}", precursorInfo.IsolationWindowTargetMz); Console.WriteLine("IsolationWindowLowerOffset: {0}", precursorInfo.IsolationWindowLowerOffset); Console.WriteLine("IsolationWindowUpperOffset: {0}", precursorInfo.IsolationWindowUpperOffset); Console.WriteLine("MsLevel: {0}", run.GetMsLevel(scanNum)); } Console.WriteLine(@"Done. {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestReadingPbfFile() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string pbfFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\TopDown\ProductionQCShew\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.pbf"; if (!File.Exists(pbfFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, pbfFilePath); } var pbfRun = new PbfLcMsRun(pbfFilePath); var specFilePath = Path.ChangeExtension(pbfFilePath, "raw"); if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } Console.WriteLine(@"Loading .pbf into memory"); var run = InMemoryLcMsRun.GetLcMsRun(specFilePath); Console.WriteLine(@"Comparing spectra between .pbf and in-memory spectra"); // spectrum comparison for (var scanNum = run.MinLcScan; scanNum <= run.MaxLcScan; scanNum++) { var spec1 = run.GetSpectrum(scanNum); var spec2 = pbfRun.GetSpectrum(scanNum); Assert.IsTrue(spec1.Peaks.Length == spec2.Peaks.Length); for (var i = 0; i < spec1.Peaks.Length; i++) { var p1 = spec1.Peaks[i]; var p2 = spec2.Peaks[i]; Assert.True(p1.Equals(p2)); Assert.True(Math.Abs(p1.Mz - p2.Mz) < 1e-8); Assert.True(Math.Abs(p1.Intensity - p2.Intensity) < 0.001); } } Console.WriteLine(@"Comparing XICs"); // chromatogram comparison const double targetMz = 655.01; var tolerance = new Tolerance(10); var xic1 = run.GetFullPrecursorIonExtractedIonChromatogram(targetMz, tolerance); var xic2 = pbfRun.GetFullPrecursorIonExtractedIonChromatogram(targetMz, tolerance); Assert.True(xic1.Count == xic2.Count); for (var i = 0; i < xic1.Count; i++) { if (!xic1[i].Equals(xic2[i])) { Console.WriteLine(@"{0} {1} {2}", i, xic1[i], xic2[i]); } Assert.True(xic1[i].Equals(xic2[i])); } Console.WriteLine(@"Done"); }
public void TestMatchedPeakCounter() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Parameters var precursorIonTolerance = new Tolerance(15); var productIonTolerance = new Tolerance(15); var sw = new System.Diagnostics.Stopwatch(); var aaSet = new AminoAcidSet(); const string protAnnotation = "_.MFQQEVTITAPNGLHTRPAAQFVKEAKGFTSEITVTSNGKSASAKSLFKLQTLGLTQGTVVTISAEGEDEQKAVEHLVKLMAELE._"; // Create a sequence graph var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotation); Assert.NotNull(seqGraph, "Invalid sequence: {0}", protAnnotation); const string specFilePath = @"\\protoapps\UserData\Jungkap\Joshua\testData\SBEP_STM_001_02272012_Aragon.raw"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826); sw.Start(); var precursorFilter = new Ms1ContainsIonFilter(run, precursorIonTolerance); var seqCompositionArr = seqGraph.GetSequenceCompositions(); Console.WriteLine("Length: {0}\tNumCompositions: {1}", protAnnotation.Length - 4, seqCompositionArr.Length); const int charge = 6; const int modIndex = 0; const int ms2ScanNum = 4448; var seqComposition = seqCompositionArr[modIndex]; var peptideComposition = seqComposition + Composition.H2O; peptideComposition.GetIsotopomerEnvelopeRelativeIntensities(); Console.WriteLine("Composition: {0}, AveragineMass: {1}", seqComposition, seqComposition.Mass); seqGraph.SetSink(modIndex); var precursorIon = new Ion(peptideComposition, charge); Assert.True(precursorFilter.IsValid(precursorIon, ms2ScanNum)); var spec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum; Assert.True(spec != null); var scorer = new MatchedPeakCounter(spec, productIonTolerance, 1, 10); var score = seqGraph.GetFragmentScore(scorer); Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", protAnnotation, charge, precursorIon.GetMostAbundantIsotopeMz(), ms2ScanNum, score); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestCorrMatchedPeakCounter() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); // Parameters var precursorIonTolerance = new Tolerance(10); var productIonTolerance = new Tolerance(10); var sw = new System.Diagnostics.Stopwatch(); var aaSet = new AminoAcidSet(); const string protAnnotation = "_.TMNITSKQMEITPAIRQHVADRLAKLEKWQTHLINPHIILSKEPQGFIADATINTPNGHLVASAKHEDMYTAINELINKLERQLNKVQHKGEAR._"; // Create a sequence graph var seqGraph = SequenceGraph.CreateGraph(aaSet, protAnnotation); Assert.NotNull(seqGraph, "Invalid sequence: {0}", protAnnotation); const string specFilePath = @"\\proto-2\UnitTest_Files\InformedProteomics_TestFiles\SBEP_STM_001_02272012_Aragon.raw"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826); sw.Start(); var precursorFilter = new Ms1ContainsIonFilter(run, precursorIonTolerance); var seqCompositionArr = seqGraph.GetSequenceCompositions(); Console.WriteLine("Length: {0}\tNumCompositions: {1}", protAnnotation.Length - 4, seqCompositionArr.Length); const int charge = 9; const int modIndex = 0; const int ms2ScanNum = 3633; var seqComposition = seqCompositionArr[modIndex]; var peptideComposition = seqComposition + Composition.H2O; peptideComposition.GetIsotopomerEnvelopeRelativeIntensities(); Console.WriteLine("Composition: {0}, AveragineMass: {1}", seqComposition, seqComposition.Mass); seqGraph.SetSink(modIndex); var precursorIon = new Ion(peptideComposition, charge); Assert.True(precursorFilter.IsValid(precursorIon, ms2ScanNum)); var spec = run.GetSpectrum(ms2ScanNum) as ProductSpectrum; Assert.True(spec != null); //var scorer = new MatchedPeakCounter(spec, productIonTolerance, 1, 10); var scorer = new CorrMatchedPeakCounter(spec, productIonTolerance, 1, 10); var score = seqGraph.GetFragmentScore(scorer); Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", protAnnotation, charge, precursorIon.GetMostAbundantIsotopeMz(), ms2ScanNum, score); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public IcBottomUpRescorer(string specFilePath, string icResultFilePath, string outputFilePath, AminoAcidSet aaSet, Tolerance tolerance) { _run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 0.0); Rescore(icResultFilePath, outputFilePath); }
public void TestPrSm() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); //const string specFilePath = @"C:\cygwin\home\kims336\Data\TopDownYufeng\raw\yufeng_column_test2.raw"; //const string annotation = // "_.MKTKLSVLSAAMLAATLTMMPAVSQAAIPQSVEGQSIPSLAPMLERTTPAVVSVAVSGTHVSKQRVPDVFRYFFGPNAPQEQVQERPFRGLGSGVIIDADKGYIVTNNHVIDGADDIQVG" + // "LHDGREVKAKLIGTDSESDIALLQIEAKNLVAIKTSDSDELRVGDFAVAIGNPFGLGQTV" + // "TSGIVSALGRSGLGIEMLENFIQTDAAINSGNSGGALVNLKGELIGINTAIVAPNGGNVG" + // "IGFAIPANMVKNLIAQIAEHGEVRRGVLGIAGRDLDSQLAQGFGLDTQHGGFVNEVSAGS" + // "AAEKAGIKAGDIIVSVDGRAIKSFQELRAKVATMGAGAKVELGLIRDGDKKTVNVTLGEA" + // "NQTTEKAAGAVHPMLQGASLENASKGVEITDVAQGSPAAMSGLQKGDLIVGINRTAVKDL" + // "KSLKELLKDQEGAVALKIVRGKSMLYLVLR._"; //var aaSet = new AminoAcidSet(); //const int charge = 60; //const int ms2ScanNum = 46661; const string specFilePath = @"D:\Research\Data\Jon\AH_SF_mouseliver_3-1_Intact_2_6Feb14_Bane_PL011402.raw"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } const int ms2ScanNum = 19011; const int charge = 7; const string annotation = "_.SKVSFKITLTSDPRLPYKVLSVPESTPFTAVLKFAAEEFKVPAATSAIITNDGIGINPAQTAGNVFLKHGSELRIIPRDRVGSC._"; var acetylN = new SearchModification(Modification.Acetylation, '*', SequenceLocation.ProteinNTerm, true); var modVal = Modification.RegisterAndGetModification("AddVal", new Composition(5, 9, 1, 1, 0)); var searchMods = AminoAcid.StandardAminoAcidCharacters.Select(residue => new SearchModification(modVal, residue, SequenceLocation.Everywhere, false)).ToList(); searchMods.Add(acetylN); const int numMaxModsPerProtein = 1; var aaSet = new AminoAcidSet(searchMods, numMaxModsPerProtein); var graph = SequenceGraph.CreateGraph(aaSet, annotation); Console.WriteLine("NumProteoforms: " + graph.GetNumProteoformCompositions()); var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 1.4826); var ms2Scorer = new ProductScorerBasedOnDeconvolutedSpectra(run, 1, 15); ms2Scorer.GetScorer(ms2ScanNum); var scorer = ms2Scorer.GetMs2Scorer(ms2ScanNum); Assert.NotNull(scorer, "Scorer is null!"); for (var i = 0; i < graph.GetNumProteoformCompositions(); i++) { graph.SetSink(i); Console.WriteLine("ModComb: " + graph.GetModificationCombinations()[i]); var score = graph.GetFragmentScore(scorer); Console.WriteLine("Fast search score: " + score); var composition = graph.GetSinkSequenceCompositionWithH2O(); var informedScorer = new InformedTopDownScorer(run, aaSet, 1, 30, new Tolerance(10)); var refinedScore = informedScorer.GetScores(AminoAcid.ProteinNTerm, SimpleStringProcessing.GetStringBetweenDots(annotation), AminoAcid.ProteinCTerm, composition, charge, ms2ScanNum); Console.WriteLine("Modifications: {0}", refinedScore.Modifications); Console.WriteLine("Composition: {0}", composition); Console.WriteLine("RefinedScores: {0}", refinedScore); } }
public void GeneratePrmInfo(string resultFilePath, string outputFilePath) { Console.Write("Processing {0}", Path.GetFileName(resultFilePath)); Console.Out.Flush(); var rawFilePath = @"D:\Research\Data\EDRN\DDA\raw\" + Path.GetFileNameWithoutExtension(resultFilePath) + ".raw"; var reader = new XCaliburReader(rawFilePath); var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath); var tolerance = new Tolerance(10, ToleranceUnit.Ppm); const string spikedInPeptideFile = @"D:\Research\Data\EDRN\SpikedPeptides.txt"; var spikedInPeptides = File.ReadAllLines(spikedInPeptideFile); var spikedInPepSet = new HashSet <string>(); foreach (var p in spikedInPeptides) { spikedInPepSet.Add(p); } // const string resultFilePath = @"D:\Research\Data\EDRN\DDA\Frac7_NTT2.tsv"; //const string resultFilePath = @"D:\Research\Data\EDRN\DDA\Heavy\342865_EDRN_Serum_07_DDA_1_12Nov13_Samwise_13-07-28.tsv"; // const string resultFilePath = @"D:\Research\Data\EDRN\DDA\NTT1_NoMod\342865_EDRN_Serum_07_DDA_1_12Nov13_Samwise_13-07-28.tsv"; const double qValueThreshold = 0.01; var pepSet = new HashSet <string>(); MsGfPlusHeaderInformation headerInfo = null; //var prefix = new HashSet<string>(); //var suffix = new HashSet<string>(); var numPeptides = 0; var prevScanNum = -1; using (var writer = new StreamWriter(outputFilePath)) { writer.WriteLine("Peptide\tCharge\tMonoMz\tMostAbundantMz\tMs2ScanNum\tRtMs2\tRtApex\tRtStart\tRtEnd\tSpecEValue\tPepQValue"); foreach (var line in File.ReadLines(resultFilePath)) { if (line.StartsWith("#")) { headerInfo = new MsGfPlusHeaderInformation(line); continue; } var match = new MsGfMatch(line, headerInfo); if (match.ScanNum == prevScanNum) { continue; } prevScanNum = match.ScanNum; if (!match.IsValid || match.Protein.StartsWith(FastaDatabase.DecoyProteinPrefix)) { continue; } if (match.PepQValue > qValueThreshold) { continue; } var peptide = match.Peptide.Replace("C+57.021", "C").Replace("K+8.014", "K").Replace("R+10.008", "R"); if (pepSet.Contains(peptide)) { continue; } pepSet.Add(peptide); if (spikedInPepSet.Contains(peptide)) { var ion = new Ion(match.Formula, match.Charge); var mostAbundantIonMz = ion.GetMostAbundantIsotopeMz(); var xic = run.GetPrecursorExtractedIonChromatogram(mostAbundantIonMz, tolerance, match.ScanNum); if (xic.Count == 0) { continue; } var minScan = xic.Min().ScanNum; var maxScan = xic.Max().ScanNum; writer.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}", peptide, match.Charge, ion.GetMonoIsotopicMz(), mostAbundantIonMz, match.ScanNum, reader.RtFromScanNum(match.ScanNum), reader.RtFromScanNum(xic.GetApexScanNum()), // Rt apex reader.RtFromScanNum(minScan), // Rt start reader.RtFromScanNum(maxScan), // Rt end match.SpecEValue, match.PepQValue); ++numPeptides; } //else //{ // foreach (var spikedInPeptide in spikedInPeptides) // { // if (spikedInPeptide.StartsWith(peptide)) prefix.Add(spikedInPeptide + "\t" + peptide + "\t" + match.ScanNum); // else if (spikedInPeptide.EndsWith(peptide)) suffix.Add(spikedInPeptide + "\t" + peptide + "\t" + match.ScanNum); // } //} } } //Console.WriteLine("*********Prefix"); //foreach(var p in prefix) Console.WriteLine(p); //Console.WriteLine("*********Suffix"); //foreach (var p in suffix) Console.WriteLine(p); Console.WriteLine("\t{0}", numPeptides); }
public void TestGeneratingProductXics() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, TestRawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(TestRawFilePath); // const string rafFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raf"; const string rafFilePath = @"H:\Research\Jarret\10mz\raw\Q_2014_0523_50_10_fmol_uL_10mz.raf"; if (!File.Exists(rafFilePath)) { Assert.Ignore(@"Skipping raf portion of test {0} since file not found: {1}", methodName, rafFilePath); } var rafRun = new PbfLcMsRun(rafFilePath); var tolerance = new Tolerance(10); var mzArr = new double[100000]; var precursorMzArr = new double[mzArr.Length]; var rnd = new Random(); for (var i = 0; i < mzArr.Length; i++) { mzArr[i] = rnd.NextDouble() * 1450.0 + 50.0; precursorMzArr[i] = rnd.NextDouble() * (810.0 - 390.0) + 390.0; } var sw = new System.Diagnostics.Stopwatch(); //double sec; // method 1 sw.Start(); for (var i = 0; i < mzArr.Length; i++) { var mz = mzArr[i]; var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; var xic1 = run.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); //var xic2 = rafRun.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); //Assert.True(xic1.Equals(xic2)); } sw.Stop(); Console.WriteLine(@"Method 1: {0:f4} sec", sw.Elapsed.TotalSeconds); sw.Reset(); sw.Start(); for (var i = 0; i < mzArr.Length; i++) { var mz = mzArr[i]; var tolTh = tolerance.GetToleranceAsTh(mz); var minMz = mz - tolTh; var maxMz = mz + tolTh; rafRun.GetFullProductExtractedIonChromatogram(minMz, maxMz, precursorMzArr[i]); } sw.Stop(); Console.WriteLine(@"Method 2: {0:f4} sec", sw.Elapsed.TotalSeconds); Console.WriteLine(@"Done"); }
public void TestMs1Filtering() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); const string resultFilePath = // @"C:\cygwin\home\kims336\Data\TopDown\raw\CorrMatches_N30\SBEP_STM_001_02272012_Aragon.tsv"; @"C:\cygwin\home\kims336\Data\TopDown\raw\CorrMatches_N30\SBEP_STM_001_02272012_Aragon.decoy.icresult"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\DataFiles\SBEP_STM_001_02272012_Aragon.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826); //const int minPrecursorCharge = 3; //const int maxPrecursorCharge = 30; //const int tolerancePpm = 15; var tolerance = new Tolerance(15); //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 0.7, 40); ////var ms1BasedFilter = new Ms1IsotopeTopKFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 20); //ISequenceFilter ms1Filter = ms1BasedFilter; var tsvReader = new TsvFileParser(resultFilePath); var compositions = tsvReader.GetData("Composition"); var scanNums = tsvReader.GetData("ScanNum"); var charges = tsvReader.GetData("Charge"); var qValues = tsvReader.GetData("QValue"); var scores = tsvReader.GetData("Score"); //var sequences = tsvReader.GetData("Annotation"); //var hist = new int[11]; Console.WriteLine("ScanNum\tScore\tPrecursor\tNext\tSum\tNextIsotope\tLessCharge\tMoreCharge\tMax\tNumXicPeaks"); for (var i = 0; i < compositions.Count; i++) { if (qValues != null) { var qValue = Convert.ToDouble(qValues[i]); if (qValue > 0.01) { continue; } } var scanNum = Convert.ToInt32(scanNums[i]); var composition = Composition.Parse(compositions[i]); var charge = Convert.ToInt32(charges[i]); var precursorIon = new Ion(composition, charge); var isValid = run.GetSpectrum(scanNum) is ProductSpectrum spec && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) { continue; } var score = Convert.ToDouble(scores[i]); var precursorScanNum = run.GetPrecursorScanNum(scanNum); var precursorSpec = run.GetSpectrum(precursorScanNum); var preIsotopeCorr = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1); var nextScanNum = run.GetNextScanNum(scanNum, 1); var nextSpec = run.GetSpectrum(nextScanNum); var nextIsotopeCorr = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1); var xicMostAbundant = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance, scanNum); var apexScanNum = xicMostAbundant.GetApexScanNum(); if (apexScanNum < run.MinLcScan) { apexScanNum = scanNum; } //var sumSpec = run.GetSummedMs1Spectrum(apexScanNum); // var apexIsotopeCorr = sumSpec.GetCorrScore(precursorIon, tolerance, 0.1); // var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0; var xicNextIsotope = run.GetPrecursorExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz() + Constants.C13MinusC12 / charge, tolerance, scanNum); var plusOneIsotopeCorr = xicMostAbundant.GetCorrelation(xicNextIsotope); var precursorIonChargeMinusOne = new Ion(composition, charge - 1); var xicChargeMinusOne = run.GetPrecursorExtractedIonChromatogram(precursorIonChargeMinusOne.GetMostAbundantIsotopeMz(), tolerance, scanNum); var chargeMinusOneCorr = xicMostAbundant.GetCorrelation(xicChargeMinusOne); var precursorIonChargePlusOne = new Ion(composition, charge + 1); var xicChargePlusOne = run.GetPrecursorExtractedIonChromatogram(precursorIonChargePlusOne.GetMostAbundantIsotopeMz(), tolerance, scanNum); var chargePlusOneCorr = xicMostAbundant.GetCorrelation(xicChargePlusOne); //var max = new[] {preIsotopeCorr, nextIsotopeCorr, apexIsotopeCorr, plusOneIsotopeCorr, chargeMinusOneCorr, chargePlusOneCorr}.Max(); //Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}", // scanNum, score, preIsotopeCorr, nextIsotopeCorr, apexIsotopeCorr, plusOneIsotopeCorr, chargeMinusOneCorr, chargePlusOneCorr, max, xicMostAbundant.Count); } //Console.WriteLine("Histogram"); //for (var i = 0; i < hist.Length; i++) //{ // Console.WriteLine("{0:f1}\t{1}", i / 10.0, hist[i]); //} }
public void TestGettingXicVector() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); if (!File.Exists(TestRawFilePath)) { Assert.Ignore(@"Skipping test " + methodName + @" since file not found: " + TestRawFilePath); } var run1 = PbfLcMsRun.GetLcMsRun(TestRawFilePath, 0.0, 0.0); var run2 = InMemoryLcMsRun.GetLcMsRun(TestRawFilePath, 0.0, 0.0); Assert.True(run1 != null && run2 != null); var comparer = new MzComparerWithBinning(27); const double minMz = 600.0; // 600.0 const double maxMz = 2000.0; // 2000.0 var minBinNum = comparer.GetBinNumber(minMz); var maxBinNum = comparer.GetBinNumber(maxMz); Console.WriteLine(@"NumBins: " + (maxBinNum - minBinNum)); var warnCount = 0; var sw = new Stopwatch(); sw.Start(); for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { var mzStart = comparer.GetMzStart(binNum); var mzEnd = comparer.GetMzEnd(binNum); var vec1 = run1.GetFullPrecursorIonExtractedIonChromatogramVector(mzStart, mzEnd); var vec2 = run2.GetFullPrecursorIonExtractedIonChromatogramVector(mzStart, mzEnd); Assert.True(vec1.Length == vec2.Length, "Extracted Ion Chromatogram vector length mismatch, {0} vs. {1}", vec1.Length, vec2.Length); var matchCount = 0; var misMatchCount = 0; var positiveCount = 0; for (var i = 0; i < vec2.Length; i++) { if (vec1[i] > 0 || vec2[i] > 0) { positiveCount++; } if (Math.Abs(vec1[i] - vec2[i]) < float.Epsilon) { matchCount++; } else { misMatchCount++; } } if (misMatchCount <= 0 || positiveCount == 0) { continue; } var fractionAgreement = 1 - misMatchCount / (double)positiveCount; if (fractionAgreement < 0.80 && misMatchCount > 2) { Console.WriteLine(@"{0}/{1} Xic values do not match for bin {2} ({3:0.00} m/z); {4:0.0}% agreement", misMatchCount, positiveCount, binNum, mzStart, fractionAgreement * 100); warnCount++; } } sw.Stop(); Console.WriteLine(@"{0:f4} sec", sw.Elapsed.TotalSeconds); Assert.IsTrue(warnCount < 10, "Too many Xic mismatch warnings: {0}", warnCount); }
public void FilteringEfficiency() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826); sw.Stop(); Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds); const int minPrecursorCharge = 3; const int maxPrecursorCharge = 30; const int tolerancePpm = 10; var tolerance = new Tolerance(tolerancePpm); sw.Reset(); sw.Start(); //var ms1BasedFilter = new Ms1BasedFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm); // //var ms1BasedFilter = new Ms1IsotopeTopKFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 20); //var ms1BasedFilter = new ProductScorerBasedOnDeconvolutedSpectra(run, // minPrecursorCharge, maxPrecursorCharge, // 0, 0, // 600.0, 1800.0, new Tolerance(tolerancePpm), null); //ms1BasedFilter.CachePrecursorMatchesBinCentric(); var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.5, 0.5, 0.5, 40); //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40); sw.Stop(); Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds); ISequenceFilter ms1Filter = ms1BasedFilter; sw.Reset(); sw.Start(); const double minProteinMass = 3000.0; const double maxProteinMass = 30000.0; var minBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass); var maxBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass); var numComparisons = 0L; for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum); numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count(); } sw.Stop(); Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds); const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon_4PTMs.icresult"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var tsvReader = new TsvFileParser(resultFilePath); var compositions = tsvReader.GetData("Composition"); var scanNums = tsvReader.GetData("ScanNum"); var charges = tsvReader.GetData("Charge"); var scores = tsvReader.GetData("Score"); var qvalues = tsvReader.GetData("QValue"); var sequences = tsvReader.GetData("Sequence"); var sequenceCount = new Dictionary <string, int>(); for (var i = 0; i < compositions.Count; i++) { if (qvalues != null) { var qValue = Convert.ToDouble(qvalues[i]); if (qValue > 0.01) { continue; } } else { var score = Convert.ToDouble(scores[i]); if (score < 13) { continue; } } var scanNum = Convert.ToInt32(scanNums[i]); var charge = Convert.ToInt32(charges[i]); var composition = Composition.Parse(compositions[i]); var precursorIon = new Ion(composition, charge); var isValid = run.GetSpectrum(scanNum) is ProductSpectrum spec && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) { continue; } var sequence = sequences[i]; if (sequenceCount.TryGetValue(sequence, out var count)) { sequenceCount[sequence] = count + 1; } else { sequenceCount[sequence] = 1; } } //var sequences = tsvReader.GetData("Annotation"); var seqSet = new HashSet <string>(); var allSeqSet = new HashSet <string>(); var numUnfilteredSpecs = 0; var totalSpecs = 0; for (var i = 0; i < compositions.Count; i++) { if (qvalues != null) { var qValue = Convert.ToDouble(qvalues[i]); if (qValue > 0.01) { continue; } } else { var score = Convert.ToDouble(scores[i]); if (score < 13) { continue; } } var scanNum = Convert.ToInt32(scanNums[i]); var charge = Convert.ToInt32(charges[i]); var composition = Composition.Parse(compositions[i]); var precursorIon = new Ion(composition, charge); var isValid = run.GetSpectrum(scanNum) is ProductSpectrum spec && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) { continue; } ++totalSpecs; var precursorScanNum = run.GetPrecursorScanNum(scanNum); var precursorSpec = run.GetSpectrum(precursorScanNum); var corr1 = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1); var nextScanNum = run.GetNextScanNum(scanNum, 1); var nextSpec = run.GetSpectrum(nextScanNum); var corr2 = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1); var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0; if (corr3 == 1) { numUnfilteredSpecs++; seqSet.Add(sequences[i]); } allSeqSet.Add(sequences[i]); //var xic = run.GetFullPrecursorIonExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance); ////xic.Display(); //var apexScanNum = xic.GetNearestApexScanNum(run.GetPrecursorScanNum(scanNum), false); //var apexSpec = run.GetSpectrum(apexScanNum); //var corr3 = apexSpec.GetCorrScore(precursorIon, tolerance, 0.1); var corrMax = new[] { corr1, corr2, corr3 }.Max(); Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax, sequenceCount[sequences[i]]); } Console.WriteLine("TotalNumComparisons: {0}", numComparisons); Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons / (double)(maxBinNum - minBinNum + 1)); Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs / (double)totalSpecs, numUnfilteredSpecs, totalSpecs); Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count / (double)allSeqSet.Count, seqSet.Count, allSeqSet.Count); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void FilteringEfficiencyQcShew() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826); sw.Stop(); Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds); const int minPrecursorCharge = 3; const int maxPrecursorCharge = 30; const int tolerancePpm = 10; var tolerance = new Tolerance(tolerancePpm); sw.Reset(); sw.Start(); var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.7, 0.7, 0.7, 40); //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40); sw.Stop(); Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds); ISequenceFilter ms1Filter = ms1BasedFilter; sw.Reset(); sw.Start(); const double minProteinMass = 3000.0; const double maxProteinMass = 30000.0; var minBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass); var maxBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass); var numComparisons = 0L; for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum); numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count(); } sw.Stop(); Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds); //const string prot = // "ADVFHLGLTKAMLDGATLAIVPGDPERVKRIAELMDNATFLASHREYTSYLAYADGKPVVICSTGIGGPSTSIAVEELAQLGVNTFLRVGTTGAIQPHVNVGDVIVTQASVRLDGASLHFAPMEFPAVANFECTTAMVAACRDAGVEPHIGVTASSDTFYPGQERYDTVTGRVTRRFAGSMKEWQDMGVLNYEMESATLFTMCATQGWRAACVAGVIVNRTQQEIPDEATMKKTEVSAVSIVVAAAKKLLA"; //var protMass = (new AminoAcidSet().GetComposition(prot) + Composition.H2O).Mass; //Console.WriteLine("************ScanNums: " + string.Join("\t", ms1Filter.GetMatchingMs2ScanNums(protMass))); const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\MSAlign\NoMod.tsv"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var tsvReader = new TsvFileParser(resultFilePath); var scanNums = tsvReader.GetData("Scan(s)"); var charges = tsvReader.GetData("Charge"); var scores = tsvReader.GetData("E-value"); var sequences = tsvReader.GetData("Peptide"); //const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402_N30_C30.tsv"; //var tsvReader = new TsvFileParser(resultFilePath); //var scanNums = tsvReader.GetData("ScanNum"); //var charges = tsvReader.GetData("Charge"); //var scores = tsvReader.GetData("Score"); //var sequences = tsvReader.GetData("Sequence"); var aaSet = new AminoAcidSet(); var seqSet = new HashSet <string>(); var allSeqSet = new HashSet <string>(); var numUnfilteredSpecs = 0; var totalSpecs = 0; for (var i = 0; i < scores.Count; i++) { var score = Convert.ToDouble(scores[i]); if (score > 1E-4) { continue; } //if (score < 10) continue; var scanNum = Convert.ToInt32(scanNums[i]); var charge = Convert.ToInt32(charges[i]); var sequence = SimpleStringProcessing.GetStringBetweenDots(sequences[i]); if (sequence == null || sequence.Contains("(")) { continue; } //var sequence = sequences[i]; var composition = aaSet.GetComposition(sequence) + Composition.H2O; var precursorIon = new Ion(composition, charge); var isValid = run.GetSpectrum(scanNum) is ProductSpectrum spec && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) { continue; } ++totalSpecs; var precursorScanNum = run.GetPrecursorScanNum(scanNum); var precursorSpec = run.GetSpectrum(precursorScanNum); var corr1 = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1); var nextScanNum = run.GetNextScanNum(scanNum, 1); var nextSpec = run.GetSpectrum(nextScanNum); var corr2 = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1); var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0; if (corr3 == 1) { numUnfilteredSpecs++; seqSet.Add(sequences[i]); } allSeqSet.Add(sequences[i]); var corrMax = new[] { corr1, corr2, corr3 }.Max(); Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax); } Console.WriteLine("TotalNumComparisons: {0}", numComparisons); Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons / (double)(maxBinNum - minBinNum + 1)); Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs / (double)totalSpecs, numUnfilteredSpecs, totalSpecs); Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count / (double)allSeqSet.Count, seqSet.Count, allSeqSet.Count); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }