public void TestPossibleSequenceMasses() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); //const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\DataFiles\SBEP_STM_001_02272012_Aragon.raw"; const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, 3, 30, 15, 0.7, 1000); var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10)); //var masses = ms1BasedFilter.GetPossibleSequenceMasses(1113); //var ms1BasedFilter = new Ms1IsotopeTopKFilter(run, 3, 30, 15); //var masses = ms1BasedFilter.GetPossibleSequenceMasses(2819, 20); //foreach (var m in masses) //{ // Console.WriteLine(m); //} sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void TestMs1Filter() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string msgfPlusResultPath = @"C:\cygwin\home\kims336\Data\QCShewQE\NoMod.tsv"; if (!File.Exists(msgfPlusResultPath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, msgfPlusResultPath); } var msgfPlusResults = new MsGfResults(msgfPlusResultPath); const string specFilePath = @"C:\cygwin\home\kims336\Data\QCShewQE\QC_Shew_13_04_A_17Feb14_Samwise_13-07-28.raw"; if (!File.Exists(specFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, specFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(specFilePath, 1.4826, 0); var ms1Filter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10), 1, 4, 400, 5000, 0.3, 0, 0); var matches = msgfPlusResults.GetMatchesAtPsmFdr(0.01); var aminoAcidSet = new AminoAcidSet(); var numPsms = 0; var numSurvived = 0; Console.WriteLine("ScanNum\tPeptide\tSpecEValue\tFilter"); foreach (var match in matches) { var scanNum = match.ScanNum; var peptide = match.Peptide; var specEValue = match.SpecEValue; var peptideMass = (new Sequence(peptide, aminoAcidSet).Composition + Composition.H2O).Mass; var survive = ms1Filter.GetMatchingMs2ScanNums(peptideMass).Contains(scanNum) ? 1 : 0; ++numPsms; numSurvived += survive; Console.WriteLine("{0}\t{1}\t{2}\t{3}", scanNum, peptide, specEValue, survive); } Console.WriteLine("SuccessRage: {0}, {1}/{2}", numSurvived / (float)numPsms, numSurvived, numPsms); }
public void FilteringEfficiency() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826); sw.Stop(); Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds); const int minPrecursorCharge = 3; const int maxPrecursorCharge = 30; const int tolerancePpm = 10; var tolerance = new Tolerance(tolerancePpm); sw.Reset(); sw.Start(); //var ms1BasedFilter = new Ms1BasedFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm); // //var ms1BasedFilter = new Ms1IsotopeTopKFilter(run, minPrecursorCharge, maxPrecursorCharge, tolerancePpm, 20); //var ms1BasedFilter = new ProductScorerBasedOnDeconvolutedSpectra(run, // minPrecursorCharge, maxPrecursorCharge, // 0, 0, // 600.0, 1800.0, new Tolerance(tolerancePpm), null); //ms1BasedFilter.CachePrecursorMatchesBinCentric(); var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.5, 0.5, 0.5, 40); //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40); sw.Stop(); Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds); ISequenceFilter ms1Filter = ms1BasedFilter; sw.Reset(); sw.Start(); const double minProteinMass = 3000.0; const double maxProteinMass = 30000.0; var minBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass); var maxBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass); var numComparisons = 0L; for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum); numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count(); } sw.Stop(); Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds); const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon_4PTMs.icresult"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var tsvReader = new TsvFileParser(resultFilePath); var compositions = tsvReader.GetData("Composition"); var scanNums = tsvReader.GetData("ScanNum"); var charges = tsvReader.GetData("Charge"); var scores = tsvReader.GetData("Score"); var qvalues = tsvReader.GetData("QValue"); var sequences = tsvReader.GetData("Sequence"); var sequenceCount = new Dictionary <string, int>(); for (var i = 0; i < compositions.Count; i++) { if (qvalues != null) { var qValue = Convert.ToDouble(qvalues[i]); if (qValue > 0.01) { continue; } } else { var score = Convert.ToDouble(scores[i]); if (score < 13) { continue; } } var scanNum = Convert.ToInt32(scanNums[i]); var charge = Convert.ToInt32(charges[i]); var composition = Composition.Parse(compositions[i]); var precursorIon = new Ion(composition, charge); var isValid = run.GetSpectrum(scanNum) is ProductSpectrum spec && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) { continue; } var sequence = sequences[i]; if (sequenceCount.TryGetValue(sequence, out var count)) { sequenceCount[sequence] = count + 1; } else { sequenceCount[sequence] = 1; } } //var sequences = tsvReader.GetData("Annotation"); var seqSet = new HashSet <string>(); var allSeqSet = new HashSet <string>(); var numUnfilteredSpecs = 0; var totalSpecs = 0; for (var i = 0; i < compositions.Count; i++) { if (qvalues != null) { var qValue = Convert.ToDouble(qvalues[i]); if (qValue > 0.01) { continue; } } else { var score = Convert.ToDouble(scores[i]); if (score < 13) { continue; } } var scanNum = Convert.ToInt32(scanNums[i]); var charge = Convert.ToInt32(charges[i]); var composition = Composition.Parse(compositions[i]); var precursorIon = new Ion(composition, charge); var isValid = run.GetSpectrum(scanNum) is ProductSpectrum spec && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) { continue; } ++totalSpecs; var precursorScanNum = run.GetPrecursorScanNum(scanNum); var precursorSpec = run.GetSpectrum(precursorScanNum); var corr1 = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1); var nextScanNum = run.GetNextScanNum(scanNum, 1); var nextSpec = run.GetSpectrum(nextScanNum); var corr2 = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1); var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0; if (corr3 == 1) { numUnfilteredSpecs++; seqSet.Add(sequences[i]); } allSeqSet.Add(sequences[i]); //var xic = run.GetFullPrecursorIonExtractedIonChromatogram(precursorIon.GetMostAbundantIsotopeMz(), tolerance); ////xic.Display(); //var apexScanNum = xic.GetNearestApexScanNum(run.GetPrecursorScanNum(scanNum), false); //var apexSpec = run.GetSpectrum(apexScanNum); //var corr3 = apexSpec.GetCorrScore(precursorIon, tolerance, 0.1); var corrMax = new[] { corr1, corr2, corr3 }.Max(); Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax, sequenceCount[sequences[i]]); } Console.WriteLine("TotalNumComparisons: {0}", numComparisons); Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons / (double)(maxBinNum - minBinNum + 1)); Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs / (double)totalSpecs, numUnfilteredSpecs, totalSpecs); Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count / (double)allSeqSet.Count, seqSet.Count, allSeqSet.Count); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public void FilteringEfficiencyQcShew() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var sw = new System.Diagnostics.Stopwatch(); sw.Start(); const string rawFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402.raw"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } var run = InMemoryLcMsRun.GetLcMsRun(rawFilePath, 1.4826, 1.4826); sw.Stop(); Console.WriteLine(@"Reading run: {0:f4} sec", sw.Elapsed.TotalSeconds); const int minPrecursorCharge = 3; const int maxPrecursorCharge = 30; const int tolerancePpm = 10; var tolerance = new Tolerance(tolerancePpm); sw.Reset(); sw.Start(); var ms1BasedFilter = new Ms1IsotopeAndChargeCorrFilter(run, new Tolerance(10.0), minPrecursorCharge, maxPrecursorCharge, 3000, 50000, 0.7, 0.7, 0.7, 40); //var ms1BasedFilter = new Ms1IsotopeCorrFilter(run, minPrecursorCharge, maxPrecursorCharge, 15, 0.5, 40); sw.Stop(); Console.WriteLine(@"Ms1 filter: {0:f4} sec", sw.Elapsed.TotalSeconds); ISequenceFilter ms1Filter = ms1BasedFilter; sw.Reset(); sw.Start(); const double minProteinMass = 3000.0; const double maxProteinMass = 30000.0; var minBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(minProteinMass); var maxBinNum = ProductScorerBasedOnDeconvolutedSpectra.GetBinNumber(maxProteinMass); var numComparisons = 0L; for (var binNum = minBinNum; binNum <= maxBinNum; binNum++) { var mass = ProductScorerBasedOnDeconvolutedSpectra.GetMz(binNum); numComparisons += ms1Filter.GetMatchingMs2ScanNums(mass).Count(); } sw.Stop(); Console.WriteLine(@"Calculating #matches per bin: {0:f4} sec", sw.Elapsed.TotalSeconds); //const string prot = // "ADVFHLGLTKAMLDGATLAIVPGDPERVKRIAELMDNATFLASHREYTSYLAYADGKPVVICSTGIGGPSTSIAVEELAQLGVNTFLRVGTTGAIQPHVNVGDVIVTQASVRLDGASLHFAPMEFPAVANFECTTAMVAACRDAGVEPHIGVTASSDTFYPGQERYDTVTGRVTRRFAGSMKEWQDMGVLNYEMESATLFTMCATQGWRAACVAGVIVNRTQQEIPDEATMKKTEVSAVSIVVAAAKKLLA"; //var protMass = (new AminoAcidSet().GetComposition(prot) + Composition.H2O).Mass; //Console.WriteLine("************ScanNums: " + string.Join("\t", ms1Filter.GetMatchingMs2ScanNums(protMass))); const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\MSAlign\NoMod.tsv"; if (!File.Exists(resultFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, resultFilePath); } var tsvReader = new TsvFileParser(resultFilePath); var scanNums = tsvReader.GetData("Scan(s)"); var charges = tsvReader.GetData("Charge"); var scores = tsvReader.GetData("E-value"); var sequences = tsvReader.GetData("Peptide"); //const string resultFilePath = @"C:\cygwin\home\kims336\Data\TopDownQCShew\raw\QC_ShewIntact_2ug_3k_CID_4Apr14_Bane_PL011402_N30_C30.tsv"; //var tsvReader = new TsvFileParser(resultFilePath); //var scanNums = tsvReader.GetData("ScanNum"); //var charges = tsvReader.GetData("Charge"); //var scores = tsvReader.GetData("Score"); //var sequences = tsvReader.GetData("Sequence"); var aaSet = new AminoAcidSet(); var seqSet = new HashSet <string>(); var allSeqSet = new HashSet <string>(); var numUnfilteredSpecs = 0; var totalSpecs = 0; for (var i = 0; i < scores.Count; i++) { var score = Convert.ToDouble(scores[i]); if (score > 1E-4) { continue; } //if (score < 10) continue; var scanNum = Convert.ToInt32(scanNums[i]); var charge = Convert.ToInt32(charges[i]); var sequence = SimpleStringProcessing.GetStringBetweenDots(sequences[i]); if (sequence == null || sequence.Contains("(")) { continue; } //var sequence = sequences[i]; var composition = aaSet.GetComposition(sequence) + Composition.H2O; var precursorIon = new Ion(composition, charge); var isValid = run.GetSpectrum(scanNum) is ProductSpectrum spec && spec.IsolationWindow.Contains(precursorIon.GetMostAbundantIsotopeMz()); if (!isValid) { continue; } ++totalSpecs; var precursorScanNum = run.GetPrecursorScanNum(scanNum); var precursorSpec = run.GetSpectrum(precursorScanNum); var corr1 = precursorSpec.GetCorrScore(precursorIon, tolerance, 0.1); var nextScanNum = run.GetNextScanNum(scanNum, 1); var nextSpec = run.GetSpectrum(nextScanNum); var corr2 = nextSpec.GetCorrScore(precursorIon, tolerance, 0.1); var corr3 = ms1Filter.GetMatchingMs2ScanNums(composition.Mass).Contains(scanNum) ? 1 : 0; if (corr3 == 1) { numUnfilteredSpecs++; seqSet.Add(sequences[i]); } allSeqSet.Add(sequences[i]); var corrMax = new[] { corr1, corr2, corr3 }.Max(); Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", scanNum, precursorScanNum, corr1, nextScanNum, corr2, corr3, corrMax); } Console.WriteLine("TotalNumComparisons: {0}", numComparisons); Console.WriteLine("AverageNumComparisons: {0:f2}", numComparisons / (double)(maxBinNum - minBinNum + 1)); Console.WriteLine("SuccessRate: {0:f2} {1} / {2}", numUnfilteredSpecs / (double)totalSpecs, numUnfilteredSpecs, totalSpecs); Console.WriteLine("NumUniqueSequences: {0:f2}, {1} / {2}", seqSet.Count / (double)allSeqSet.Count, seqSet.Count, allSeqSet.Count); Console.WriteLine(@"Elapsed Time: {0:f4} sec", sw.Elapsed.TotalSeconds); }
public bool RunSearch(double corrThreshold) { var sw = new Stopwatch(); ErrorMessage = string.Empty; Console.Write(@"Reading raw file..."); sw.Start(); _run = InMemoryLcMsRun.GetLcMsRun(SpecFilePath, 1.4826, 1.4826); _bottomUpScorer = new InformedBottomUpScorer(_run, AminoAcidSet, MinProductIonCharge, MaxProductIonCharge, ProductIonTolerance); sw.Stop(); var sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec); sw.Reset(); Console.Write(@"Determining precursor masses..."); sw.Start(); var ms1Filter = new Ms1IsotopeAndChargeCorrFilter(_run, PrecursorIonTolerance, MinPrecursorIonCharge, MaxPrecursorIonCharge, 400, 5000, corrThreshold, 0, 0); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec); sw.Reset(); Console.Write(@"Deconvoluting MS2 spectra..."); sw.Start(); _ms2ScorerFactory = new ProductScorerBasedOnDeconvolutedSpectra( _run, MinProductIonCharge, MaxProductIonCharge, new Tolerance(10), 0 ); _ms2ScorerFactory.DeconvoluteAllProductSpectra(); sw.Stop(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec); // Target database var targetDb = new FastaDatabase(DatabaseFilePath); // string dirName = OutputDir ?? Path.GetDirectoryName(SpecFilePath); var baseName = Path.GetFileNameWithoutExtension(SpecFilePath); var targetOutputFilePath = Path.Combine(OutputDir, baseName + TargetFileExtension); var decoyOutputFilePath = Path.Combine(OutputDir, baseName + DecoyFileExtension); var tdaOutputFilePath = Path.Combine(OutputDir, baseName + TdaFileExtension); if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Target)) { sw.Reset(); Console.Write(@"Reading the target database..."); sw.Start(); targetDb.Read(); sw.Stop(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec); sw.Reset(); Console.WriteLine(@"Searching the target database"); sw.Start(); var targetMatches = RunSearch(GetAnnotationsAndOffsets(targetDb), ms1Filter, false); sw.Stop(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Target database search elapsed time: {0:f4} sec", sec); sw.Reset(); Console.Write(@"Rescoring and writing target results..."); sw.Start(); WriteResultsToFile(targetMatches, targetOutputFilePath, targetDb); sw.Stop(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed time: {0:f4} sec", sec); } if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Decoy)) { // Decoy database sw.Reset(); Console.Write(@"Reading the decoy database..."); sw.Start(); var decoyDb = targetDb.Decoy(Enzyme); decoyDb.Read(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec); sw.Reset(); Console.WriteLine(@"Searching the decoy database"); sw.Start(); var decoyMatches = RunSearch(GetAnnotationsAndOffsets(decoyDb), ms1Filter, true); sw.Stop(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Decoy database search elapsed Time: {0:f4} sec", sec); sw.Reset(); Console.Write(@"Rescoring and writing decoy results..."); sw.Start(); WriteResultsToFile(decoyMatches, decoyOutputFilePath, decoyDb); sw.Stop(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed time: {0:f4} sec", sec); } if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Both)) { var fdrCalculator = new FdrCalculator(targetOutputFilePath, decoyOutputFilePath); if (fdrCalculator.HasError()) { ErrorMessage = fdrCalculator.ErrorMessage; Console.WriteLine(@"Error computing FDR: " + fdrCalculator.ErrorMessage); return(false); } fdrCalculator.WriteTo(tdaOutputFilePath); } Console.WriteLine(@"Done"); return(true); }