public void MergeTargetDecoyFiles() { const string dir = @"C:\cygwin\home\kims336\Data\TopDown\raw\Cache"; var rawFileNames = new HashSet <string>(); foreach (var f in Directory.GetFiles(dir, "*.icresult")) { rawFileNames.Add(f.Substring(0, f.IndexOf('.'))); } foreach (var rawFileName in rawFileNames) { var targetResultFilePath = rawFileName + ".icresult"; var decoyResultFilePath = rawFileName + ".decoy.icresult"; var mergedResultFilePath = rawFileName + ".tsv"; Console.Write(@"Creating {0}...", mergedResultFilePath); var fdrCalculator = new FdrCalculator(targetResultFilePath, decoyResultFilePath); if (fdrCalculator.HasError()) { throw new Exception(@"Error computing FDR: " + fdrCalculator.ErrorMessage); } fdrCalculator.WriteTo(mergedResultFilePath); Console.WriteLine(@"Done"); } }
public void TestIcTopDown() { //const string targetResultPath = @"H:\Research\Charles\TopDown\Mod_M1\SBEP_STM_001_02272012_Aragon_IcTarget.tsv"; const string targetResultPath = @"D:\MassSpecFiles\training\test\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTarget.tsv"; const string decoyResultPath = @"D:\MassSpecFiles\training\test\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcDecoy.tsv"; const string tdaResultPath = @"D:\MassSpecFiles\training\test\QC_Shew_Intact_26Sep14_Bane_C2Column3_test.tsv"; //const string decoyResultPath = @"H:\Research\Charles\TopDown\Mod_M1\SBEP_STM_001_02272012_Aragon_IcDecoy.tsv"; //const string tdaResultPath = @"H:\Research\Charles\TopDown\Mod_M1\SBEP_STM_001_02272012_Aragon_IcTda.tsv"; //const string targetResultPath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon.icresult"; //const string decoyResultPath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon.decoy.icresult"; var fdrCalculator = new FdrCalculator(targetResultPath, decoyResultPath); if (fdrCalculator.HasError()) { throw new Exception(@"Error computing FDR: " + fdrCalculator.ErrorMessage); } fdrCalculator.WriteTo(tdaResultPath); Console.WriteLine(@"Done"); }
public void RecomputeFdr() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string targetResultPath = @"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTarget_Rescored.tsv"; const string decoyResultPath = @"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcDecoy_Rescored.tsv"; const string tdaResultPath = @"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda_Rescored.tsv"; //const string targetResultPath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon.icresult"; //const string decoyResultPath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon.decoy.icresult"; var fdrCalculator = new FdrCalculator(targetResultPath, decoyResultPath); if (fdrCalculator.HasError()) { throw new Exception(@"Error computing FDR: " + fdrCalculator.ErrorMessage); } fdrCalculator.WriteTo(tdaResultPath); Console.WriteLine("Done"); }
public bool RunSearch(double corrThreshold) { var sw = new Stopwatch(); ErrorMessage = string.Empty; Console.Write(@"Reading raw file..."); sw.Start(); _run = InMemoryLcMsRun.GetLcMsRun(SpecFilePath, 1.4826, 1.4826); _bottomUpScorer = new InformedBottomUpScorer(_run, AminoAcidSet, MinProductIonCharge, MaxProductIonCharge, ProductIonTolerance); sw.Stop(); var sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec); sw.Reset(); Console.Write(@"Determining precursor masses..."); sw.Start(); var ms1Filter = new Ms1IsotopeAndChargeCorrFilter(_run, PrecursorIonTolerance, MinPrecursorIonCharge, MaxPrecursorIonCharge, 400, 5000, corrThreshold, 0, 0); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec); sw.Reset(); Console.Write(@"Deconvoluting MS2 spectra..."); sw.Start(); _ms2ScorerFactory = new ProductScorerBasedOnDeconvolutedSpectra( _run, MinProductIonCharge, MaxProductIonCharge, new Tolerance(10), 0 ); _ms2ScorerFactory.DeconvoluteAllProductSpectra(); sw.Stop(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec); // Target database var targetDb = new FastaDatabase(DatabaseFilePath); // string dirName = OutputDir ?? Path.GetDirectoryName(SpecFilePath); var baseName = Path.GetFileNameWithoutExtension(SpecFilePath); var targetOutputFilePath = Path.Combine(OutputDir, baseName + TargetFileExtension); var decoyOutputFilePath = Path.Combine(OutputDir, baseName + DecoyFileExtension); var tdaOutputFilePath = Path.Combine(OutputDir, baseName + TdaFileExtension); if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Target)) { sw.Reset(); Console.Write(@"Reading the target database..."); sw.Start(); targetDb.Read(); sw.Stop(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec); sw.Reset(); Console.WriteLine(@"Searching the target database"); sw.Start(); var targetMatches = RunSearch(GetAnnotationsAndOffsets(targetDb), ms1Filter, false); sw.Stop(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Target database search elapsed time: {0:f4} sec", sec); sw.Reset(); Console.Write(@"Rescoring and writing target results..."); sw.Start(); WriteResultsToFile(targetMatches, targetOutputFilePath, targetDb); sw.Stop(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed time: {0:f4} sec", sec); } if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Decoy)) { // Decoy database sw.Reset(); Console.Write(@"Reading the decoy database..."); sw.Start(); var decoyDb = targetDb.Decoy(Enzyme); decoyDb.Read(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec); sw.Reset(); Console.WriteLine(@"Searching the decoy database"); sw.Start(); var decoyMatches = RunSearch(GetAnnotationsAndOffsets(decoyDb), ms1Filter, true); sw.Stop(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Decoy database search elapsed Time: {0:f4} sec", sec); sw.Reset(); Console.Write(@"Rescoring and writing decoy results..."); sw.Start(); WriteResultsToFile(decoyMatches, decoyOutputFilePath, decoyDb); sw.Stop(); sec = sw.ElapsedTicks / (double)Stopwatch.Frequency; Console.WriteLine(@"Elapsed time: {0:f4} sec", sec); } if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Both)) { var fdrCalculator = new FdrCalculator(targetOutputFilePath, decoyOutputFilePath); if (fdrCalculator.HasError()) { ErrorMessage = fdrCalculator.ErrorMessage; Console.WriteLine(@"Error computing FDR: " + fdrCalculator.ErrorMessage); return(false); } fdrCalculator.WriteTo(tdaOutputFilePath); } Console.WriteLine(@"Done"); return(true); }
public void TestIcTopDownFdr() { var methodName = MethodBase.GetCurrentMethod().Name; Utils.ShowStarting(methodName); var targetFile = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"IdFiles\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTarget.tsv"); var targetResultPath = Utils.GetTestFile(methodName, targetFile); var decoyFile = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"IdFiles\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcDecoy.tsv"); var decoyResultPath = Utils.GetTestFile(methodName, decoyFile); if (targetResultPath.DirectoryName == null) { Assert.Ignore("Cannot determine the parent directory of " + targetResultPath.FullName); } var tdaResultPath = Path.Combine(targetResultPath.DirectoryName, "QC_Shew_Intact_26Sep14_Bane_C2Column3_result.tsv"); var fdrCalculator = new FdrCalculator(targetResultPath.FullName, decoyResultPath.FullName); if (fdrCalculator.HasError()) { throw new Exception(@"Error computing FDR: " + fdrCalculator.ErrorMessage); } fdrCalculator.WriteTo(tdaResultPath); using (var reader = new StreamReader(new FileStream(tdaResultPath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))) { if (reader.EndOfStream) { Assert.Fail("Result file is empty: " + tdaResultPath); } var headerLine = reader.ReadLine(); if (string.IsNullOrWhiteSpace(headerLine)) { Assert.Fail("Header line is empty: " + tdaResultPath); } var headerColumns = headerLine.Split('\t'); if (headerColumns.Length < 21) { Assert.Fail("Header line col count is less than 21: " + tdaResultPath); } var headerMap = new Dictionary <string, int>(StringComparer.InvariantCultureIgnoreCase); for (var i = 0; i < headerColumns.Length; i++) { headerMap.Add(headerColumns[i], i); } Console.WriteLine("Headers: " + string.Join(" ", headerColumns)); if (!headerMap.TryGetValue("QValue", out var qvalueColIndex)) { Assert.Fail("QValue not found in header line: " + tdaResultPath); } if (reader.EndOfStream) { Assert.Fail("Result file has a header line but no results: " + tdaResultPath); } var qValueByScan = new Dictionary <int, string>(); while (!reader.EndOfStream) { var result = reader.ReadLine(); if (string.IsNullOrWhiteSpace(result)) { continue; } var dataColumns = result.Split('\t'); if (dataColumns.Length < headerMap.Count) { Assert.Fail("Incomplete result line: " + result); } if (!int.TryParse(dataColumns[0], out var scanNumber)) { Assert.Fail("Scan number is non-numeric: " + dataColumns[0]); } qValueByScan.Add(scanNumber, dataColumns[qvalueColIndex]); } Console.WriteLine("Result count: " + 2655); Assert.AreEqual(2655, qValueByScan.Count, "Result count {0} does not match expected count", qValueByScan.Count); VerifyQValue(tdaResultPath, qValueByScan, 1808, 0); //9.99e-308); VerifyQValue(tdaResultPath, qValueByScan, 2565, 0); //1.323038e-38); VerifyQValue(tdaResultPath, qValueByScan, 1682, 0); // 1.912647e-12); VerifyQValue(tdaResultPath, qValueByScan, 3045, 0.0116883); //0.010666); VerifyQValue(tdaResultPath, qValueByScan, 2668, 0.0227943); //0.113394); } Console.WriteLine(@"Done, see " + tdaResultPath); }
public bool RunSearch(double corrThreshold = 0.7, CancellationToken?cancellationToken = null, IProgress <ProgressData> progress = null) { // Get the Normalized spec file/folder path SpecFilePath = MassSpecDataReaderFactory.NormalizeDatasetPath(SpecFilePath); var prog = new Progress <ProgressData>(); var progData = new ProgressData(progress); if (progress != null) { prog = new Progress <ProgressData>(p => { progData.Status = p.Status; progData.StatusInternal = p.StatusInternal; progData.Report(p.Percent); }); } var sw = new Stopwatch(); var swAll = new Stopwatch(); swAll.Start(); ErrorMessage = string.Empty; Console.Write(@"Reading raw file..."); progData.Status = "Reading spectra file"; progData.StepRange(10.0); sw.Start(); _run = PbfLcMsRun.GetLcMsRun(SpecFilePath, 0, 0, prog); _ms2ScanNums = _run.GetScanNumbers(2).ToArray(); _isolationWindowTargetMz = new double[_run.MaxLcScan + 1]; foreach (var ms2Scan in _ms2ScanNums) { var ms2Spec = _run.GetSpectrum(ms2Scan) as ProductSpectrum; if (ms2Spec == null) { continue; } _isolationWindowTargetMz[ms2Scan] = ms2Spec.IsolationWindow.IsolationWindowTargetMz; } sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); progData.StepRange(20.0); ISequenceFilter ms1Filter; if (this.ScanNumbers != null && this.ScanNumbers.Any()) { ms1Filter = new SelectedMsMsFilter(this.ScanNumbers); } else if (string.IsNullOrWhiteSpace(FeatureFilePath)) { // Checks whether SpecFileName.ms1ft exists var ms1FtFilePath = MassSpecDataReaderFactory.ChangeExtension(SpecFilePath, LcMsFeatureFinderLauncher.FileExtension); if (!File.Exists(ms1FtFilePath)) { Console.WriteLine(@"Running ProMex..."); sw.Start(); var param = new LcMsFeatureFinderInputParameter { InputPath = SpecFilePath, MinSearchMass = MinSequenceMass, MaxSearchMass = MaxSequenceMass, MinSearchCharge = MinPrecursorIonCharge, MaxSearchCharge = MaxPrecursorIonCharge, CsvOutput = false, ScoreReport = false, LikelihoodScoreThreshold = -10 }; var featureFinder = new LcMsFeatureFinderLauncher(param); featureFinder.Run(); } sw.Reset(); sw.Start(); Console.Write(@"Reading ProMex results..."); ms1Filter = new Ms1FtFilter(_run, PrecursorIonTolerance, ms1FtFilePath, -10); } else { sw.Reset(); sw.Start(); var extension = Path.GetExtension(FeatureFilePath); if (extension.ToLower().Equals(".csv")) { Console.Write(@"Reading ICR2LS/Decon2LS results..."); ms1Filter = new IsosFilter(_run, PrecursorIonTolerance, FeatureFilePath); } else if (extension.ToLower().Equals(".ms1ft")) { Console.Write(@"Reading ProMex results..."); ms1Filter = new Ms1FtFilter(_run, PrecursorIonTolerance, FeatureFilePath, -10); } else if (extension.ToLower().Equals(".msalign")) { Console.Write(@"Reading MS-Align+ results..."); ms1Filter = new MsDeconvFilter(_run, PrecursorIonTolerance, FeatureFilePath); } else { ms1Filter = null; //new Ms1FeatureMatrix(_run); } } sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); // pre-generate deconvoluted spectra for scoring _massBinComparer = new FilteredProteinMassBinning(AminoAcidSet, MaxSequenceMass + 1000); _ms2ScorerFactory2 = new CompositeScorerFactory(_run, _massBinComparer, AminoAcidSet, MinProductIonCharge, MaxProductIonCharge, ProductIonTolerance); sw.Reset(); Console.WriteLine(@"Generating deconvoluted spectra for MS/MS spectra..."); sw.Start(); var pfeOptions = new ParallelOptions { MaxDegreeOfParallelism = MaxNumThreads, CancellationToken = cancellationToken ?? CancellationToken.None }; Parallel.ForEach(_ms2ScanNums, pfeOptions, ms2ScanNum => { _ms2ScorerFactory2.DeconvonluteProductSpectrum(ms2ScanNum); }); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); progData.StepRange(10.0); progData.Status = "Reading Fasta File"; // Target database var targetDb = new FastaDatabase(DatabaseFilePath); targetDb.Read(); // Generate sequence tags for all MS/MS spectra if (TagBasedSearch) { progData.StepRange(25.0); progData.Status = "Generating Sequence Tags"; sw.Reset(); Console.WriteLine(@"Generating sequence tags for MS/MS spectra..."); sw.Start(); var seqTagGen = GetSequenceTagGenerator(); _tagMs2ScanNum = seqTagGen.GetMs2ScanNumsContainingTags().ToArray(); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); _tagSearchEngine = new ScanBasedTagSearchEngine(_run, seqTagGen, new LcMsPeakMatrix(_run, ms1Filter), targetDb, ProductIonTolerance, AminoAcidSet, _ms2ScorerFactory2, ScanBasedTagSearchEngine.DefaultMinMatchedTagLength, MaxSequenceMass, MinProductIonCharge, MaxProductIonCharge); } var specFileName = MassSpecDataReaderFactory.RemoveExtension(Path.GetFileName(SpecFilePath)); var targetOutputFilePath = Path.Combine(OutputDir, specFileName + TargetFileNameEnding); var decoyOutputFilePath = Path.Combine(OutputDir, specFileName + DecoyFileNameEnding); var tdaOutputFilePath = Path.Combine(OutputDir, specFileName + TdaFileNameEnding); progData.StepRange(60.0); progData.Status = "Running Target search"; if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Target)) { sw.Reset(); Console.Write(@"Reading the target database..."); sw.Start(); targetDb.Read(); sw.Stop(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); var targetMatches = new SortedSet <DatabaseSequenceSpectrumMatch> [_run.MaxLcScan + 1]; progData.MaxPercentage = 42.5; if (TagBasedSearch) { sw.Reset(); Console.WriteLine(@"Tag-based searching the target database"); sw.Start(); RunTagBasedSearch(targetMatches, targetDb, null, prog); Console.WriteLine(@"Target database tag-based search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); } progData.MaxPercentage = 60.0; sw.Reset(); Console.WriteLine(@"Searching the target database"); sw.Start(); RunSearch(targetMatches, targetDb, ms1Filter, null, prog); Console.WriteLine(@"Target database search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); // calculate spectral e-value usign generating function sw.Reset(); Console.WriteLine(@"Calculating spectral E-values for target-spectrum matches"); sw.Start(); var bestTargetMatches = RunGeneratingFunction(targetMatches); WriteResultsToFile(bestTargetMatches, targetOutputFilePath, targetDb); sw.Stop(); Console.WriteLine(@"Target-spectrum match E-value calculation elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); } progData.StepRange(95.0); // total to 95% progData.Status = "Running Decoy search"; if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Decoy)) { // Decoy database sw.Reset(); sw.Start(); var decoyDb = targetDb.Decoy(null, true); Console.Write(@"Reading the decoy database..."); decoyDb.Read(); Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); progData.MaxPercentage = 77.5; var decoyMatches = new SortedSet <DatabaseSequenceSpectrumMatch> [_run.MaxLcScan + 1]; if (TagBasedSearch) { sw.Reset(); Console.WriteLine(@"Tag-based searching the decoy database"); sw.Start(); RunTagBasedSearch(decoyMatches, decoyDb, null, prog); Console.WriteLine(@"Decoy database tag-based search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); } progData.MaxPercentage = 95.0; sw.Reset(); Console.WriteLine(@"Searching the decoy database"); sw.Start(); RunSearch(decoyMatches, decoyDb, ms1Filter, null, prog); Console.WriteLine(@"Decoy database search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); // calculate spectral e-value usign generating function sw.Reset(); Console.WriteLine(@"Calculating spectral E-values for decoy-spectrum matches"); sw.Start(); var bestDecoyMatches = RunGeneratingFunction(decoyMatches); WriteResultsToFile(bestDecoyMatches, decoyOutputFilePath, decoyDb); sw.Stop(); Console.WriteLine(@"Decoy-spectrum match E-value calculation elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds); } progData.StepRange(100.0); progData.Status = "Writing combined results file"; if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Both)) { // Add "Qvalue" and "PepQValue" var fdrCalculator = new FdrCalculator(targetOutputFilePath, decoyOutputFilePath); if (fdrCalculator.HasError()) { ErrorMessage = fdrCalculator.ErrorMessage; Console.WriteLine(@"Error computing FDR: " + fdrCalculator.ErrorMessage); return(false); } fdrCalculator.WriteTo(tdaOutputFilePath); } progData.Report(100.0); Console.WriteLine(@"Done."); swAll.Stop(); Console.WriteLine(@"Total elapsed time for search: {0:f1} sec ({1:f2} min)", swAll.Elapsed.TotalSeconds, swAll.Elapsed.TotalMinutes); return(true); }