コード例 #1
0
        public void MergeTargetDecoyFiles()
        {
            const string dir          = @"C:\cygwin\home\kims336\Data\TopDown\raw\Cache";
            var          rawFileNames = new HashSet <string>();

            foreach (var f in Directory.GetFiles(dir, "*.icresult"))
            {
                rawFileNames.Add(f.Substring(0, f.IndexOf('.')));
            }

            foreach (var rawFileName in rawFileNames)
            {
                var targetResultFilePath = rawFileName + ".icresult";
                var decoyResultFilePath  = rawFileName + ".decoy.icresult";
                var mergedResultFilePath = rawFileName + ".tsv";

                Console.Write(@"Creating {0}...", mergedResultFilePath);
                var fdrCalculator = new FdrCalculator(targetResultFilePath, decoyResultFilePath);
                if (fdrCalculator.HasError())
                {
                    throw new Exception(@"Error computing FDR: " + fdrCalculator.ErrorMessage);
                }

                fdrCalculator.WriteTo(mergedResultFilePath);
                Console.WriteLine(@"Done");
            }
        }
コード例 #2
0
        public void TestIcTopDown()
        {
            //const string targetResultPath = @"H:\Research\Charles\TopDown\Mod_M1\SBEP_STM_001_02272012_Aragon_IcTarget.tsv";
            const string targetResultPath = @"D:\MassSpecFiles\training\test\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTarget.tsv";
            const string decoyResultPath  = @"D:\MassSpecFiles\training\test\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcDecoy.tsv";
            const string tdaResultPath    = @"D:\MassSpecFiles\training\test\QC_Shew_Intact_26Sep14_Bane_C2Column3_test.tsv";

            //const string decoyResultPath = @"H:\Research\Charles\TopDown\Mod_M1\SBEP_STM_001_02272012_Aragon_IcDecoy.tsv";
            //const string tdaResultPath = @"H:\Research\Charles\TopDown\Mod_M1\SBEP_STM_001_02272012_Aragon_IcTda.tsv";
            //const string targetResultPath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon.icresult";
            //const string decoyResultPath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon.decoy.icresult";
            var fdrCalculator = new FdrCalculator(targetResultPath, decoyResultPath);

            if (fdrCalculator.HasError())
            {
                throw new Exception(@"Error computing FDR: " + fdrCalculator.ErrorMessage);
            }

            fdrCalculator.WriteTo(tdaResultPath);
            Console.WriteLine(@"Done");
        }
コード例 #3
0
        public void RecomputeFdr()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string targetResultPath = @"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTarget_Rescored.tsv";
            const string decoyResultPath  = @"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcDecoy_Rescored.tsv";
            const string tdaResultPath    = @"D:\MassSpecFiles\training\Rescoring\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTda_Rescored.tsv";

            //const string targetResultPath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon.icresult";
            //const string decoyResultPath = @"C:\cygwin\home\kims336\Data\TopDown\raw\SBEP_STM_001_02272012_Aragon.decoy.icresult";
            var fdrCalculator = new FdrCalculator(targetResultPath, decoyResultPath);

            if (fdrCalculator.HasError())
            {
                throw new Exception(@"Error computing FDR: " + fdrCalculator.ErrorMessage);
            }

            fdrCalculator.WriteTo(tdaResultPath);
            Console.WriteLine("Done");
        }
コード例 #4
0
        public bool RunSearch(double corrThreshold)
        {
            var sw = new Stopwatch();

            ErrorMessage = string.Empty;

            Console.Write(@"Reading raw file...");
            sw.Start();
            _run            = InMemoryLcMsRun.GetLcMsRun(SpecFilePath, 1.4826, 1.4826);
            _bottomUpScorer = new InformedBottomUpScorer(_run, AminoAcidSet, MinProductIonCharge, MaxProductIonCharge, ProductIonTolerance);
            sw.Stop();
            var sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;

            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec);

            sw.Reset();
            Console.Write(@"Determining precursor masses...");
            sw.Start();
            var ms1Filter = new Ms1IsotopeAndChargeCorrFilter(_run, PrecursorIonTolerance, MinPrecursorIonCharge, MaxPrecursorIonCharge,
                                                              400, 5000, corrThreshold, 0, 0);

            sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec);

            sw.Reset();
            Console.Write(@"Deconvoluting MS2 spectra...");
            sw.Start();
            _ms2ScorerFactory = new ProductScorerBasedOnDeconvolutedSpectra(
                _run,
                MinProductIonCharge, MaxProductIonCharge,
                new Tolerance(10),
                0
                );
            _ms2ScorerFactory.DeconvoluteAllProductSpectra();
            sw.Stop();
            sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec);

            // Target database
            var targetDb = new FastaDatabase(DatabaseFilePath);

            //            string dirName = OutputDir ?? Path.GetDirectoryName(SpecFilePath);

            var baseName             = Path.GetFileNameWithoutExtension(SpecFilePath);
            var targetOutputFilePath = Path.Combine(OutputDir, baseName + TargetFileExtension);
            var decoyOutputFilePath  = Path.Combine(OutputDir, baseName + DecoyFileExtension);
            var tdaOutputFilePath    = Path.Combine(OutputDir, baseName + TdaFileExtension);

            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Target))
            {
                sw.Reset();
                Console.Write(@"Reading the target database...");
                sw.Start();
                targetDb.Read();
                sw.Stop();
                sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec);

                sw.Reset();
                Console.WriteLine(@"Searching the target database");
                sw.Start();
                var targetMatches = RunSearch(GetAnnotationsAndOffsets(targetDb), ms1Filter, false);
                sw.Stop();
                sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                Console.WriteLine(@"Target database search elapsed time: {0:f4} sec", sec);

                sw.Reset();
                Console.Write(@"Rescoring and writing target results...");
                sw.Start();
                WriteResultsToFile(targetMatches, targetOutputFilePath, targetDb);
                sw.Stop();
                sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                Console.WriteLine(@"Elapsed time: {0:f4} sec", sec);
            }

            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Decoy))
            {
                // Decoy database
                sw.Reset();
                Console.Write(@"Reading the decoy database...");
                sw.Start();
                var decoyDb = targetDb.Decoy(Enzyme);
                decoyDb.Read();
                sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec);

                sw.Reset();
                Console.WriteLine(@"Searching the decoy database");
                sw.Start();
                var decoyMatches = RunSearch(GetAnnotationsAndOffsets(decoyDb), ms1Filter, true);
                sw.Stop();
                sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                Console.WriteLine(@"Decoy database search elapsed Time: {0:f4} sec", sec);

                sw.Reset();
                Console.Write(@"Rescoring and writing decoy results...");
                sw.Start();
                WriteResultsToFile(decoyMatches, decoyOutputFilePath, decoyDb);
                sw.Stop();
                sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                Console.WriteLine(@"Elapsed time: {0:f4} sec", sec);
            }

            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Both))
            {
                var fdrCalculator = new FdrCalculator(targetOutputFilePath, decoyOutputFilePath);
                if (fdrCalculator.HasError())
                {
                    ErrorMessage = fdrCalculator.ErrorMessage;
                    Console.WriteLine(@"Error computing FDR: " + fdrCalculator.ErrorMessage);
                    return(false);
                }

                fdrCalculator.WriteTo(tdaOutputFilePath);
            }

            Console.WriteLine(@"Done");
            return(true);
        }
コード例 #5
0
        public void TestIcTopDownFdr()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            Utils.ShowStarting(methodName);

            var targetFile       = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"IdFiles\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcTarget.tsv");
            var targetResultPath = Utils.GetTestFile(methodName, targetFile);

            var decoyFile       = Path.Combine(Utils.DEFAULT_TEST_FILE_FOLDER, @"IdFiles\QC_Shew_Intact_26Sep14_Bane_C2Column3_IcDecoy.tsv");
            var decoyResultPath = Utils.GetTestFile(methodName, decoyFile);

            if (targetResultPath.DirectoryName == null)
            {
                Assert.Ignore("Cannot determine the parent directory of " + targetResultPath.FullName);
            }

            var tdaResultPath = Path.Combine(targetResultPath.DirectoryName, "QC_Shew_Intact_26Sep14_Bane_C2Column3_result.tsv");

            var fdrCalculator = new FdrCalculator(targetResultPath.FullName, decoyResultPath.FullName);

            if (fdrCalculator.HasError())
            {
                throw new Exception(@"Error computing FDR: " + fdrCalculator.ErrorMessage);
            }

            fdrCalculator.WriteTo(tdaResultPath);

            using (var reader = new StreamReader(new FileStream(tdaResultPath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)))
            {
                if (reader.EndOfStream)
                {
                    Assert.Fail("Result file is empty: " + tdaResultPath);
                }

                var headerLine = reader.ReadLine();

                if (string.IsNullOrWhiteSpace(headerLine))
                {
                    Assert.Fail("Header line is empty: " + tdaResultPath);
                }

                var headerColumns = headerLine.Split('\t');

                if (headerColumns.Length < 21)
                {
                    Assert.Fail("Header line col count is less than 21: " + tdaResultPath);
                }

                var headerMap = new Dictionary <string, int>(StringComparer.InvariantCultureIgnoreCase);

                for (var i = 0; i < headerColumns.Length; i++)
                {
                    headerMap.Add(headerColumns[i], i);
                }

                Console.WriteLine("Headers: " + string.Join("  ", headerColumns));

                if (!headerMap.TryGetValue("QValue", out var qvalueColIndex))
                {
                    Assert.Fail("QValue not found in header line: " + tdaResultPath);
                }

                if (reader.EndOfStream)
                {
                    Assert.Fail("Result file has a header line but no results: " + tdaResultPath);
                }

                var qValueByScan = new Dictionary <int, string>();

                while (!reader.EndOfStream)
                {
                    var result = reader.ReadLine();
                    if (string.IsNullOrWhiteSpace(result))
                    {
                        continue;
                    }

                    var dataColumns = result.Split('\t');

                    if (dataColumns.Length < headerMap.Count)
                    {
                        Assert.Fail("Incomplete result line: " + result);
                    }

                    if (!int.TryParse(dataColumns[0], out var scanNumber))
                    {
                        Assert.Fail("Scan number is non-numeric: " + dataColumns[0]);
                    }

                    qValueByScan.Add(scanNumber, dataColumns[qvalueColIndex]);
                }

                Console.WriteLine("Result count: " + 2655);

                Assert.AreEqual(2655, qValueByScan.Count, "Result count {0} does not match expected count", qValueByScan.Count);

                VerifyQValue(tdaResultPath, qValueByScan, 1808, 0);         //9.99e-308);
                VerifyQValue(tdaResultPath, qValueByScan, 2565, 0);         //1.323038e-38);
                VerifyQValue(tdaResultPath, qValueByScan, 1682, 0);         // 1.912647e-12);
                VerifyQValue(tdaResultPath, qValueByScan, 3045, 0.0116883); //0.010666);
                VerifyQValue(tdaResultPath, qValueByScan, 2668, 0.0227943); //0.113394);
            }
            Console.WriteLine(@"Done, see " + tdaResultPath);
        }
コード例 #6
0
        public bool RunSearch(double corrThreshold = 0.7, CancellationToken?cancellationToken = null, IProgress <ProgressData> progress = null)
        {
            // Get the Normalized spec file/folder path
            SpecFilePath = MassSpecDataReaderFactory.NormalizeDatasetPath(SpecFilePath);

            var prog     = new Progress <ProgressData>();
            var progData = new ProgressData(progress);

            if (progress != null)
            {
                prog = new Progress <ProgressData>(p =>
                {
                    progData.Status         = p.Status;
                    progData.StatusInternal = p.StatusInternal;
                    progData.Report(p.Percent);
                });
            }

            var sw    = new Stopwatch();
            var swAll = new Stopwatch();

            swAll.Start();
            ErrorMessage = string.Empty;

            Console.Write(@"Reading raw file...");
            progData.Status = "Reading spectra file";
            progData.StepRange(10.0);
            sw.Start();

            _run = PbfLcMsRun.GetLcMsRun(SpecFilePath, 0, 0, prog);

            _ms2ScanNums             = _run.GetScanNumbers(2).ToArray();
            _isolationWindowTargetMz = new double[_run.MaxLcScan + 1];
            foreach (var ms2Scan in _ms2ScanNums)
            {
                var ms2Spec = _run.GetSpectrum(ms2Scan) as ProductSpectrum;
                if (ms2Spec == null)
                {
                    continue;
                }
                _isolationWindowTargetMz[ms2Scan] = ms2Spec.IsolationWindow.IsolationWindowTargetMz;
            }


            sw.Stop();
            Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

            progData.StepRange(20.0);
            ISequenceFilter ms1Filter;

            if (this.ScanNumbers != null && this.ScanNumbers.Any())
            {
                ms1Filter = new SelectedMsMsFilter(this.ScanNumbers);
            }
            else if (string.IsNullOrWhiteSpace(FeatureFilePath))
            {
                // Checks whether SpecFileName.ms1ft exists
                var ms1FtFilePath = MassSpecDataReaderFactory.ChangeExtension(SpecFilePath, LcMsFeatureFinderLauncher.FileExtension);
                if (!File.Exists(ms1FtFilePath))
                {
                    Console.WriteLine(@"Running ProMex...");
                    sw.Start();
                    var param = new LcMsFeatureFinderInputParameter
                    {
                        InputPath                = SpecFilePath,
                        MinSearchMass            = MinSequenceMass,
                        MaxSearchMass            = MaxSequenceMass,
                        MinSearchCharge          = MinPrecursorIonCharge,
                        MaxSearchCharge          = MaxPrecursorIonCharge,
                        CsvOutput                = false,
                        ScoreReport              = false,
                        LikelihoodScoreThreshold = -10
                    };
                    var featureFinder = new LcMsFeatureFinderLauncher(param);
                    featureFinder.Run();
                }
                sw.Reset();
                sw.Start();
                Console.Write(@"Reading ProMex results...");
                ms1Filter = new Ms1FtFilter(_run, PrecursorIonTolerance, ms1FtFilePath, -10);
            }
            else
            {
                sw.Reset();
                sw.Start();
                var extension = Path.GetExtension(FeatureFilePath);
                if (extension.ToLower().Equals(".csv"))
                {
                    Console.Write(@"Reading ICR2LS/Decon2LS results...");
                    ms1Filter = new IsosFilter(_run, PrecursorIonTolerance, FeatureFilePath);
                }
                else if (extension.ToLower().Equals(".ms1ft"))
                {
                    Console.Write(@"Reading ProMex results...");
                    ms1Filter = new Ms1FtFilter(_run, PrecursorIonTolerance, FeatureFilePath, -10);
                }
                else if (extension.ToLower().Equals(".msalign"))
                {
                    Console.Write(@"Reading MS-Align+ results...");
                    ms1Filter = new MsDeconvFilter(_run, PrecursorIonTolerance, FeatureFilePath);
                }
                else
                {
                    ms1Filter = null;  //new Ms1FeatureMatrix(_run);
                }
            }

            sw.Stop();
            Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);


            // pre-generate deconvoluted spectra for scoring
            _massBinComparer = new FilteredProteinMassBinning(AminoAcidSet, MaxSequenceMass + 1000);

            _ms2ScorerFactory2 = new CompositeScorerFactory(_run, _massBinComparer, AminoAcidSet,
                                                            MinProductIonCharge, MaxProductIonCharge, ProductIonTolerance);
            sw.Reset();
            Console.WriteLine(@"Generating deconvoluted spectra for MS/MS spectra...");
            sw.Start();
            var pfeOptions = new ParallelOptions
            {
                MaxDegreeOfParallelism = MaxNumThreads,
                CancellationToken      = cancellationToken ?? CancellationToken.None
            };

            Parallel.ForEach(_ms2ScanNums, pfeOptions, ms2ScanNum =>
            {
                _ms2ScorerFactory2.DeconvonluteProductSpectrum(ms2ScanNum);
            });
            sw.Stop();
            Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

            progData.StepRange(10.0);
            progData.Status = "Reading Fasta File";

            // Target database
            var targetDb = new FastaDatabase(DatabaseFilePath);

            targetDb.Read();

            // Generate sequence tags for all MS/MS spectra
            if (TagBasedSearch)
            {
                progData.StepRange(25.0);
                progData.Status = "Generating Sequence Tags";
                sw.Reset();
                Console.WriteLine(@"Generating sequence tags for MS/MS spectra...");
                sw.Start();
                var seqTagGen = GetSequenceTagGenerator();
                _tagMs2ScanNum = seqTagGen.GetMs2ScanNumsContainingTags().ToArray();
                sw.Stop();
                Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
                _tagSearchEngine = new ScanBasedTagSearchEngine(_run, seqTagGen, new LcMsPeakMatrix(_run, ms1Filter), targetDb, ProductIonTolerance, AminoAcidSet,
                                                                _ms2ScorerFactory2,
                                                                ScanBasedTagSearchEngine.DefaultMinMatchedTagLength,
                                                                MaxSequenceMass, MinProductIonCharge, MaxProductIonCharge);
            }

            var specFileName         = MassSpecDataReaderFactory.RemoveExtension(Path.GetFileName(SpecFilePath));
            var targetOutputFilePath = Path.Combine(OutputDir, specFileName + TargetFileNameEnding);
            var decoyOutputFilePath  = Path.Combine(OutputDir, specFileName + DecoyFileNameEnding);
            var tdaOutputFilePath    = Path.Combine(OutputDir, specFileName + TdaFileNameEnding);

            progData.StepRange(60.0);
            progData.Status = "Running Target search";

            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Target))
            {
                sw.Reset();
                Console.Write(@"Reading the target database...");
                sw.Start();
                targetDb.Read();
                sw.Stop();
                Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                var targetMatches = new SortedSet <DatabaseSequenceSpectrumMatch> [_run.MaxLcScan + 1];

                progData.MaxPercentage = 42.5;
                if (TagBasedSearch)
                {
                    sw.Reset();
                    Console.WriteLine(@"Tag-based searching the target database");
                    sw.Start();
                    RunTagBasedSearch(targetMatches, targetDb, null, prog);
                    Console.WriteLine(@"Target database tag-based search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
                }
                progData.MaxPercentage = 60.0;

                sw.Reset();
                Console.WriteLine(@"Searching the target database");
                sw.Start();
                RunSearch(targetMatches, targetDb, ms1Filter, null, prog);
                Console.WriteLine(@"Target database search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                // calculate spectral e-value usign generating function
                sw.Reset();
                Console.WriteLine(@"Calculating spectral E-values for target-spectrum matches");
                sw.Start();
                var bestTargetMatches = RunGeneratingFunction(targetMatches);
                WriteResultsToFile(bestTargetMatches, targetOutputFilePath, targetDb);
                sw.Stop();
                Console.WriteLine(@"Target-spectrum match E-value calculation elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
            }

            progData.StepRange(95.0); // total to 95%
            progData.Status = "Running Decoy search";

            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Decoy))
            {
                // Decoy database
                sw.Reset();
                sw.Start();
                var decoyDb = targetDb.Decoy(null, true);

                Console.Write(@"Reading the decoy database...");
                decoyDb.Read();
                Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                progData.MaxPercentage = 77.5;
                var decoyMatches = new SortedSet <DatabaseSequenceSpectrumMatch> [_run.MaxLcScan + 1];
                if (TagBasedSearch)
                {
                    sw.Reset();
                    Console.WriteLine(@"Tag-based searching the decoy database");
                    sw.Start();
                    RunTagBasedSearch(decoyMatches, decoyDb, null, prog);
                    Console.WriteLine(@"Decoy database tag-based search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
                }
                progData.MaxPercentage = 95.0;

                sw.Reset();
                Console.WriteLine(@"Searching the decoy database");
                sw.Start();
                RunSearch(decoyMatches, decoyDb, ms1Filter, null, prog);
                Console.WriteLine(@"Decoy database search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                // calculate spectral e-value usign generating function
                sw.Reset();
                Console.WriteLine(@"Calculating spectral E-values for decoy-spectrum matches");
                sw.Start();
                var bestDecoyMatches = RunGeneratingFunction(decoyMatches);
                WriteResultsToFile(bestDecoyMatches, decoyOutputFilePath, decoyDb);
                sw.Stop();
                Console.WriteLine(@"Decoy-spectrum match E-value calculation elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
            }

            progData.StepRange(100.0);
            progData.Status = "Writing combined results file";
            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Both))
            {
                // Add "Qvalue" and "PepQValue"
                var fdrCalculator = new FdrCalculator(targetOutputFilePath, decoyOutputFilePath);
                if (fdrCalculator.HasError())
                {
                    ErrorMessage = fdrCalculator.ErrorMessage;
                    Console.WriteLine(@"Error computing FDR: " + fdrCalculator.ErrorMessage);
                    return(false);
                }

                fdrCalculator.WriteTo(tdaOutputFilePath);
            }
            progData.Report(100.0);

            Console.WriteLine(@"Done.");
            swAll.Stop();
            Console.WriteLine(@"Total elapsed time for search: {0:f1} sec ({1:f2} min)", swAll.Elapsed.TotalSeconds, swAll.Elapsed.TotalMinutes);

            return(true);
        }