Beispiel #1
0
 public double GetScore(Sequence sequence, int charge, int scan, LcMsRun lcmsRun)
 {
     var mass = sequence.Composition.Mass + Composition.H2O.Mass;
     var spectrum = lcmsRun.GetSpectrum(scan);
     var ionTypes = _rankScore.GetIonTypes(charge, mass);
     var filteredSpectrum = SpectrumFilter.FilterIonPeaks(sequence, spectrum, ionTypes, _tolerance);
     var match = new SpectrumMatch(sequence, filteredSpectrum, charge);
     var score = 0.0;
     var rankedPeaks = new RankedPeaks(filteredSpectrum);
     foreach (var ionType in ionTypes)
     {
         var ions = match.GetCleavageIons(ionType);
         foreach (var ion in ions)
         {
             var rank = rankedPeaks.RankIon(ion, _tolerance);
             score += _rankScore.GetScore(ionType, rank, charge, mass);
         }
     }
     return score;
 }
Beispiel #2
0
        public void CreateSequenceMassToMs2ScansMap(LcMsRun run, Tolerance tolerance, double minMass, double maxMass)
        {
            // Make a bin to scan numbers map without considering tolerance
            var massBinToScanNumsMapNoTolerance = new Dictionary<int, List<int>>();
            var minBinNum = GetBinNumber(minMass);
            var maxBinNum = GetBinNumber(maxMass);
            for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
            {
                IList<IntRange> scanRanges;
                if (!_map.TryGetValue(binNum, out scanRanges)) continue;
                var sequenceMass = GetMass(binNum);
                var ms2ScanNums = new List<int>();

                foreach (var scanRange in scanRanges)
                {
                    for (var scanNum = scanRange.Min; scanNum <= scanRange.Max; scanNum++)
                    {
                        
                        if (scanNum < run.MinLcScan || scanNum > run.MaxLcScan) continue;
                        if (run.GetMsLevel(scanNum) == 2)
                        {
                            var productSpec = run.GetSpectrum(scanNum) as ProductSpectrum;
                            if (productSpec == null) continue;
                            var isolationWindow = productSpec.IsolationWindow;
                            var isolationWindowTargetMz = isolationWindow.IsolationWindowTargetMz;
                            var charge = (int)Math.Round(sequenceMass / isolationWindowTargetMz);
                            var mz = Ion.GetIsotopeMz(sequenceMass, charge,
                                Averagine.GetIsotopomerEnvelope(sequenceMass).MostAbundantIsotopeIndex);
                            if (productSpec.IsolationWindow.Contains(mz)) ms2ScanNums.Add(scanNum);
                        }
                    }
                }

                ms2ScanNums.Sort();
                massBinToScanNumsMapNoTolerance.Add(binNum, ms2ScanNums);
            }

            // Account for mass tolerance
            _sequenceMassBinToScanNumsMap = new Dictionary<int, IEnumerable<int>>();
            var sumScanNums = 0L;
            for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
            {
                var sequenceMass = GetMass(binNum);
                var deltaMass = tolerance.GetToleranceAsDa(sequenceMass, 1);

                var curMinBinNum = GetBinNumber(sequenceMass - deltaMass);
                var curMaxBinNum = GetBinNumber(sequenceMass + deltaMass);

                var ms2ScanNums = new HashSet<int>();
                for (var curBinNum = curMinBinNum; curBinNum <= curMaxBinNum; curBinNum++)
                {
                    if (curBinNum < minBinNum || curBinNum > maxBinNum) continue;
                    List<int> existingMs2ScanNums;
                    if (!massBinToScanNumsMapNoTolerance.TryGetValue(curBinNum, out existingMs2ScanNums)) continue;
                    foreach (var ms2ScanNum in existingMs2ScanNums)
                    {
                        ms2ScanNums.Add(ms2ScanNum);
                    }
                }
                _sequenceMassBinToScanNumsMap[binNum] = ms2ScanNums.ToArray();
                sumScanNums += ms2ScanNums.Count;
            }
            Console.WriteLine("#MS/MS matches per sequence: {0}", sumScanNums / (float)(maxBinNum-minBinNum+1));
            _map = null;
        }
Beispiel #3
0
        public bool RunSearch(double corrThreshold = 0.7, CancellationToken? cancellationToken = null, IProgress<ProgressData> progress = null)
        {
            // Get the Normalized spec file/folder path
            SpecFilePath = MassSpecDataReaderFactory.NormalizeDatasetPath(SpecFilePath);

            var prog = new Progress<ProgressData>();
            var progData = new ProgressData(progress);
            if (progress != null)
            {
                prog = new Progress<ProgressData>(p =>
                {
                    progData.Status = p.Status;
                    progData.StatusInternal = p.StatusInternal;
                    progData.Report(p.Percent);
                });
            }

            var sw = new Stopwatch();
            var swAll = new Stopwatch();
            swAll.Start();
            ErrorMessage = string.Empty;

            Console.Write(@"Reading raw file...");
            progData.Status = "Reading spectra file";
            progData.StepRange(10.0);
            sw.Start();

            _run = PbfLcMsRun.GetLcMsRun(SpecFilePath, 0, 0, prog);

            _ms2ScanNums = _run.GetScanNumbers(2).ToArray();
            _isolationWindowTargetMz = new double[_run.MaxLcScan + 1];
            foreach (var ms2Scan in _ms2ScanNums)
            {
                var ms2Spec = _run.GetSpectrum(ms2Scan) as ProductSpectrum;
                if (ms2Spec == null) continue;
                _isolationWindowTargetMz[ms2Scan] = ms2Spec.IsolationWindow.IsolationWindowTargetMz;
            }

            
            sw.Stop();
            Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

            progData.StepRange(20.0);
            ISequenceFilter ms1Filter;
            if (this.ScanNumbers != null && this.ScanNumbers.Any())
            {
                ms1Filter = new SelectedMsMsFilter(this.ScanNumbers);
            }
            else if (string.IsNullOrWhiteSpace(FeatureFilePath))
            {
                // Checks whether SpecFileName.ms1ft exists
                var ms1FtFilePath = MassSpecDataReaderFactory.ChangeExtension(SpecFilePath, LcMsFeatureFinderLauncher.FileExtension);
                if (!File.Exists(ms1FtFilePath))
                {
                    Console.WriteLine(@"Running ProMex...");
                    sw.Start();
                    var param = new LcMsFeatureFinderInputParameter
                    {
                        InputPath = SpecFilePath,
                        MinSearchMass = MinSequenceMass,
                        MaxSearchMass = MaxSequenceMass,
                        MinSearchCharge = MinPrecursorIonCharge,
                        MaxSearchCharge = MaxPrecursorIonCharge,
                        CsvOutput = false,
                        ScoreReport = false,
                        LikelihoodScoreThreshold = -10
                    };
                    var featureFinder = new LcMsFeatureFinderLauncher(param);
                    featureFinder.Run();
                }
                sw.Reset();
                sw.Start();
                Console.Write(@"Reading ProMex results...");
                ms1Filter = new Ms1FtFilter(_run, PrecursorIonTolerance, ms1FtFilePath, -10);
            }
            else
            {
                sw.Reset();
                sw.Start();
                var extension = Path.GetExtension(FeatureFilePath);
                if (extension.ToLower().Equals(".csv"))
                {
                    Console.Write(@"Reading ICR2LS/Decon2LS results...");
                    ms1Filter = new IsosFilter(_run, PrecursorIonTolerance, FeatureFilePath);
                }
                else if (extension.ToLower().Equals(".ms1ft"))
                {
                    Console.Write(@"Reading ProMex results...");
                    ms1Filter = new Ms1FtFilter(_run, PrecursorIonTolerance, FeatureFilePath, -10);
                }
                else if (extension.ToLower().Equals(".msalign"))
                {
                    Console.Write(@"Reading MS-Align+ results...");
                    ms1Filter = new MsDeconvFilter(_run, PrecursorIonTolerance, FeatureFilePath);
                }
                else ms1Filter = null; //new Ms1FeatureMatrix(_run);
            }

            sw.Stop();
            Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

            
            // pre-generate deconvoluted spectra for scoring
            _massBinComparer = new FilteredProteinMassBinning(AminoAcidSet, MaxSequenceMass+1000);

            _ms2ScorerFactory2 = new CompositeScorerFactory(_run, _massBinComparer, AminoAcidSet,
                                                               MinProductIonCharge, MaxProductIonCharge, ProductIonTolerance);
            sw.Reset();
            Console.WriteLine(@"Generating deconvoluted spectra for MS/MS spectra...");
            sw.Start();
            var pfeOptions = new ParallelOptions
            {
                MaxDegreeOfParallelism = MaxNumThreads,
                CancellationToken = cancellationToken ?? CancellationToken.None
            };
            Parallel.ForEach(_ms2ScanNums, pfeOptions, ms2ScanNum =>
            {
                _ms2ScorerFactory2.DeconvonluteProductSpectrum(ms2ScanNum);
            });
            sw.Stop();
            Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

            progData.StepRange(10.0);
            progData.Status = "Reading Fasta File";

            // Target database
            var targetDb = new FastaDatabase(DatabaseFilePath);
            targetDb.Read();
            
            // Generate sequence tags for all MS/MS spectra
            if (TagBasedSearch)
            {
                progData.StepRange(25.0);
                progData.Status = "Generating Sequence Tags";
                sw.Reset();
                Console.WriteLine(@"Generating sequence tags for MS/MS spectra...");
                sw.Start();
                var seqTagGen = GetSequenceTagGenerator();
                _tagMs2ScanNum = seqTagGen.GetMs2ScanNumsContainingTags().ToArray();
                sw.Stop();
                Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
                _tagSearchEngine = new ScanBasedTagSearchEngine(_run, seqTagGen, new LcMsPeakMatrix(_run, ms1Filter), targetDb, ProductIonTolerance, AminoAcidSet,
                                _ms2ScorerFactory2,
                                ScanBasedTagSearchEngine.DefaultMinMatchedTagLength,
                                MaxSequenceMass, MinProductIonCharge, MaxProductIonCharge);                
            }
            
            var specFileName = MassSpecDataReaderFactory.RemoveExtension(Path.GetFileName(SpecFilePath));
            var targetOutputFilePath = Path.Combine(OutputDir, specFileName + TargetFileNameEnding);
            var decoyOutputFilePath = Path.Combine(OutputDir, specFileName + DecoyFileNameEnding);
            var tdaOutputFilePath = Path.Combine(OutputDir, specFileName + TdaFileNameEnding);

            progData.StepRange(60.0);
            progData.Status = "Running Target search";

            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Target))
            {
                sw.Reset();
                Console.Write(@"Reading the target database...");
                sw.Start();
                targetDb.Read();
                sw.Stop();
                Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                var targetMatches = new SortedSet<DatabaseSequenceSpectrumMatch>[_run.MaxLcScan + 1];
                
                progData.MaxPercentage = 42.5;
                if (TagBasedSearch)
                {
                    sw.Reset();
                    Console.WriteLine(@"Tag-based searching the target database");
                    sw.Start();
                    RunTagBasedSearch(targetMatches, targetDb, null, prog);
                    Console.WriteLine(@"Target database tag-based search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
                }
                progData.MaxPercentage = 60.0;

                sw.Reset();
                Console.WriteLine(@"Searching the target database");
                sw.Start();
                RunSearch(targetMatches, targetDb, ms1Filter, null, prog);
                Console.WriteLine(@"Target database search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                // calculate spectral e-value usign generating function
                sw.Reset();
                Console.WriteLine(@"Calculating spectral E-values for target-spectrum matches");
                sw.Start();
                var bestTargetMatches = RunGeneratingFunction(targetMatches);
                WriteResultsToFile(bestTargetMatches, targetOutputFilePath, targetDb);
                sw.Stop();
                Console.WriteLine(@"Target-spectrum match E-value calculation elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
            }

            progData.StepRange(95.0); // total to 95%
            progData.Status = "Running Decoy search";

            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Decoy))
            {
                // Decoy database
                sw.Reset();
                sw.Start();
                var decoyDb = targetDb.Decoy(null, true);

                Console.Write(@"Reading the decoy database...");
                decoyDb.Read();
                Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                progData.MaxPercentage = 77.5;
                var decoyMatches = new SortedSet<DatabaseSequenceSpectrumMatch>[_run.MaxLcScan + 1];
                if (TagBasedSearch)
                {
                    sw.Reset();
                    Console.WriteLine(@"Tag-based searching the decoy database");
                    sw.Start();
                    RunTagBasedSearch(decoyMatches, decoyDb, null, prog);
                    Console.WriteLine(@"Decoy database tag-based search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);                    
                }
                progData.MaxPercentage = 95.0;

                sw.Reset();
                Console.WriteLine(@"Searching the decoy database");
                sw.Start();
                RunSearch(decoyMatches, decoyDb, ms1Filter, null, prog);
                Console.WriteLine(@"Decoy database search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                // calculate spectral e-value usign generating function
                sw.Reset();
                Console.WriteLine(@"Calculating spectral E-values for decoy-spectrum matches");
                sw.Start();
                var bestDecoyMatches = RunGeneratingFunction(decoyMatches);
                WriteResultsToFile(bestDecoyMatches, decoyOutputFilePath, decoyDb);
                sw.Stop();
                Console.WriteLine(@"Decoy-spectrum match E-value calculation elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
            }

            progData.StepRange(100.0);
            progData.Status = "Writing combined results file";
            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Both))
            {
                // Add "Qvalue" and "PepQValue"
                var fdrCalculator = new FdrCalculator(targetOutputFilePath, decoyOutputFilePath);
                if (fdrCalculator.HasError())
                {
                    ErrorMessage = fdrCalculator.ErrorMessage;
                    Console.WriteLine(@"Error computing FDR: " + fdrCalculator.ErrorMessage);
                    return false;
                }

                fdrCalculator.WriteTo(tdaOutputFilePath);
            }
            progData.Report(100.0);

            Console.WriteLine(@"Done.");
            swAll.Stop();
            Console.WriteLine(@"Total elapsed time for search: {0:f1} sec ({1:f2} min)", swAll.Elapsed.TotalSeconds, swAll.Elapsed.TotalMinutes);

            return true;
        }
        private List<ProductSpectrum> GetMatchedSpectrums(LcMsRun run, IList<int> ms2List ,Tuple<int,double, double, double, double,double> feature, int fileIndex)
        {
            var spectrumList = new List<ProductSpectrum>();
            var mass = feature.Item2;
            var minElution = feature.Item3;
            var maxElution = feature.Item4;
            var featureId = feature.Item1 - 1;
            var det = new Tuple<int, double, int>(-1, -1.0, -1);
            for (var i = 0; i < ms2List.Count; i++)
            {
                var scanElutionTime = run.GetElutionTime(ms2List[i]);
                if (scanElutionTime < minElution || scanElutionTime > maxElution) continue;
                
                var spectrum = run.GetSpectrum(ms2List[i]) as ProductSpectrum;
                var window = spectrum.IsolationWindow;
                var minMz = window.MinMz - .5;
                var maxMz = window.MaxMz + .5;
                var mzTable = GetFeatureMassTable(mass);
                 
                for (var j = 0; j < mzTable.Length; j++)
                {
                    var mz = mzTable[j];
                    if (mz < minMz || mz > maxMz) continue;
                    spectrumList.Add(spectrum);
                    if(!(det.Item1 > -1)) det = new Tuple<int, double, int>(ms2List[i],mz,j+2);
                    break;
                }
                _identifiedFeatures[featureId][fileIndex] = det;

            }
            return spectrumList;
        }
        private List<XYData> LoadSpectra(LcMsRun ipbReader, int scan)
        {
            var spec = ipbReader.GetSpectrum(scan);

            if (spec.Peaks == null)
                return new List<XYData>();

            //var data = new List<XYData>(spec.Peaks.Length);
            //for (var i = 0; i < spec.Peaks.Length; i++)
            //{
            //    data.Add(new XYData(spec.Peaks[i].Mz, spec.Peaks[i].Intensity));
            //}
            //return data;
            return spec.Peaks.Select(peak => new XYData(peak.Mz, peak.Intensity)).ToList();
        }
        private ScanSummary GetScanSummary(int scan, LcMsRun ipbReader)
        {
            var spec = ipbReader.GetSpectrum(scan, true);

            var summary = new ScanSummary
            {
                MsLevel = spec.MsLevel,
                Time = spec.ElutionTime,
                Scan = spec.ScanNum,
                //TotalIonCurrent = Convert.ToInt64(header.TotalIonCurrent), // Only used in PNNLOmics.Algorithms.Chromatograms.XicCreator.CreateXic(...)
                TotalIonCurrent = spec.Peaks.Select(peak => (long)peak.Intensity).Sum(), // Only used in PNNLOmics.Algorithms.Chromatograms.XicCreator.CreateXic(...)
                PrecursorMz = 0,
                CollisionType = CollisionType.Other,
            };

            if (spec is ProductSpectrum)
            {
                var pspec = spec as ProductSpectrum;
                if (pspec.IsolationWindow.MonoisotopicMass != null)
                {
                    summary.PrecursorMz = pspec.IsolationWindow.MonoisotopicMass.Value;
                }

                switch (pspec.ActivationMethod)
                {
                    case ActivationMethod.CID:
                        summary.CollisionType = CollisionType.Cid;
                        break;
                    case ActivationMethod.HCD:
                        summary.CollisionType = CollisionType.Hcd;
                        break;
                    case ActivationMethod.ETD:
                        summary.CollisionType = CollisionType.Etd;
                        break;
                    case ActivationMethod.ECD:
                        summary.CollisionType = CollisionType.Ecd;
                        break;
                    case ActivationMethod.PQD:
                        //summary.CollisionType = CollisionType.Hid; // HID? what is HID?
                        break;
                }
            }
            return summary;
        }