Пример #1
0
 /// <summary>
 /// Constructor
 /// </summary>
 /// <param name="run">LcMsRun (for determining minimum and maximum elution time</param>
 /// <param name="features">List of features</param>
 /// <param name="title">Plot title</param>
 /// <param name="minMass">Minimum mass</param>
 /// <param name="maxMass">Maximum mass</param>
 public LcMsFeatureMap(LcMsRun run, IEnumerable<LcMsFeature> features, string title, double minMass, double maxMass) :
     this(features, title, 
     minMass, maxMass, 
     Math.Max(run.GetElutionTime(run.MinLcScan) - 5, 0), 
     run.GetElutionTime(run.MaxLcScan) + 5)
 {
 }
Пример #2
0
 public IEnumerable<int> GetMatchingMs2ScanNums(double sequenceMass, Tolerance tolerance, LcMsRun run)
 {
     var massBinNum = GetBinNumber(sequenceMass);
     IEnumerable<int> ms2ScanNums;
     if (_sequenceMassBinToScanNumsMap.TryGetValue(massBinNum, out ms2ScanNums)) return ms2ScanNums;
     return new int[0];
 }
Пример #3
0
        public ScanBasedTagSearchEngine(
            LcMsRun run,
            ISequenceTagFinder seqTagFinder,
            LcMsPeakMatrix featureFinder,
            FastaDatabase fastaDb,
            Tolerance tolerance,
            AminoAcidSet aaSet,
            CompositeScorerFactory ms2ScorerFactory = null,
            int minMatchedTagLength = DefaultMinMatchedTagLength,
            double maxSequenceMass = 50000.0,
            int minProductIonCharge = 1,
            int maxProductIonCharge = 20)
        {
            _run = run;
            _featureFinder = featureFinder;
            
            _searchableDb = new SearchableDatabase(fastaDb);

            _tolerance = tolerance;
            _aaSet = aaSet;
            _minMatchedTagLength = minMatchedTagLength;
            _maxSequenceMass = maxSequenceMass;
            _minProductIonCharge = minProductIonCharge;
            _maxProductIonCharge = maxProductIonCharge;
            MinScan = int.MinValue;
            MaxScan = int.MaxValue;
            _ms2ScorerFactory = ms2ScorerFactory;
            _seqTagFinder = seqTagFinder;
        }
Пример #4
0
        public Ms1FtFilter(LcMsRun run, Tolerance massTolerance, string ms1FtFileName, double minLikelihoodRatio = 0)
        {
            _lcMsChargeMap = new LcMsChargeMap(run, massTolerance);
            _minLikelihoodRatio = minLikelihoodRatio;

            Read(ms1FtFileName);
        }
Пример #5
0
 public FeatureBasedTagSearchEngine(
     LcMsRun run,
     Ms1FtParser featureParser,
     ProductScorerBasedOnDeconvolutedSpectra ms2Scorer,
     SequenceTagParser tagParser,
     FastaDatabase fastaDb,
     Tolerance tolerance,
     AminoAcidSet aaSet,
     double maxSequenceMass = 50000.0,
     int minProductIonCharge = 1,
     int maxProductIonCharge = 20)
 {
     _run = run;
     _ms2Scorer = ms2Scorer;
     _featureParser = featureParser;
     _ms1FtFilter = new Ms1FtFilter(run, tolerance, featureParser.Ms1FtFileName);
     _tagParser = tagParser;
     _fastaDb = fastaDb;
     _searchableDb = new SearchableDatabase(fastaDb);
     _tolerance = tolerance;
     _aaSet = aaSet;
     _maxSequenceMass = maxSequenceMass;
     _minProductIonCharge = minProductIonCharge;
     _maxProductIonCharge = maxProductIonCharge;
 }
Пример #6
0
        public LcMsPeakMatrix(LcMsRun run, LcMsFeatureLikelihood scorer = null, int minScanCharge = 1, int maxScanCharge = 60, int maxThreadCount = 0)
        {
            Run = run;
            _maxThreadCount = maxThreadCount;

            _ms1PeakList = new List<Ms1Peak>();
            Ms1Spectra = new List<Ms1Spectrum>();
            var ms1ScanNums = run.GetMs1ScanVector();

            NColumns = ms1ScanNums.Length;
            MinSearchCharge = minScanCharge;
            MaxSearchCharge = maxScanCharge;
            NRows = Math.Min(MaxSearchCharge - MinSearchCharge + 1, 35);

            for (var i = 0; i < Math.Min(ms1ScanNums.Length, ushort.MaxValue); i++)
            {
                var ms1Spec = run.GetMs1Spectrum(ms1ScanNums[i]);
                Ms1Spectra.Add(ms1Spec);
                foreach (var peak in ms1Spec.Peaks) _ms1PeakList.Add(peak as Ms1Peak);
            }
            _ms1PeakList.Sort();

            _distProfileAcrossCharge = new double[NRows];
            _corrProfileAcrossCharge = new double[NRows];
            _intensityAcrossCharge = new double[NRows];
            _summedEnvelopeColRange = new int[NRows, 2];
            
            _featureMatrix = null;
            Comparer = new MzComparerWithBinning(27); // 16ppm
            _scorer = scorer;
            _seedEnvelopes = new List<KeyValuePair<double, ObservedIsotopeEnvelope>>();
            _ms1Filter = null;
        }
Пример #7
0
        public MsDeconvFilter(LcMsRun run, Tolerance massTolerance, string msDeconvFileName)
        {
            _run = run;
            _massTolerance = massTolerance;
            _lcMsMatchMap = new LcMsMatchMap();

            Read(msDeconvFileName);
        }
Пример #8
0
        public IsosFilter(LcMsRun run, Tolerance massTolerance, string isosFileName, double fitScoreThreshold = 1.0)
        {
            _run = run;
            _massTolerance = massTolerance;
            _fitScoreThreshold = fitScoreThreshold;
            _lcMsMatchMap = new LcMsMatchMap();

            Read(isosFileName);
        }
Пример #9
0
 public InformedTopDownScorer(LcMsRun run, AminoAcidSet aaSet, int minProductCharge, int maxProductCharge, Tolerance tolerance, double ms2CorrThreshold = 0.7)
 {
     Run = run;
     AminoAcidSet = aaSet;
     MinProductCharge = minProductCharge;
     MaxProductCharge = maxProductCharge;
     Tolerance = tolerance;
     Ms2CorrThreshold = ms2CorrThreshold;
 }
Пример #10
0
 public InformedBottomUpScorer(LcMsRun run, AminoAcidSet aaSet, int minProductCharge, int maxProductCharge, Tolerance tolerance)
 {
     Run = run;
     AminoAcidSet = aaSet;
     MinProductCharge = minProductCharge;
     MaxProductCharge = maxProductCharge;
     Tolerance = tolerance;
     _rankScorer = new RankScore(ActivationMethod.HCD, Ms2DetectorType.Orbitrap, Enzyme.Trypsin, Protocol.Standard);
     _scoredSpectra = new Dictionary<int, ScoredSpectrum>();
 }
Пример #11
0
        public SequenceTagGenerator(LcMsRun run, Tolerance tolerance, int minTagLength = 5, int maxTagLength = 8,
            AminoAcid[] aminoAcidsArray = null)
        {
            _run = run;
            _tolerance = tolerance;

            _minTagLen = minTagLength;
            _maxTagLen = maxTagLength;
            _aminoAcids = aminoAcidsArray ?? AminoAcid.StandardAminoAcidArr;
            _ms2ScanToTagMap = new Dictionary<int, IList<SequenceTag>>();
        }
Пример #12
0
 public Ms1BasedFilter(
     LcMsRun run,
     int minCharge = 3, int maxCharge = 30,
     double ppmTolerance = 15)
 {
     _run = run;
     _minCharge = minCharge;
     _maxCharge = maxCharge;
     _ppmTolerance = ppmTolerance;
     _tolerance = new Tolerance(_ppmTolerance);
     _sequenceMassBinToScanNumsMap = new Dictionary<int, IList<int>>();
     PrecomputeMostAbundantMzToMatches();
 }
Пример #13
0
        public LcMsChargeMap(LcMsRun run, Tolerance tolerance, int maxNumMs2ScansPerMass = MaxNumMs2ScansPerFeature)
        {
            _run = run;
            _scanToIsolationWindow = new Dictionary<int, IsolationWindow>();
            _maxNumMs2ScansPerMass = maxNumMs2ScansPerMass;

            foreach (var ms2ScanNum in _run.GetScanNumbers(2))
            {
                var isoWindow = _run.GetIsolationWindow(ms2ScanNum);
                if (isoWindow != null) _scanToIsolationWindow.Add(ms2ScanNum, isoWindow);
            }

            _tolerance = tolerance;
            _map = new Dictionary<int, BitArray>();
            _comparer = new MzComparerWithBinning(30);  // 2 ppm binning

            _sequenceMassBinToScanNumsMap = new Dictionary<int, IEnumerable<int>>();
            _scanNumToMassBin = new Dictionary<int, List<int>>();
        }
Пример #14
0
        public Ms1IsotopeCorrFilter(
            LcMsRun run, 
            int minCharge = 3, int maxCharge = 30,
            double ppmTolerance = 15,
            double corrThreshold = 0.5,
            int maxNumDeisotopedPeaksPerIsolationWindow = 40)
        {
            _run = run;
            _minCharge = minCharge;
            _maxCharge = maxCharge;
            _corrThreshold = corrThreshold;
            MaxNumDeisotopedPeaksPerIsolationWindow = maxNumDeisotopedPeaksPerIsolationWindow;
            _ppmTolerance = ppmTolerance;
            _tolerance = new Tolerance(_ppmTolerance);
            _comparer = new MzComparerWithPpmTolerance(ppmTolerance);
            _sequenceMassBinToScanNumsMap = new Dictionary<int, IList<int>>();
//            _topKFilter = new Ms1IsotopeTopKFilter(run, minCharge, maxCharge, ppmTolerance);
            PrecomputePossibleSequenceMasses();
        }
Пример #15
0
 public double GetScore(Sequence sequence, int charge, int scan, LcMsRun lcmsRun)
 {
     var mass = sequence.Composition.Mass + Composition.H2O.Mass;
     var spectrum = lcmsRun.GetSpectrum(scan);
     var ionTypes = _rankScore.GetIonTypes(charge, mass);
     var filteredSpectrum = SpectrumFilter.FilterIonPeaks(sequence, spectrum, ionTypes, _tolerance);
     var match = new SpectrumMatch(sequence, filteredSpectrum, charge);
     var score = 0.0;
     var rankedPeaks = new RankedPeaks(filteredSpectrum);
     foreach (var ionType in ionTypes)
     {
         var ions = match.GetCleavageIons(ionType);
         foreach (var ion in ions)
         {
             var rank = rankedPeaks.RankIon(ion, _tolerance);
             score += _rankScore.GetScore(ionType, rank, charge, mass);
         }
     }
     return score;
 }
Пример #16
0
        public TopDownScorer(Composition proteinComposition, LcMsRun run, Tolerance tolernace, SubScoreFactory factory)
        {
            _run = run;
            _proteinCompositionPlusWater = proteinComposition + Composition.H2O;
            _tolerance = tolernace;
            _factory = factory;

            _maxIntensityIsotopeIndex = _proteinCompositionPlusWater.GetMostAbundantIsotopeZeroBasedIndex();
            var thorethicalIsotopeEnvelope = _proteinCompositionPlusWater.GetIsotopomerEnvelopeRelativeIntensities();

            

            _minIsotopeIndex = 0;
            for (var i = 0; i < thorethicalIsotopeEnvelope.Length; i++)
            {
                if (!(thorethicalIsotopeEnvelope[i] > MinIsotopeIntensity)) continue;
                _minIsotopeIndex = i;
                break;
            }

            _isotopeEnvelope = new double[Math.Min(_maxIntensityIsotopeIndex + NumberAfterMaxIsotopeIndex, thorethicalIsotopeEnvelope.Length) - _minIsotopeIndex];
            for (var k = 0; k < _isotopeEnvelope.Length; k++)
            {
                _isotopeEnvelope[k] = thorethicalIsotopeEnvelope[k + _minIsotopeIndex];
            }

            /*foreach (var iso in thorethicalIsotopeEnvelope)
            {
                Console.WriteLine(iso);
            }
            Console.WriteLine();
            foreach (var iso in _isotopeEnvelope)
            {
                Console.WriteLine(iso);
            }
            
            System.Environment.Exit(1);
            */
            _xicArray = GetXicArray();
            _smoothedXicArray = GetSmoothedXicArray();
        }
Пример #17
0
        public LcMsPeakCluster(LcMsRun run, TheoreticalIsotopeEnvelope theoreticalIsotopeEnvelope, double mass, int charge, double repMz, int repScanNum, double abundance)
            : base(mass, charge, repMz, repScanNum, abundance)
        {
            _run = run;
            TheoreticalEnvelope = theoreticalIsotopeEnvelope;
            Flag = 0;
            RepresentativeSummedEnvelop = new double[TheoreticalEnvelope.Size];

            AbundanceDistributionAcrossCharge = new double[2];
            BestCorrelationScoreAcrossCharge = new double[2];
            BestDistanceScoreAcrossCharge = new double[2];
            BestIntensityScoreAcrossCharge = new double[2];

            EnvelopeDistanceScoreAcrossCharge = new double[2];
            EnvelopeCorrelationScoreAcrossCharge = new double[2];
            EnvelopeIntensityScoreAcrossCharge = new double[2];
            BestCharge = new int[2];

            XicCorrelationBetweenBestCharges = new double[2];
            _initScore = false;
        }
Пример #18
0
 public FeatureBasedTagSearchEngine(
     LcMsRun run,
     Ms1FtParser featureParser,
     SequenceTagParser tagParser,
     FastaDatabase fastaDb,
     Tolerance tolerance,
     AminoAcidSet aaSet,
     double maxSequenceMass = 50000.0,
     int minProductIonCharge = 1,
     int maxProductIonCharge = 20)
     : this(
         run,
         featureParser,
         null,
         tagParser,
         fastaDb,
         tolerance,
         aaSet,
         maxSequenceMass,
         minProductIonCharge,
         maxProductIonCharge)
 {
 }
 public Ms1IsotopeAndChargeCorrFilter(
     LcMsRun run,
     Tolerance tolerance,
     int minCharge = 3, int maxCharge = 30,
     double minMass = 3000.0,
     double maxMass = 50000.0,
     double isotopeCorrThreshold = 0.7,
     double chargeCorrThreshold = 0.7,
     double mostAbundantPlusOneIsotopeCorrThreshold = 0.7,
     int maxNumPeaksToConsider = 40)
 {
     _run = run;
     _minCharge = minCharge;
     _maxCharge = maxCharge;
     _isotopeCorrThresholdThreshold = isotopeCorrThreshold;
     _chargeCorrThresholdThreshold = chargeCorrThreshold;
     _mostAbundantPlusOneIsotopeCorrThreshold = mostAbundantPlusOneIsotopeCorrThreshold;
     MaxNumPeaksToConsider = maxNumPeaksToConsider;
     _tolerance = tolerance;
     _comparer = new MzComparerWithTolerance(tolerance);
     _lcMsMatchMap = new LcMsMatchMap();
     PrecomputePossibleSequenceMasses();
     _lcMsMatchMap.CreateSequenceMassToMs2ScansMap(_run, _tolerance, minMass, maxMass);
 }
Пример #20
0
        public ProteinSpectrumMathMap(LcMsRun run, int dataid, List<ProteinSpectrumMatch> prsmList, string dataDesc = "")
        {
            DataId = dataid;
            DataDesc = dataDesc;
            Run = run;
            _scanNumToMatchMap = new Dictionary<int, ProteinSpectrumMatch>();

            if (prsmList == null) return;

            ProteinSpectrumMatches = prsmList;
            _scanNumToMatchMap.Clear();

            foreach (var prsm in prsmList)
            {
                if (_scanNumToMatchMap.ContainsKey(prsm.ScanNum))
                {
                    if (_scanNumToMatchMap[prsm.ScanNum].Score < prsm.Score) _scanNumToMatchMap[prsm.ScanNum] = prsm;
                }
                else
                {
                    _scanNumToMatchMap.Add(prsm.ScanNum, prsm);
                }
            }
        }
Пример #21
0
 public LcMsPeakCluster(LcMsRun run, ObservedIsotopeEnvelope observedEnvelope)
     : this(run, observedEnvelope.TheoreticalEnvelope, observedEnvelope.MonoMass, observedEnvelope.Charge, 
     observedEnvelope.RepresentativePeak.Mz, observedEnvelope.ScanNum, observedEnvelope.Abundance)
 {
 }
Пример #22
0
        public bool RunSearch(double corrThreshold)
        {
            var sw = new Stopwatch();
            ErrorMessage = string.Empty;

            Console.Write(@"Reading raw file...");
            sw.Start();
            _run = InMemoryLcMsRun.GetLcMsRun(SpecFilePath, 1.4826, 1.4826);
            _bottomUpScorer = new InformedBottomUpScorer(_run, AminoAcidSet, MinProductIonCharge, MaxProductIonCharge, ProductIonTolerance);
            sw.Stop();
            var sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec);

            sw.Reset();
            Console.Write(@"Determining precursor masses...");
            sw.Start();
            var ms1Filter = new Ms1IsotopeAndChargeCorrFilter(_run, PrecursorIonTolerance, MinPrecursorIonCharge, MaxPrecursorIonCharge,
                400, 5000, corrThreshold, 0, 0);
            sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec);

            sw.Reset();
            Console.Write(@"Deconvoluting MS2 spectra...");
            sw.Start();
            _ms2ScorerFactory = new ProductScorerBasedOnDeconvolutedSpectra(
                _run,
                MinProductIonCharge, MaxProductIonCharge,
                new Tolerance(10),
                0
                );
            _ms2ScorerFactory.DeconvoluteAllProductSpectra();
            sw.Stop();
            sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
            Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec);

            // Target database
            var targetDb = new FastaDatabase(DatabaseFilePath);

            //            string dirName = OutputDir ?? Path.GetDirectoryName(SpecFilePath);

            var baseName = Path.GetFileNameWithoutExtension(SpecFilePath);
            var targetOutputFilePath = Path.Combine(OutputDir, baseName + TargetFileExtension);
            var decoyOutputFilePath = Path.Combine(OutputDir, baseName + DecoyFileExtension);
            var tdaOutputFilePath = Path.Combine(OutputDir, baseName + TdaFileExtension);

            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Target))
            {
                sw.Reset();
                Console.Write(@"Reading the target database...");
                sw.Start();
                targetDb.Read();
                sw.Stop();
                sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec);

                sw.Reset();
                Console.WriteLine(@"Searching the target database");
                sw.Start();
                var targetMatches = RunSearch(GetAnnotationsAndOffsets(targetDb), ms1Filter, false);
                sw.Stop();
                sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                Console.WriteLine(@"Target database search elapsed time: {0:f4} sec", sec);

                sw.Reset();
                Console.Write(@"Rescoring and writing target results...");
                sw.Start();
                WriteResultsToFile(targetMatches, targetOutputFilePath, targetDb);
                sw.Stop();
                sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                Console.WriteLine(@"Elapsed time: {0:f4} sec", sec);
            }

            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Decoy))
            {
                // Decoy database
                sw.Reset();
                Console.Write(@"Reading the decoy database...");
                sw.Start();
                var decoyDb = targetDb.Decoy(Enzyme);
                decoyDb.Read();
                sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                Console.WriteLine(@"Elapsed Time: {0:f4} sec", sec);

                sw.Reset();
                Console.WriteLine(@"Searching the decoy database");
                sw.Start();
                var decoyMatches = RunSearch(GetAnnotationsAndOffsets(decoyDb), ms1Filter, true);
                sw.Stop();
                sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                Console.WriteLine(@"Decoy database search elapsed Time: {0:f4} sec", sec);

                sw.Reset();
                Console.Write(@"Rescoring and writing decoy results...");
                sw.Start();
                WriteResultsToFile(decoyMatches, decoyOutputFilePath, decoyDb);
                sw.Stop();
                sec = sw.ElapsedTicks / (double)Stopwatch.Frequency;
                Console.WriteLine(@"Elapsed time: {0:f4} sec", sec);
            }

            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Both))
            {
                var fdrCalculator = new FdrCalculator(targetOutputFilePath, decoyOutputFilePath);
                if (fdrCalculator.HasError())
                {
                    ErrorMessage = fdrCalculator.ErrorMessage;
                    Console.WriteLine(@"Error computing FDR: " + fdrCalculator.ErrorMessage);
                    return false;
                }

                fdrCalculator.WriteTo(tdaOutputFilePath);
            }

            Console.WriteLine(@"Done");
            return true;
        }
Пример #23
0
        private void TestTagBasedSearch(LcMsRun run, 
            FastaDatabase fastaDb, Tolerance tolerance, AminoAcidSet aaSet)
        {
            var engine = new ScanBasedTagSearchEngine(run, new SequenceTagGenerator(run, new Tolerance(8)), new LcMsPeakMatrix(run), fastaDb, tolerance, aaSet);
//            engine.MinScan = 3400;
//            engine.MaxScan = 3900;
            engine.RunSearch();
        }
Пример #24
0
 private void CreateFeatureMapImage(LcMsRun run, string featuresFilePath, string imgFilePath)
 {
     var map = new LcMsFeatureMap(run, featuresFilePath, Math.Max(0, Parameters.MinSearchMass - 500), Parameters.MaxSearchMass);
     map.SaveImage(imgFilePath);
     Console.WriteLine(@" - Feature map image output: {0}", imgFilePath);
 }
Пример #25
0
 public Ms1ContainsIonFilter(LcMsRun run, Tolerance mzTolerance)
 {
     Run = run;
     MzTolerance = mzTolerance;
 }
Пример #26
0
        public void CreateSequenceMassToMs2ScansMap(LcMsRun run, Tolerance tolerance, double minMass, double maxMass)
        {
            // Make a bin to scan numbers map without considering tolerance
            var massBinToScanNumsMapNoTolerance = new Dictionary<int, List<int>>();
            var minBinNum = GetBinNumber(minMass);
            var maxBinNum = GetBinNumber(maxMass);
            for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
            {
                IList<IntRange> scanRanges;
                if (!_map.TryGetValue(binNum, out scanRanges)) continue;
                var sequenceMass = GetMass(binNum);
                var ms2ScanNums = new List<int>();

                foreach (var scanRange in scanRanges)
                {
                    for (var scanNum = scanRange.Min; scanNum <= scanRange.Max; scanNum++)
                    {
                        
                        if (scanNum < run.MinLcScan || scanNum > run.MaxLcScan) continue;
                        if (run.GetMsLevel(scanNum) == 2)
                        {
                            var productSpec = run.GetSpectrum(scanNum) as ProductSpectrum;
                            if (productSpec == null) continue;
                            var isolationWindow = productSpec.IsolationWindow;
                            var isolationWindowTargetMz = isolationWindow.IsolationWindowTargetMz;
                            var charge = (int)Math.Round(sequenceMass / isolationWindowTargetMz);
                            var mz = Ion.GetIsotopeMz(sequenceMass, charge,
                                Averagine.GetIsotopomerEnvelope(sequenceMass).MostAbundantIsotopeIndex);
                            if (productSpec.IsolationWindow.Contains(mz)) ms2ScanNums.Add(scanNum);
                        }
                    }
                }

                ms2ScanNums.Sort();
                massBinToScanNumsMapNoTolerance.Add(binNum, ms2ScanNums);
            }

            // Account for mass tolerance
            _sequenceMassBinToScanNumsMap = new Dictionary<int, IEnumerable<int>>();
            var sumScanNums = 0L;
            for (var binNum = minBinNum; binNum <= maxBinNum; binNum++)
            {
                var sequenceMass = GetMass(binNum);
                var deltaMass = tolerance.GetToleranceAsDa(sequenceMass, 1);

                var curMinBinNum = GetBinNumber(sequenceMass - deltaMass);
                var curMaxBinNum = GetBinNumber(sequenceMass + deltaMass);

                var ms2ScanNums = new HashSet<int>();
                for (var curBinNum = curMinBinNum; curBinNum <= curMaxBinNum; curBinNum++)
                {
                    if (curBinNum < minBinNum || curBinNum > maxBinNum) continue;
                    List<int> existingMs2ScanNums;
                    if (!massBinToScanNumsMapNoTolerance.TryGetValue(curBinNum, out existingMs2ScanNums)) continue;
                    foreach (var ms2ScanNum in existingMs2ScanNums)
                    {
                        ms2ScanNums.Add(ms2ScanNum);
                    }
                }
                _sequenceMassBinToScanNumsMap[binNum] = ms2ScanNums.ToArray();
                sumScanNums += ms2ScanNums.Count;
            }
            Console.WriteLine("#MS/MS matches per sequence: {0}", sumScanNums / (float)(maxBinNum-minBinNum+1));
            _map = null;
        }
Пример #27
0
        private Tuple<int, int> GetMinMaxMs1ScanNum(LcMsRun run, double minTime, double maxTime)
        {
            var ms1ScanNums = run.GetMs1ScanVector();
            var minScanNum = -1;
            var maxScanNum = -1;

            for(var i = 1; i < ms1ScanNums.Length; i++)
            {
                var time = run.GetElutionTime(ms1ScanNums[i]);
                if (minScanNum < 0 && time > minTime)
                {
                    minScanNum = ms1ScanNums[i - 1];
                }
                if (maxScanNum < 0 && time > maxTime)
                {
                    maxScanNum = ms1ScanNums[i];
                    break;
                }
            }
            return new Tuple<int, int>(minScanNum, maxScanNum);
        }
Пример #28
0
        public bool RunSearch(double corrThreshold = 0.7, CancellationToken? cancellationToken = null, IProgress<ProgressData> progress = null)
        {
            // Get the Normalized spec file/folder path
            SpecFilePath = MassSpecDataReaderFactory.NormalizeDatasetPath(SpecFilePath);

            var prog = new Progress<ProgressData>();
            var progData = new ProgressData(progress);
            if (progress != null)
            {
                prog = new Progress<ProgressData>(p =>
                {
                    progData.Status = p.Status;
                    progData.StatusInternal = p.StatusInternal;
                    progData.Report(p.Percent);
                });
            }

            var sw = new Stopwatch();
            var swAll = new Stopwatch();
            swAll.Start();
            ErrorMessage = string.Empty;

            Console.Write(@"Reading raw file...");
            progData.Status = "Reading spectra file";
            progData.StepRange(10.0);
            sw.Start();

            _run = PbfLcMsRun.GetLcMsRun(SpecFilePath, 0, 0, prog);

            _ms2ScanNums = _run.GetScanNumbers(2).ToArray();
            _isolationWindowTargetMz = new double[_run.MaxLcScan + 1];
            foreach (var ms2Scan in _ms2ScanNums)
            {
                var ms2Spec = _run.GetSpectrum(ms2Scan) as ProductSpectrum;
                if (ms2Spec == null) continue;
                _isolationWindowTargetMz[ms2Scan] = ms2Spec.IsolationWindow.IsolationWindowTargetMz;
            }

            
            sw.Stop();
            Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

            progData.StepRange(20.0);
            ISequenceFilter ms1Filter;
            if (this.ScanNumbers != null && this.ScanNumbers.Any())
            {
                ms1Filter = new SelectedMsMsFilter(this.ScanNumbers);
            }
            else if (string.IsNullOrWhiteSpace(FeatureFilePath))
            {
                // Checks whether SpecFileName.ms1ft exists
                var ms1FtFilePath = MassSpecDataReaderFactory.ChangeExtension(SpecFilePath, LcMsFeatureFinderLauncher.FileExtension);
                if (!File.Exists(ms1FtFilePath))
                {
                    Console.WriteLine(@"Running ProMex...");
                    sw.Start();
                    var param = new LcMsFeatureFinderInputParameter
                    {
                        InputPath = SpecFilePath,
                        MinSearchMass = MinSequenceMass,
                        MaxSearchMass = MaxSequenceMass,
                        MinSearchCharge = MinPrecursorIonCharge,
                        MaxSearchCharge = MaxPrecursorIonCharge,
                        CsvOutput = false,
                        ScoreReport = false,
                        LikelihoodScoreThreshold = -10
                    };
                    var featureFinder = new LcMsFeatureFinderLauncher(param);
                    featureFinder.Run();
                }
                sw.Reset();
                sw.Start();
                Console.Write(@"Reading ProMex results...");
                ms1Filter = new Ms1FtFilter(_run, PrecursorIonTolerance, ms1FtFilePath, -10);
            }
            else
            {
                sw.Reset();
                sw.Start();
                var extension = Path.GetExtension(FeatureFilePath);
                if (extension.ToLower().Equals(".csv"))
                {
                    Console.Write(@"Reading ICR2LS/Decon2LS results...");
                    ms1Filter = new IsosFilter(_run, PrecursorIonTolerance, FeatureFilePath);
                }
                else if (extension.ToLower().Equals(".ms1ft"))
                {
                    Console.Write(@"Reading ProMex results...");
                    ms1Filter = new Ms1FtFilter(_run, PrecursorIonTolerance, FeatureFilePath, -10);
                }
                else if (extension.ToLower().Equals(".msalign"))
                {
                    Console.Write(@"Reading MS-Align+ results...");
                    ms1Filter = new MsDeconvFilter(_run, PrecursorIonTolerance, FeatureFilePath);
                }
                else ms1Filter = null; //new Ms1FeatureMatrix(_run);
            }

            sw.Stop();
            Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

            
            // pre-generate deconvoluted spectra for scoring
            _massBinComparer = new FilteredProteinMassBinning(AminoAcidSet, MaxSequenceMass+1000);

            _ms2ScorerFactory2 = new CompositeScorerFactory(_run, _massBinComparer, AminoAcidSet,
                                                               MinProductIonCharge, MaxProductIonCharge, ProductIonTolerance);
            sw.Reset();
            Console.WriteLine(@"Generating deconvoluted spectra for MS/MS spectra...");
            sw.Start();
            var pfeOptions = new ParallelOptions
            {
                MaxDegreeOfParallelism = MaxNumThreads,
                CancellationToken = cancellationToken ?? CancellationToken.None
            };
            Parallel.ForEach(_ms2ScanNums, pfeOptions, ms2ScanNum =>
            {
                _ms2ScorerFactory2.DeconvonluteProductSpectrum(ms2ScanNum);
            });
            sw.Stop();
            Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

            progData.StepRange(10.0);
            progData.Status = "Reading Fasta File";

            // Target database
            var targetDb = new FastaDatabase(DatabaseFilePath);
            targetDb.Read();
            
            // Generate sequence tags for all MS/MS spectra
            if (TagBasedSearch)
            {
                progData.StepRange(25.0);
                progData.Status = "Generating Sequence Tags";
                sw.Reset();
                Console.WriteLine(@"Generating sequence tags for MS/MS spectra...");
                sw.Start();
                var seqTagGen = GetSequenceTagGenerator();
                _tagMs2ScanNum = seqTagGen.GetMs2ScanNumsContainingTags().ToArray();
                sw.Stop();
                Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
                _tagSearchEngine = new ScanBasedTagSearchEngine(_run, seqTagGen, new LcMsPeakMatrix(_run, ms1Filter), targetDb, ProductIonTolerance, AminoAcidSet,
                                _ms2ScorerFactory2,
                                ScanBasedTagSearchEngine.DefaultMinMatchedTagLength,
                                MaxSequenceMass, MinProductIonCharge, MaxProductIonCharge);                
            }
            
            var specFileName = MassSpecDataReaderFactory.RemoveExtension(Path.GetFileName(SpecFilePath));
            var targetOutputFilePath = Path.Combine(OutputDir, specFileName + TargetFileNameEnding);
            var decoyOutputFilePath = Path.Combine(OutputDir, specFileName + DecoyFileNameEnding);
            var tdaOutputFilePath = Path.Combine(OutputDir, specFileName + TdaFileNameEnding);

            progData.StepRange(60.0);
            progData.Status = "Running Target search";

            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Target))
            {
                sw.Reset();
                Console.Write(@"Reading the target database...");
                sw.Start();
                targetDb.Read();
                sw.Stop();
                Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                var targetMatches = new SortedSet<DatabaseSequenceSpectrumMatch>[_run.MaxLcScan + 1];
                
                progData.MaxPercentage = 42.5;
                if (TagBasedSearch)
                {
                    sw.Reset();
                    Console.WriteLine(@"Tag-based searching the target database");
                    sw.Start();
                    RunTagBasedSearch(targetMatches, targetDb, null, prog);
                    Console.WriteLine(@"Target database tag-based search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
                }
                progData.MaxPercentage = 60.0;

                sw.Reset();
                Console.WriteLine(@"Searching the target database");
                sw.Start();
                RunSearch(targetMatches, targetDb, ms1Filter, null, prog);
                Console.WriteLine(@"Target database search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                // calculate spectral e-value usign generating function
                sw.Reset();
                Console.WriteLine(@"Calculating spectral E-values for target-spectrum matches");
                sw.Start();
                var bestTargetMatches = RunGeneratingFunction(targetMatches);
                WriteResultsToFile(bestTargetMatches, targetOutputFilePath, targetDb);
                sw.Stop();
                Console.WriteLine(@"Target-spectrum match E-value calculation elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
            }

            progData.StepRange(95.0); // total to 95%
            progData.Status = "Running Decoy search";

            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Decoy))
            {
                // Decoy database
                sw.Reset();
                sw.Start();
                var decoyDb = targetDb.Decoy(null, true);

                Console.Write(@"Reading the decoy database...");
                decoyDb.Read();
                Console.WriteLine(@"Elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                progData.MaxPercentage = 77.5;
                var decoyMatches = new SortedSet<DatabaseSequenceSpectrumMatch>[_run.MaxLcScan + 1];
                if (TagBasedSearch)
                {
                    sw.Reset();
                    Console.WriteLine(@"Tag-based searching the decoy database");
                    sw.Start();
                    RunTagBasedSearch(decoyMatches, decoyDb, null, prog);
                    Console.WriteLine(@"Decoy database tag-based search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);                    
                }
                progData.MaxPercentage = 95.0;

                sw.Reset();
                Console.WriteLine(@"Searching the decoy database");
                sw.Start();
                RunSearch(decoyMatches, decoyDb, ms1Filter, null, prog);
                Console.WriteLine(@"Decoy database search elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);

                // calculate spectral e-value usign generating function
                sw.Reset();
                Console.WriteLine(@"Calculating spectral E-values for decoy-spectrum matches");
                sw.Start();
                var bestDecoyMatches = RunGeneratingFunction(decoyMatches);
                WriteResultsToFile(bestDecoyMatches, decoyOutputFilePath, decoyDb);
                sw.Stop();
                Console.WriteLine(@"Decoy-spectrum match E-value calculation elapsed Time: {0:f1} sec", sw.Elapsed.TotalSeconds);
            }

            progData.StepRange(100.0);
            progData.Status = "Writing combined results file";
            if (RunTargetDecoyAnalysis.HasFlag(DatabaseSearchMode.Both))
            {
                // Add "Qvalue" and "PepQValue"
                var fdrCalculator = new FdrCalculator(targetOutputFilePath, decoyOutputFilePath);
                if (fdrCalculator.HasError())
                {
                    ErrorMessage = fdrCalculator.ErrorMessage;
                    Console.WriteLine(@"Error computing FDR: " + fdrCalculator.ErrorMessage);
                    return false;
                }

                fdrCalculator.WriteTo(tdaOutputFilePath);
            }
            progData.Report(100.0);

            Console.WriteLine(@"Done.");
            swAll.Stop();
            Console.WriteLine(@"Total elapsed time for search: {0:f1} sec ({1:f2} min)", swAll.Elapsed.TotalSeconds, swAll.Elapsed.TotalMinutes);

            return true;
        }
Пример #29
0
        private List<ProductSpectrum> GetMatchedSpectrums(LcMsRun run, IList<int> ms2List ,Tuple<int,double, double, double, double,double> feature, int fileIndex)
        {
            var spectrumList = new List<ProductSpectrum>();
            var mass = feature.Item2;
            var minElution = feature.Item3;
            var maxElution = feature.Item4;
            var featureId = feature.Item1 - 1;
            var det = new Tuple<int, double, int>(-1, -1.0, -1);
            for (var i = 0; i < ms2List.Count; i++)
            {
                var scanElutionTime = run.GetElutionTime(ms2List[i]);
                if (scanElutionTime < minElution || scanElutionTime > maxElution) continue;
                
                var spectrum = run.GetSpectrum(ms2List[i]) as ProductSpectrum;
                var window = spectrum.IsolationWindow;
                var minMz = window.MinMz - .5;
                var maxMz = window.MaxMz + .5;
                var mzTable = GetFeatureMassTable(mass);
                 
                for (var j = 0; j < mzTable.Length; j++)
                {
                    var mz = mzTable[j];
                    if (mz < minMz || mz > maxMz) continue;
                    spectrumList.Add(spectrum);
                    if(!(det.Item1 > -1)) det = new Tuple<int, double, int>(ms2List[i],mz,j+2);
                    break;
                }
                _identifiedFeatures[featureId][fileIndex] = det;

            }
            return spectrumList;
        }
Пример #30
0
 public LcMsPeakMatrix(LcMsRun run, ISequenceFilter ms1Filter, LcMsFeatureLikelihood scorer = null, int minScanCharge = 1, int maxScanCharge = 60,
     int maxThreadCount = 0)
     : this(run, scorer, minScanCharge, maxScanCharge, maxThreadCount)
 {
     _ms1Filter = ms1Filter;
 }