Пример #1
0
        public void AddMatch(DatabaseSequenceSpectrumMatch newMatch)
        {
            if (newMatch.Score < _scoreCutoff)
            {
                return;
            }
            var scanIndex = _ms2ScanToIndexMap[newMatch.ScanNum];
            var modIndex  = (newMatch.Modifications == null) ? 0 : newMatch.Modifications.GetNumModifications();

            if (modIndex >= _matchedSet.Length)
            {
                return;
            }

            // thread safe
            lock (_matchedSet[modIndex])
            {
                if (_matchedSet[modIndex][scanIndex] == null)
                {
                    _matchedSet[modIndex][scanIndex] = new SortedSet <DatabaseSequenceSpectrumMatch> {
                        newMatch
                    };
                }
                else // already exists
                {
                    var existingMatches = _matchedSet[modIndex][scanIndex];
                    var maxScore        = existingMatches.Max.Score;
                    if (existingMatches.Count < NumMatchesPerSpectrum && maxScore * ScoreRatioCutoff < newMatch.Score)
                    {
                        existingMatches.Add(newMatch);
                        existingMatches.RemoveWhere(mt => mt.Score < maxScore * ScoreRatioCutoff);
                    }
                    else
                    {
                        var minScore = existingMatches.Min.Score;
                        if (newMatch.Score > minScore)
                        {
                            existingMatches.Add(newMatch);
                            existingMatches.RemoveWhere(mt => mt.Score < maxScore * ScoreRatioCutoff);
                        }
                    }
                }
            }
        }
Пример #2
0
        private DatabaseSequenceSpectrumMatch[] RunGeneratingFunction(SortedSet <DatabaseSequenceSpectrumMatch>[] sortedMatches, CancellationToken?cancellationToken = null, IProgress <ProgressData> progress = null)
        {
            var progData = new ProgressData(progress)
            {
                Status = "Calculating spectral E-values for matches"
            };

            if (_cachedScoreDistributions == null)
            {
                _cachedScoreDistributions = new LinkedList <Tuple <double, ScoreDistribution> > [_run.MaxLcScan + 1];
                foreach (var scanNum in _ms2ScanNums)
                {
                    _cachedScoreDistributions[scanNum] = new LinkedList <Tuple <double, ScoreDistribution> >();
                }
            }

            var sw = new Stopwatch();

            var topDownScorer = new InformedTopDownScorer(_run, AminoAcidSet, MinProductIonCharge, MaxProductIonCharge, ProductIonTolerance);

            // Rescore and Estimate #proteins for GF calculation
            var  matches           = new LinkedList <DatabaseSequenceSpectrumMatch> [sortedMatches.Length];
            long estimatedProteins = 0;

            foreach (var scanNum in _ms2ScanNums)
            {
                var prsms = sortedMatches[scanNum];
                if (prsms == null)
                {
                    continue;
                }
                var spec = _run.GetSpectrum(scanNum) as ProductSpectrum;
                if (spec == null)
                {
                    return(null);
                }

                foreach (var match in prsms)
                {
                    var sequence = match.Sequence;
                    var ion      = match.Ion;

                    // Re-scoring
                    var scores = topDownScorer.GetScores(spec, sequence, ion.Composition, ion.Charge, scanNum);
                    if (scores == null)
                    {
                        continue;
                    }

                    match.Score               = scores.Score;
                    match.ModificationText    = scores.Modifications;
                    match.NumMatchedFragments = scores.NumMatchedFrags;
                    if (match.Score > CompositeScorer.ScoreParam.Cutoff)
                    {
                        if (matches[scanNum] == null)
                        {
                            matches[scanNum] = new LinkedList <DatabaseSequenceSpectrumMatch>();
                        }
                        matches[scanNum].AddLast(match);
                    }
                }

                if (matches[scanNum] != null)
                {
                    estimatedProteins += matches[scanNum].Count;
                }
            }

            Console.WriteLine(@"Estimated matched proteins: " + estimatedProteins);

            var numProteins = 0;
            var lastUpdate  = DateTime.MinValue; // Force original update of 0%

            sw.Reset();
            sw.Start();

            var scanNums = _ms2ScanNums.Where(scanNum => matches[scanNum] != null).ToArray();

            var pfeOptions = new ParallelOptions
            {
                MaxDegreeOfParallelism = MaxNumThreads,
                CancellationToken      = cancellationToken ?? CancellationToken.None
            };

            Parallel.ForEach(scanNums, pfeOptions, scanNum =>
            {
                var currentTask = "?";
                try
                {
                    var scoreDistributions = _cachedScoreDistributions[scanNum];
                    foreach (var match in matches[scanNum])
                    {
                        var currentIteration = "for scan " + scanNum + " and mass " + match.Ion.Composition.Mass;
                        currentTask          = "Calling GetMs2ScoringGraph " + currentIteration;

                        var graph = _ms2ScorerFactory2.GetMs2ScoringGraph(scanNum, match.Ion.Composition.Mass);
                        if (graph == null)
                        {
                            continue;
                        }

                        currentTask = "Calling ComputeGeneratingFunction " + currentIteration;

                        var scoreDist = (from distribution in scoreDistributions
                                         where Math.Abs(distribution.Item1 - match.Ion.Composition.Mass) < PrecursorIonTolerance.GetToleranceAsTh(match.Ion.Composition.Mass)
                                         select distribution.Item2).FirstOrDefault();
                        if (scoreDist == null)
                        {
                            var gf = new GeneratingFunction(graph);
                            gf.ComputeGeneratingFunction();
                            scoreDist = gf.GetScoreDistribution();
                            scoreDistributions.AddLast(new Tuple <double, ScoreDistribution>(match.Ion.Composition.Mass, scoreDist));
                        }

                        currentTask      = "Calling GetSpectralEValue " + currentIteration + " and score " + (int)match.Score;
                        match.SpecEvalue = scoreDist.GetSpectralEValue(match.Score);

                        currentTask = "Reporting progress " + currentIteration;
                        SearchProgressReport(ref numProteins, ref lastUpdate, estimatedProteins, sw, progData);
                    }
                }
                catch (Exception ex)
                {
                    var errMsg = string.Format("Exception while {0}: {1}", currentTask, ex.Message);
                    Console.WriteLine(errMsg);
                    throw new Exception(errMsg, ex);
                }
            });

            var finalMatches = new DatabaseSequenceSpectrumMatch[matches.Length];

            foreach (var scanNum in scanNums)
            {
                finalMatches[scanNum] = matches[scanNum].OrderBy(m => m.SpecEvalue).First();
            }

            progData.StatusInternal = string.Empty;
            progData.Report(100.0);
            return(finalMatches);
        }
Пример #3
0
        private void SearchForMatches(AnnotationAndOffset annotationAndOffset,
                                      ISequenceFilter sequenceFilter, SortedSet <DatabaseSequenceSpectrumMatch>[] matches, int maxNumNTermCleavages, bool isDecoy, CancellationToken?cancellationToken = null)
        {
            var pfeOptions = new ParallelOptions
            {
                MaxDegreeOfParallelism = MaxNumThreads,
                CancellationToken      = cancellationToken ?? CancellationToken.None
            };

            var annotation = annotationAndOffset.Annotation;
            var offset     = annotationAndOffset.Offset;
            //var protein = db.GetProteinName(offset);
            var protSequence = annotation.Substring(2, annotation.Length - 4);
            var seqGraph     = SequenceGraph.CreateGraph(AminoAcidSet, AminoAcid.ProteinNTerm, protSequence,
                                                         AminoAcid.ProteinCTerm);

            if (seqGraph == null)
            {
                return;                   // No matches will be found without a sequence graph.
            }
            for (var numNTermCleavages = 0; numNTermCleavages <= maxNumNTermCleavages; numNTermCleavages++)
            {
                if (numNTermCleavages > 0)
                {
                    seqGraph.CleaveNTerm();
                }
                var numProteoforms = seqGraph.GetNumProteoformCompositions();
                var modCombs       = seqGraph.GetModificationCombinations();
                for (var modIndex = 0; modIndex < numProteoforms; modIndex++)
                {
                    seqGraph.SetSink(modIndex);
                    var protCompositionWithH2O = seqGraph.GetSinkSequenceCompositionWithH2O();
                    var sequenceMass           = protCompositionWithH2O.Mass;

                    if (sequenceMass < MinSequenceMass || sequenceMass > MaxSequenceMass)
                    {
                        continue;
                    }

                    var modCombinations = modCombs[modIndex];
                    var ms2ScanNums     = this.ScanNumbers ?? sequenceFilter.GetMatchingMs2ScanNums(sequenceMass);

                    Parallel.ForEach(ms2ScanNums, pfeOptions, ms2ScanNum =>
                    {
                        if (ms2ScanNum > _ms2ScanNums.Last() || ms2ScanNum < _ms2ScanNums.First())
                        {
                            return;
                        }

                        var scorer      = _ms2ScorerFactory2.GetMs2Scorer(ms2ScanNum);
                        var score       = seqGraph.GetFragmentScore(scorer);
                        var isoTargetMz = _isolationWindowTargetMz[ms2ScanNum];
                        if (!(isoTargetMz > 0))
                        {
                            return;
                        }
                        var charge = (int)Math.Round(sequenceMass / (isoTargetMz - Constants.Proton));

                        var precursorIon = new Ion(protCompositionWithH2O, charge);
                        var sequence     = protSequence.Substring(numNTermCleavages);
                        var pre          = numNTermCleavages == 0 ? annotation[0] : annotation[numNTermCleavages + 1];
                        var post         = annotation[annotation.Length - 1];
                        var prsm         = new DatabaseSequenceSpectrumMatch(sequence, pre, post, ms2ScanNum, offset, numNTermCleavages,
                                                                             modCombinations, precursorIon, score, isDecoy);

                        AddMatch(matches, ms2ScanNum, prsm);
                    });
                }
            }
        }
Пример #4
0
 private void AddMatch(SortedSet <DatabaseSequenceSpectrumMatch>[] matches, int ms2ScanNum, DatabaseSequenceSpectrumMatch prsm)
 {
     lock (matches)
     {
         if (matches[ms2ScanNum] == null)
         {
             matches[ms2ScanNum] = new SortedSet <DatabaseSequenceSpectrumMatch> {
                 prsm
             };
         }
         else // already exists
         {
             var existingMatches = matches[ms2ScanNum];
             //var maxScore = existingMatches.Max.Score;
             if (existingMatches.Count < NumMatchesPerSpectrum)
             {
                 //if (!(maxScore*0.7 < prsm.Score)) return;
                 existingMatches.Add(prsm);
             }
             else
             {
                 var minScore = existingMatches.Min.Score;
                 if (!(prsm.Score > minScore))
                 {
                     return;
                 }
                 existingMatches.Add(prsm);
                 existingMatches.Remove(existingMatches.Min);
             }
             //if (NumMatchesPerSpectrum > 1) existingMatches.RemoveWhere(mt => mt.Score < maxScore * 0.7);
         }
     }
 }
Пример #5
0
        private void RunTagBasedSearch(SortedSet <DatabaseSequenceSpectrumMatch>[] matches, FastaDatabase db,
                                       CancellationToken?cancellationToken = null, IProgress <ProgressData> progress = null)
        {
            _tagSearchEngine.SetDatabase(db);

            //var ms2ScanNums = _run.GetScanNumbers(2);
            var progData = new ProgressData(progress)
            {
                Status = "Tag-based Searching for matches"
            };

            var sw = new Stopwatch();

            long estimatedProteins = _tagMs2ScanNum.Length;

            Console.WriteLine(@"Number of spectra containing sequence tags: " + estimatedProteins);
            var numProteins = 0;
            var lastUpdate  = DateTime.MinValue; // Force original update of 0%

            sw.Reset();
            sw.Start();

            var pfeOptions = new ParallelOptions
            {
                MaxDegreeOfParallelism = MaxNumThreads,
                CancellationToken      = cancellationToken ?? CancellationToken.None
            };

            Parallel.ForEach(_tagMs2ScanNum, pfeOptions, ms2ScanNum =>
            {
                var tagSeqMatches = _tagSearchEngine.RunSearch(ms2ScanNum);

                foreach (var tagSequenceMatch in tagSeqMatches)
                {
                    var offset = _tagSearchEngine.FastaDatabase.GetOffset(tagSequenceMatch.ProteinName);
                    if (offset == null)
                    {
                        continue;
                    }

                    var sequence          = tagSequenceMatch.Sequence;
                    var numNTermCleavages = tagSequenceMatch.TagMatch.StartIndex;

                    var seqObj       = Sequence.CreateSequence(sequence, tagSequenceMatch.TagMatch.ModificationText, AminoAcidSet);
                    var precursorIon = new Ion(seqObj.Composition + Composition.H2O, tagSequenceMatch.TagMatch.Charge);

                    var prsm = new DatabaseSequenceSpectrumMatch(sequence, tagSequenceMatch.Pre, tagSequenceMatch.Post,
                                                                 ms2ScanNum, (long)offset, numNTermCleavages,
                                                                 tagSequenceMatch.TagMatch.Modifications,
                                                                 precursorIon, tagSequenceMatch.TagMatch.Score, db.IsDecoy)
                    {
                        ModificationText = tagSequenceMatch.TagMatch.ModificationText,
                    };

                    AddMatch(matches, ms2ScanNum, prsm);
                }

                SearchProgressReport(ref numProteins, ref lastUpdate, estimatedProteins, sw, progData, "spectra");
            });

            Console.WriteLine(@"Collected candidate matches: {0}", GetNumberOfMatches(matches));

            progData.StatusInternal = string.Empty;
            progData.Report(100.0);
        }