Beispiel #1
0
        private IEnumerable <TagSequenceMatch> GetMatches(IEnumerable <SequenceTag.SequenceTag> tags, ProductSpectrum spec, IScorer scorer)
        {
            // Match tags against the database
            var proteinsToTags = GetProteinToMatchedTagsMap(tags, _searchableDb, _aaSet, _tolerance, _tolerance);

            //var tagSequenceMatchList = new List<TagSequenceMatch>();

            // Extend matches
            foreach (var entry in proteinsToTags)
            {
                var proteinName     = entry.Key;
                var matchedTagSet   = entry.Value;
                var proteinSequence = matchedTagSet.Sequence;

                var tagFinder = new TagMatchFinder(spec, scorer, _featureFinder, proteinSequence, _tolerance, _aaSet, _maxSequenceMass);

                foreach (var matchedTag in matchedTagSet.Tags)
                {
                    if (matchedTag.Length < _minMatchedTagLength)
                    {
                        continue;
                    }
                    if (matchedTag.NTermFlankingMass == null && matchedTag.CTermFlankingMass == null)
                    {
                        continue;
                    }

                    var matches = tagFinder.FindMatches(matchedTag).ToArray();
                    //var prevScore = double.NegativeInfinity;
                    //foreach (var match in matches.OrderByDescending(m => m.Score))
                    foreach (var match in matches)
                    {
                        var sequence = proteinSequence.Substring(match.StartIndex, match.EndIndex - match.StartIndex);
                        //re-scoring
                        var sequenceObj = Sequence.CreateSequence(sequence, match.ModificationText, _aaSet);
                        match.Score = sequenceObj.GetInternalCleavages().Sum(c => scorer.GetFragmentScore(c.PrefixComposition, c.SuffixComposition));

                        //var numMatches = matchedTag.Length * 2 + match.NTermScore + match.CTermScore;
                        //var score = match.NTermScore + match.CTermScore;
                        //score += (matchedTag.NumReliableNTermFlankingMasses > 0)
                        //  ? matchedTag.Length*CompositeScorer.ScoreParam.Prefix.ConsecutiveMatch
                        //: matchedTag.Length*CompositeScorer.ScoreParam.Suffix.ConsecutiveMatch;

                        // Poisson p-value score
                        //var n = (match.EndIndex - match.StartIndex - 1)*2;
                        //var lambda = numMatches / n;
                        //var pValue = 1 - Poisson.CDF(lambda, numMatches);
                        //var pScore = (pValue > 0) ? - Math.Log(pValue, 2) : 50.0;
                        //if (numMatches < 5) break;
                        //if (prevScore - numMatches > 2) break;
                        //prevScore = numMatches;

                        var pre  = match.StartIndex == 0 ? '-' : proteinSequence[match.StartIndex - 1];              // startIndex is inclusive
                        var post = match.EndIndex >= proteinSequence.Length ? '-' : proteinSequence[match.EndIndex]; // endIndex is Exclusive

                        yield return(new TagSequenceMatch(sequence, proteinName, match, pre, post));

                        //tagSequenceMatchList.Add(new TagSequenceMatch(sequence, proteinName, match, pre, post));
                    }
                }
            }
            //return tagSequenceMatchList;
        }
        private IEnumerable<TagSequenceMatch> GetMatches(IEnumerable<SequenceTag> tags, ProductSpectrum spec, IScorer scorer)
        {
            // Match tags against the database
            var proteinsToTags = GetProteinToMatchedTagsMap(tags, _searchableDb, _aaSet, _tolerance, _tolerance);
            //var tagSequenceMatchList = new List<TagSequenceMatch>();

            // Extend matches
            foreach (var entry in proteinsToTags)
            {
                var proteinName = entry.Key;
                var matchedTagSet = entry.Value;
                var proteinSequence = matchedTagSet.Sequence;

                var tagFinder = new TagMatchFinder(spec, scorer, _featureFinder, proteinSequence, _tolerance, _aaSet, _maxSequenceMass);

                foreach (var matchedTag in matchedTagSet.Tags)
                {
                    if (matchedTag.Length < _minMatchedTagLength) continue;
                    if (matchedTag.NTermFlankingMass == null && matchedTag.CTermFlankingMass == null) continue;

                    var matches = tagFinder.FindMatches(matchedTag).ToArray();
                    //var prevScore = double.NegativeInfinity;
                    //foreach (var match in matches.OrderByDescending(m => m.Score))
                    foreach(var match in matches)
                    {
                        var sequence = proteinSequence.Substring(match.StartIndex, match.EndIndex - match.StartIndex);
                        //re-scoring
                        var sequenceObj = Sequence.CreateSequence(sequence, match.ModificationText, _aaSet);
                        match.Score = sequenceObj.GetInternalCleavages().Sum(c => scorer.GetFragmentScore(c.PrefixComposition, c.SuffixComposition));

                        //var numMatches = matchedTag.Length * 2 + match.NTermScore + match.CTermScore;
                        //var score = match.NTermScore + match.CTermScore;
                        //score += (matchedTag.NumReliableNTermFlankingMasses > 0)
                          //  ? matchedTag.Length*CompositeScorer.ScoreParam.Prefix.ConsecutiveMatch
                            //: matchedTag.Length*CompositeScorer.ScoreParam.Suffix.ConsecutiveMatch;
                        

                        // Poisson p-value score
                        //var n = (match.EndIndex - match.StartIndex - 1)*2;
                        //var lambda = numMatches / n;
                        //var pValue = 1 - Poisson.CDF(lambda, numMatches);
                        //var pScore = (pValue > 0) ? - Math.Log(pValue, 2) : 50.0;
                        //if (numMatches < 5) break;
                        //if (prevScore - numMatches > 2) break;
                        //prevScore = numMatches;

                        var pre = match.StartIndex == 0 ? '-' : proteinSequence[match.StartIndex - 1]; // startIndex is inclusive
                        var post = match.EndIndex >= proteinSequence.Length ? '-' : proteinSequence[match.EndIndex]; // endIndex is Exclusive

                        yield return new TagSequenceMatch(sequence, proteinName, match, pre, post);

                        //tagSequenceMatchList.Add(new TagSequenceMatch(sequence, proteinName, match, pre, post));
                    }
                }
            }
            //return tagSequenceMatchList;
        }