private IEnumerable <TagSequenceMatch> GetMatches(IEnumerable <SequenceTag.SequenceTag> tags, ProductSpectrum spec, IScorer scorer) { // Match tags against the database var proteinsToTags = GetProteinToMatchedTagsMap(tags, _searchableDb, _aaSet, _tolerance, _tolerance); //var tagSequenceMatchList = new List<TagSequenceMatch>(); // Extend matches foreach (var entry in proteinsToTags) { var proteinName = entry.Key; var matchedTagSet = entry.Value; var proteinSequence = matchedTagSet.Sequence; var tagFinder = new TagMatchFinder(spec, scorer, _featureFinder, proteinSequence, _tolerance, _aaSet, _maxSequenceMass); foreach (var matchedTag in matchedTagSet.Tags) { if (matchedTag.Length < _minMatchedTagLength) { continue; } if (matchedTag.NTermFlankingMass == null && matchedTag.CTermFlankingMass == null) { continue; } var matches = tagFinder.FindMatches(matchedTag).ToArray(); //var prevScore = double.NegativeInfinity; //foreach (var match in matches.OrderByDescending(m => m.Score)) foreach (var match in matches) { var sequence = proteinSequence.Substring(match.StartIndex, match.EndIndex - match.StartIndex); //re-scoring var sequenceObj = Sequence.CreateSequence(sequence, match.ModificationText, _aaSet); match.Score = sequenceObj.GetInternalCleavages().Sum(c => scorer.GetFragmentScore(c.PrefixComposition, c.SuffixComposition)); //var numMatches = matchedTag.Length * 2 + match.NTermScore + match.CTermScore; //var score = match.NTermScore + match.CTermScore; //score += (matchedTag.NumReliableNTermFlankingMasses > 0) // ? matchedTag.Length*CompositeScorer.ScoreParam.Prefix.ConsecutiveMatch //: matchedTag.Length*CompositeScorer.ScoreParam.Suffix.ConsecutiveMatch; // Poisson p-value score //var n = (match.EndIndex - match.StartIndex - 1)*2; //var lambda = numMatches / n; //var pValue = 1 - Poisson.CDF(lambda, numMatches); //var pScore = (pValue > 0) ? - Math.Log(pValue, 2) : 50.0; //if (numMatches < 5) break; //if (prevScore - numMatches > 2) break; //prevScore = numMatches; var pre = match.StartIndex == 0 ? '-' : proteinSequence[match.StartIndex - 1]; // startIndex is inclusive var post = match.EndIndex >= proteinSequence.Length ? '-' : proteinSequence[match.EndIndex]; // endIndex is Exclusive yield return(new TagSequenceMatch(sequence, proteinName, match, pre, post)); //tagSequenceMatchList.Add(new TagSequenceMatch(sequence, proteinName, match, pre, post)); } } } //return tagSequenceMatchList; }
private IEnumerable<TagSequenceMatch> GetMatches(IEnumerable<SequenceTag> tags, ProductSpectrum spec, IScorer scorer) { // Match tags against the database var proteinsToTags = GetProteinToMatchedTagsMap(tags, _searchableDb, _aaSet, _tolerance, _tolerance); //var tagSequenceMatchList = new List<TagSequenceMatch>(); // Extend matches foreach (var entry in proteinsToTags) { var proteinName = entry.Key; var matchedTagSet = entry.Value; var proteinSequence = matchedTagSet.Sequence; var tagFinder = new TagMatchFinder(spec, scorer, _featureFinder, proteinSequence, _tolerance, _aaSet, _maxSequenceMass); foreach (var matchedTag in matchedTagSet.Tags) { if (matchedTag.Length < _minMatchedTagLength) continue; if (matchedTag.NTermFlankingMass == null && matchedTag.CTermFlankingMass == null) continue; var matches = tagFinder.FindMatches(matchedTag).ToArray(); //var prevScore = double.NegativeInfinity; //foreach (var match in matches.OrderByDescending(m => m.Score)) foreach(var match in matches) { var sequence = proteinSequence.Substring(match.StartIndex, match.EndIndex - match.StartIndex); //re-scoring var sequenceObj = Sequence.CreateSequence(sequence, match.ModificationText, _aaSet); match.Score = sequenceObj.GetInternalCleavages().Sum(c => scorer.GetFragmentScore(c.PrefixComposition, c.SuffixComposition)); //var numMatches = matchedTag.Length * 2 + match.NTermScore + match.CTermScore; //var score = match.NTermScore + match.CTermScore; //score += (matchedTag.NumReliableNTermFlankingMasses > 0) // ? matchedTag.Length*CompositeScorer.ScoreParam.Prefix.ConsecutiveMatch //: matchedTag.Length*CompositeScorer.ScoreParam.Suffix.ConsecutiveMatch; // Poisson p-value score //var n = (match.EndIndex - match.StartIndex - 1)*2; //var lambda = numMatches / n; //var pValue = 1 - Poisson.CDF(lambda, numMatches); //var pScore = (pValue > 0) ? - Math.Log(pValue, 2) : 50.0; //if (numMatches < 5) break; //if (prevScore - numMatches > 2) break; //prevScore = numMatches; var pre = match.StartIndex == 0 ? '-' : proteinSequence[match.StartIndex - 1]; // startIndex is inclusive var post = match.EndIndex >= proteinSequence.Length ? '-' : proteinSequence[match.EndIndex]; // endIndex is Exclusive yield return new TagSequenceMatch(sequence, proteinName, match, pre, post); //tagSequenceMatchList.Add(new TagSequenceMatch(sequence, proteinName, match, pre, post)); } } } //return tagSequenceMatchList; }