public IEnumerable<TagMatch> FindMatches(MatchedTag matchedTag) { if(matchedTag.NTermFlankingMass != null && matchedTag.CTermFlankingMass != null) return FindMatchesWithFeatureMass(matchedTag); if(matchedTag.NTermFlankingMass != null) return FindMatchesForwardAndBackward(matchedTag); if(matchedTag.CTermFlankingMass != null) return FindMatchesBackwardAndForward(matchedTag); return Enumerable.Empty<TagMatch>(); }
/// <summary> /// Adds a tag to this tag set. /// </summary> /// <param name="tag">a matched tag to add</param> /// <returns>true if tag is merged to an existingTag tag. false otherwise</returns> public bool Add(MatchedTag tag) { if (_tags.Any(existingTag => TryMerge(existingTag, tag))) { return(true); } _tags.Add(tag); return(false); }
/// <summary> /// Adds a tag to this tag set. /// </summary> /// <param name="tag">a matched tag to add</param> /// <returns>true if tag is merged to an existingTag tag. false otherwise</returns> public bool Add(MatchedTag tag) { if (_tags.Any(existingTag => TryMerge(existingTag, tag))) { return true; } _tags.Add(tag); return false; }
public MatchedTag Add(MatchedTag tag) { // N-term var newStartIndex = Math.Min(StartIndex, tag.StartIndex); if (tag.IsNTermFlankingMassReliable) // flanking mass from this tag is reliable { NTermFlankingMass = (NumReliableNTermFlankingMasses * NTermFlankingMass + tag.NTermFlankingMass) / (NumReliableNTermFlankingMasses + 1); ++NumReliableNTermFlankingMasses; } else // flanking mass is not reliable { if (IsNTermFlankingMassReliable) { // do nothing } else { NTermFlankingMass = (NumMergedSequenceTags * NTermFlankingMass + tag.NTermFlankingMass) / (NumMergedSequenceTags + 1); } } StartIndex = newStartIndex; // C-term var newEndIndex = Math.Max(EndIndex, tag.EndIndex); if (tag.IsCTermFlankingMassReliable) // flanking mass is reliable { CTermFlankingMass = (NumReliableCTermFlankingMasses * CTermFlankingMass + tag.CTermFlankingMass) / (NumReliableCTermFlankingMasses + 1); ++NumReliableCTermFlankingMasses; } else // flanking mass is not reliable { if (IsCTermFlankingMassReliable) { // do nothing } else { CTermFlankingMass = (NumMergedSequenceTags * CTermFlankingMass + tag.CTermFlankingMass) / (NumMergedSequenceTags + 1); } } EndIndex = newEndIndex; ++NumMergedSequenceTags; return(this); }
private IEnumerable<TagMatch> FindMatchesWithFeatureMass(MatchedTag matchedTag) { if (matchedTag.NTermFlankingMass == null || matchedTag.CTermFlankingMass == null) yield break; var featureMass = (double) matchedTag.NTermFlankingMass + matchedTag.Mass + (double)matchedTag.CTermFlankingMass + Composition.H2O.Mass; var shiftMass = matchedTag.Mass + (double)matchedTag.NTermFlankingMass; var backwardGraph = new ShiftedSequenceGraph(_aaSet, shiftMass, false, matchedTag.StartIndex, featureMass - MinSumModificationMasses); foreach (var backwardMatch in GetBackwardMatches(matchedTag, backwardGraph, featureMass)) { // Make a forward graph var nTermShiftMass = backwardMatch.Mass + matchedTag.Mass; var forwardGraph = new ShiftedSequenceGraph(_aaSet, nTermShiftMass, true, _proteinSequence.Length - matchedTag.EndIndex, featureMass - MinSumModificationMasses); foreach ( var forwardMatch in GetForwardMatches(matchedTag, forwardGraph, featureMass)) { var mass = forwardMatch.Mass + matchedTag.Mass + backwardMatch.Mass; if (mass > _maxSequenceMass) continue; var offset = matchedTag.EndIndex - backwardMatch.Index - 1; var modStr = string.Join(",", backwardMatch.Modifications.Concat(forwardMatch.Modifications.Select(m => m.GetModificationInstanceWithOffset(offset)))); var modList = new List<Modification>(); foreach (var mod in backwardMatch.Modifications) modList.Add(mod.Modification); foreach (var mod in forwardMatch.Modifications) modList.Add(mod.Modification); var tagMatch = new TagMatch( backwardMatch.Index, forwardMatch.Index, matchedTag.Length, backwardMatch.Charge, backwardMatch.Score, forwardMatch.Score, mass, new ModificationCombination(modList), modStr); yield return tagMatch; } } }
private IEnumerable<FlankingMassMatch> GetBackwardMatches( MatchedTag matchedTag, ShiftedSequenceGraph backwardGraph, double? featureMass = null ) { for (var j = matchedTag.StartIndex - 1; j >= -1; j--) { var residue = j >= 0 ? _proteinSequence[j] : AminoAcid.ProteinNTerm.Residue; var location = j > 0 ? SequenceLocation.Everywhere : SequenceLocation.ProteinNTerm; if(!backwardGraph.AddAminoAcid(residue, location)) yield break; if (j == 0) continue; var backwardMatch = GetBestMatchInTheGraph(backwardGraph, _spec, featureMass); if (backwardMatch != null) { backwardMatch.Index = Math.Max(j, 0); yield return backwardMatch; } } }
// private readonly int _minProductIonCharge; // private readonly int _maxProductIonCharge; private IEnumerable<FlankingMassMatch> GetForwardMatches( MatchedTag matchedTag, ShiftedSequenceGraph forwardGraph, double? featureMass = null ) { for (var i = matchedTag.EndIndex; i <= _proteinSequence.Length; i++) { var residue = i < _proteinSequence.Length ? _proteinSequence[i] : AminoAcid.ProteinCTerm.Residue; var location = i < _proteinSequence.Length - 1 ? SequenceLocation.Everywhere : SequenceLocation.ProteinCTerm; if (!forwardGraph.AddAminoAcid(residue, location)) yield break; if (i == _proteinSequence.Length - 1) continue; var forwardMatch = GetBestMatchInTheGraph(forwardGraph, _spec, featureMass); if (forwardMatch != null) { forwardMatch.Index = Math.Min(i + 1, _proteinSequence.Length); yield return forwardMatch; } } }
private bool TryMerge(MatchedTag existingTag, MatchedTag newTag) { // N-term var newStartIndex = Math.Min(existingTag.StartIndex, newTag.StartIndex); double?adjustedNTermFlankingMass = null; if (existingTag.NTermFlankingMass != null && newTag.NTermFlankingMass != null) { var newNTermFlankingMassFromExistingTag = (double)existingTag.NTermFlankingMass - GetSequenceMass(newTag.StartIndex, existingTag.StartIndex); var newNTermFlankingMassFromNewTag = (double)newTag.NTermFlankingMass - GetSequenceMass(existingTag.StartIndex, newTag.StartIndex); var toleranceNTerm = existingTag.IsNTermFlankingMassReliable == newTag.IsNTermFlankingMassReliable ? _tolerance : _relaxedTolerance; if (!toleranceNTerm.IsWithin(newNTermFlankingMassFromExistingTag, newNTermFlankingMassFromNewTag)) { return(false); } if (existingTag.IsNTermFlankingMassReliable || newTag.IsNTermFlankingMassReliable) // flanking mass from this tag is reliable { adjustedNTermFlankingMass = (existingTag.NumReliableNTermFlankingMasses * newNTermFlankingMassFromExistingTag + newTag.NumReliableNTermFlankingMasses * newNTermFlankingMassFromNewTag) / (existingTag.NumReliableNTermFlankingMasses + newTag.NumReliableNTermFlankingMasses); } else { adjustedNTermFlankingMass = (existingTag.NumMergedSequenceTags * newNTermFlankingMassFromExistingTag + newTag.NumMergedSequenceTags * newNTermFlankingMassFromNewTag) / (existingTag.NumMergedSequenceTags + newTag.NumMergedSequenceTags); } } else if (existingTag.NTermFlankingMass != newTag.NTermFlankingMass) { return(false); } // C-term var newEndIndex = Math.Max(existingTag.EndIndex, newTag.EndIndex); double?adjustedCTermFlankingMass = null; if (existingTag.CTermFlankingMass != null && newTag.CTermFlankingMass != null) { var newCTermFlankingMassFromExistingTag = (double)existingTag.CTermFlankingMass - GetSequenceMass(existingTag.EndIndex, newTag.EndIndex); var newCTermFlankingMassFromNewTag = (double)newTag.CTermFlankingMass - GetSequenceMass(newTag.EndIndex, existingTag.EndIndex); var toleranceCTerm = existingTag.IsCTermFlankingMassReliable == newTag.IsCTermFlankingMassReliable ? _tolerance : _relaxedTolerance; if (!toleranceCTerm.IsWithin(newCTermFlankingMassFromExistingTag, newCTermFlankingMassFromNewTag)) { return(false); } if (existingTag.IsCTermFlankingMassReliable || newTag.IsCTermFlankingMassReliable) // flanking mass from this tag is reliable { adjustedCTermFlankingMass = (existingTag.NumReliableCTermFlankingMasses * newCTermFlankingMassFromExistingTag + newTag.NumReliableCTermFlankingMasses * newCTermFlankingMassFromNewTag) / (existingTag.NumReliableCTermFlankingMasses + newTag.NumReliableCTermFlankingMasses); } else { adjustedCTermFlankingMass = (existingTag.NumMergedSequenceTags * newCTermFlankingMassFromExistingTag + newTag.NumMergedSequenceTags * newCTermFlankingMassFromNewTag) / (existingTag.NumMergedSequenceTags + newTag.NumMergedSequenceTags); } } else if (existingTag.CTermFlankingMass != newTag.CTermFlankingMass) { return(false); } existingTag.Mass += GetSequenceMass(newStartIndex, existingTag.StartIndex) + GetSequenceMass(existingTag.EndIndex, newEndIndex); existingTag.StartIndex = newStartIndex; existingTag.EndIndex = newEndIndex; existingTag.NTermFlankingMass = adjustedNTermFlankingMass; existingTag.CTermFlankingMass = adjustedCTermFlankingMass; existingTag.NumMergedSequenceTags += newTag.NumMergedSequenceTags; existingTag.NumReliableNTermFlankingMasses += newTag.NumReliableNTermFlankingMasses; existingTag.NumReliableCTermFlankingMasses += newTag.NumReliableCTermFlankingMasses; return(true); }
private bool TryMerge(MatchedTag existingTag, MatchedTag newTag) { // N-term var newStartIndex = Math.Min(existingTag.StartIndex, newTag.StartIndex); double? adjustedNTermFlankingMass = null; if (existingTag.NTermFlankingMass != null && newTag.NTermFlankingMass != null) { var newNTermFlankingMassFromExistingTag = (double)existingTag.NTermFlankingMass - GetSequenceMass(newTag.StartIndex, existingTag.StartIndex); var newNTermFlankingMassFromNewTag = (double)newTag.NTermFlankingMass - GetSequenceMass(existingTag.StartIndex, newTag.StartIndex); var toleranceNTerm = existingTag.IsNTermFlankingMassReliable == newTag.IsNTermFlankingMassReliable ? _tolerance : _relaxedTolerance; if (!toleranceNTerm.IsWithin(newNTermFlankingMassFromExistingTag, newNTermFlankingMassFromNewTag)) return false; if (existingTag.IsNTermFlankingMassReliable || newTag.IsNTermFlankingMassReliable) // flanking mass from this tag is reliable { adjustedNTermFlankingMass = (existingTag.NumReliableNTermFlankingMasses * newNTermFlankingMassFromExistingTag + newTag.NumReliableNTermFlankingMasses * newNTermFlankingMassFromNewTag) / (existingTag.NumReliableNTermFlankingMasses + newTag.NumReliableNTermFlankingMasses); } else { adjustedNTermFlankingMass = (existingTag.NumMergedSequenceTags * newNTermFlankingMassFromExistingTag + newTag.NumMergedSequenceTags * newNTermFlankingMassFromNewTag) / (existingTag.NumMergedSequenceTags + newTag.NumMergedSequenceTags); } } else if (existingTag.NTermFlankingMass != newTag.NTermFlankingMass) return false; // C-term var newEndIndex = Math.Max(existingTag.EndIndex, newTag.EndIndex); double? adjustedCTermFlankingMass = null; if (existingTag.CTermFlankingMass != null && newTag.CTermFlankingMass != null) { var newCTermFlankingMassFromExistingTag = (double) existingTag.CTermFlankingMass - GetSequenceMass(existingTag.EndIndex, newTag.EndIndex); var newCTermFlankingMassFromNewTag = (double) newTag.CTermFlankingMass - GetSequenceMass(newTag.EndIndex, existingTag.EndIndex); var toleranceCTerm = existingTag.IsCTermFlankingMassReliable == newTag.IsCTermFlankingMassReliable ? _tolerance : _relaxedTolerance; if (!toleranceCTerm.IsWithin(newCTermFlankingMassFromExistingTag, newCTermFlankingMassFromNewTag)) return false; if (existingTag.IsCTermFlankingMassReliable || newTag.IsCTermFlankingMassReliable) // flanking mass from this tag is reliable { adjustedCTermFlankingMass = (existingTag.NumReliableCTermFlankingMasses * newCTermFlankingMassFromExistingTag + newTag.NumReliableCTermFlankingMasses * newCTermFlankingMassFromNewTag) / (existingTag.NumReliableCTermFlankingMasses + newTag.NumReliableCTermFlankingMasses); } else { adjustedCTermFlankingMass = (existingTag.NumMergedSequenceTags * newCTermFlankingMassFromExistingTag + newTag.NumMergedSequenceTags * newCTermFlankingMassFromNewTag) / (existingTag.NumMergedSequenceTags + newTag.NumMergedSequenceTags); } } else if (existingTag.CTermFlankingMass != newTag.CTermFlankingMass) return false; existingTag.Mass += GetSequenceMass(newStartIndex, existingTag.StartIndex) + GetSequenceMass(existingTag.EndIndex, newEndIndex); existingTag.StartIndex = newStartIndex; existingTag.EndIndex = newEndIndex; existingTag.NTermFlankingMass = adjustedNTermFlankingMass; existingTag.CTermFlankingMass = adjustedCTermFlankingMass; existingTag.NumMergedSequenceTags += newTag.NumMergedSequenceTags; existingTag.NumReliableNTermFlankingMasses += newTag.NumReliableNTermFlankingMasses; existingTag.NumReliableCTermFlankingMasses += newTag.NumReliableCTermFlankingMasses; return true; }
public MatchedTag Add(MatchedTag tag) { // N-term var newStartIndex = Math.Min(StartIndex, tag.StartIndex); if (tag.IsNTermFlankingMassReliable) // flanking mass from this tag is reliable { NTermFlankingMass = (NumReliableNTermFlankingMasses * NTermFlankingMass + tag.NTermFlankingMass) / (NumReliableNTermFlankingMasses + 1); ++NumReliableNTermFlankingMasses; } else // flanking mass is not reliable { if (IsNTermFlankingMassReliable) { // do nothing } else { NTermFlankingMass = (NumMergedSequenceTags * NTermFlankingMass + tag.NTermFlankingMass) / (NumMergedSequenceTags + 1); } } StartIndex = newStartIndex; // C-term var newEndIndex = Math.Max(EndIndex, tag.EndIndex); if (tag.IsCTermFlankingMassReliable) // flanking mass is reliable { CTermFlankingMass = (NumReliableCTermFlankingMasses * CTermFlankingMass + tag.CTermFlankingMass) / (NumReliableCTermFlankingMasses + 1); ++NumReliableCTermFlankingMasses; } else // flanking mass is not reliable { if (IsCTermFlankingMassReliable) { // do nothing } else { CTermFlankingMass = (NumMergedSequenceTags * CTermFlankingMass + tag.CTermFlankingMass) / (NumMergedSequenceTags + 1); } } EndIndex = newEndIndex; ++NumMergedSequenceTags; return this; }
public void TestFeatureId() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3"; if (!File.Exists(dataSet)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dataSet); } // Feature: 5236-5286 6-12 8480.3681 5 const int minScanNum = 5236; const int maxScanNum = 5286; const double featureMass = 8480.3681; //const int minScanNum = 7251; //const int maxScanNum = 7326; //const double featureMass = 32347.18; // const int minScanNum = 4451; // const int maxScanNum = 4541; // const double featureMass = 31267.95; var tolerance = new Tolerance(10); var relaxedTolerance = new Tolerance(20); const int minTagLength = 5; const int minMergedTagLength = 7; const int minNumTagMatches = 1; var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw"); var run = PbfLcMsRun.GetLcMsRun(rawFileName); var aminoAcidSet = AminoAcidSet.GetStandardAminoAcidSet(); var featureFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".ms1ft"); var filter = new Ms1FtFilter(run, tolerance, featureFileName); var ms2ScanNums = filter.GetMatchingMs2ScanNums(featureMass) .Where(scanNum => scanNum > minScanNum && scanNum < maxScanNum) .ToArray(); const string tagFileName = dataSet + ".seqtag"; //"_MinLength3.seqtag"; //Path.ChangeExtension(dataSet, ".seqtag"); const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta"; if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var searchableDb = new SearchableDatabase(fastaDb); var tagParser = new SequenceTagParser(tagFileName, minTagLength); var proteinsToTags = new Dictionary<string, IList<MatchedTag>>(); foreach (var ms2ScanNum in ms2ScanNums) { var tags = tagParser.GetSequenceTags(ms2ScanNum); foreach (var tag in tags) { var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray(); foreach (var index in matchedIndices) { var protein = fastaDb.GetProteinName(index); var startIndex = fastaDb.GetZeroBasedPositionInProtein(index); var matchedTag = new MatchedTag(tag, startIndex, featureMass); IList<MatchedTag> existingTags; if (proteinsToTags.TryGetValue(protein, out existingTags)) { existingTags.Add(matchedTag); } else { proteinsToTags.Add(protein, new List<MatchedTag> { matchedTag }); } } } } foreach (var entry in proteinsToTags.OrderByDescending(e => e.Value.Count)) { if (entry.Value.Count < minNumTagMatches) break; var proteinName = entry.Key; var proteinSequence = fastaDb.GetProteinSequence(proteinName); var protein = new Sequence(proteinSequence, aminoAcidSet); Console.WriteLine(proteinName + "\t" + entry.Value.Count); var matchedTagSet = new MatchedTagSet(proteinSequence, aminoAcidSet, tolerance, relaxedTolerance); Console.WriteLine("********** Before merging"); foreach (var matchedTag in entry.Value) { var seq = proteinSequence.Substring(matchedTag.StartIndex, matchedTag.EndIndex - matchedTag.StartIndex); var nTermMass = protein.GetMass(0, matchedTag.StartIndex); var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count); Console.WriteLine("\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}", (matchedTag.NTermFlankingMass - nTermMass), seq, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex, matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable); matchedTagSet.Add(matchedTag); } Console.WriteLine("********** After merging"); foreach (var matchedTag in matchedTagSet.Tags) { if (matchedTag.Length < minMergedTagLength) continue; var seq = proteinSequence.Substring(matchedTag.StartIndex, matchedTag.EndIndex - matchedTag.StartIndex); var nTermMass = protein.GetMass(0, matchedTag.StartIndex); var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count); Console.WriteLine("\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}", (matchedTag.NTermFlankingMass-nTermMass), seq, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex, matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable); } break; } }
public void TestGetProteinsWithTagMatchingSingleSpec() { var methodName = MethodBase.GetCurrentMethod().Name; TestUtils.ShowStarting(methodName); const string dataSet = @"H:\Research\Lewy\raw\Lewy_intact_07"; // const int scanNum = 5158; const int minTagLength = 7; const int minNumTagMatches = 1; var aminoAcidSet = AminoAcidSet.GetStandardAminoAcidSet(); const int scanNum = 2; // Parse sequence tags //const string tagFileName = dataSet + ".seqtag"; //"_MinLength3.seqtag"; //Path.ChangeExtension(dataSet, ".seqtag"); const string rawFilePath = ""; const string fastaFilePath = @"H:\Research\Lewy\ID_004858_0EE8CF61.fasta"; if (!File.Exists(rawFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath); } if (!File.Exists(fastaFilePath)) { Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath); } var fastaDb = new FastaDatabase(fastaFilePath); var searchableDb = new SearchableDatabase(fastaDb); //var tagParser = new SequenceTagParser(tagFileName, minTagLength); //var tags = tagParser.GetSequenceTags(scanNum); var run = PbfLcMsRun.GetLcMsRun(rawFilePath); var spec = run.GetSpectrum(scanNum) as ProductSpectrum; var tagFinder = new SequenceTagFinder(spec, new Tolerance(5)); var tags = tagFinder.GetAllSequenceTagString(); var proteinsToTags = new Dictionary<string, IList<MatchedTag>>(); foreach (var tag in tags) { var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray(); foreach (var index in matchedIndices) { var protein = fastaDb.GetProteinName(index); var startIndex = fastaDb.GetOneBasedPositionInProtein(index); var matchedTag = new MatchedTag(tag, startIndex, 0.0); IList<MatchedTag> existingTags; if (proteinsToTags.TryGetValue(protein, out existingTags)) { existingTags.Add(matchedTag); } else { proteinsToTags.Add(protein, new List<MatchedTag> { matchedTag }); } } } foreach (var entry in proteinsToTags.OrderByDescending(e => e.Value.Count)) { if (entry.Value.Count < minNumTagMatches) break; var proteinName = entry.Key; var proteinSequence = fastaDb.GetProteinSequence(proteinName); var protein = new Sequence(proteinSequence, aminoAcidSet); Console.WriteLine(proteinName + "\t" + entry.Value.Count); foreach (var matchedTag in entry.Value) { var seq = proteinSequence.Substring(matchedTag.StartIndex, matchedTag.EndIndex - matchedTag.StartIndex); var nTermMass = protein.GetMass(0, matchedTag.StartIndex); var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count); Console.WriteLine("\t{0} ({1})\t{2}\t{3} ({4})\t{5}\t{6}\t{7}", matchedTag.NTermFlankingMass, (matchedTag.NTermFlankingMass - nTermMass), seq, matchedTag.CTermFlankingMass, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex, matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable); } } }
public static Dictionary<string, MatchedTagSet> GetProteinToMatchedTagsMap( IEnumerable<SequenceTag> tags, SearchableDatabase searchableDb, AminoAcidSet aaSet, Tolerance tolerance, Tolerance relaxedTolerance) { var fastaDb = searchableDb.FastaDatabase; var proteinsToTags = new Dictionary<string, MatchedTagSet>(); foreach (var tag in tags) { var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray(); if (matchedIndices.Length > MaxNumProteinMatchesPerTag) continue; foreach (var index in matchedIndices) { var proteinName = fastaDb.GetProteinName(index); var startIndex = fastaDb.GetZeroBasedPositionInProtein(index); var mass = aaSet.GetComposition(tag.Sequence).Mass; var matchedTag = new MatchedTag(tag, startIndex) { Mass = mass }; MatchedTagSet existingMatchedTagSet; if (proteinsToTags.TryGetValue(proteinName, out existingMatchedTagSet)) { existingMatchedTagSet.Add(matchedTag); } else { var proteinSequence = fastaDb.GetProteinSequence(proteinName); var matchedTagSet = new MatchedTagSet(proteinSequence, aaSet, tolerance, relaxedTolerance); matchedTagSet.Add(matchedTag); proteinsToTags.Add(proteinName, matchedTagSet); } } } return proteinsToTags; }