Ejemplo n.º 1
0
 public IEnumerable<TagMatch> FindMatches(MatchedTag matchedTag)
 {
     if(matchedTag.NTermFlankingMass != null && matchedTag.CTermFlankingMass != null) return FindMatchesWithFeatureMass(matchedTag);
     if(matchedTag.NTermFlankingMass != null) return FindMatchesForwardAndBackward(matchedTag);
     if(matchedTag.CTermFlankingMass != null) return FindMatchesBackwardAndForward(matchedTag);
     return Enumerable.Empty<TagMatch>();
 }
Ejemplo n.º 2
0
 /// <summary>
 /// Adds a tag to this tag set.
 /// </summary>
 /// <param name="tag">a matched tag to add</param>
 /// <returns>true if tag is merged to an existingTag tag. false otherwise</returns>
 public bool Add(MatchedTag tag)
 {
     if (_tags.Any(existingTag => TryMerge(existingTag, tag)))
     {
         return(true);
     }
     _tags.Add(tag);
     return(false);
 }
Ejemplo n.º 3
0
 /// <summary>
 /// Adds a tag to this tag set.
 /// </summary>
 /// <param name="tag">a matched tag to add</param>
 /// <returns>true if tag is merged to an existingTag tag. false otherwise</returns>
 public bool Add(MatchedTag tag)
 {
     if (_tags.Any(existingTag => TryMerge(existingTag, tag)))
     {
         return true;
     }
     _tags.Add(tag);
     return false;
 }
Ejemplo n.º 4
0
        public MatchedTag Add(MatchedTag tag)
        {
            // N-term
            var newStartIndex = Math.Min(StartIndex, tag.StartIndex);

            if (tag.IsNTermFlankingMassReliable)    // flanking mass from this tag is reliable
            {
                NTermFlankingMass = (NumReliableNTermFlankingMasses * NTermFlankingMass + tag.NTermFlankingMass) /
                                    (NumReliableNTermFlankingMasses + 1);
                ++NumReliableNTermFlankingMasses;
            }
            else  // flanking mass is not reliable
            {
                if (IsNTermFlankingMassReliable)
                {
                    // do nothing
                }
                else
                {
                    NTermFlankingMass = (NumMergedSequenceTags * NTermFlankingMass + tag.NTermFlankingMass) /
                                        (NumMergedSequenceTags + 1);
                }
            }
            StartIndex = newStartIndex;

            // C-term
            var newEndIndex = Math.Max(EndIndex, tag.EndIndex);

            if (tag.IsCTermFlankingMassReliable)    // flanking mass is reliable
            {
                CTermFlankingMass = (NumReliableCTermFlankingMasses * CTermFlankingMass + tag.CTermFlankingMass) /
                                    (NumReliableCTermFlankingMasses + 1);
                ++NumReliableCTermFlankingMasses;
            }
            else  // flanking mass is not reliable
            {
                if (IsCTermFlankingMassReliable)
                {
                    // do nothing
                }
                else
                {
                    CTermFlankingMass = (NumMergedSequenceTags * CTermFlankingMass + tag.CTermFlankingMass) /
                                        (NumMergedSequenceTags + 1);
                }
            }
            EndIndex = newEndIndex;

            ++NumMergedSequenceTags;

            return(this);
        }
Ejemplo n.º 5
0
        private IEnumerable<TagMatch> FindMatchesWithFeatureMass(MatchedTag matchedTag)
        {
            if (matchedTag.NTermFlankingMass == null || matchedTag.CTermFlankingMass == null) yield break;
            var featureMass = (double) matchedTag.NTermFlankingMass + matchedTag.Mass +
                              (double)matchedTag.CTermFlankingMass + Composition.H2O.Mass;
            var shiftMass = matchedTag.Mass + (double)matchedTag.NTermFlankingMass;

            var backwardGraph = new ShiftedSequenceGraph(_aaSet, shiftMass, false,
                matchedTag.StartIndex, featureMass - MinSumModificationMasses);

            foreach (var backwardMatch in GetBackwardMatches(matchedTag, backwardGraph, featureMass))
            {
                // Make a forward graph
                var nTermShiftMass = backwardMatch.Mass + matchedTag.Mass;
                var forwardGraph = new ShiftedSequenceGraph(_aaSet, nTermShiftMass, true,
                    _proteinSequence.Length - matchedTag.EndIndex, featureMass - MinSumModificationMasses);

                foreach (
                    var forwardMatch in
                        GetForwardMatches(matchedTag, forwardGraph, featureMass))
                {
                    var mass = forwardMatch.Mass + matchedTag.Mass + backwardMatch.Mass;
                    if (mass > _maxSequenceMass) continue;

                    var offset = matchedTag.EndIndex - backwardMatch.Index - 1;
                    var modStr = string.Join(",", backwardMatch.Modifications.Concat(forwardMatch.Modifications.Select(m => m.GetModificationInstanceWithOffset(offset))));

                    var modList = new List<Modification>();
                    foreach (var mod in backwardMatch.Modifications) modList.Add(mod.Modification);
                    foreach (var mod in forwardMatch.Modifications) modList.Add(mod.Modification);

                    var tagMatch = new TagMatch(
                        backwardMatch.Index, 
                        forwardMatch.Index, 
                        matchedTag.Length,
                        backwardMatch.Charge,
                        backwardMatch.Score,
                        forwardMatch.Score,
                        mass,
                        new ModificationCombination(modList), 
                        modStr);
                    yield return tagMatch;
                }
            }
        }
Ejemplo n.º 6
0
        private IEnumerable<FlankingMassMatch> GetBackwardMatches(
            MatchedTag matchedTag,
            ShiftedSequenceGraph backwardGraph,
            double? featureMass = null
            )
        {
            for (var j = matchedTag.StartIndex - 1; j >= -1; j--)
            {
                var residue = j >= 0 ? _proteinSequence[j] : AminoAcid.ProteinNTerm.Residue;
                var location = j > 0 ? SequenceLocation.Everywhere : SequenceLocation.ProteinNTerm;
                if(!backwardGraph.AddAminoAcid(residue, location)) yield break;

                if (j == 0) continue;
                var backwardMatch = GetBestMatchInTheGraph(backwardGraph, _spec, featureMass);
                if (backwardMatch != null)
                {
                    backwardMatch.Index = Math.Max(j, 0);
                    yield return backwardMatch;
                }
            }
        }
Ejemplo n.º 7
0
//        private readonly int _minProductIonCharge;
//        private readonly int _maxProductIonCharge;

        private IEnumerable<FlankingMassMatch> GetForwardMatches(
            MatchedTag matchedTag,
            ShiftedSequenceGraph forwardGraph,
            double? featureMass = null
            )
        {
            for (var i = matchedTag.EndIndex; i <= _proteinSequence.Length; i++)
            {
                var residue = i < _proteinSequence.Length ? _proteinSequence[i] : AminoAcid.ProteinCTerm.Residue;
                var location = i < _proteinSequence.Length - 1
                    ? SequenceLocation.Everywhere
                    : SequenceLocation.ProteinCTerm;
                if (!forwardGraph.AddAminoAcid(residue, location)) yield break;

                if (i == _proteinSequence.Length - 1) continue;

                var forwardMatch = GetBestMatchInTheGraph(forwardGraph, _spec, featureMass);

                if (forwardMatch != null)
                {
                    forwardMatch.Index = Math.Min(i + 1, _proteinSequence.Length);
                    yield return forwardMatch;
                }
            }
        }
Ejemplo n.º 8
0
        private bool TryMerge(MatchedTag existingTag, MatchedTag newTag)
        {
            // N-term
            var    newStartIndex             = Math.Min(existingTag.StartIndex, newTag.StartIndex);
            double?adjustedNTermFlankingMass = null;

            if (existingTag.NTermFlankingMass != null && newTag.NTermFlankingMass != null)
            {
                var newNTermFlankingMassFromExistingTag = (double)existingTag.NTermFlankingMass -
                                                          GetSequenceMass(newTag.StartIndex, existingTag.StartIndex);
                var newNTermFlankingMassFromNewTag = (double)newTag.NTermFlankingMass -
                                                     GetSequenceMass(existingTag.StartIndex, newTag.StartIndex);

                var toleranceNTerm = existingTag.IsNTermFlankingMassReliable == newTag.IsNTermFlankingMassReliable
                    ? _tolerance
                    : _relaxedTolerance;
                if (!toleranceNTerm.IsWithin(newNTermFlankingMassFromExistingTag, newNTermFlankingMassFromNewTag))
                {
                    return(false);
                }

                if (existingTag.IsNTermFlankingMassReliable || newTag.IsNTermFlankingMassReliable)    // flanking mass from this tag is reliable
                {
                    adjustedNTermFlankingMass =
                        (existingTag.NumReliableNTermFlankingMasses * newNTermFlankingMassFromExistingTag
                         + newTag.NumReliableNTermFlankingMasses * newNTermFlankingMassFromNewTag) /
                        (existingTag.NumReliableNTermFlankingMasses + newTag.NumReliableNTermFlankingMasses);
                }
                else
                {
                    adjustedNTermFlankingMass =
                        (existingTag.NumMergedSequenceTags * newNTermFlankingMassFromExistingTag
                         + newTag.NumMergedSequenceTags * newNTermFlankingMassFromNewTag) /
                        (existingTag.NumMergedSequenceTags + newTag.NumMergedSequenceTags);
                }
            }
            else if (existingTag.NTermFlankingMass != newTag.NTermFlankingMass)
            {
                return(false);
            }

            // C-term
            var    newEndIndex = Math.Max(existingTag.EndIndex, newTag.EndIndex);
            double?adjustedCTermFlankingMass = null;

            if (existingTag.CTermFlankingMass != null && newTag.CTermFlankingMass != null)
            {
                var newCTermFlankingMassFromExistingTag = (double)existingTag.CTermFlankingMass -
                                                          GetSequenceMass(existingTag.EndIndex, newTag.EndIndex);
                var newCTermFlankingMassFromNewTag = (double)newTag.CTermFlankingMass -
                                                     GetSequenceMass(newTag.EndIndex, existingTag.EndIndex);

                var toleranceCTerm = existingTag.IsCTermFlankingMassReliable == newTag.IsCTermFlankingMassReliable
                    ? _tolerance
                    : _relaxedTolerance;
                if (!toleranceCTerm.IsWithin(newCTermFlankingMassFromExistingTag, newCTermFlankingMassFromNewTag))
                {
                    return(false);
                }

                if (existingTag.IsCTermFlankingMassReliable || newTag.IsCTermFlankingMassReliable)    // flanking mass from this tag is reliable
                {
                    adjustedCTermFlankingMass =
                        (existingTag.NumReliableCTermFlankingMasses * newCTermFlankingMassFromExistingTag
                         + newTag.NumReliableCTermFlankingMasses * newCTermFlankingMassFromNewTag) /
                        (existingTag.NumReliableCTermFlankingMasses + newTag.NumReliableCTermFlankingMasses);
                }
                else
                {
                    adjustedCTermFlankingMass =
                        (existingTag.NumMergedSequenceTags * newCTermFlankingMassFromExistingTag
                         + newTag.NumMergedSequenceTags * newCTermFlankingMassFromNewTag) /
                        (existingTag.NumMergedSequenceTags + newTag.NumMergedSequenceTags);
                }
            }
            else if (existingTag.CTermFlankingMass != newTag.CTermFlankingMass)
            {
                return(false);
            }

            existingTag.Mass                           += GetSequenceMass(newStartIndex, existingTag.StartIndex) + GetSequenceMass(existingTag.EndIndex, newEndIndex);
            existingTag.StartIndex                      = newStartIndex;
            existingTag.EndIndex                        = newEndIndex;
            existingTag.NTermFlankingMass               = adjustedNTermFlankingMass;
            existingTag.CTermFlankingMass               = adjustedCTermFlankingMass;
            existingTag.NumMergedSequenceTags          += newTag.NumMergedSequenceTags;
            existingTag.NumReliableNTermFlankingMasses += newTag.NumReliableNTermFlankingMasses;
            existingTag.NumReliableCTermFlankingMasses += newTag.NumReliableCTermFlankingMasses;

            return(true);
        }
Ejemplo n.º 9
0
        private bool TryMerge(MatchedTag existingTag, MatchedTag newTag)
        {
            // N-term
            var newStartIndex = Math.Min(existingTag.StartIndex, newTag.StartIndex);
            double? adjustedNTermFlankingMass = null;
            if (existingTag.NTermFlankingMass != null && newTag.NTermFlankingMass != null)
            {
                var newNTermFlankingMassFromExistingTag = (double)existingTag.NTermFlankingMass -
                                           GetSequenceMass(newTag.StartIndex, existingTag.StartIndex);
                var newNTermFlankingMassFromNewTag = (double)newTag.NTermFlankingMass -
                                           GetSequenceMass(existingTag.StartIndex, newTag.StartIndex);

                var toleranceNTerm = existingTag.IsNTermFlankingMassReliable == newTag.IsNTermFlankingMassReliable
                    ? _tolerance
                    : _relaxedTolerance;
                if (!toleranceNTerm.IsWithin(newNTermFlankingMassFromExistingTag, newNTermFlankingMassFromNewTag)) return false;

                if (existingTag.IsNTermFlankingMassReliable || newTag.IsNTermFlankingMassReliable)    // flanking mass from this tag is reliable
                {
                    adjustedNTermFlankingMass =
                        (existingTag.NumReliableNTermFlankingMasses * newNTermFlankingMassFromExistingTag
                        + newTag.NumReliableNTermFlankingMasses * newNTermFlankingMassFromNewTag) /
                        (existingTag.NumReliableNTermFlankingMasses + newTag.NumReliableNTermFlankingMasses);
                }
                else
                {
                    adjustedNTermFlankingMass =
                        (existingTag.NumMergedSequenceTags * newNTermFlankingMassFromExistingTag
                        + newTag.NumMergedSequenceTags * newNTermFlankingMassFromNewTag) /
                        (existingTag.NumMergedSequenceTags + newTag.NumMergedSequenceTags);
                }
            }
            else if (existingTag.NTermFlankingMass != newTag.NTermFlankingMass) return false;

            // C-term
            var newEndIndex = Math.Max(existingTag.EndIndex, newTag.EndIndex);
            double? adjustedCTermFlankingMass = null;
            if (existingTag.CTermFlankingMass != null && newTag.CTermFlankingMass != null)
            {
                var newCTermFlankingMassFromExistingTag = (double) existingTag.CTermFlankingMass -
                                                          GetSequenceMass(existingTag.EndIndex, newTag.EndIndex);
                var newCTermFlankingMassFromNewTag = (double) newTag.CTermFlankingMass -
                                                     GetSequenceMass(newTag.EndIndex, existingTag.EndIndex);

                var toleranceCTerm = existingTag.IsCTermFlankingMassReliable == newTag.IsCTermFlankingMassReliable
                    ? _tolerance
                    : _relaxedTolerance;
                if (!toleranceCTerm.IsWithin(newCTermFlankingMassFromExistingTag, newCTermFlankingMassFromNewTag)) return false;

                if (existingTag.IsCTermFlankingMassReliable || newTag.IsCTermFlankingMassReliable)    // flanking mass from this tag is reliable
                {
                    adjustedCTermFlankingMass =
                        (existingTag.NumReliableCTermFlankingMasses * newCTermFlankingMassFromExistingTag
                        + newTag.NumReliableCTermFlankingMasses * newCTermFlankingMassFromNewTag) /
                        (existingTag.NumReliableCTermFlankingMasses + newTag.NumReliableCTermFlankingMasses);
                }
                else
                {
                    adjustedCTermFlankingMass =
                        (existingTag.NumMergedSequenceTags * newCTermFlankingMassFromExistingTag
                        + newTag.NumMergedSequenceTags * newCTermFlankingMassFromNewTag) /
                        (existingTag.NumMergedSequenceTags + newTag.NumMergedSequenceTags);
                }
            }
            else if (existingTag.CTermFlankingMass != newTag.CTermFlankingMass) return false;

            existingTag.Mass += GetSequenceMass(newStartIndex, existingTag.StartIndex) + GetSequenceMass(existingTag.EndIndex, newEndIndex);
            existingTag.StartIndex = newStartIndex;
            existingTag.EndIndex = newEndIndex;
            existingTag.NTermFlankingMass = adjustedNTermFlankingMass;
            existingTag.CTermFlankingMass = adjustedCTermFlankingMass;
            existingTag.NumMergedSequenceTags += newTag.NumMergedSequenceTags;
            existingTag.NumReliableNTermFlankingMasses += newTag.NumReliableNTermFlankingMasses;
            existingTag.NumReliableCTermFlankingMasses += newTag.NumReliableCTermFlankingMasses;

            return true;
        }
Ejemplo n.º 10
0
        public MatchedTag Add(MatchedTag tag)
        {
            // N-term
            var newStartIndex = Math.Min(StartIndex, tag.StartIndex);
            if (tag.IsNTermFlankingMassReliable)    // flanking mass from this tag is reliable
            {
                NTermFlankingMass = (NumReliableNTermFlankingMasses * NTermFlankingMass + tag.NTermFlankingMass) /
                                    (NumReliableNTermFlankingMasses + 1);
                ++NumReliableNTermFlankingMasses;
            }
            else  // flanking mass is not reliable
            {
                if (IsNTermFlankingMassReliable)
                {
                    // do nothing
                }
                else
                {
                    NTermFlankingMass = (NumMergedSequenceTags * NTermFlankingMass + tag.NTermFlankingMass) /
                                        (NumMergedSequenceTags + 1);
                }
            }
            StartIndex = newStartIndex;

            // C-term
            var newEndIndex = Math.Max(EndIndex, tag.EndIndex);

            if (tag.IsCTermFlankingMassReliable)    // flanking mass is reliable
            {
                CTermFlankingMass = (NumReliableCTermFlankingMasses * CTermFlankingMass + tag.CTermFlankingMass) /
                                    (NumReliableCTermFlankingMasses + 1);
                ++NumReliableCTermFlankingMasses;
            }
            else  // flanking mass is not reliable
            {
                if (IsCTermFlankingMassReliable)
                {
                    // do nothing
                }
                else
                {
                    CTermFlankingMass = (NumMergedSequenceTags * CTermFlankingMass + tag.CTermFlankingMass) /
                                        (NumMergedSequenceTags + 1);
                }
            }
            EndIndex = newEndIndex;
            
            ++NumMergedSequenceTags;

            return this;
        }
Ejemplo n.º 11
0
        public void TestFeatureId()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3";

            if (!File.Exists(dataSet))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dataSet);
            }

            // Feature: 5236-5286	6-12	8480.3681	5
            const int minScanNum = 5236;
            const int maxScanNum = 5286;
            const double featureMass = 8480.3681;

            //const int minScanNum = 7251;
            //const int maxScanNum = 7326;
            //const double featureMass = 32347.18;

//            const int minScanNum = 4451;
//            const int maxScanNum = 4541;
//            const double featureMass = 31267.95;

            var tolerance = new Tolerance(10);
            var relaxedTolerance = new Tolerance(20);

            const int minTagLength = 5;
            const int minMergedTagLength = 7;
            const int minNumTagMatches = 1;

            var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw");
            var run = PbfLcMsRun.GetLcMsRun(rawFileName);

            var aminoAcidSet = AminoAcidSet.GetStandardAminoAcidSet();
            var featureFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".ms1ft");
            var filter = new Ms1FtFilter(run, tolerance, featureFileName);
            var ms2ScanNums =
                filter.GetMatchingMs2ScanNums(featureMass)
                    .Where(scanNum => scanNum > minScanNum && scanNum < maxScanNum)
                    .ToArray();

            const string tagFileName = dataSet + ".seqtag"; //"_MinLength3.seqtag"; //Path.ChangeExtension(dataSet, ".seqtag");
            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);
            var tagParser = new SequenceTagParser(tagFileName, minTagLength);

            var proteinsToTags = new Dictionary<string, IList<MatchedTag>>();
            foreach (var ms2ScanNum in ms2ScanNums)
            {
                var tags = tagParser.GetSequenceTags(ms2ScanNum);
                foreach (var tag in tags)
                {
                    var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray();
                    foreach (var index in matchedIndices)
                    {
                        var protein = fastaDb.GetProteinName(index);
                        var startIndex = fastaDb.GetZeroBasedPositionInProtein(index);
                        var matchedTag = new MatchedTag(tag, startIndex, featureMass);
                        IList<MatchedTag> existingTags;
                        if (proteinsToTags.TryGetValue(protein, out existingTags))
                        {
                            existingTags.Add(matchedTag);
                        }
                        else
                        {
                            proteinsToTags.Add(protein, new List<MatchedTag> { matchedTag });
                        }
                    }
                }
            }

            foreach (var entry in proteinsToTags.OrderByDescending(e => e.Value.Count))
            {
                if (entry.Value.Count < minNumTagMatches) break;
                var proteinName = entry.Key;
                var proteinSequence = fastaDb.GetProteinSequence(proteinName);
                var protein = new Sequence(proteinSequence, aminoAcidSet);
                Console.WriteLine(proteinName + "\t" + entry.Value.Count);

                var matchedTagSet = new MatchedTagSet(proteinSequence, aminoAcidSet,
                    tolerance, relaxedTolerance);

                Console.WriteLine("********** Before merging");
                foreach (var matchedTag in entry.Value)
                {
                    var seq = proteinSequence.Substring(matchedTag.StartIndex,
                        matchedTag.EndIndex - matchedTag.StartIndex);
                    var nTermMass = protein.GetMass(0, matchedTag.StartIndex);
                    var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count);
                    Console.WriteLine("\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}",
                        (matchedTag.NTermFlankingMass - nTermMass), seq, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex,
                        matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable);

                    matchedTagSet.Add(matchedTag);
                }

                Console.WriteLine("********** After merging");
                foreach (var matchedTag in matchedTagSet.Tags)
                {
                    if (matchedTag.Length < minMergedTagLength) continue;
                    var seq = proteinSequence.Substring(matchedTag.StartIndex,
                        matchedTag.EndIndex - matchedTag.StartIndex);
                    var nTermMass = protein.GetMass(0, matchedTag.StartIndex);
                    var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count);
                    Console.WriteLine("\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}",
                        (matchedTag.NTermFlankingMass-nTermMass), seq, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex,
                        matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable);
                }

                break;
            }
        }
Ejemplo n.º 12
0
        public void TestGetProteinsWithTagMatchingSingleSpec()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;
            TestUtils.ShowStarting(methodName);

            const string dataSet = @"H:\Research\Lewy\raw\Lewy_intact_07";
            //            const int scanNum = 5158;
            const int minTagLength = 7;
            const int minNumTagMatches = 1;
            var aminoAcidSet = AminoAcidSet.GetStandardAminoAcidSet();

            const int scanNum = 2;
            // Parse sequence tags
            //const string tagFileName = dataSet + ".seqtag"; //"_MinLength3.seqtag"; //Path.ChangeExtension(dataSet, ".seqtag");

            const string rawFilePath = "";

            const string fastaFilePath = @"H:\Research\Lewy\ID_004858_0EE8CF61.fasta";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);
            //var tagParser = new SequenceTagParser(tagFileName, minTagLength);
            //var tags = tagParser.GetSequenceTags(scanNum);
            var run = PbfLcMsRun.GetLcMsRun(rawFilePath);
            var spec = run.GetSpectrum(scanNum) as ProductSpectrum;
            var tagFinder = new SequenceTagFinder(spec, new Tolerance(5));
            var tags = tagFinder.GetAllSequenceTagString();

            var proteinsToTags = new Dictionary<string, IList<MatchedTag>>();

            foreach (var tag in tags)
            {
                var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray();
                foreach (var index in matchedIndices)
                {
                    var protein = fastaDb.GetProteinName(index);
                    var startIndex = fastaDb.GetOneBasedPositionInProtein(index);
                    var matchedTag = new MatchedTag(tag, startIndex, 0.0);
                    IList<MatchedTag> existingTags;
                    if (proteinsToTags.TryGetValue(protein, out existingTags))
                    {
                        existingTags.Add(matchedTag);
                    }
                    else
                    {
                        proteinsToTags.Add(protein, new List<MatchedTag> { matchedTag });
                    }
                }
            }

            foreach (var entry in proteinsToTags.OrderByDescending(e => e.Value.Count))
            {
                if (entry.Value.Count < minNumTagMatches) break;
                var proteinName = entry.Key;
                var proteinSequence = fastaDb.GetProteinSequence(proteinName);
                var protein = new Sequence(proteinSequence, aminoAcidSet);
                Console.WriteLine(proteinName + "\t" + entry.Value.Count);
                foreach (var matchedTag in entry.Value)
                {
                    var seq = proteinSequence.Substring(matchedTag.StartIndex,
                        matchedTag.EndIndex - matchedTag.StartIndex);
                    var nTermMass = protein.GetMass(0, matchedTag.StartIndex);
                    var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count);
                    Console.WriteLine("\t{0} ({1})\t{2}\t{3} ({4})\t{5}\t{6}\t{7}",
                        matchedTag.NTermFlankingMass, (matchedTag.NTermFlankingMass - nTermMass), 
                        seq, 
                        matchedTag.CTermFlankingMass, (matchedTag.CTermFlankingMass - cTermMass), 
                        matchedTag.StartIndex,
                        matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable);

                }
            }
        }
Ejemplo n.º 13
0
        public static Dictionary<string, MatchedTagSet> GetProteinToMatchedTagsMap(
            IEnumerable<SequenceTag> tags, 
            SearchableDatabase searchableDb, 
            AminoAcidSet aaSet, 
            Tolerance tolerance,
            Tolerance relaxedTolerance)
        {
            var fastaDb = searchableDb.FastaDatabase;
            var proteinsToTags = new Dictionary<string, MatchedTagSet>();
            foreach (var tag in tags)
            {
                var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray();
                if (matchedIndices.Length > MaxNumProteinMatchesPerTag) continue;
                foreach (var index in matchedIndices)
                {
                    var proteinName = fastaDb.GetProteinName(index);
                    var startIndex = fastaDb.GetZeroBasedPositionInProtein(index);
                    var mass = aaSet.GetComposition(tag.Sequence).Mass;
                    var matchedTag = new MatchedTag(tag, startIndex) { Mass = mass };
                    MatchedTagSet existingMatchedTagSet;
                    if (proteinsToTags.TryGetValue(proteinName, out existingMatchedTagSet))
                    {
                        existingMatchedTagSet.Add(matchedTag);
                    }
                    else
                    {
                        var proteinSequence = fastaDb.GetProteinSequence(proteinName);
                        var matchedTagSet = new MatchedTagSet(proteinSequence, aaSet, tolerance, relaxedTolerance);
                        matchedTagSet.Add(matchedTag);
                        proteinsToTags.Add(proteinName, matchedTagSet);
                    }
                }
            }

            return proteinsToTags;
        }