Exemplo n.º 1
0
        private IEnumerable <FlankingMassMatch> GetBackwardMatches(
            MatchedTag matchedTag,
            ShiftedSequenceGraph backwardGraph,
            double?featureMass = null
            )
        {
            for (var j = matchedTag.StartIndex - 1; j >= -1; j--)
            {
                var residue  = j >= 0 ? _proteinSequence[j] : AminoAcid.ProteinNTerm.Residue;
                var location = j > 0 ? SequenceLocation.Everywhere : SequenceLocation.ProteinNTerm;
                if (!backwardGraph.AddAminoAcid(residue, location))
                {
                    yield break;
                }

                if (j == 0)
                {
                    continue;
                }
                var backwardMatch = GetBestMatchInTheGraph(backwardGraph, _spec, featureMass);
                if (backwardMatch != null)
                {
                    backwardMatch.Index = Math.Max(j, 0);
                    yield return(backwardMatch);
                }
            }
        }
Exemplo n.º 2
0
//        private readonly int _minProductIonCharge;
//        private readonly int _maxProductIonCharge;

        private IEnumerable <FlankingMassMatch> GetForwardMatches(
            MatchedTag matchedTag,
            ShiftedSequenceGraph forwardGraph,
            double?featureMass = null
            )
        {
            for (var i = matchedTag.EndIndex; i <= _proteinSequence.Length; i++)
            {
                var residue  = i < _proteinSequence.Length ? _proteinSequence[i] : AminoAcid.ProteinCTerm.Residue;
                var location = i < _proteinSequence.Length - 1
                    ? SequenceLocation.Everywhere
                    : SequenceLocation.ProteinCTerm;
                if (!forwardGraph.AddAminoAcid(residue, location))
                {
                    yield break;
                }

                if (i == _proteinSequence.Length - 1)
                {
                    continue;
                }

                var forwardMatch = GetBestMatchInTheGraph(forwardGraph, _spec, featureMass);

                if (forwardMatch != null)
                {
                    forwardMatch.Index = Math.Min(i + 1, _proteinSequence.Length);
                    yield return(forwardMatch);
                }
            }
        }
Exemplo n.º 3
0
        private IEnumerable <TagMatch> FindMatchesWithFeatureMass(MatchedTag matchedTag)
        {
            if (matchedTag.NTermFlankingMass == null || matchedTag.CTermFlankingMass == null)
            {
                yield break;
            }
            var featureMass = (double)matchedTag.NTermFlankingMass + matchedTag.Mass +
                              (double)matchedTag.CTermFlankingMass + Composition.H2O.Mass;
            var shiftMass = matchedTag.Mass + (double)matchedTag.NTermFlankingMass;

            var backwardGraph = new ShiftedSequenceGraph(_aaSet, shiftMass, false,
                                                         matchedTag.StartIndex, featureMass - MinSumModificationMasses);

            foreach (var backwardMatch in GetBackwardMatches(matchedTag, backwardGraph, featureMass))
            {
                // Make a forward graph
                var nTermShiftMass = backwardMatch.Mass + matchedTag.Mass;
                var forwardGraph   = new ShiftedSequenceGraph(_aaSet, nTermShiftMass, true,
                                                              _proteinSequence.Length - matchedTag.EndIndex, featureMass - MinSumModificationMasses);

                foreach (
                    var forwardMatch in
                    GetForwardMatches(matchedTag, forwardGraph, featureMass))
                {
                    var mass = forwardMatch.Mass + matchedTag.Mass + backwardMatch.Mass;
                    if (mass > _maxSequenceMass)
                    {
                        continue;
                    }

                    var offset = matchedTag.EndIndex - backwardMatch.Index - 1;
                    var modStr = string.Join(",", backwardMatch.Modifications.Concat(forwardMatch.Modifications.Select(m => m.GetModificationInstanceWithOffset(offset))));

                    var modList = new List <Modification>();
                    foreach (var mod in backwardMatch.Modifications)
                    {
                        modList.Add(mod.Modification);
                    }
                    foreach (var mod in forwardMatch.Modifications)
                    {
                        modList.Add(mod.Modification);
                    }

                    var tagMatch = new TagMatch(
                        backwardMatch.Index,
                        forwardMatch.Index,
                        matchedTag.Length,
                        backwardMatch.Charge,
                        backwardMatch.Score,
                        forwardMatch.Score,
                        mass,
                        new ModificationCombination(modList),
                        modStr);
                    yield return(tagMatch);
                }
            }
        }
Exemplo n.º 4
0
        public static Dictionary <string, MatchedTagSet> GetProteinToMatchedTagsMap(
            IEnumerable <SequenceTag.SequenceTag> tags,
            SearchableDatabase searchableDb,
            AminoAcidSet aaSet,
            Tolerance tolerance,
            Tolerance relaxedTolerance)
        {
            var fastaDb        = searchableDb.FastaDatabase;
            var proteinsToTags = new Dictionary <string, MatchedTagSet>();

            foreach (var tag in tags)
            {
                var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray();
                if (matchedIndices.Length > MaxNumProteinMatchesPerTag)
                {
                    continue;
                }
                foreach (var index in matchedIndices)
                {
                    var proteinName = fastaDb.GetProteinName(index);
                    var startIndex  = fastaDb.GetZeroBasedPositionInProtein(index);
                    var mass        = aaSet.GetComposition(tag.Sequence).Mass;
                    var matchedTag  = new MatchedTag(tag, startIndex)
                    {
                        Mass = mass
                    };
                    MatchedTagSet existingMatchedTagSet;
                    if (proteinsToTags.TryGetValue(proteinName, out existingMatchedTagSet))
                    {
                        existingMatchedTagSet.Add(matchedTag);
                    }
                    else
                    {
                        var proteinSequence = fastaDb.GetProteinSequence(proteinName);
                        if (proteinSequence == null)
                        {
                            proteinSequence = proteinName;
                        }

                        var matchedTagSet = new MatchedTagSet(proteinSequence, aaSet, tolerance, relaxedTolerance);
                        matchedTagSet.Add(matchedTag);
                        proteinsToTags.Add(proteinName, matchedTagSet);
                    }
                }
            }

            return(proteinsToTags);
        }
Exemplo n.º 5
0
 public IEnumerable <TagMatch> FindMatches(MatchedTag matchedTag)
 {
     if (matchedTag.NTermFlankingMass != null && matchedTag.CTermFlankingMass != null)
     {
         return(FindMatchesWithFeatureMass(matchedTag));
     }
     if (matchedTag.NTermFlankingMass != null)
     {
         return(FindMatchesForwardAndBackward(matchedTag));
     }
     if (matchedTag.CTermFlankingMass != null)
     {
         return(FindMatchesBackwardAndForward(matchedTag));
     }
     return(Enumerable.Empty <TagMatch>());
 }
Exemplo n.º 6
0
        public void TestFeatureId()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string dataSet = @"H:\Research\QCShew_TopDown\Production\QC_Shew_Intact_26Sep14_Bane_C2Column3";

            if (!File.Exists(dataSet))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, dataSet);
            }

            // Feature: 5236-5286	6-12	8480.3681	5
            const int    minScanNum  = 5236;
            const int    maxScanNum  = 5286;
            const double featureMass = 8480.3681;

            //const int minScanNum = 7251;
            //const int maxScanNum = 7326;
            //const double featureMass = 32347.18;

//            const int minScanNum = 4451;
//            const int maxScanNum = 4541;
//            const double featureMass = 31267.95;

            var tolerance        = new Tolerance(10);
            var relaxedTolerance = new Tolerance(20);

            const int minTagLength       = 5;
            const int minMergedTagLength = 7;
            const int minNumTagMatches   = 1;

            var rawFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".raw");
            var run         = PbfLcMsRun.GetLcMsRun(rawFileName);

            var aminoAcidSet    = AminoAcidSet.GetStandardAminoAcidSet();
            var featureFileName = MassSpecDataReaderFactory.ChangeExtension(dataSet, ".ms1ft");
            var filter          = new Ms1FtFilter(run, tolerance, featureFileName);
            var ms2ScanNums     =
                filter.GetMatchingMs2ScanNums(featureMass)
                .Where(scanNum => scanNum > minScanNum && scanNum < maxScanNum)
                .ToArray();

            const string tagFileName   = dataSet + ".seqtag"; //"_MinLength3.seqtag"; //Path.ChangeExtension(dataSet, ".seqtag");
            const string fastaFilePath = @"H:\Research\QCShew_TopDown\Production\ID_002216_235ACCEA.fasta";

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb      = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);
            var tagParser    = new SequenceTagParser(tagFileName, minTagLength);

            var proteinsToTags = new Dictionary <string, IList <MatchedTag> >();

            foreach (var ms2ScanNum in ms2ScanNums)
            {
                var tags = tagParser.GetSequenceTags(ms2ScanNum);
                foreach (var tag in tags)
                {
                    var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray();
                    foreach (var index in matchedIndices)
                    {
                        var protein    = fastaDb.GetProteinName(index);
                        var startIndex = fastaDb.GetZeroBasedPositionInProtein(index);
                        var matchedTag = new MatchedTag(tag, startIndex, featureMass);
                        IList <MatchedTag> existingTags;
                        if (proteinsToTags.TryGetValue(protein, out existingTags))
                        {
                            existingTags.Add(matchedTag);
                        }
                        else
                        {
                            proteinsToTags.Add(protein, new List <MatchedTag> {
                                matchedTag
                            });
                        }
                    }
                }
            }

            foreach (var entry in proteinsToTags.OrderByDescending(e => e.Value.Count))
            {
                if (entry.Value.Count < minNumTagMatches)
                {
                    break;
                }
                var proteinName     = entry.Key;
                var proteinSequence = fastaDb.GetProteinSequence(proteinName);
                var protein         = new Sequence(proteinSequence, aminoAcidSet);
                Console.WriteLine(proteinName + "\t" + entry.Value.Count);

                var matchedTagSet = new MatchedTagSet(proteinSequence, aminoAcidSet,
                                                      tolerance, relaxedTolerance);

                Console.WriteLine("********** Before merging");
                foreach (var matchedTag in entry.Value)
                {
                    var seq = proteinSequence.Substring(matchedTag.StartIndex,
                                                        matchedTag.EndIndex - matchedTag.StartIndex);
                    var nTermMass = protein.GetMass(0, matchedTag.StartIndex);
                    var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count);
                    Console.WriteLine("\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}",
                                      (matchedTag.NTermFlankingMass - nTermMass), seq, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex,
                                      matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable);

                    matchedTagSet.Add(matchedTag);
                }

                Console.WriteLine("********** After merging");
                foreach (var matchedTag in matchedTagSet.Tags)
                {
                    if (matchedTag.Length < minMergedTagLength)
                    {
                        continue;
                    }
                    var seq = proteinSequence.Substring(matchedTag.StartIndex,
                                                        matchedTag.EndIndex - matchedTag.StartIndex);
                    var nTermMass = protein.GetMass(0, matchedTag.StartIndex);
                    var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count);
                    Console.WriteLine("\t{0}\t{1}\t{2}\t{3}\t{4}\t{5}",
                                      (matchedTag.NTermFlankingMass - nTermMass), seq, (matchedTag.CTermFlankingMass - cTermMass), matchedTag.StartIndex,
                                      matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable);
                }

                break;
            }
        }
Exemplo n.º 7
0
        public void TestGetProteinsWithTagMatchingSingleSpec()
        {
            var methodName = MethodBase.GetCurrentMethod().Name;

            TestUtils.ShowStarting(methodName);

            const string dataSet = @"H:\Research\Lewy\raw\Lewy_intact_07";
            //            const int scanNum = 5158;
            const int minTagLength     = 7;
            const int minNumTagMatches = 1;
            var       aminoAcidSet     = AminoAcidSet.GetStandardAminoAcidSet();

            const int scanNum = 2;
            // Parse sequence tags
            //const string tagFileName = dataSet + ".seqtag"; //"_MinLength3.seqtag"; //Path.ChangeExtension(dataSet, ".seqtag");

            const string rawFilePath = "";

            const string fastaFilePath = @"H:\Research\Lewy\ID_004858_0EE8CF61.fasta";

            if (!File.Exists(rawFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, rawFilePath);
            }

            if (!File.Exists(fastaFilePath))
            {
                Assert.Ignore(@"Skipping test {0} since file not found: {1}", methodName, fastaFilePath);
            }

            var fastaDb      = new FastaDatabase(fastaFilePath);
            var searchableDb = new SearchableDatabase(fastaDb);
            //var tagParser = new SequenceTagParser(tagFileName, minTagLength);
            //var tags = tagParser.GetSequenceTags(scanNum);
            var run       = PbfLcMsRun.GetLcMsRun(rawFilePath);
            var spec      = run.GetSpectrum(scanNum) as ProductSpectrum;
            var tagFinder = new SequenceTagFinder(spec, new Tolerance(5));
            var tags      = tagFinder.GetAllSequenceTagString();

            var proteinsToTags = new Dictionary <string, IList <MatchedTag> >();

            foreach (var tag in tags)
            {
                var matchedIndices = searchableDb.FindAllMatchedSequenceIndices(tag.Sequence).ToArray();
                foreach (var index in matchedIndices)
                {
                    var protein    = fastaDb.GetProteinName(index);
                    var startIndex = fastaDb.GetOneBasedPositionInProtein(index);
                    var matchedTag = new MatchedTag(tag, startIndex, 0.0);
                    IList <MatchedTag> existingTags;
                    if (proteinsToTags.TryGetValue(protein, out existingTags))
                    {
                        existingTags.Add(matchedTag);
                    }
                    else
                    {
                        proteinsToTags.Add(protein, new List <MatchedTag> {
                            matchedTag
                        });
                    }
                }
            }

            foreach (var entry in proteinsToTags.OrderByDescending(e => e.Value.Count))
            {
                if (entry.Value.Count < minNumTagMatches)
                {
                    break;
                }
                var proteinName     = entry.Key;
                var proteinSequence = fastaDb.GetProteinSequence(proteinName);
                var protein         = new Sequence(proteinSequence, aminoAcidSet);
                Console.WriteLine(proteinName + "\t" + entry.Value.Count);
                foreach (var matchedTag in entry.Value)
                {
                    var seq = proteinSequence.Substring(matchedTag.StartIndex,
                                                        matchedTag.EndIndex - matchedTag.StartIndex);
                    var nTermMass = protein.GetMass(0, matchedTag.StartIndex);
                    var cTermMass = protein.GetMass(matchedTag.EndIndex, protein.Count);
                    Console.WriteLine("\t{0} ({1})\t{2}\t{3} ({4})\t{5}\t{6}\t{7}",
                                      matchedTag.NTermFlankingMass, (matchedTag.NTermFlankingMass - nTermMass),
                                      seq,
                                      matchedTag.CTermFlankingMass, (matchedTag.CTermFlankingMass - cTermMass),
                                      matchedTag.StartIndex,
                                      matchedTag.IsNTermFlankingMassReliable, matchedTag.IsCTermFlankingMassReliable);
                }
            }
        }