Ejemplo n.º 1
0
        public static void ParsimonyLocalizeableTreatAsUnique()
        {
            bool modPeptidesAreUnique = true;

            // set up mods
            var modDictionary = new Dictionary <int, List <Modification> >();

            ModificationMotif.TryGetMotif("M", out ModificationMotif motif1);
            var mod = new ModificationWithMass("Oxidation of M", "Common Variable", motif1, TerminusLocalization.Any, 15.99491461957);

            TerminusType terminusType = ProductTypeMethod.IdentifyTerminusType(new List <ProductType> {
                ProductType.B, ProductType.Y
            });

            var protease = new Protease("kprotease", new List <string> {
                "K"
            }, new List <string>(), TerminusType.C, CleavageSpecificity.Full, null, null, null);

            // modified version of protein
            var protein1 = new Protein("PEPTIDEM", "accession1");
            // unmodified version of protein
            var protein2 = new Protein("YYYKPEPTIDEM", "accession2");

            var pep1 = protein1.Digest(new DigestionParams {
                MinPeptideLength = null, Protease = protease
            }, new List <ModificationWithMass> {
                mod
            }, new List <ModificationWithMass>()).First();
            var pep2 = protein2.Digest(new DigestionParams {
                MinPeptideLength = null, Protease = protease
            }, new List <ModificationWithMass>(), new List <ModificationWithMass>()).ToList()[1];

            // check to make sure mod is present
            Assert.That(pep1.Sequence != pep2.Sequence);
            Assert.That(pep1.NumMods == 1);
            Assert.That(pep2.NumMods == 0);

            // build the dictionary for input to parsimony
            var compactPeptideToProteinPeptideMatching = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >();

            compactPeptideToProteinPeptideMatching.Add(pep1.CompactPeptide(terminusType), new HashSet <PeptideWithSetModifications> {
                pep1
            });
            compactPeptideToProteinPeptideMatching.Add(pep2.CompactPeptide(terminusType), new HashSet <PeptideWithSetModifications> {
                pep2
            });

            // apply parsimony
            ProteinParsimonyEngine pae = new ProteinParsimonyEngine(compactPeptideToProteinPeptideMatching, modPeptidesAreUnique, new List <string>());

            pae.Run();

            // check to make sure both peptides are NOT associated with both proteins
            Assert.That(compactPeptideToProteinPeptideMatching.Count == 2);
            foreach (var kvp in compactPeptideToProteinPeptideMatching)
            {
                Assert.That(kvp.Value.Count == 1);
            }
        }
        public static void ParsimonyWeirdCatch()
        {
            Protein protein1 = new Protein("MATSIK", "protein1", isDecoy: true);
            Protein protein2 = new Protein("MATSIK", "protein2");

            IEnumerable <Modification> allKnownFixedModifications = new List <Modification>();
            DigestionParams            digestionParams            = new DigestionParams(minPeptideLength: 5);
            List <Modification>        variableModifications      = new List <Modification>();
            var pep1 = protein1.Digest(digestionParams, allKnownFixedModifications, variableModifications).First();
            var pep2 = protein2.Digest(digestionParams, allKnownFixedModifications, variableModifications).First();

            // build the dictionary for input to parsimony
            MsDataScan dfb = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null);
            Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(dfb, 2, 0, "File", new CommonParameters());

            List <PeptideSpectralMatch> psms = new List <PeptideSpectralMatch>
            {
                new PeptideSpectralMatch(pep1, 0, 1, 0, scan, new DigestionParams(), new List <MatchedFragmentIon>()),
            };

            // this PSM has a target and a decoy
            psms[0].AddOrReplace(pep2, 1, 0, true, null, 0);

            psms.ForEach(p => p.ResolveAllAmbiguities());
            psms.ForEach(p => p.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0));

            // apply parsimony
            ProteinParsimonyEngine pae = new ProteinParsimonyEngine(psms, false, new CommonParameters(), new List <string>());

            // because the two chosen peptides are the same, we should end up with both protein accessions still in the list
            var proteinParsimonyResult = (ProteinParsimonyResults)pae.Run();

            // score protein groups and merge indistinguishable ones
            ProteinScoringAndFdrEngine proteinScoringEngine = new ProteinScoringAndFdrEngine(proteinParsimonyResult.ProteinGroups, psms, false, true, true, new CommonParameters(), new List <string>());
            var results = (ProteinScoringAndFdrResults)proteinScoringEngine.Run();

            int countOfProteinGroups = results.SortedAndScoredProteinGroups.Count;

            // only target protein gets generated
            Assert.That(countOfProteinGroups == 1);
            Assert.That(results.SortedAndScoredProteinGroups.First().Proteins.Count == 1);
            Assert.That(!results.SortedAndScoredProteinGroups.First().IsDecoy);
        }
        public static void TopPickedFdrTest()
        {
            //Test that the decoy beats out the target for FDR, but that the target still gets written.

            Protein fillerProtein = new Protein("FILLR", "filler");
            Protein targetProtein = new Protein("KFDSA", "protein");
            Protein decoyProtein  = new Protein("ASDFK", "DECOY_protein", isDecoy: true);

            IEnumerable <Modification>  allKnownFixedModifications = new List <Modification>();
            DigestionParams             digestionParams            = new DigestionParams(minPeptideLength: 5);
            List <Modification>         variableModifications      = new List <Modification>();
            PeptideWithSetModifications fillerPep = fillerProtein.Digest(digestionParams, allKnownFixedModifications, variableModifications).First();
            PeptideWithSetModifications targetPep = targetProtein.Digest(digestionParams, allKnownFixedModifications, variableModifications).First();
            PeptideWithSetModifications decoyPep  = decoyProtein.Digest(digestionParams, allKnownFixedModifications, variableModifications).First();

            // build the dictionary for input to parsimony
            MsDataScan dfb = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null);
            Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(dfb, 2, 0, "File", new CommonParameters());

            List <PeptideSpectralMatch> psms = new List <PeptideSpectralMatch>
            {
                new PeptideSpectralMatch(fillerPep, 0, 30, 0, scan, new DigestionParams(), new List <MatchedFragmentIon>()),
                new PeptideSpectralMatch(decoyPep, 0, 15.11, 0, scan, new DigestionParams(), new List <MatchedFragmentIon>()),
                new PeptideSpectralMatch(targetPep, 0, 15.1, 0, scan, new DigestionParams(), new List <MatchedFragmentIon>())
            };

            psms.ForEach(p => p.ResolveAllAmbiguities());
            psms.ForEach(p => p.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0));

            // apply parsimony
            ProteinParsimonyEngine      pae = new ProteinParsimonyEngine(psms, false, new CommonParameters(), new List <string>());
            ProteinParsimonyResults     proteinParsimonyResult = (ProteinParsimonyResults)pae.Run();
            ProteinScoringAndFdrEngine  proteinScoringEngine   = new ProteinScoringAndFdrEngine(proteinParsimonyResult.ProteinGroups, psms, false, true, true, new CommonParameters(), new List <string>());
            ProteinScoringAndFdrResults results = (ProteinScoringAndFdrResults)proteinScoringEngine.Run();

            Assert.IsTrue(results.SortedAndScoredProteinGroups.Count == 3);
            Assert.IsTrue(results.SortedAndScoredProteinGroups[1].QValue == 0.5);
        }
Ejemplo n.º 4
0
        public static void ParsimonyVariableDontTreatAsUnique()
        {
            bool modPeptidesAreUnique = false;

            // set up mods
            var modDictionary = new Dictionary <int, List <Modification> >();

            ModificationMotif.TryGetMotif("M", out ModificationMotif motif1);
            var      mod      = new ModificationWithMass("Oxidation of M", "Common Variable", motif1, TerminusLocalization.Any, 15.99491461957);
            Protease protease = new Protease("k Protease", new List <Tuple <string, TerminusType> > {
                new Tuple <string, TerminusType>("K", TerminusType.C)
            }, new List <Tuple <string, TerminusType> >(), CleavageSpecificity.Full, null, null, null);

            ProteaseDictionary.Dictionary.Add(protease.Name, protease);
            TerminusType terminusType = ProductTypeMethods.IdentifyTerminusType(new List <ProductType> {
                ProductType.B, ProductType.Y
            });

            // modified version of protein
            var protein1 = new Protein("PEPTIDEM", "accession1");
            // unmodified version of protein
            var protein2 = new Protein("YYYKPEPTIDEM", "accession2");

            var pep1 = protein1.Digest(new DigestionParams(protease: "k Protease", minPeptideLength: 1), new List <ModificationWithMass> {
                mod
            }, new List <ModificationWithMass>()).First();
            var pep2 = protein2.Digest(new DigestionParams(protease: "k Protease", minPeptideLength: 1), new List <ModificationWithMass> {
                mod
            }, new List <ModificationWithMass>()).ToList()[1];

            // check to make sure mod is present
            Assert.That(pep1.Sequence.Equals(pep2.Sequence));
            Assert.That(pep1.NumMods == 1);
            Assert.That(pep2.NumMods == 1);

            // build the dictionary for input to parsimony
            var compactPeptideToProteinPeptideMatching = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >();
            var cp1 = pep1.CompactPeptide(terminusType);
            var cp2 = pep2.CompactPeptide(terminusType);

            Assert.That(cp1.Equals(cp2));

            compactPeptideToProteinPeptideMatching.Add(pep1.CompactPeptide(terminusType), new HashSet <PeptideWithSetModifications> {
                pep1
            });
            Assert.That(compactPeptideToProteinPeptideMatching.ContainsKey(cp2));
            compactPeptideToProteinPeptideMatching[cp2].Add(pep2);

            // apply parsimony
            ProteinParsimonyEngine pae = new ProteinParsimonyEngine(compactPeptideToProteinPeptideMatching, modPeptidesAreUnique, new CommonParameters(), new List <string>());

            pae.Run();


            // check to make sure both peptides are associated with both proteins
            Assert.That(compactPeptideToProteinPeptideMatching.Count == 1);
            Assert.That(compactPeptideToProteinPeptideMatching.First().Value.Count == 2);
            var seq = compactPeptideToProteinPeptideMatching.First().Value.First().Sequence;

            foreach (var sequence in compactPeptideToProteinPeptideMatching.First().Value)
            {
                Assert.That(sequence.Sequence.Equals(seq));
            }
        }
Ejemplo n.º 5
0
        public static void TestParsimony()
        {
            // creates some proteins to test parsimony with
            string[] proteinSequences =
            {
                "AB--------",                      // 1: contains unique
                "--C-------",                      // 2: one hit wonder
                "---D---HHH--",                    // 3: subset
                "-B-D---HHH--",                    // 4: D should go to 4, not 3 (3 is subset)
                "-B--E-----",                      // 5: subsumable
                "----EFG---",                      // 6: indistinguishable from 8 (J will not be a "detected" PSM)
                "-----F----",                      // 7: lone pep shared w/ decoy
                "--------I-",                      // 8: I should go to 9, not 8
                "-B------I-",                      // 9: I should go to 9, not 8
                "----EFG--J"                       // 10: indistinguishable from 6 (J will not be a "detected" PSM)
            };

            var proteins = new List <Protein>();

            for (int i = 0; i < proteinSequences.Length; i++)
            {
                proteins.Add(new Protein(proteinSequences[i], (i + 1).ToString()));
            }
            proteins.Add(new Protein("-----F----*", "D1", isDecoy: true));
            proteins.Add(new Protein("-----F----**", "C1", isContaminant: true));
            proteins.Add(new Protein("----E----**", "C2", isContaminant: true));

            // create the protease
            List <DigestionMotif> digestionMotifs = new List <DigestionMotif>
            {
                new DigestionMotif("A", null, 1, null),
                new DigestionMotif("B", null, 1, null),
                new DigestionMotif("C", null, 1, null),
                new DigestionMotif("D", null, 1, null),
                new DigestionMotif("E", null, 1, null),
                new DigestionMotif("F", null, 1, null),
                new DigestionMotif("G", null, 1, null),
                new DigestionMotif("H", null, 1, null),
                new DigestionMotif("I", null, 1, null),
                new DigestionMotif("J", null, 1, null),
                new DigestionMotif("-", null, 1, null),
            };

            var protease = new Protease("test", CleavageSpecificity.Full, null, null, digestionMotifs);

            ProteaseDictionary.Dictionary.Add(protease.Name, protease);
            DigestionParams digestionParams = new DigestionParams(protease: protease.Name, minPeptideLength: 1);

            // digest the proteins
            var peptides = new HashSet <PeptideWithSetModifications>();

            foreach (Protein protein in proteins)
            {
                foreach (PeptideWithSetModifications peptide in protein.Digest(digestionParams, new List <Modification>(), new List <Modification>()))
                {
                    switch (peptide.BaseSequence)
                    {
                    case "A": peptides.Add(peptide); break;

                    case "B": peptides.Add(peptide); break;

                    case "C": peptides.Add(peptide); break;

                    case "D": peptides.Add(peptide); break;

                    case "E": peptides.Add(peptide); break;

                    case "F": peptides.Add(peptide); break;

                    case "G": peptides.Add(peptide); break;

                    case "H": peptides.Add(peptide); break;

                    case "I": peptides.Add(peptide); break;
                    }
                }
            }

            // create PSMs for the peptides
            Dictionary <string, PeptideSpectralMatch> temp = new Dictionary <string, PeptideSpectralMatch>();

            MsDataScan fakeScan = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false),
                                                 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null,
                                                 null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null);

            Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(fakeScan, 2, 0, "File", new CommonParameters());

            foreach (var peptide in peptides)
            {
                if (temp.TryGetValue(peptide.BaseSequence, out var psm))
                {
                    psm.AddOrReplace(peptide, 1, 0, true, new List <MatchedFragmentIon>(), 0);
                }
                else
                {
                    temp.Add(peptide.BaseSequence, new PeptideSpectralMatch(peptide, 0, 1, 0, scan, digestionParams, new List <MatchedFragmentIon>()));
                }
            }

            List <PeptideSpectralMatch> psms = temp.Values.ToList();

            foreach (var psm in psms)
            {
                psm.ResolveAllAmbiguities();
                psm.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            }

            // run parsimony
            ProteinParsimonyEngine parsimonyEngine = new ProteinParsimonyEngine(psms, false, new CommonParameters(), new List <string>());
            var parsimonyResults = (ProteinParsimonyResults)parsimonyEngine.Run();
            var proteinGroups    = parsimonyResults.ProteinGroups;

            ProteinScoringAndFdrEngine proteinScoringAndFdrEngine = new ProteinScoringAndFdrEngine(proteinGroups, psms, true, false, true, new CommonParameters(), new List <string>());
            var proteinScoringAndFdrResults = (ProteinScoringAndFdrResults)proteinScoringAndFdrEngine.Run();

            proteinGroups = proteinScoringAndFdrResults.SortedAndScoredProteinGroups;

            // select the PSMs' proteins
            List <string> parsimonyProteinSequences = psms.SelectMany(p => p.BestMatchingPeptides.Select(v => v.Peptide.Protein)).Select(v => v.BaseSequence).Distinct().ToList();

            // check that correct proteins are in parsimony list
            Assert.Contains("AB--------", parsimonyProteinSequences);
            Assert.Contains("--C-------", parsimonyProteinSequences);
            Assert.Contains("-B-D---HHH--", parsimonyProteinSequences);
            Assert.Contains("----E----**", parsimonyProteinSequences);
            Assert.Contains("-B------I-", parsimonyProteinSequences);
            Assert.Contains("----EFG---", parsimonyProteinSequences);
            Assert.Contains("----EFG--J", parsimonyProteinSequences);
            Assert.AreEqual(8, parsimonyProteinSequences.Count);

            // sequence coverage test
            foreach (var proteinGroup in proteinGroups)
            {
                foreach (var coverage in proteinGroup.SequenceCoveragePercent)
                {
                    Assert.That(coverage <= 1.0);
                }
            }

            // test protein groups
            Assert.AreEqual(3, proteinGroups.Count);
            Assert.AreEqual(1, proteinGroups.First().Proteins.Count);
            Assert.AreEqual("AB--------", proteinGroups.First().Proteins.First().BaseSequence);
            Assert.AreEqual(2, proteinGroups.First().AllPsmsBelowOnePercentFDR.Count);
            Assert.AreEqual(2, proteinGroups.First().ProteinGroupScore);
        }
Ejemplo n.º 6
0
        public static void TestPTMOutput()
        {
            List <Modification> variableModifications = new List <Modification>();
            List <Modification> fixedModifications    = new List <Modification>();

            ModificationMotif.TryGetMotif("S", out ModificationMotif motif);
            variableModifications.Add(new Modification(_originalId: "resMod", _modificationType: "HaHa", _target: motif, _locationRestriction: "Anywhere.", _chemicalFormula: ChemicalFormula.ParseFormula("H")));

            var proteinList = new List <Protein> {
                new Protein("MNNNSKQQQ", "accession")
            };
            var protease = new Protease("CustomProtease", CleavageSpecificity.Full, null, null, new List <DigestionMotif> {
                new DigestionMotif("K", null, 1, null)
            });

            ProteaseDictionary.Dictionary.Add(protease.Name, protease);

            Dictionary <Modification, ushort> modsDictionary = new Dictionary <Modification, ushort>
            {
                { variableModifications.Last(), 1 }
            };

            DigestionParams digestionParams = new DigestionParams(protease: protease.Name, maxMissedCleavages: 0, minPeptideLength: 1);

            var modPep = proteinList.First().Digest(digestionParams, fixedModifications, variableModifications).Last();
            HashSet <PeptideWithSetModifications> value = new HashSet <PeptideWithSetModifications> {
                modPep
            };
            PeptideWithSetModifications compactPeptide1 = value.First();

            Assert.AreEqual("QQQ", value.First().FullSequence);//this might be base

            var firstProtDigest = proteinList.First().Digest(digestionParams, fixedModifications, variableModifications).ToList();
            HashSet <PeptideWithSetModifications> value2 = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[0]
            };
            PeptideWithSetModifications compactPeptide2 = value2.First();

            Assert.AreEqual("MNNNSK", value2.First().FullSequence);//this might be base

            HashSet <PeptideWithSetModifications> value2mod = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[1]
            };
            PeptideWithSetModifications compactPeptide2mod = value2mod.Last();

            Assert.AreEqual("MNNNS[HaHa:resMod on S]K", value2mod.Last().FullSequence);//this might be base

            HashSet <PeptideWithSetModifications> value3 = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[2]
            };
            PeptideWithSetModifications compactPeptide3 = value3.First();

            Assert.AreEqual("NNNSK", value3.First().FullSequence);//this might be base
            HashSet <PeptideWithSetModifications> value3mod = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[3]
            };

            PeptideWithSetModifications compactPeptide3mod = value3mod.Last();

            Assert.AreEqual("NNNS[HaHa:resMod on S]K", value3mod.Last().FullSequence);//this might be base

            var peptideList = new HashSet <PeptideWithSetModifications>();

            foreach (var protein in proteinList)
            {
                foreach (var peptide in protein.Digest(digestionParams, new List <Modification>(), variableModifications))
                {
                    peptideList.Add(peptide);
                }
            }

            MsDataScan jdfk = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null);
            Ms2ScanWithSpecificMass ms2scan = new Ms2ScanWithSpecificMass(jdfk, 2, 0, "File", new CommonParameters());

            Tolerance fragmentTolerance = new AbsoluteTolerance(0.01);

            var match1 = new PeptideSpectralMatch(peptideList.ElementAt(0), 0, 10, 0, ms2scan, digestionParams, new List <MatchedFragmentIon>())
            {
            };

            match1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            var match2 = new PeptideSpectralMatch(peptideList.ElementAt(1), 0, 10, 0, ms2scan, digestionParams, new List <MatchedFragmentIon>())
            {
            };

            match2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            var match3 = new PeptideSpectralMatch(peptideList.ElementAt(1), 0, 10, 0, ms2scan, digestionParams, new List <MatchedFragmentIon>())
            {
            };

            match3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);

            List <PeptideSpectralMatch> psms = new List <PeptideSpectralMatch>
            {
                match1,
                match2,
                match3
            };

            psms.ForEach(p => p.ResolveAllAmbiguities());

            ProteinParsimonyEngine engine = new ProteinParsimonyEngine(psms, true, new CommonParameters(), new List <string> {
                "ff"
            });
            var cool          = (ProteinParsimonyResults)engine.Run();
            var proteinGroups = cool.ProteinGroups;

            ProteinScoringAndFdrEngine f = new ProteinScoringAndFdrEngine(proteinGroups, psms, false, false, true, new CommonParameters(), new List <string>());

            f.Run();

            Assert.AreEqual("#aa5[resMod on S,info:occupancy=0.67(2/3)];", proteinGroups.First().ModsInfo[0]);
        }
Ejemplo n.º 7
0
        public static void TryFailSequenceCoverage()
        {
            var prot1 = new Protein("MMKMMK", "prot1");

            ModificationMotif.TryGetMotif("M", out ModificationMotif motifM);
            Modification mod1 = new Modification(_originalId: "mod1", _modificationType: "mt", _target: motifM, _locationRestriction: "N-terminal.", _monoisotopicMass: 10);
            Modification mod2 = new Modification(_originalId: "mod2", _modificationType: "mt", _target: motifM, _locationRestriction: "Peptide N-terminal.", _monoisotopicMass: 10);
            Modification mod3 = new Modification(_originalId: "mod3", _modificationType: "mt", _target: motifM, _locationRestriction: "Anywhere.", _monoisotopicMass: 10);

            ModificationMotif.TryGetMotif("K", out ModificationMotif motifK);
            Modification mod4 = new Modification(_originalId: "mod4", _modificationType: "mt", _target: motifK, _locationRestriction: "Peptide C-terminal.", _monoisotopicMass: 10);
            Modification mod5 = new Modification(_originalId: "mod5", _modificationType: "mt", _target: motifK, _locationRestriction: "C-terminal.", _monoisotopicMass: 10);

            Dictionary <int, Modification> modsFor1 = new Dictionary <int, Modification>
            {
                { 1, mod1 },
                { 3, mod3 },
                { 5, mod4 },
            };
            Dictionary <int, Modification> modsFor2 = new Dictionary <int, Modification>
            {
                { 1, mod2 },
                { 5, mod5 },
            };
            Dictionary <int, Modification> modsFor3 = new Dictionary <int, Modification>
            {
                { 1, mod1 },
                { 5, mod3 },
                { 8, mod5 }
            };

            DigestionParams digestionParams = new DigestionParams();
            var             pwsm1           = new PeptideWithSetModifications(prot1, digestionParams, 1, 3, CleavageSpecificity.Unknown, "", 0, modsFor1, 0);
            var             pwsm2           = new PeptideWithSetModifications(prot1, digestionParams, 4, 6, CleavageSpecificity.Unknown, "", 0, modsFor2, 0);
            var             pwsm3           = new PeptideWithSetModifications(prot1, digestionParams, 1, 6, CleavageSpecificity.Unknown, "", 0, modsFor3, 0);

            HashSet <PeptideWithSetModifications> peptides = new HashSet <PeptideWithSetModifications>
            {
                pwsm1,
                pwsm2,
                pwsm3,
            };

            IScan scan = new ThisTestScan();
            var   psm1 = new PeptideSpectralMatch(pwsm1, 0, 1, 0, scan, digestionParams, new List <MatchedFragmentIon>());

            psm1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);

            var psm2 = new PeptideSpectralMatch(pwsm2, 0, 1, 0, scan, digestionParams, new List <MatchedFragmentIon>());

            psm2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);

            var psm3 = new PeptideSpectralMatch(pwsm3, 0, 1, 0, scan, digestionParams, new List <MatchedFragmentIon>());

            psm3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);


            List <PeptideSpectralMatch> newPsms = new List <PeptideSpectralMatch>
            {
                psm1,
                psm2,
                psm3,
            };

            newPsms.ForEach(p => p.ResolveAllAmbiguities());

            ProteinParsimonyEngine  ppe  = new ProteinParsimonyEngine(newPsms, true, new CommonParameters(), new List <string>());
            ProteinParsimonyResults fjkd = (ProteinParsimonyResults)ppe.Run();

            ProteinScoringAndFdrEngine psafe = new ProteinScoringAndFdrEngine(fjkd.ProteinGroups, newPsms, true, true, true, new CommonParameters(), new List <string>());

            psafe.Run();

            fjkd.ProteinGroups.First().CalculateSequenceCoverage();

            var firstSequenceCoverageDisplayList = fjkd.ProteinGroups.First().SequenceCoverageDisplayList.First();

            Assert.AreEqual("MMKMMK", firstSequenceCoverageDisplayList);
            var firstSequenceCoverageDisplayListWithMods = fjkd.ProteinGroups.First().SequenceCoverageDisplayListWithMods.First();

            Assert.AreEqual("[mod1 on M]-MM[mod3 on M]KM[mod3 on M]MK-[mod5 on K]", firstSequenceCoverageDisplayListWithMods);

            var firstModInfo = fjkd.ProteinGroups.First().ModsInfo.First();

            Assert.IsTrue(firstModInfo.Contains(@"#aa1[mod1 on M,info:occupancy=1.00(2/2)]"));
            Assert.IsTrue(firstModInfo.Contains(@"#aa2[mod3 on M,info:occupancy=0.50(1/2)]"));
            Assert.IsFalse(firstModInfo.Contains(@"#aa3"));
            Assert.IsTrue(firstModInfo.Contains(@"#aa4[mod3 on M,info:occupancy=0.50(1/2)]"));
            Assert.IsFalse(firstModInfo.Contains(@"#aa5"));
            Assert.IsTrue(firstModInfo.Contains(@"#aa6[mod5 on K,info:occupancy=1.00(2/2)]"));
        }
Ejemplo n.º 8
0
        public static void TryFailSequenceCoverage()
        {
            var prot1 = new Protein("MMKMMK", "prot1");

            ModificationMotif.TryGetMotif("M", out ModificationMotif motifM);
            ModificationWithMass mod1 = new ModificationWithMass("mod1", "mt", motifM, TerminusLocalization.NProt, 10);
            ModificationWithMass mod2 = new ModificationWithMass("mod2", "mt", motifM, TerminusLocalization.NPep, 10);
            ModificationWithMass mod3 = new ModificationWithMass("mod3", "mt", motifM, TerminusLocalization.Any, 10);

            ModificationMotif.TryGetMotif("K", out ModificationMotif motifK);
            ModificationWithMass mod4 = new ModificationWithMass("mod4", "mt", motifK, TerminusLocalization.PepC, 10);
            ModificationWithMass mod5 = new ModificationWithMass("mod5", "mt", motifK, TerminusLocalization.ProtC, 10);

            Dictionary <int, ModificationWithMass> modsFor1 = new Dictionary <int, ModificationWithMass>
            {
                { 1, mod1 },
                { 3, mod3 },
                { 5, mod4 },
            };
            Dictionary <int, ModificationWithMass> modsFor2 = new Dictionary <int, ModificationWithMass>
            {
                { 1, mod2 },
                { 5, mod5 },
            };
            Dictionary <int, ModificationWithMass> modsFor3 = new Dictionary <int, ModificationWithMass>
            {
                { 1, mod1 },
                { 5, mod3 },
                { 8, mod5 }
            };

            DigestionParams digestionParams = new DigestionParams();
            var             pwsm1           = new PeptideWithSetModifications(protein: prot1, digestionParams: digestionParams, oneBasedStartResidueInProtein: 1, oneBasedEndResidueInProtein: 3, peptideDescription: "", missedCleavages: 0, allModsOneIsNterminus: modsFor1, numFixedMods: 0);
            var             pwsm2           = new PeptideWithSetModifications(protein: prot1, digestionParams: digestionParams, oneBasedStartResidueInProtein: 4, oneBasedEndResidueInProtein: 6, peptideDescription: "", missedCleavages: 0, allModsOneIsNterminus: modsFor2, numFixedMods: 0);
            var             pwsm3           = new PeptideWithSetModifications(protein: prot1, digestionParams: digestionParams, oneBasedStartResidueInProtein: 1, oneBasedEndResidueInProtein: 6, peptideDescription: "", missedCleavages: 0, allModsOneIsNterminus: modsFor3, numFixedMods: 0);

            HashSet <PeptideWithSetModifications> peptides = new HashSet <PeptideWithSetModifications>
            {
                pwsm1,
                pwsm2,
                pwsm3,
            };

            Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > matching = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >
            {
                { pwsm1.CompactPeptide(TerminusType.None), new HashSet <PeptideWithSetModifications> {
                      pwsm1
                  } },
                { pwsm2.CompactPeptide(TerminusType.None), new HashSet <PeptideWithSetModifications> {
                      pwsm2
                  } },
                { pwsm3.CompactPeptide(TerminusType.None), new HashSet <PeptideWithSetModifications> {
                      pwsm3
                  } },
            };

            IScan scan = new ThisTestScan();
            var   psm1 = new PeptideSpectralMatch(pwsm1.CompactPeptide(TerminusType.None), 0, 1, 0, scan, digestionParams);

            psm1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            psm1.MatchToProteinLinkedPeptides(matching);
            var psm2 = new PeptideSpectralMatch(pwsm2.CompactPeptide(TerminusType.None), 0, 1, 0, scan, digestionParams);

            psm2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            psm2.MatchToProteinLinkedPeptides(matching);
            var psm3 = new PeptideSpectralMatch(pwsm3.CompactPeptide(TerminusType.None), 0, 1, 0, scan, digestionParams);

            psm3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            psm3.MatchToProteinLinkedPeptides(matching);

            List <PeptideSpectralMatch> newPsms = new List <PeptideSpectralMatch>
            {
                psm1,
                psm2,
                psm3,
            };

            ProteinParsimonyEngine  ppe  = new ProteinParsimonyEngine(matching, true, new CommonParameters(), new List <string>());
            ProteinParsimonyResults fjkd = (ProteinParsimonyResults)ppe.Run();

            ProteinScoringAndFdrEngine psafe = new ProteinScoringAndFdrEngine(fjkd.ProteinGroups, newPsms, true, true, true, new CommonParameters(), new List <string>());

            psafe.Run();

            fjkd.ProteinGroups.First().CalculateSequenceCoverage();

            var firstSequenceCoverageDisplayList = fjkd.ProteinGroups.First().SequenceCoverageDisplayList.First();

            Assert.AreEqual("MMKMMK", firstSequenceCoverageDisplayList);
            var firstSequenceCoverageDisplayListWithMods = fjkd.ProteinGroups.First().SequenceCoverageDisplayListWithMods.First();

            Assert.AreEqual("[mod1]-MM[mod3]KM[mod3]MK-[mod5]", firstSequenceCoverageDisplayListWithMods);

            var firstModInfo = fjkd.ProteinGroups.First().ModsInfo.First();

            Assert.IsTrue(firstModInfo.Contains(@"#aa1[mod1,info:occupancy=1.00(2/2)]"));
            Assert.IsTrue(firstModInfo.Contains(@"#aa2[mod3,info:occupancy=0.50(1/2)]"));
            Assert.IsFalse(firstModInfo.Contains(@"#aa3"));
            Assert.IsTrue(firstModInfo.Contains(@"#aa4[mod3,info:occupancy=0.50(1/2)]"));
            Assert.IsFalse(firstModInfo.Contains(@"#aa5"));
            Assert.IsTrue(firstModInfo.Contains(@"#aa6[mod5,info:occupancy=1.00(2/2)]"));
        }
        public static void MultiProteaseIndistiguishableTest()
        {
            string[] sequences =
            {
                "ABCEFG",
                "EFGABC",
            };

            List <Tuple <string, TerminusType> > sequencesInducingCleavage = new List <Tuple <string, TerminusType> > {
                new Tuple <string, TerminusType>("C", TerminusType.C)
            };
            List <Tuple <string, TerminusType> > sequencesInducingCleavage2 = new List <Tuple <string, TerminusType> > {
                new Tuple <string, TerminusType>("G", TerminusType.C)
            };

            var protease = new Protease("testA", sequencesInducingCleavage, new List <Tuple <string, TerminusType> >(), CleavageSpecificity.Full, null, null, null);

            ProteaseDictionary.Dictionary.Add(protease.Name, protease);
            var protease2 = new Protease("testB", sequencesInducingCleavage2, new List <Tuple <string, TerminusType> >(), CleavageSpecificity.Full, null, null, null);

            ProteaseDictionary.Dictionary.Add(protease2.Name, protease2);
            var peptideList = new HashSet <PeptideWithSetModifications>();

            var p = new List <Protein>();
            List <Tuple <string, string> > gn = new List <Tuple <string, string> >();

            for (int i = 0; i < sequences.Length; i++)
            {
                p.Add(new Protein(sequences[i], (i + 1).ToString(), null, gn, new Dictionary <int, List <Modification> >()));
            }

            DigestionParams digestionParams  = new DigestionParams(protease: protease.Name, minPeptideLength: 1);
            DigestionParams digestionParams2 = new DigestionParams(protease: protease2.Name, minPeptideLength: 1);

            foreach (var protein in p)
            {
                foreach (var peptide in protein.Digest(digestionParams, new List <ModificationWithMass>(), new List <ModificationWithMass>()))
                {
                    switch (peptide.BaseSequence)
                    {
                    case "ABC": peptideList.Add(peptide); break;

                    case "EFG": peptideList.Add(peptide); break;
                    }
                }
                foreach (var peptide in protein.Digest(digestionParams2, new List <ModificationWithMass>(), new List <ModificationWithMass>()))
                {
                    switch (peptide.BaseSequence)
                    {
                    case "ABC": peptideList.Add(peptide); break;

                    case "EFG": peptideList.Add(peptide); break;
                    }
                }
            }

            // creates the initial dictionary of "peptide" and "virtual peptide" matches
            var dictionary = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >();

            CompactPeptide[] peptides = new CompactPeptide[peptideList.Count];

            PeptideWithSetModifications[] PWSM = new PeptideWithSetModifications[peptideList.Count];

            // creates peptide list
            for (int i = 0; i < peptideList.Count; i++)
            {
                peptides[i] = new CompactPeptide(peptideList.ElementAt(i), TerminusType.None);
                PWSM[i]     = peptideList.ElementAt(i);
            }

            dictionary.Add(peptides[0], new HashSet <PeptideWithSetModifications> {
                PWSM[0], PWSM[3]
            });
            dictionary.Add(peptides[1], new HashSet <PeptideWithSetModifications> {
                PWSM[1], PWSM[2]
            });

            // builds psm list to match to peptides
            List <PeptideSpectralMatch> psms = new List <PeptideSpectralMatch>();

            MsDataScan dfb = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null);
            Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(dfb, 2, 0, "File");

            foreach (var kvp in dictionary)
            {
                foreach (var peptide in kvp.Value)
                {
                    switch (peptide.BaseSequence)
                    {
                    case "ABC":
                        if (peptide.DigestionParams == digestionParams)
                        {
                            psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 10, 0, scan, digestionParams));
                            break;
                        }
                        if (peptide.DigestionParams == digestionParams2)
                        {
                            psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 10, 0, scan, digestionParams2));
                            break;
                        }
                        else
                        {
                            break;
                        }

                    case "EFG":
                        if (peptide.DigestionParams == digestionParams)
                        {
                            psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 10, 0, scan, digestionParams));
                            break;
                        }
                        if (peptide.DigestionParams == digestionParams2)
                        {
                            psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 10, 0, scan, digestionParams2));
                            break;
                        }
                        else
                        {
                            break;
                        }
                    }
                }
            }

            List <ProductType> IonTypes  = new List <ProductType>();
            ProductType        BnoB1ions = ProductType.BnoB1ions;
            ProductType        Yions     = ProductType.Y;

            IonTypes.Add(BnoB1ions);
            IonTypes.Add(Yions);

            HashSet <DigestionParams> digestionParamsList = new HashSet <DigestionParams>();

            digestionParamsList.Add(digestionParams);
            digestionParamsList.Add(digestionParams2);
            ModificationMotif.TryGetMotif("M", out ModificationMotif motif1);
            ModificationWithMass        mod        = new ModificationWithMass("Oxidation of M", "Common Variable", motif1, TerminusLocalization.Any, 15.99491461957);
            List <ModificationWithMass> modVarList = new List <ModificationWithMass> {
                mod
            };

            ModificationMotif.TryGetMotif("M", out ModificationMotif motif2);
            List <ModificationWithMass> modFixedList = new List <ModificationWithMass> {
                mod
            };
            SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngine =
                new SequencesToActualProteinPeptidesEngine(psms, p, modFixedList, modVarList, IonTypes, digestionParamsList, true, new CommonParameters(), null);
            var results = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngine.Run();
            var CompactPeptidesToProteinPeptidesMatching = results.CompactPeptideToProteinPeptideMatching;

            Assert.AreEqual(2, CompactPeptidesToProteinPeptidesMatching.Count);

            Assert.AreEqual(2, CompactPeptidesToProteinPeptidesMatching.ElementAt(0).Value.Count);
            Assert.AreEqual("ABC", CompactPeptidesToProteinPeptidesMatching.ElementAt(0).Value.ElementAt(0).BaseSequence);
            Assert.AreEqual("ABC", CompactPeptidesToProteinPeptidesMatching.ElementAt(0).Value.ElementAt(1).BaseSequence);

            Assert.AreEqual(2, CompactPeptidesToProteinPeptidesMatching.ElementAt(1).Value.Count);
            Assert.AreEqual("EFG", CompactPeptidesToProteinPeptidesMatching.ElementAt(1).Value.ElementAt(0).BaseSequence);
            Assert.AreEqual("EFG", CompactPeptidesToProteinPeptidesMatching.ElementAt(1).Value.ElementAt(1).BaseSequence);

            ProteinParsimonyEngine ppe = new ProteinParsimonyEngine(CompactPeptidesToProteinPeptidesMatching, false, new CommonParameters(), null);
            var proteinAnalysisResults = (ProteinParsimonyResults)ppe.Run();

            List <ProteinGroup> proteinGroups = proteinAnalysisResults.ProteinGroups;

            Assert.AreEqual(2, proteinGroups.Count);

            Assert.AreEqual(2, proteinGroups.ElementAt(0).AllPeptides.Count);
            Assert.AreEqual(2, proteinGroups.ElementAt(0).UniquePeptides.Count);
            Assert.AreEqual("ABC", proteinGroups.ElementAt(0).AllPeptides.ElementAt(0).BaseSequence);
            Assert.AreEqual("testA", proteinGroups.ElementAt(0).AllPeptides.ElementAt(0).DigestionParams.Protease.Name);
            Assert.AreEqual("EFG", proteinGroups.ElementAt(0).AllPeptides.ElementAt(1).BaseSequence);
            Assert.AreEqual("testA", proteinGroups.ElementAt(0).AllPeptides.ElementAt(1).DigestionParams.Protease.Name);
            Assert.AreEqual("ABC", proteinGroups.ElementAt(0).UniquePeptides.ElementAt(0).BaseSequence);
            Assert.AreEqual("EFG", proteinGroups.ElementAt(0).UniquePeptides.ElementAt(1).BaseSequence);

            Assert.AreEqual(2, proteinGroups.ElementAt(1).AllPeptides.Count);
            Assert.AreEqual(2, proteinGroups.ElementAt(1).UniquePeptides.Count);
            Assert.AreEqual("ABC", proteinGroups.ElementAt(1).AllPeptides.ElementAt(0).BaseSequence);
            Assert.AreEqual("testB", proteinGroups.ElementAt(1).AllPeptides.ElementAt(0).DigestionParams.Protease.Name);
            Assert.AreEqual("EFG", proteinGroups.ElementAt(1).AllPeptides.ElementAt(1).BaseSequence);
            Assert.AreEqual("testB", proteinGroups.ElementAt(1).AllPeptides.ElementAt(1).DigestionParams.Protease.Name);
            Assert.AreEqual("ABC", proteinGroups.ElementAt(1).UniquePeptides.ElementAt(0).BaseSequence);
            Assert.AreEqual("EFG", proteinGroups.ElementAt(1).UniquePeptides.ElementAt(1).BaseSequence);
        }
        public static void ParsimonyDontTreatModifiedFormsAsUnique()
        {
            bool modPeptidesAreUnique = false;

            // set up mods
            var modDictionary = new Dictionary <int, List <Modification> >();

            ModificationMotif.TryGetMotif("M", out ModificationMotif motif1);
            var mod = new Modification(_originalId: "Oxidation of M", _modificationType: "Common Variable", _target: motif1, _locationRestriction: "Anywhere.", _monoisotopicMass: 15.99491461957);

            // modified version of protein
            var protein1 = new Protein("PEPTIDEM", "accession1");

            // unmodified version of protein
            var protein2 = new Protein("YYYKPEPTIDEM", "accession2");

            List <PeptideWithSetModifications> pwsmsFromProtein1 = protein1.Digest(new DigestionParams(protease: "trypsin", minPeptideLength: 1), new List <Modification> {
                mod
            }, new List <Modification>()).ToList();                                                                                                                                                                   //this is a fixed mod
            List <PeptideWithSetModifications> pwsmsFromProtein2 = protein2.Digest(new DigestionParams(protease: "trypsin", minPeptideLength: 1), new List <Modification>(), new List <Modification>()).ToList();

            // check to make sure mod is present
            PeptideWithSetModifications modifiedPeptide   = pwsmsFromProtein1[0];
            PeptideWithSetModifications unmodifiedPeptide = pwsmsFromProtein2[1];

            Assert.That(!modifiedPeptide.FullSequence.Equals(unmodifiedPeptide.FullSequence)); // sequences should not be equal (one has a mod)
            Assert.That(modifiedPeptide.BaseSequence.Equals(unmodifiedPeptide.BaseSequence));  // base sequences should be equal
            Assert.That(modifiedPeptide.NumMods == 1);                                         // methionine was oxidized on this protein
            Assert.That(unmodifiedPeptide.NumMods == 0);                                       // there was no modification on this protein

            // build PSMs for parsimony
            List <PeptideSpectralMatch> psmsForParsimony = new List <PeptideSpectralMatch>();

            MsDataScan fakeScan = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false),
                                                 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null,
                                                 null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null);

            Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(fakeScan, 2, 0, "File", new CommonParameters());

            PeptideSpectralMatch psm1 = new PeptideSpectralMatch(modifiedPeptide, 0, 10, 1, scan, new DigestionParams(), new List <MatchedFragmentIon>());

            psm1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            psm1.ResolveAllAmbiguities();

            PeptideSpectralMatch psm2 = new PeptideSpectralMatch(unmodifiedPeptide, 0, 10, 2, scan, new DigestionParams(), new List <MatchedFragmentIon>());

            psm2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            psm2.ResolveAllAmbiguities();

            psmsForParsimony.Add(psm1);
            psmsForParsimony.Add(psm2);

            // apply parsimony
            ProteinParsimonyEngine pae = new ProteinParsimonyEngine(psmsForParsimony, modPeptidesAreUnique, new CommonParameters(), new List <string>());

            // because the two chosen peptides are the same, we should end up with both protein accessions still in the list
            var proteinParsimonyResult = (ProteinParsimonyResults)pae.Run();

            // score protein groups and merge indistinguishable ones
            ProteinScoringAndFdrEngine proteinScoringEngine = new ProteinScoringAndFdrEngine(proteinParsimonyResult.ProteinGroups, psmsForParsimony, false, true, true, new CommonParameters(), new List <string>());
            var results = (ProteinScoringAndFdrResults)proteinScoringEngine.Run();

            int countOfProteinGroups = results.SortedAndScoredProteinGroups.Count;

            // because modified peptides were NOT considered as unique,
            // then there should be one ambiguous protein group after parsimony,
            // and two protein accessions for each peptide
            Assert.AreEqual(1, countOfProteinGroups);
            Assert.AreEqual(2, results.SortedAndScoredProteinGroups.First().Proteins.Count);
            Assert.IsNull(psm1.ProteinAccession);
            Assert.IsNull(psm2.ProteinAccession);
        }
Ejemplo n.º 11
0
        public static void TestPTMOutput()
        {
            List <ModificationWithMass> variableModifications = new List <ModificationWithMass>();
            List <ModificationWithMass> fixedModifications    = new List <ModificationWithMass>();

            ModificationMotif.TryGetMotif("S", out ModificationMotif motif);
            variableModifications.Add(new ModificationWithMassAndCf("resMod", "HaHa", motif, TerminusLocalization.Any, ChemicalFormula.ParseFormula("H")));

            var proteinList = new List <Protein> {
                new Protein("MNNNSKQQQ", "accession")
            };
            var protease = new Protease("CustomProtease", new List <Tuple <string, TerminusType> > {
                new Tuple <string, TerminusType>("K", TerminusType.C)
            }, new List <Tuple <string, TerminusType> >(), CleavageSpecificity.Full, null, null, null);

            ProteaseDictionary.Dictionary.Add(protease.Name, protease);
            Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatching = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >();
            Dictionary <ModificationWithMass, ushort> modsDictionary = new Dictionary <ModificationWithMass, ushort>
            {
                { variableModifications.Last(), 1 }
            };

            DigestionParams digestionParams = new DigestionParams(protease: protease.Name, maxMissedCleavages: 0, minPeptideLength: 1);

            var modPep = proteinList.First().Digest(digestionParams, fixedModifications, variableModifications).Last();
            HashSet <PeptideWithSetModifications> value = new HashSet <PeptideWithSetModifications> {
                modPep
            };
            CompactPeptide compactPeptide1 = new CompactPeptide(value.First(), TerminusType.None);

            Assert.AreEqual("QQQ", value.First().Sequence);

            var firstProtDigest = proteinList.First().Digest(digestionParams, fixedModifications, variableModifications).ToList();
            HashSet <PeptideWithSetModifications> value2 = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[0]
            };
            CompactPeptide compactPeptide2 = new CompactPeptide(value2.First(), TerminusType.None);

            Assert.AreEqual("MNNNSK", value2.First().Sequence);

            HashSet <PeptideWithSetModifications> value2mod = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[1]
            };
            CompactPeptide compactPeptide2mod = new CompactPeptide(value2mod.Last(), TerminusType.None);

            Assert.AreEqual("MNNNS[HaHa:resMod]K", value2mod.Last().Sequence);

            HashSet <PeptideWithSetModifications> value3 = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[2]
            };
            CompactPeptide compactPeptide3 = new CompactPeptide(value3.First(), TerminusType.None);

            Assert.AreEqual("NNNSK", value3.First().Sequence);
            HashSet <PeptideWithSetModifications> value3mod = new HashSet <PeptideWithSetModifications> {
                firstProtDigest[3]
            };

            CompactPeptide compactPeptide3mod = new CompactPeptide(value3mod.Last(), TerminusType.None);

            Assert.AreEqual("NNNS[HaHa:resMod]K", value3mod.Last().Sequence);

            var peptideList = new HashSet <PeptideWithSetModifications>();

            foreach (var protein in proteinList)
            {
                foreach (var peptide in protein.Digest(digestionParams, new List <ModificationWithMass>(), variableModifications))
                {
                    peptideList.Add(peptide);
                }
            }

            compactPeptideToProteinPeptideMatching.Add(compactPeptide1, value);
            compactPeptideToProteinPeptideMatching.Add(compactPeptide2, value2);
            compactPeptideToProteinPeptideMatching.Add(compactPeptide3, value3);
            compactPeptideToProteinPeptideMatching.Add(compactPeptide2mod, value2mod);
            compactPeptideToProteinPeptideMatching.Add(compactPeptide3mod, value3mod);

            ProteinParsimonyEngine engine = new ProteinParsimonyEngine(compactPeptideToProteinPeptideMatching, true, new CommonParameters(), new List <string> {
                "ff"
            });
            var cool          = (ProteinParsimonyResults)engine.Run();
            var proteinGroups = cool.ProteinGroups;

            MsDataScan jdfk = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null);
            Ms2ScanWithSpecificMass ms2scan = new Ms2ScanWithSpecificMass(jdfk, 2, 0, "File");

            List <ProductType> lp = new List <ProductType> {
                ProductType.B, ProductType.Y
            };
            Tolerance fragmentTolerance = new AbsoluteTolerance(0.01);

            var match1 = new PeptideSpectralMatch(peptideList.ElementAt(0).CompactPeptide(TerminusType.None), 0, 10, 0, ms2scan, digestionParams)
            {
            };

            match1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            var match2 = new PeptideSpectralMatch(peptideList.ElementAt(1).CompactPeptide(TerminusType.None), 0, 10, 0, ms2scan, digestionParams)
            {
            };

            match2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            var match3 = new PeptideSpectralMatch(peptideList.ElementAt(1).CompactPeptide(TerminusType.None), 0, 10, 0, ms2scan, digestionParams)
            {
            };

            match3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            match1.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching);
            match2.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching);
            match3.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching);

            List <PeptideSpectralMatch> psms = new List <PeptideSpectralMatch>
            {
                match1,
                match2,
                match3
            };
            ProteinScoringAndFdrEngine f = new ProteinScoringAndFdrEngine(proteinGroups, psms, false, false, true, new CommonParameters(), new List <string>());

            f.Run();

            Assert.AreEqual("#aa5[resMod,info:occupancy=0.67(2/3)];", proteinGroups.First().ModsInfo[0]);
        }
Ejemplo n.º 12
0
        public static void TestParsimony()
        {
            // creates some test proteins and digests them (simulating a protein database)
            string[] sequences = { "AB--------",   // 1: contains unique
                                   "--C-------",   // 2: one hit wonder
                                   "---D---HHH--", // 3: subset
                                   "-B-D---HHH--", // 4: D should go to 4, not 3 (3 is subset)
                                   "-B--E-----",   // 5: subsumable
                                   "----EFG---",   // 6: indistinguishable from 8 (J will not be a "detected" PSM)
                                   "-----F----",   // 7: lone pep shared w/ decoy
                                   "--------I-",   // 8: I should go to 9, not 8
                                   "-B------I-",   // 9: I should go to 9, not 8
                                   "----EFG--J"    // 10: indistinguishable from 6 (J will not be a "detected" PSM)
            };

            IEnumerable <Tuple <string, TerminusType> > sequencesInducingCleavage = new List <Tuple <string, TerminusType> > {
                new Tuple <string, TerminusType>("A", TerminusType.C), new Tuple <string, TerminusType>("B", TerminusType.C), new Tuple <string, TerminusType>("C", TerminusType.C), new Tuple <string, TerminusType>("D", TerminusType.C), new Tuple <string, TerminusType>("E", TerminusType.C), new Tuple <string, TerminusType>("F", TerminusType.C), new Tuple <string, TerminusType>("G", TerminusType.C), new Tuple <string, TerminusType>("H", TerminusType.C), new Tuple <string, TerminusType>("I", TerminusType.C), new Tuple <string, TerminusType>("J", TerminusType.C), new Tuple <string, TerminusType>("-", TerminusType.C)
            };
            var protease    = new Protease("test", sequencesInducingCleavage, new List <Tuple <string, TerminusType> >(), CleavageSpecificity.Full, null, null, null);
            var peptideList = new HashSet <PeptideWithSetModifications>();

            ProteaseDictionary.Dictionary.Add(protease.Name, protease);

            var p = new List <Protein>();
            List <Tuple <string, string> > gn = new List <Tuple <string, string> >();

            for (int i = 0; i < sequences.Length; i++)
            {
                p.Add(new Protein(sequences[i], (i + 1).ToString(), null, gn, new Dictionary <int, List <Modification> >()));
            }
            p.Add(new Protein("-----F----*", "D1", null, gn, new Dictionary <int, List <Modification> >(), isDecoy: true));
            p.Add(new Protein("-----F----**", "C1", null, gn, new Dictionary <int, List <Modification> >(), isContaminant: true));
            p.Add(new Protein("----E----**", "C2", null, gn, new Dictionary <int, List <Modification> >(), isContaminant: true));

            DigestionParams digestionParams = new DigestionParams(protease: protease.Name, minPeptideLength: 1);

            foreach (var protein in p)
            {
                foreach (var peptide in protein.Digest(digestionParams, new List <ModificationWithMass>(), new List <ModificationWithMass>()))
                {
                    switch (peptide.BaseSequence)
                    {
                    case "A": peptideList.Add(peptide); break;

                    case "B": peptideList.Add(peptide); break;

                    case "C": peptideList.Add(peptide); break;

                    case "D": peptideList.Add(peptide); break;

                    case "E": peptideList.Add(peptide); break;

                    case "F": peptideList.Add(peptide); break;

                    case "G": peptideList.Add(peptide); break;

                    case "H": peptideList.Add(peptide); break;

                    case "I": peptideList.Add(peptide); break;
                    }
                }
            }

            // creates the initial dictionary of "peptide" and "virtual peptide" matches
            var dictionary = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >();

            CompactPeptide[] peptides = new CompactPeptide[peptideList.Count];
            HashSet <PeptideWithSetModifications>[] virtualPeptideSets = new HashSet <PeptideWithSetModifications> [peptideList.Count];

            Dictionary <ModificationWithMass, ushort> modsDictionary = new Dictionary <ModificationWithMass, ushort>();

            // creates peptide list
            for (int i = 0; i < peptideList.Count; i++)
            {
                peptides[i] = new CompactPeptide(peptideList.ElementAt(i), TerminusType.None);
            }

            // creates protein list
            for (int i = 0; i < virtualPeptideSets.Length; i++)
            {
                virtualPeptideSets[i] = new HashSet <PeptideWithSetModifications>();

                foreach (var virtualPeptide in peptideList)
                {
                    string peptideBaseSequence = string.Join("", peptideList.ElementAt(i).BaseSequence.Select(b => char.ConvertFromUtf32(b)));

                    if (virtualPeptide.BaseSequence.Contains(peptideBaseSequence))
                    {
                        virtualPeptideSets[i].Add(virtualPeptide);
                    }
                }
            }

            // populates initial peptide-virtualpeptide dictionary
            for (int i = 0; i < peptides.Length; i++)
            {
                if (!dictionary.ContainsKey(peptides[i]))
                {
                    dictionary.Add(peptides[i], virtualPeptideSets[i]);
                }
            }

            // copy for comparison later
            Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > initialDictionary = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >();

            foreach (var kvp in dictionary)
            {
                CompactPeptideBase cp = kvp.Key;
                HashSet <PeptideWithSetModifications> peps = new HashSet <PeptideWithSetModifications>();
                foreach (var pep in kvp.Value)
                {
                    peps.Add(pep);
                }

                initialDictionary.Add(cp, peps);
            }

            // apply parsimony to dictionary
            ProteinParsimonyEngine ae = new ProteinParsimonyEngine(dictionary, false, new CommonParameters(), new List <string>());
            var hah           = (ProteinParsimonyResults)ae.Run();
            var proteinGroups = hah.ProteinGroups;

            var parsimonyProteinList   = new List <Protein>();
            var parsimonyBaseSequences = new List <string>();

            foreach (var kvp in dictionary)
            {
                foreach (var virtualPeptide in kvp.Value)
                {
                    if (!parsimonyProteinList.Contains(virtualPeptide.Protein))
                    {
                        parsimonyProteinList.Add(virtualPeptide.Protein);
                        parsimonyBaseSequences.Add(virtualPeptide.Protein.BaseSequence);
                    }
                }
            }

            // builds psm list to match to peptides
            List <PeptideSpectralMatch> psms = new List <PeptideSpectralMatch>();

            MsDataScan dfb = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null);
            Ms2ScanWithSpecificMass scan = new Ms2ScanWithSpecificMass(dfb, 2, 0, "File");

            foreach (var kvp in dictionary)
            {
                foreach (var peptide in kvp.Value)
                {
                    switch (peptide.BaseSequence)
                    {
                    case "A": psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 10, 0, scan, digestionParams)); break;

                    case "B": psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 9, 0, scan, digestionParams)); break;

                    case "C": psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 8, 0, scan, digestionParams)); break;

                    case "D": psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 7, 0, scan, digestionParams)); break;

                    case "E": psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 6, 0, scan, digestionParams)); break;

                    case "F": psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 5, 0, scan, digestionParams)); break;

                    case "G": psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 4, 0, scan, digestionParams)); break;

                    case "H": psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 3, 0, scan, digestionParams)); break;

                    case "I": psms.Add(new PeptideSpectralMatch(peptide.CompactPeptide(TerminusType.None), 0, 2, 0, scan, digestionParams)); break;
                    }
                }
            }

            List <ProductType> lp = new List <ProductType> {
                ProductType.B, ProductType.Y
            };
            Tolerance fragmentTolerance = new AbsoluteTolerance(0.01);

            foreach (var hm in psms)
            {
                hm.MatchToProteinLinkedPeptides(initialDictionary);
                hm.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0, 0, false);
            }

            ProteinScoringAndFdrEngine f = new ProteinScoringAndFdrEngine(proteinGroups, psms, true, false, true, new CommonParameters(), new List <string>());
            var ok = (ProteinScoringAndFdrResults)f.Run();

            proteinGroups = ok.SortedAndScoredProteinGroups;

            //prints initial dictionary
            List <Protein> proteinList = new List <Protein>();

            foreach (var kvp in initialDictionary)
            {
                proteinList = new List <Protein>();
                foreach (var peptide in kvp.Value)
                {
                    if (!proteinList.Contains(peptide.Protein))
                    {
                        proteinList.Add(peptide.Protein);
                    }
                }
            }

            //prints parsimonious dictionary
            foreach (var kvp in dictionary)
            {
                proteinList = new List <Protein>();
                foreach (var peptide in kvp.Value)
                {
                    if (!proteinList.Contains(peptide.Protein))
                    {
                        proteinList.Add(peptide.Protein);
                    }
                }
            }

            // check that correct proteins are in parsimony list
            Assert.Contains("AB--------", parsimonyBaseSequences);
            Assert.Contains("--C-------", parsimonyBaseSequences);
            Assert.Contains("-B-D---HHH--", parsimonyBaseSequences);
            Assert.Contains("-----F----*", parsimonyBaseSequences);
            Assert.Contains("----E----**", parsimonyBaseSequences);
            Assert.Contains("-B------I-", parsimonyBaseSequences);
            Assert.Contains("----EFG---", parsimonyBaseSequences);
            Assert.Contains("----EFG--J", parsimonyBaseSequences);
            Assert.AreEqual(8, parsimonyProteinList.Count);

            // sequence coverage test
            foreach (var proteinGroup in proteinGroups)
            {
                foreach (var coverage in proteinGroup.SequenceCoveragePercent)
                {
                    Assert.That(coverage <= 1.0);
                }
            }

            // protein group tests
            Assert.AreEqual(4, proteinGroups.Count);
            Assert.AreEqual(1, proteinGroups.First().Proteins.Count);
            Assert.AreEqual("AB--------", proteinGroups.First().Proteins.First().BaseSequence);
            Assert.AreEqual(4, proteinGroups.First().AllPsmsBelowOnePercentFDR.Count);
            Assert.AreEqual(19, proteinGroups.First().ProteinGroupScore);
        }
Ejemplo n.º 13
0
        public static void TestPTMOutput()
        {
            List <Modification> variableModifications = new List <Modification>();
            List <Modification> fixedModifications    = new List <Modification>();

            ModificationMotif.TryGetMotif("S", out ModificationMotif motifS);
            ModificationMotif.TryGetMotif("I", out ModificationMotif motifI);
            variableModifications.Add(new Modification(_originalId: "resMod", _modificationType: "HaHa", _target: motifS, _locationRestriction: "Anywhere.", _chemicalFormula: ChemicalFormula.ParseFormula("H")));
            variableModifications.Add(new Modification(_originalId: "iModOne", _modificationType: "HaHa", _target: motifI, _locationRestriction: "Anywhere.", _chemicalFormula: ChemicalFormula.ParseFormula("H")));
            variableModifications.Add(new Modification(_originalId: "iModTwo", _modificationType: "HaHa", _target: motifI, _locationRestriction: "Anywhere.", _chemicalFormula: ChemicalFormula.ParseFormula("H")));

            var proteinList = new List <Protein> {
                new Protein("MNNNSKQQQI", "accession")
            };
            var protease = new Protease("CustomProtease", CleavageSpecificity.Full, null, null, new List <DigestionMotif> {
                new DigestionMotif("K", null, 1, null)
            });

            ProteaseDictionary.Dictionary.Add(protease.Name, protease);

            Dictionary <Modification, ushort> modsDictionary = new Dictionary <Modification, ushort>
            {
                { variableModifications.Last(), 1 }
            };

            CommonParameters commonParameters = new CommonParameters(digestionParams: new DigestionParams(protease: protease.Name, maxMissedCleavages: 0, minPeptideLength: 1));
            var protDigest = proteinList.First().Digest(commonParameters.DigestionParams, fixedModifications, variableModifications).ToList();

            int idx = 0;

            var pep1 = new HashSet <PeptideWithSetModifications> {
                protDigest[idx++]
            };

            Assert.AreEqual("MNNNSK", pep1.Single().FullSequence);//this might be base

            var pep1mod = new HashSet <PeptideWithSetModifications> {
                protDigest[idx++]
            };

            Assert.AreEqual("MNNNS[HaHa:resMod on S]K", pep1mod.Single().FullSequence);//this might be base

            var pep3 = new HashSet <PeptideWithSetModifications> {
                protDigest[idx++]
            };

            Assert.AreEqual("NNNSK", pep3.Single().FullSequence);//this might be base

            var pep3mod = new HashSet <PeptideWithSetModifications> {
                protDigest[idx++]
            };

            Assert.AreEqual("NNNS[HaHa:resMod on S]K", pep3mod.Single().FullSequence);//this might be base

            var pep4 = new HashSet <PeptideWithSetModifications> {
                protDigest[idx++]
            };

            Assert.AreEqual("QQQI", pep4.Single().FullSequence);//this might be base

            var pep4mod1 = new HashSet <PeptideWithSetModifications> {
                protDigest[idx++]
            };

            Assert.AreEqual("QQQI[HaHa:iModOne on I]", pep4mod1.Single().FullSequence);//this might be base

            var pep4mod2 = new HashSet <PeptideWithSetModifications> {
                protDigest[idx++]
            };

            Assert.AreEqual("QQQI[HaHa:iModTwo on I]", pep4mod2.Single().FullSequence);//this might be base

            var peptideList = new HashSet <PeptideWithSetModifications>();

            foreach (var peptide in proteinList.SelectMany(protein => protein.Digest(commonParameters.DigestionParams, new List <Modification>(), variableModifications)))
            {
                peptideList.Add(peptide);
            }

            MsDataScan jdfk = new MsDataScan(new MzSpectrum(new double[] { 1 }, new double[] { 1 }, false), 0, 1, true, Polarity.Positive, double.NaN, null, null, MZAnalyzerType.Orbitrap, double.NaN, null, null, "scan=1", double.NaN, null, null, double.NaN, null, DissociationType.AnyActivationType, 0, null);
            Ms2ScanWithSpecificMass ms2scan = new Ms2ScanWithSpecificMass(jdfk, 2, 0, "File", new CommonParameters());

            Tolerance fragmentTolerance = new AbsoluteTolerance(0.01);

            var match1 = new PeptideSpectralMatch(peptideList.ElementAt(0), 0, 10, 0, ms2scan, commonParameters, new List <MatchedFragmentIon>())
            {
            };

            match1.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            var match2 = new PeptideSpectralMatch(peptideList.ElementAt(1), 0, 10, 0, ms2scan, commonParameters, new List <MatchedFragmentIon>())
            {
            };

            match2.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            var match3 = new PeptideSpectralMatch(peptideList.ElementAt(1), 0, 10, 0, ms2scan, commonParameters, new List <MatchedFragmentIon>())
            {
            };

            match3.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            var match4 = new PeptideSpectralMatch(peptideList.ElementAt(4), 0, 10, 0, ms2scan, commonParameters, new List <MatchedFragmentIon>())
            {
            };

            match4.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            var match5 = new PeptideSpectralMatch(peptideList.ElementAt(5), 0, 10, 0, ms2scan, commonParameters, new List <MatchedFragmentIon>())
            {
            };

            match5.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            var match6 = new PeptideSpectralMatch(peptideList.ElementAt(6), 0, 10, 0, ms2scan, commonParameters, new List <MatchedFragmentIon>())
            {
            };

            match6.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            var match44 = new PeptideSpectralMatch(peptideList.ElementAt(4), 0, 10, 0, ms2scan, commonParameters, new List <MatchedFragmentIon>())
            {
            };

            match44.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            var match55 = new PeptideSpectralMatch(peptideList.ElementAt(5), 0, 10, 0, ms2scan, commonParameters, new List <MatchedFragmentIon>())
            {
            };

            match55.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);
            var match66 = new PeptideSpectralMatch(peptideList.ElementAt(6), 0, 10, 0, ms2scan, commonParameters, new List <MatchedFragmentIon>())
            {
            };

            match66.SetFdrValues(0, 0, 0, 0, 0, 0, 0, 0);


            List <PeptideSpectralMatch> psms = new List <PeptideSpectralMatch>
            {
                match1,
                match2,
                match3,
                match4,
                match44,
                match5,
                match55,
                match6,
                match66
            };

            psms.ForEach(p => p.ResolveAllAmbiguities());

            ProteinParsimonyEngine engine = new ProteinParsimonyEngine(psms, true, new CommonParameters(), null, new List <string> {
                "ff"
            });
            var cool          = (ProteinParsimonyResults)engine.Run();
            var proteinGroups = cool.ProteinGroups;

            ProteinScoringAndFdrEngine f = new ProteinScoringAndFdrEngine(proteinGroups, psms, false, false, true, new CommonParameters(), null, new List <string>());

            f.Run();

            Assert.AreEqual("#aa5[resMod on S,info:occupancy=0.67(2/3)];#aa10[iModOne on I,info:occupancy=0.33(2/6)];#aa10[iModTwo on I,info:occupancy=0.33(2/6)]", proteinGroups.First().ModsInfo[0]);
        }