Example #1
0
        public void TestProteomeDb()
        {
            using (var testFilesDir = new TestFilesDir(TestContext, ZIP_FILE))
            {
                string fastaPath  = testFilesDir.GetTestPath("high_ipi.Human.20060111.fasta");
                string protDbPath = testFilesDir.GetTestPath("test.protdb");

                using (ProteomeDb proteomeDb = ProteomeDb.CreateProteomeDb(protDbPath))
                {
                    Enzyme trypsin = EnzymeList.GetDefault();
                    using (var reader = new StreamReader(fastaPath))
                    {
                        proteomeDb.AddFastaFile(reader, (msg, progress) => true);
                    }
                    // perform digestion
                    proteomeDb.Digest(new ProteaseImpl(trypsin), (msg, progress) => true);
                    Digestion digestion         = proteomeDb.GetDigestion(trypsin.Name);
                    var       digestedProteins0 = digestion.GetProteinsWithSequencePrefix("EDGWVK", 100);
                    Assert.IsTrue(digestedProteins0.Count >= 1);
                }
            }
        }
Example #2
0
        public void TestProteomeDb()
        {
            using (var testFilesDir = new TestFilesDir(TestContext, ZIP_FILE))
            {
                string fastaPath  = testFilesDir.GetTestPath("high_ipi.Human.20060111.fasta");
                string protDbPath = testFilesDir.GetTestPath("test.protdb");

                using (ProteomeDb proteomeDb = ProteomeDb.CreateProteomeDb(protDbPath))
                {
                    Enzyme          trypsin = EnzymeList.GetDefault();
                    IProgressStatus status  = new ProgressStatus(string.Empty);
                    using (var reader = new StreamReader(fastaPath))
                    {
                        proteomeDb.AddFastaFile(reader, new SilentProgressMonitor(), ref status, true); // Delay indexing
                    }
                    // perform digestion
                    proteomeDb.Digest(new ProteaseImpl(trypsin), ProteomeDb.PROTDB_MAX_MISSED_CLEAVAGES, new SilentProgressMonitor(), ref status);
                    Digestion digestion         = proteomeDb.GetDigestion(trypsin.Name);
                    var       digestedProteins0 = digestion.GetProteinsWithSequencePrefix("EDGWVK", 100);
                    Assert.IsTrue(digestedProteins0.Count >= 1);
                }
            }
        }
        public void SettingsChangePeptides()
        {
            SrmDocument docFasta = CreateMixedDoc();
            const int posList = 0;  // Peptide list is first peptide group.
            SrmSettings settings = docFasta.Settings;

            // Change enzymes, and verify expected peptide changes
            var enzymes = new EnzymeList();
            enzymes.AddDefaults();
            SrmDocument docCnbr = docFasta.ChangeSettings(settings.ChangePeptideSettings(
                p => p.ChangeEnzyme(enzymes["CNBr [M | P]"])));
            foreach (PeptideDocNode nodePeptide in docCnbr.Peptides)
            {
                if (nodePeptide.Peptide.FastaSequence == null)
                    continue;
                Peptide peptide = nodePeptide.Peptide;
                char prev = peptide.PrevAA;
                if (prev != 'M')
                    Assert.Fail("Unexpected preceding cleavage at {0}", prev);
                string seq = peptide.Sequence;
                char last = seq[seq.Length - 1];
                if (last != 'M' && peptide.NextAA != '-')
                    Assert.Fail("Unexpected cleavage at {0}", last);
            }
            Assert.IsTrue(docCnbr.PeptideCount < docFasta.PeptideCount);
            // Peptide list should not have changed.
            Assert.AreSame(docFasta.Children[posList], docCnbr.Children[posList]);

            // Change back to original enzyme, and make sure peptides are restored
            SrmDocument docFasta2 = docCnbr.ChangeSettings(settings);
            Assert.AreEqual(docFasta.RevisionIndex + 2, docFasta2.RevisionIndex);
            Assert.AreEqual(docFasta.PeptideCount, docFasta2.PeptideCount);
            Assert.AreEqual(docFasta.PeptideTransitionCount, docFasta2.PeptideTransitionCount);

            // Allow missed cleavages, and verify changes
            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideSettings(
                p => p.ChangeDigestSettings(new DigestSettings(1, false))));
            Assert.IsTrue(docFasta.PeptideCount < docFasta2.PeptideCount);
            // TODO: Make minimum transition count work immediately
            //            Assert.IsTrue((docFasta2.PeptideCount - docFasta.PeptideCount) * 3 <
            //                docFasta2.TransitionCount - docFasta.TransitionCount);
            int missedCleavageCount = 0;
            var dictOrig = docFasta.Peptides.ToDictionary(node => node.Peptide);
            foreach (PeptideDocNode nodePeptide in docFasta2.Peptides)
            {
                // Make sure all zero-cleavage peptides are the same as the old document
                int missed = nodePeptide.Peptide.MissedCleavages;
                if (missed == 0)
                    Assert.AreEqual(nodePeptide, dictOrig[nodePeptide.Peptide]);

                // Count the number of new missed cleavages
                missedCleavageCount += nodePeptide.Peptide.MissedCleavages;
            }
            Assert.AreEqual(docFasta2.PeptideCount - docFasta.PeptideCount, missedCleavageCount);
            // Peptide list should not have changed.
            Assert.AreSame(docFasta.Children[posList], docFasta2.Children[posList]);

            // Increase minimum peptide length
            const int minNew = 12;
            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(f => f.ChangeMinPeptideLength(minNew)));
            CheckPeptides(docFasta, docFasta2, node => node.Peptide.Length >= minNew);
            Assert.AreSame(docFasta.Children[posList], docFasta2.Children[posList]);

            // Decrease maximum peptide length
            const int maxNew = 18;
            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(f => f.ChangeMaxPeptideLength(maxNew)));
            CheckPeptides(docFasta, docFasta2, node => node.Peptide.Length <= maxNew);
            Assert.AreSame(docFasta.Children[posList], docFasta2.Children[posList]);

            // Increase n-term AA exclustion
            const int ntermStart = 50;
            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(
                f => f.ChangeExcludeNTermAAs(ntermStart)));
            CheckPeptides(docFasta, docFasta2, node =>
                node.Peptide.Begin.HasValue && node.Peptide.Begin.Value >= ntermStart);
            Assert.AreSame(docFasta.Children[posList], docFasta2.Children[posList]);

            // Use ragged end exclusion
            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideSettings(
                p => p.ChangeDigestSettings(new DigestSettings(0, true))));
            CheckPeptides(docFasta, docFasta2, IsNotRagged);
            Assert.AreSame(docFasta.Children[posList], docFasta2.Children[posList]);

            // Check custom exclusions
            var exclusions = new PeptideExcludeList();
            exclusions.AddDefaults();

            // Exclude Cys
            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(
                f => f.ChangeExclusions(new[] { exclusions["Cys"] })));
            CheckPeptides(docFasta, docFasta2, node => node.Peptide.Sequence.IndexOf('C') == -1);
            Assert.AreSame(docFasta.Children[posList], docFasta2.Children[posList]);

            // Exclude Met
            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(
                f => f.ChangeExclusions(new[] { exclusions["Met"] })));
            CheckPeptides(docFasta, docFasta2, node => node.Peptide.Sequence.IndexOf('M') == -1);

            // Exclude Hys
            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(
                f => f.ChangeExclusions(new[] { exclusions["His"] })));
            CheckPeptides(docFasta, docFasta2, node => node.Peptide.Sequence.IndexOf('H') == -1);

            // Exclude NXS/NXT
            Regex regexNx = new Regex("N.[ST]");
            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(
                f => f.ChangeExclusions(new[] { exclusions["NXT/NXS"] })));
            CheckPeptides(docFasta, docFasta2, node => !regexNx.Match(node.Peptide.Sequence).Success);

            // Exclude RP/KP
            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(
                f => f.ChangeExclusions(new[] { exclusions["RP/KP"] })));
            CheckPeptides(docFasta, docFasta2, node => node.Peptide.Sequence.IndexOf("RP", StringComparison.Ordinal) == -1 &&
                                                       node.Peptide.Sequence.IndexOf("KP", StringComparison.Ordinal) == -1);

            // Custom exclude ^Q*K$
            var excludeCustom = new PeptideExcludeRegex("Custom", "^Q.*K$");
            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(
                f => f.ChangeExclusions(new[] { excludeCustom })));
            CheckPeptides(docFasta, docFasta2, node =>
                (!node.Peptide.Sequence.StartsWith("Q") || !node.Peptide.Sequence.EndsWith("K")));

            // Auto-picking off should keep any changes from occurring
            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(
                f => f.ChangeAutoSelect(false).ChangeExclusions(new[] { exclusions["Cys"], exclusions["Met"], excludeCustom })));
            Assert.AreSame(docFasta.Children, docFasta2.Children);

            // Removing restriction with auto-picking off should change anything
            settings = docFasta2.Settings;
            SrmDocument docFasta3 = docFasta2.ChangeSettings(settings.ChangePeptideFilter(
                f => f.ChangeExclusions(new PeptideExcludeRegex[0])));
            Assert.AreSame(docFasta2.Children, docFasta3.Children);
        }
Example #4
0
        public void SettingsChangePeptides()
        {
            SrmDocument docFasta = CreateMixedDoc();
            const int   posList  = 0; // Peptide list is first peptide group.
            SrmSettings settings = docFasta.Settings;

            // Change enzymes, and verify expected peptide changes
            var enzymes = new EnzymeList();

            enzymes.AddDefaults();
            SrmDocument docCnbr = docFasta.ChangeSettings(settings.ChangePeptideSettings(
                                                              p => p.ChangeEnzyme(enzymes["CNBr [M | P]"])));

            foreach (PeptideDocNode nodePeptide in docCnbr.Peptides)
            {
                if (nodePeptide.Peptide.FastaSequence == null)
                {
                    continue;
                }
                Peptide peptide = nodePeptide.Peptide;
                char    prev    = peptide.PrevAA;
                if (prev != 'M')
                {
                    Assert.Fail("Unexpected preceding cleavage at {0}", prev);
                }
                string seq  = peptide.Sequence;
                char   last = seq[seq.Length - 1];
                if (last != 'M' && peptide.NextAA != '-')
                {
                    Assert.Fail("Unexpected cleavage at {0}", last);
                }
            }
            Assert.IsTrue(docCnbr.PeptideCount < docFasta.PeptideCount);
            // Peptide list should not have changed.
            Assert.AreSame(docFasta.Children[posList], docCnbr.Children[posList]);

            // Change back to original enzyme, and make sure peptides are restored
            SrmDocument docFasta2 = docCnbr.ChangeSettings(settings);

            Assert.AreEqual(docFasta.RevisionIndex + 2, docFasta2.RevisionIndex);
            Assert.AreEqual(docFasta.PeptideCount, docFasta2.PeptideCount);
            Assert.AreEqual(docFasta.PeptideTransitionCount, docFasta2.PeptideTransitionCount);

            // Allow missed cleavages, and verify changes
            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideSettings(
                                                    p => p.ChangeDigestSettings(new DigestSettings(1, false))));
            Assert.IsTrue(docFasta.PeptideCount < docFasta2.PeptideCount);
// TODO: Make minimum transition count work immediately
//            Assert.IsTrue((docFasta2.PeptideCount - docFasta.PeptideCount) * 3 <
//                docFasta2.TransitionCount - docFasta.TransitionCount);
            int missedCleavageCount = 0;
            var dictOrig            = docFasta.Peptides.ToDictionary(node => node.Peptide);

            foreach (PeptideDocNode nodePeptide in docFasta2.Peptides)
            {
                // Make sure all zero-cleavage peptides are the same as the old document
                int missed = nodePeptide.Peptide.MissedCleavages;
                if (missed == 0)
                {
                    Assert.AreEqual(nodePeptide, dictOrig[nodePeptide.Peptide]);
                }

                // Count the number of new missed cleavages
                missedCleavageCount += nodePeptide.Peptide.MissedCleavages;
            }
            Assert.AreEqual(docFasta2.PeptideCount - docFasta.PeptideCount, missedCleavageCount);
            // Peptide list should not have changed.
            Assert.AreSame(docFasta.Children[posList], docFasta2.Children[posList]);

            // Increase minimum peptide length
            const int minNew = 12;

            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(f => f.ChangeMinPeptideLength(minNew)));
            CheckPeptides(docFasta, docFasta2, node => node.Peptide.Length >= minNew);
            Assert.AreSame(docFasta.Children[posList], docFasta2.Children[posList]);

            // Decrease maximum peptide length
            const int maxNew = 18;

            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(f => f.ChangeMaxPeptideLength(maxNew)));
            CheckPeptides(docFasta, docFasta2, node => node.Peptide.Length <= maxNew);
            Assert.AreSame(docFasta.Children[posList], docFasta2.Children[posList]);

            // Increase n-term AA exclustion
            const int ntermStart = 50;

            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(
                                                    f => f.ChangeExcludeNTermAAs(ntermStart)));
            CheckPeptides(docFasta, docFasta2, node =>
                          node.Peptide.Begin.HasValue && node.Peptide.Begin.Value >= ntermStart);
            Assert.AreSame(docFasta.Children[posList], docFasta2.Children[posList]);

            // Use ragged end exclusion
            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideSettings(
                                                    p => p.ChangeDigestSettings(new DigestSettings(0, true))));
            CheckPeptides(docFasta, docFasta2, IsNotRagged);
            Assert.AreSame(docFasta.Children[posList], docFasta2.Children[posList]);

            // Check custom exclusions
            var exclusions = new PeptideExcludeList();

            exclusions.AddDefaults();

            // Exclude Cys
            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(
                                                    f => f.ChangeExclusions(new[] { exclusions["Cys"] })));
            CheckPeptides(docFasta, docFasta2, node => node.Peptide.Sequence.IndexOf('C') == -1);
            Assert.AreSame(docFasta.Children[posList], docFasta2.Children[posList]);

            // Exclude Met
            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(
                                                    f => f.ChangeExclusions(new[] { exclusions["Met"] })));
            CheckPeptides(docFasta, docFasta2, node => node.Peptide.Sequence.IndexOf('M') == -1);

            // Exclude Hys
            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(
                                                    f => f.ChangeExclusions(new[] { exclusions["His"] })));
            CheckPeptides(docFasta, docFasta2, node => node.Peptide.Sequence.IndexOf('H') == -1);

            // Exclude NXS/NXT
            Regex regexNx = new Regex("N.[ST]");

            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(
                                                    f => f.ChangeExclusions(new[] { exclusions["NXT/NXS"] })));
            CheckPeptides(docFasta, docFasta2, node => !regexNx.Match(node.Peptide.Sequence).Success);

            // Exclude RP/KP
            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(
                                                    f => f.ChangeExclusions(new[] { exclusions["RP/KP"] })));
            CheckPeptides(docFasta, docFasta2, node => node.Peptide.Sequence.IndexOf("RP", StringComparison.Ordinal) == -1 &&
                          node.Peptide.Sequence.IndexOf("KP", StringComparison.Ordinal) == -1);

            // Custom exclude ^Q*K$
            var excludeCustom = new PeptideExcludeRegex("Custom", "^Q.*K$");

            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(
                                                    f => f.ChangeExclusions(new[] { excludeCustom })));
            CheckPeptides(docFasta, docFasta2, node =>
                          (!node.Peptide.Sequence.StartsWith("Q") || !node.Peptide.Sequence.EndsWith("K")));

            // Auto-picking off should keep any changes from occurring
            docFasta2 = docFasta.ChangeSettings(settings.ChangePeptideFilter(
                                                    f => f.ChangeAutoSelect(false).ChangeExclusions(new[] { exclusions["Cys"], exclusions["Met"], excludeCustom })));
            Assert.AreSame(docFasta.Children, docFasta2.Children);

            // Removing restriction with auto-picking off should change anything
            settings = docFasta2.Settings;
            SrmDocument docFasta3 = docFasta2.ChangeSettings(settings.ChangePeptideFilter(
                                                                 f => f.ChangeExclusions(new PeptideExcludeRegex[0])));

            Assert.AreSame(docFasta2.Children, docFasta3.Children);
        }