public static void ExampleProteinGrouping(IProtease protease, double percentIdentified = 0.01, int maxMissed = 3, int minLength = 5, int maxLength = 50)
        {
            Stopwatch watch = new Stopwatch();
            watch.Start();
            List<Peptide> peps = new List<Peptide>(1000000);
            List<Protein> proteins = new List<Protein>(7000);

            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    peps.AddRange(protein.Digest(protease, maxMissed, minLength, maxLength));
                    proteins.Add(protein);
                }
            }
            Console.WriteLine("Loaded {0:N0} peptides from {1:N0} proteins in {2} ms", peps.Count, proteins.Count, watch.ElapsedMilliseconds);
            watch.Restart();

            Random random = new Random(480912341);

            // Take the first x % to act as our identified peptides
            List<Peptide> identifiedPeptides = peps.OrderBy(x => random.Next()).Take((int) (peps.Count*percentIdentified)).ToList();

            List<ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList();

            watch.Stop();

            Console.WriteLine("{0:N0} proteins produced {1:N0} protein groups from {2:N0} identified sequences", proteins.Count, proteinGroups.Count, identifiedPeptides.Count);
            Console.WriteLine();
            Console.WriteLine("Time elapsed: {0} ms", watch.ElapsedMilliseconds);
        }
        public static void Start(IProtease protease, double percentIdentified = 0.05, int maxMissed = 3, int minLength = 5, int maxLength = 35)
        {
            Console.WriteLine("**Start Protein Grouping**");
            Stopwatch watch = new Stopwatch();
            watch.Start();
            List<Peptide> peps = new List<Peptide>();
            List<Protein> proteins = new List<Protein>();

            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peps.Add(peptide);
                    }
                    proteins.Add(protein);
                }
            }
            Console.WriteLine("Loaded {0:N0} peptides from {1:N0} proteins in {2} ms", peps.Count, proteins.Count, watch.ElapsedMilliseconds);

            // Fixed seed to make it reproducible
            Random random = new Random(480912341);

            // Take the first x % to act as our identified peptides
            List<Peptide> identifiedPeptides = peps.OrderBy(x => random.Next()).Take((int) (peps.Count*percentIdentified)).ToList();

            List<ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList();

            watch.Stop();
            Console.WriteLine("{0:N0} proteins produced {1:N0} protein groups from {2:N0} identified sequences", proteins.Count, proteinGroups.Count, identifiedPeptides.Count);
            Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
            Console.WriteLine("Memory used: {0:N0} MB", System.Environment.WorkingSet/(1024*1024));
            Console.WriteLine("**END Protein Grouping**");
        }
Exemple #3
0
 public static IEnumerable <ProteinGroup> GroupProteins(string fastaFile, IProtease protease, IEnumerable <IAminoAcidSequence> observeredSequences, IEqualityComparer <IAminoAcidSequence> peptideComparer, int MaxMissedCleavages = 3)
 {
     using (FastaReader fasta = new FastaReader(fastaFile))
     {
         return(GroupProteins(fasta.ReadNextProtein(), new[] { protease }, observeredSequences, peptideComparer, MaxMissedCleavages));
     }
 }
Exemple #4
0
        public static void Start(IProtease protease, int maxMissed = 3, int minLength = 5, int maxLength = 35)
        {
            Console.WriteLine("**Start Morpheus Search**");
            Stopwatch watch = new Stopwatch();
            watch.Start();
            List<int> hashCodes = new List<int>();
            // Generate peptide candidates

            HashSet<Peptide> peptides = new HashSet<Peptide>();
            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peptides.Add(peptide);
                    }
                }
            }

            MSSearchEngine engine = new MorpheusSearchEngine();
            engine.PrecursorMassTolerance = Tolerance.FromPPM(100);
            engine.ProductMassTolerance = Tolerance.FromPPM(10);

            engine.LoadPeptides(peptides);

            watch.Stop();
            Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
            Console.WriteLine("Memory used: {0:N0} MB", Environment.WorkingSet/(1024*1024));
            Console.WriteLine("**End Morpheus Search**");
        }
        public static void Start(IProtease protease, int maxMissed = 3, int minLength = 5, int maxLength = 35)
        {
            Console.WriteLine("**Start Morpheus Search**");
            Stopwatch watch = new Stopwatch();

            watch.Start();
            List <int> hashCodes = new List <int>();
            // Generate peptide candidates

            HashSet <Peptide> peptides = new HashSet <Peptide>();

            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peptides.Add(peptide);
                    }
                }
            }

            MSSearchEngine engine = new MorpheusSearchEngine();

            engine.PrecursorMassTolerance = Tolerance.FromPPM(100);
            engine.ProductMassTolerance   = Tolerance.FromPPM(10);

            engine.LoadPeptides(peptides);

            watch.Stop();
            Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
            Console.WriteLine("Memory used: {0:N0} MB", Environment.WorkingSet / (1024 * 1024));
            Console.WriteLine("**End Morpheus Search**");
        }
Exemple #6
0
        public static void Start(IProtease protease, int maxMissed = 1, int minLength = 0, int maxLength = int.MaxValue, bool storeSequenceString = true)
        {
            Console.WriteLine("**Start Digestion**");
            Stopwatch watch = new Stopwatch();

            watch.Start();
            List <Peptide> peps   = new List <Peptide>();
            List <Protein> prots  = new List <Protein>();
            List <double>  allMzs = new List <double>();

            AminoAcidPolymer.StoreSequenceString = storeSequenceString;
            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peps.Add(peptide);
                        allMzs.Add(peptide.ToMz(2)); // forces the calculation of the mass and thus chemical formula
                    }
                    prots.Add(protein);
                }
            }
            watch.Stop();
            Console.WriteLine("{0:N0} proteins produced {1:N0} peptides using {2:N0} missed cleavages", prots.Count, peps.Count, maxMissed);
            Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
            Console.WriteLine("Memory used: {0:N0} MB", Environment.WorkingSet / (1024 * 1024));
            Console.WriteLine("**End Digestion**");
        }
        public static void StartRamp(IProtease protease, double percentIdentifiedSteps = 0.05, int maxMissed = 3, int minLength = 5, int maxLength = 35)
        {
            List <Peptide> peps     = new List <Peptide>();
            List <Protein> proteins = new List <Protein>();

            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peps.Add(peptide);
                    }
                    proteins.Add(protein);
                }
            }

            // Fixed seed to make it reproducible
            Random random = new Random(480912341);

            peps = peps.OrderBy(x => random.Next()).ToList();

            for (double percentIdentified = 0; percentIdentified <= 1; percentIdentified += percentIdentifiedSteps)
            {
                // Take the first x % to act as our identified peptides
                List <Peptide> identifiedPeptides = peps.Take((int)(peps.Count * percentIdentified)).ToList();

                List <ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList();
                Console.WriteLine("{0} peptides {1} protein groups", identifiedPeptides.Count, proteinGroups.Count);
            }
        }
Exemple #8
0
 public static void Start(IProtease protease, int maxMissed = 1, int minLength = 0, int maxLength = int.MaxValue, bool storeSequenceString = true)
 {
     Console.WriteLine("**Start Digestion**");
     Stopwatch watch = new Stopwatch();
     watch.Start();
     List<Peptide> peps = new List<Peptide>();
     List<Protein> prots = new List<Protein>();
     List<double> allMzs = new List<double>();
     AminoAcidPolymer.StoreSequenceString = storeSequenceString;
     using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
     {
         foreach (Protein protein in reader.ReadNextProtein())
         {
             foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
             {
                 peps.Add(peptide);
                 allMzs.Add(peptide.ToMz(2)); // forces the calculation of the mass and thus chemical formula
             }
             prots.Add(protein);
         }
     }
     watch.Stop();
     Console.WriteLine("{0:N0} proteins produced {1:N0} peptides using {2:N0} missed cleavages", prots.Count, peps.Count, maxMissed);
     Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
     Console.WriteLine("Memory used: {0:N0} MB", System.Environment.WorkingSet / (1024 * 1024));
     Console.WriteLine("**End Digestion**");
 }
Exemple #9
0
        public static void ExampleDigestion()
        {
            const string  fastaFilePath      = "Resources/yeast_uniprot_120226.fasta";
            IProtease     trypsin            = Protease.GetProtease("Trypsin");
            const int     maxMissedCleavages = 3;
            const int     minPeptideLength   = 5;
            const int     maxPeptideLength   = 50;
            List <double> masses             = new List <double>();
            Stopwatch     watch = new Stopwatch();

            watch.Start();
            using (FastaReader reader = new FastaReader(fastaFilePath))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(trypsin, maxMissedCleavages, minPeptideLength, maxPeptideLength))
                    {
                        masses.Add(peptide.MonoisotopicMass);
                    }
                }
            }
            //Console.WriteLine("Average Peptide Mass = {0:F4}", masses.Average());
            watch.Stop();
            Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
        }
        public static void Start(IProtease protease, int maxMissed = 3, int minLength = 5, int maxLength = 35)
        {
            //Console.WriteLine("**Start Protein Grouping**");
            //Stopwatch watch = new Stopwatch();
            //watch.Start();
            //List<Peptide> peps = new List<Peptide>();
            //List<Protein> prots = new List<Protein>();
            //using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            //{
            //    foreach (Protein protein in reader.ReadNextProtein())
            //    {
            //        foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
            //        {
            //            peps.Add(peptide);
            //        }
            //        prots.Add(protein);
            //    }
            //}

            //List<ProteinGroup> groups = new List<ProteinGroup>();
            //ProteinGroup.SetProteins(prots, protease, maxMissed, minLength, maxLength);
            //foreach (Peptide pep in peps)
            //{
            //    ProteinGroup pg = ProteinGroup.Group(pep);
            //    groups.Add(pg);
            //}

            //watch.Stop();
            //Console.WriteLine("{0:N0} proteins produced {1:N0} protein groups using {2:N0} missed clevages", prots.Count, groups.Count, maxMissed);
            //Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
            //Console.WriteLine("Memory used: {0:N0} MB", System.Environment.WorkingSet / (1024 * 1024));
            //Console.WriteLine("**End Digestion**");
        }
        public static void ExampleProteinGrouping(IProtease protease, double percentIdentified = 0.01, int maxMissed = 3, int minLength = 5, int maxLength = 50)
        {
            Stopwatch watch = new Stopwatch();

            watch.Start();
            List <Peptide> peps     = new List <Peptide>(1000000);
            List <Protein> proteins = new List <Protein>(7000);

            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    peps.AddRange(protein.Digest(protease, maxMissed, minLength, maxLength));
                    proteins.Add(protein);
                }
            }
            Console.WriteLine("Loaded {0:N0} peptides from {1:N0} proteins in {2} ms", peps.Count, proteins.Count, watch.ElapsedMilliseconds);
            watch.Restart();

            Random random = new Random(480912341);

            // Take the first x % to act as our identified peptides
            List <Peptide> identifiedPeptides = peps.OrderBy(x => random.Next()).Take((int)(peps.Count * percentIdentified)).ToList();

            List <ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList();

            watch.Stop();

            Console.WriteLine("{0:N0} proteins produced {1:N0} protein groups from {2:N0} identified sequences", proteins.Count, proteinGroups.Count, identifiedPeptides.Count);
            Console.WriteLine();
            Console.WriteLine("Time elapsed: {0} ms", watch.ElapsedMilliseconds);
        }
Exemple #12
0
        private void btnDigest_Click(object sender, EventArgs e)
        {
            String    enzymeName = lbxDigestion.SelectedItem.ToString();
            IProtease protease   = Enzymes.AllEnzymes()[enzymeName];
            String    organism   = lbxOrganisms.SelectedItem.ToString();

            RunBackground(() =>
                          proteomeDb.GetOrganism(organism).Digest(protease, enzymeName, null, UpdateProgress));
        }
        public static void AssignNumProteaseTermini(this IProtease protease, IIdentifiedSpectrum s)
        {
            var counts = (from p in s.Peptides
                          let beforeChar = p.Sequence[0]
                                           let afterChar = p.Sequence[p.Sequence.Length - 1]
                                                           let c = protease.GetProteaseTerminiCount(beforeChar, p.PureSequence, afterChar, '-')
                                                                   select c).Distinct().ToList();

            s.NumProteaseTermini = counts.Max();
        }
Exemple #14
0
 public ProteinMatchSettings(ProteomeDbPath proteomeDbPath, IProtease protease, ProteinMatchType proteinMatchTypes, String searchText)
 {
     ProteomeDbPath = proteomeDbPath;
     Protease       = protease;
     if (protease != null)
     {
         using (var proteomeDb = proteomeDbPath.OpenProteomeDb())
         {
             Digestion = proteomeDb.GetDigestion(protease.Name);
         }
     }
     MatchTypes = proteinMatchTypes;
     SearchText = searchText;
 }
Exemple #15
0
 public ProteinMatchSettings(ProteomeDbPath proteomeDbPath, IProtease protease, ProteinMatchType proteinMatchTypes, String searchText)
 {
     ProteomeDbPath = proteomeDbPath;
     Protease = protease;
     if (protease != null)
     {
         using (var proteomeDb = proteomeDbPath.OpenProteomeDb())
         {
             Digestion = proteomeDb.GetDigestion(protease.Name);
         }
     }
     MatchTypes = proteinMatchTypes;
     SearchText = searchText;
 }
        public static int GetNumProteaseTermini(this IProtease protease, char beforeChar, string pureSeq, char afterChar, char terminalChar, int positionInProtein)
        {
            int result = 0;

            if (beforeChar == 'M' && positionInProtein == 2)
            {
                result++;
            }
            else if (protease.IsCleavageSite(beforeChar, pureSeq[0], terminalChar))
            {
                result++;
            }

            if (protease.IsCleavageSite(pureSeq[pureSeq.Length - 1], afterChar, terminalChar))
            {
                result++;
            }

            return(result);
        }
Exemple #17
0
        public static void Start(IProtease protease, int maxMissed = 3, int minLength = 5, int maxLength = 35)
        {
            Console.WriteLine("**Start Morpheus Search**");
            Stopwatch watch = new Stopwatch();
            watch.Start();
            List<int> hashCodes = new List<int>();
            // Generate peptide candidates

            HashSet<Peptide> peptides = new HashSet<Peptide>();
            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peptides.Add(peptide);
                    }
                }
            }

            MSSearchEngine engine = new MorpheusSearchEngine();
            engine.PrecursorMassTolerance = Tolerance.FromPPM(100);
            engine.ProductMassTolerance = Tolerance.FromPPM(10);

            engine.LoadPeptides(peptides);
            using (MSDataFile msDataFile = new ThermoRawFile("Resources/ThermoRawFileMS1MS2.raw"))
            {
                //SortedMaxSizedContainer<PeptideSpectralMatch> psms = engine.Search(msDataFile.Where(scan => scan.MsnOrder > 1));

                //foreach (MSDataScan scan in msDataFile.Where(scan => scan.MsnOrder > 1))
                //{
                //    List<PeptideSpectralMatch> psms = engine.Search(scan);
                //    Console.WriteLine("{0} {1}", scan.SpectrumNumber, psms.Count);
                //}
            }
            watch.Stop();
            Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
            Console.WriteLine("Memory used: {0:N0} MB", System.Environment.WorkingSet / (1024 * 1024));
            Console.WriteLine("**End Morpheus Search**");
        }
        public static void Start(IProtease protease, double percentIdentified = 0.05, int maxMissed = 3, int minLength = 5, int maxLength = 35)
        {
            Console.WriteLine("**Start Protein Grouping**");
            Stopwatch watch = new Stopwatch();

            watch.Start();
            List <Peptide> peps     = new List <Peptide>();
            List <Protein> proteins = new List <Protein>();

            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peps.Add(peptide);
                    }
                    proteins.Add(protein);
                }
            }
            Console.WriteLine("Loaded {0:N0} peptides from {1:N0} proteins in {2} ms", peps.Count, proteins.Count, watch.ElapsedMilliseconds);

            // Fixed seed to make it reproducible
            Random random = new Random(480912341);

            // Take the first x % to act as our identified peptides
            List <Peptide> identifiedPeptides = peps.OrderBy(x => random.Next()).Take((int)(peps.Count * percentIdentified)).ToList();

            List <ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList();

            watch.Stop();
            Console.WriteLine("{0:N0} proteins produced {1:N0} protein groups from {2:N0} identified sequences", proteins.Count, proteinGroups.Count, identifiedPeptides.Count);
            Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
            Console.WriteLine("Memory used: {0:N0} MB", System.Environment.WorkingSet / (1024 * 1024));
            Console.WriteLine("**END Protein Grouping**");
        }
Exemple #19
0
 public static IEnumerable <ProteinGroup> GroupProteins(IEnumerable <Protein> proteins, IProtease protease, IEnumerable <IAminoAcidSequence> observeredSequences, IEqualityComparer <IAminoAcidSequence> peptideComparer, int MaxMissedCleavages = 3, int minPepPerProtein = 1)
 {
     return(GroupProteins(proteins, new[] { protease }, observeredSequences, peptideComparer, MaxMissedCleavages));
 }
Exemple #20
0
 /// <summary>
 /// Digests this protein into peptides.
 /// </summary>
 /// <param name="protease">The protease to digest with</param>
 /// <param name="maxMissedCleavages">The max number of missed cleavages generated, 0 means no missed cleavages</param>
 /// <param name="minLength">The minimum length (in amino acids) of the peptide</param>
 /// <param name="maxLength">The maximum length (in amino acids) of the peptide</param>
 /// <param name="initiatorMethonine"></param>
 /// <param name="includeModifications"></param>
 /// <param name="semiDigestion"></param>
 /// <returns>A list of digested peptides</returns>
 public virtual IEnumerable <Peptide> Digest(IProtease protease, int maxMissedCleavages = 3, int minLength = 1, int maxLength = int.MaxValue, bool initiatorMethonine = true, bool includeModifications = false, bool semiDigestion = false)
 {
     return(Digest(new[] { protease }, maxMissedCleavages, minLength, maxLength, initiatorMethonine, includeModifications, semiDigestion));
 }
Exemple #21
0
 public static IEnumerable <string> Digest(AminoAcidPolymer polymer, IProtease protease, int maxMissedCleavages, int minLength, int maxLength, bool methionineInitiator, bool semiDigestion)
 {
     return(Digest(polymer.BaseSequence, new[] { protease }, maxMissedCleavages, minLength, maxLength, methionineInitiator, semiDigestion));
 }
Exemple #22
0
 public static IEnumerable <string> Digest(AminoAcidPolymer sequence, IProtease protease)
 {
     return(Digest(sequence, protease, 3, 1, int.MaxValue, true, false));
 }
Exemple #23
0
        public Digestion Digest(IProtease protease, String name, String description, ProgressMonitor progressMonitor)
        {
            DbOrganism       organism;
            DbDigestion      digestion;
            List <DbProtein> proteins;

            using (ISession session = ProteomeDb.OpenWriteSession())
            {
                organism = GetEntity(session);
                session.BeginTransaction();
                digestion = new DbDigestion
                {
                    Name               = name,
                    Description        = description,
                    Organism           = organism,
                    MaxMissedCleavages = protease.MaxMissedCleavages
                };
                session.Save(digestion);
                if (!progressMonitor.Invoke("Listing proteins", 0))
                {
                    return(null);
                }
                proteins = new List <DbProtein>(organism.Proteins);
                Dictionary <String, long> digestedPeptideIds
                    = new Dictionary <string, long>();
                const String sqlPeptide =
                    "INSERT INTO ProteomeDbDigestedPeptide (Digestion, MissedCleavages, Sequence, Version) VALUES(@Digestion,@MissedCleavages,@Sequence,1);select last_insert_rowid();";
                var commandPeptide = session.Connection.CreateCommand();
                commandPeptide.CommandText = sqlPeptide;
                commandPeptide.Parameters.Add(new SQLiteParameter("@Digestion"));
                commandPeptide.Parameters.Add(new SQLiteParameter("@MissedCleavages"));
                commandPeptide.Parameters.Add(new SQLiteParameter("@Sequence"));
                const String sqlPeptideProtein =
                    "INSERT INTO ProteomeDbDigestedPeptideProtein (StartIndex, Peptide, Protein, Version) VALUES(?,?,?,1);";
                var commandProtein = session.Connection.CreateCommand();
                commandProtein.CommandText = sqlPeptideProtein;
                commandProtein.Parameters.Add(new SQLiteParameter("@StartIndex"));
                commandProtein.Parameters.Add(new SQLiteParameter("@Peptide"));
                commandProtein.Parameters.Add(new SQLiteParameter("@Protein"));
                for (int i = 0; i < proteins.Count; i++)
                {
                    if (!progressMonitor.Invoke("Digesting " + proteins.Count
                                                + " proteins", 100 * i / proteins.Count))
                    {
                        return(null);
                    }
                    Protein protein = new Protein(this, proteins[i]);
                    foreach (DigestedPeptide digestedPeptide in protease.Digest(protein))
                    {
                        if (digestedPeptide.Sequence.Length > MAX_PEPTIDE_LENGTH)
                        {
                            continue;
                        }
                        long digestedPeptideId;
                        if (!digestedPeptideIds.TryGetValue(digestedPeptide.Sequence, out digestedPeptideId))
                        {
                            ((SQLiteParameter)commandPeptide.Parameters[0]).Value = digestion.Id;
                            ((SQLiteParameter)commandPeptide.Parameters[1]).Value = digestedPeptide.MissedCleavages;
                            ((SQLiteParameter)commandPeptide.Parameters[2]).Value = digestedPeptide.Sequence;
                            digestedPeptideId = Convert.ToInt64(commandPeptide.ExecuteScalar());
                            digestedPeptideIds.Add(digestedPeptide.Sequence, digestedPeptideId);
                        }
                        ((SQLiteParameter)commandProtein.Parameters[0]).Value = digestedPeptide.Index;
                        ((SQLiteParameter)commandProtein.Parameters[1]).Value = digestedPeptideId;
                        ((SQLiteParameter)commandProtein.Parameters[2]).Value = proteins[i].Id;
                        commandProtein.ExecuteNonQuery();
                    }
                }
                if (!progressMonitor.Invoke("Committing transaction", 99))
                {
                    return(null);
                }
                session.Transaction.Commit();
                progressMonitor.Invoke(
                    "Digested " + proteins.Count + " proteins into " + digestedPeptideIds.Count + " unique peptides",
                    100);
                return(new Digestion(this, digestion));
            }
        }
Exemple #24
0
 /// <summary>
 /// Digests this protein into peptides.
 /// </summary>
 /// <param name="protease">The protease to digest with</param>
 /// <param name="maxMissedCleavages">The max number of missed cleavages generated, 0 means no missed cleavages</param>
 /// <param name="minLength">The minimum length (in amino acids) of the peptide</param>
 /// <param name="maxLength">The maximum length (in amino acids) of the peptide</param>
 /// <param name="initiatorMethonine"></param>
 /// <param name="includeModifications"></param>
 /// <param name="semiDigestion"></param>
 /// <returns>A list of digested peptides</returns>
 public virtual IEnumerable<Peptide> Digest(IProtease protease, int maxMissedCleavages = 3, int minLength = 1, int maxLength = int.MaxValue, bool initiatorMethonine = true, bool includeModifications = false, bool semiDigestion = false)
 {
     return Digest(new[] {protease}, maxMissedCleavages, minLength, maxLength, initiatorMethonine, includeModifications, semiDigestion);
 }
Exemple #25
0
        public Digestion Digest(IProtease protease, ProgressMonitor progressMonitor)
        {
            using (ISession session = OpenWriteSession())
            {
                DbDigestion      dbDigestion       = GetDbDigestion(protease.Name);
                HashSet <string> existingSequences = new HashSet <string>();
                using (var transaction = session.BeginTransaction())
                {
                    if (dbDigestion != null)
                    {
                        if (dbDigestion.MaxSequenceLength >= MAX_SEQUENCE_LENGTH)
                        {
                            return(new Digestion(this, dbDigestion));
                        }
                        if (!progressMonitor.Invoke(Resources.ProteomeDb_Digest_Listing_existing_peptides, 0))
                        {
                            return(null);
                        }
                        IQuery query = session.CreateQuery("SELECT P.Sequence FROM "                                          // Not L10N
                                                           + typeof(DbDigestedPeptide) + " P WHERE P.Digestion = :Digestion") // Not L10N
                                       .SetParameter("Digestion", dbDigestion);                                               // Not L10N
                        List <String> listSequences = new List <string>();
                        query.List(listSequences);
                        existingSequences.UnionWith(listSequences);
                        dbDigestion.MaxSequenceLength = MAX_SEQUENCE_LENGTH;
                        session.Update(dbDigestion);
                    }
                    else
                    {
                        dbDigestion = new DbDigestion
                        {
                            Name = protease.Name,
                            MinSequenceLength = MIN_SEQUENCE_LENGTH,
                            MaxSequenceLength = MAX_SEQUENCE_LENGTH,
                        };
                        session.Save(dbDigestion);
                    }
                    if (!progressMonitor.Invoke(Resources.ProteomeDb_Digest_Listing_proteins, 0))
                    {
                        return(null);
                    }
                    List <DbProtein> proteins = new List <DbProtein>();
                    session.CreateCriteria(typeof(DbProtein)).List(proteins);
                    Dictionary <String, long> digestedPeptideIds
                        = new Dictionary <string, long>();
                    const String sqlPeptide =
                        "INSERT INTO ProteomeDbDigestedPeptide (Digestion, Sequence) VALUES(?,?);select last_insert_rowid();";     // Not L10N
                    using (var commandPeptide = session.Connection.CreateCommand())
                        using (var commandProtein = session.Connection.CreateCommand())
                        {
                            commandPeptide.CommandText = sqlPeptide;
                            commandPeptide.Parameters.Add(new SQLiteParameter());
                            commandPeptide.Parameters.Add(new SQLiteParameter());
                            const String sqlPeptideProtein =
                                "INSERT INTO ProteomeDbDigestedPeptideProtein (Peptide, Protein) VALUES(?,?);"; // Not L10N
                            commandProtein.CommandText = sqlPeptideProtein;
                            commandProtein.Parameters.Add(new SQLiteParameter());
                            commandProtein.Parameters.Add(new SQLiteParameter());
                            commandProtein.Parameters.Add(new SQLiteParameter());
                            for (int i = 0; i < proteins.Count; i++)
                            {
                                var proteinSequences = new HashSet <string>();
                                if (!progressMonitor.Invoke(string.Format(Resources.ProteomeDb_Digest_Digesting__0__proteins, proteins.Count), 100 * i / proteins.Count))
                                {
                                    return(null);
                                }
                                Protein protein = new Protein(ProteomeDbPath, proteins[i]);

                                foreach (DigestedPeptide digestedPeptide in protease.Digest(protein))
                                {
                                    if (digestedPeptide.Sequence.Length < dbDigestion.MinSequenceLength)
                                    {
                                        continue;
                                    }
                                    String truncatedSequence = digestedPeptide.Sequence.Substring(
                                        0, Math.Min(digestedPeptide.Sequence.Length, dbDigestion.MaxSequenceLength));
                                    if (existingSequences.Contains(truncatedSequence))
                                    {
                                        continue;
                                    }
                                    if (proteinSequences.Contains(truncatedSequence))
                                    {
                                        continue;
                                    }
                                    proteinSequences.Add(truncatedSequence);
                                    long digestedPeptideId;
                                    if (!digestedPeptideIds.TryGetValue(truncatedSequence, out digestedPeptideId))
                                    {
                                        ((SQLiteParameter)commandPeptide.Parameters[0]).Value = dbDigestion.Id;
                                        ((SQLiteParameter)commandPeptide.Parameters[1]).Value = truncatedSequence;
                                        digestedPeptideId = Convert.ToInt64(commandPeptide.ExecuteScalar());
                                        digestedPeptideIds.Add(truncatedSequence, digestedPeptideId);
                                    }
                                    ((SQLiteParameter)commandProtein.Parameters[0]).Value = digestedPeptideId;
                                    ((SQLiteParameter)commandProtein.Parameters[1]).Value = protein.Id;
                                    commandProtein.ExecuteNonQuery();
                                }
                            }
                        }
                    if (!progressMonitor.Invoke(Resources.ProteomeDb_AddFastaFile_Saving_changes, 99))
                    {
                        return(null);
                    }
                    transaction.Commit();

                    AnalyzeDb(session);
                    progressMonitor.Invoke(
                        string.Format(Resources.ProteomeDb_Digest_Digested__0__proteins_into__1__unique_peptides,
                                      proteins.Count, digestedPeptideIds.Count),
                        100);
                }
                return(new Digestion(this, dbDigestion));
            }
        }
Exemple #26
0
        public Digestion Digest(IProtease protease, ProgressMonitor progressMonitor)
        {
            using (ISession session = OpenWriteSession())
            {
                DbDigestion dbDigestion = GetDbDigestion(protease.Name);
                HashSet<string> existingSequences = new HashSet<string>();
                using (var transaction = session.BeginTransaction())
                {
                    if (dbDigestion != null)
                    {
                        if (dbDigestion.MaxSequenceLength >= MAX_SEQUENCE_LENGTH)
                        {
                            return new Digestion(this, dbDigestion);
                        }
                        if (!progressMonitor.Invoke(Resources.ProteomeDb_Digest_Listing_existing_peptides, 0))
                        {
                            return null;
                        }
                        IQuery query = session.CreateQuery("SELECT P.Sequence FROM " // Not L10N
                                                           + typeof(DbDigestedPeptide) + " P WHERE P.Digestion = :Digestion") // Not L10N
                            .SetParameter("Digestion", dbDigestion); // Not L10N
                        List<String> listSequences = new List<string>();
                        query.List(listSequences);
                        existingSequences.UnionWith(listSequences);
                        dbDigestion.MaxSequenceLength = MAX_SEQUENCE_LENGTH;
                        session.Update(dbDigestion);
                    }
                    else
                    {
                        dbDigestion = new DbDigestion
                        {
                            Name = protease.Name,
                            MinSequenceLength = MIN_SEQUENCE_LENGTH,
                            MaxSequenceLength = MAX_SEQUENCE_LENGTH,
                        };
                        session.Save(dbDigestion);
                    }
                    if (!progressMonitor.Invoke(Resources.ProteomeDb_Digest_Listing_proteins, 0)) 
                    {
                        return null;
                    }
                    List<DbProtein> proteins = new List<DbProtein>();
                    session.CreateCriteria(typeof(DbProtein)).List(proteins);
                    Dictionary<String, long> digestedPeptideIds
                        = new Dictionary<string, long>();
                    const String sqlPeptide =
                            "INSERT INTO ProteomeDbDigestedPeptide (Digestion, Sequence) VALUES(?,?);select last_insert_rowid();"; // Not L10N
                    using (var commandPeptide = session.Connection.CreateCommand())
                    using (var commandProtein = session.Connection.CreateCommand())
                    {
                        commandPeptide.CommandText = sqlPeptide;
                        commandPeptide.Parameters.Add(new SQLiteParameter());
                        commandPeptide.Parameters.Add(new SQLiteParameter());
                        const String sqlPeptideProtein =
                            "INSERT INTO ProteomeDbDigestedPeptideProtein (Peptide, Protein) VALUES(?,?);"; // Not L10N
                        commandProtein.CommandText = sqlPeptideProtein;
                        commandProtein.Parameters.Add(new SQLiteParameter());
                        commandProtein.Parameters.Add(new SQLiteParameter());
                        commandProtein.Parameters.Add(new SQLiteParameter());
                        for (int i = 0; i < proteins.Count; i++)
                        {
                            var proteinSequences = new HashSet<string>();
                            if (!progressMonitor.Invoke(string.Format(Resources.ProteomeDb_Digest_Digesting__0__proteins,proteins.Count), 100 * i / proteins.Count))
                            {
                                return null;
                            }
                            Protein protein = new Protein(ProteomeDbPath, proteins[i]);

                            foreach (DigestedPeptide digestedPeptide in protease.Digest(protein))
                            {
                                if (digestedPeptide.Sequence.Length < dbDigestion.MinSequenceLength)
                                {
                                    continue;
                                }
                                String truncatedSequence = digestedPeptide.Sequence.Substring(
                                    0, Math.Min(digestedPeptide.Sequence.Length, dbDigestion.MaxSequenceLength));
                                if (existingSequences.Contains(truncatedSequence))
                                {
                                    continue;
                                }
                                if (proteinSequences.Contains(truncatedSequence))
                                {
                                    continue;
                                }
                                proteinSequences.Add(truncatedSequence);
                                long digestedPeptideId;
                                if (!digestedPeptideIds.TryGetValue(truncatedSequence, out digestedPeptideId))
                                {
                                    ((SQLiteParameter)commandPeptide.Parameters[0]).Value = dbDigestion.Id;
                                    ((SQLiteParameter)commandPeptide.Parameters[1]).Value = truncatedSequence;
                                    digestedPeptideId = Convert.ToInt64(commandPeptide.ExecuteScalar());
                                    digestedPeptideIds.Add(truncatedSequence, digestedPeptideId);
                                }
                                ((SQLiteParameter)commandProtein.Parameters[0]).Value = digestedPeptideId;
                                ((SQLiteParameter)commandProtein.Parameters[1]).Value = protein.Id;
                                commandProtein.ExecuteNonQuery();
                            }
                        }
                    }
                    if (!progressMonitor.Invoke(Resources.ProteomeDb_AddFastaFile_Saving_changes, 99))
                    {
                        return null;
                    }
                    transaction.Commit();

                    AnalyzeDb(session);
                    progressMonitor.Invoke(
                        string.Format(Resources.ProteomeDb_Digest_Digested__0__proteins_into__1__unique_peptides,
                                      proteins.Count, digestedPeptideIds.Count),
                        100);
                }
                return new Digestion(this, dbDigestion);
            }
        }
Exemple #27
0
        public Digestion Digest(IProtease protease, int maxMissedCleavages, IProgressMonitor progressMonitor, ref IProgressStatus status, bool delayDbIndexing = false)
        {
            using (ISession session = OpenWriteSession())
            {
                DbDigestion      dbDigestion = GetDbDigestion(protease.Name, session);
                HashSet <string> existingSequences;  // TODO(bspratt) - the logic around this seems fishy, investigate.  Probably never actually been used.  Part of fix for issue #304, probably
                if (dbDigestion != null)
                {
                    if (dbDigestion.MaxSequenceLength >= MAX_SEQUENCE_LENGTH)
                    {
                        return(new Digestion(this, dbDigestion));
                    }
                    if (!UpdateProgressAndCheckForCancellation(progressMonitor, ref status, Resources.ProteomeDb_Digest_Listing_existing_peptides, 0))
                    {
                        return(null);
                    }
                    IQuery query = session.CreateQuery("SELECT P.Sequence FROM "                                          // Not L10N
                                                       + typeof(DbDigestedPeptide) + " P WHERE P.Digestion = :Digestion") // Not L10N
                                   .SetParameter("Digestion", dbDigestion);                                               // Not L10N
                    List <String> listSequences = new List <string>();
                    query.List(listSequences);
                    existingSequences             = new HashSet <string>(listSequences);
                    dbDigestion.MaxSequenceLength = MAX_SEQUENCE_LENGTH;
                }
                else
                {
                    dbDigestion = new DbDigestion
                    {
                        Name = protease.Name,
                        MinSequenceLength = MIN_SEQUENCE_LENGTH,
                        MaxSequenceLength = MAX_SEQUENCE_LENGTH,
                    };
                    existingSequences = new HashSet <string>();
                }
                if (!UpdateProgressAndCheckForCancellation(progressMonitor, ref status, Resources.ProteomeDb_Digest_Listing_proteins, 0))
                {
                    return(null);
                }
                var dbProteins = new List <DbProtein>();
                session.CreateCriteria(typeof(DbProtein)).List(dbProteins);

                // Digest the proteins
                var proteinCount = dbProteins.Count;
                if (proteinCount == 0)
                {
                    return(null);
                }

                var       proteinsList       = new Protein[proteinCount];
                var       truncatedSequences = new HashSet <string> [proteinCount]; // One hashset of sequences for each protein of interest
                const int N_DIGEST_THREADS   = 16;                                  // Arbitrary value - do a progress/canel check every nth protein
                string    message            = string.Format(Resources.ProteomeDb_Digest_Digesting__0__proteins, proteinCount);
                for (var i = 0; i < proteinCount; i += N_DIGEST_THREADS)
                {
                    var endRange = Math.Min(proteinCount, i + N_DIGEST_THREADS);
                    if (!UpdateProgressAndCheckForCancellation(progressMonitor, ref status, message, 50 * endRange / proteinCount))
                    {
                        return(null);
                    }
                    for (int ii = i; ii < endRange; ii++)
                    {
                        var protein = new Protein(ProteomeDbPath, dbProteins[ii]);
                        proteinsList[ii] = protein;
                    }
                    Parallel.For(i, endRange, ii =>
                    {
                        var proteinSequences   = new HashSet <string>(); // We only save the first dbDigestion.MaxSequenceLength characters of each peptide so collisions are likely
                        truncatedSequences[ii] = proteinSequences;       // One hashset of sequences for each protein of interest

                        foreach (var digestedPeptide in protease.DigestSequence(proteinsList[ii].Sequence, maxMissedCleavages, null))
                        {
                            if (digestedPeptide.Sequence.Length < dbDigestion.MinSequenceLength)
                            {
                                continue;
                            }
                            var truncatedSequence = digestedPeptide.Sequence.Substring(
                                0, Math.Min(digestedPeptide.Sequence.Length, dbDigestion.MaxSequenceLength));
                            if (!existingSequences.Contains(truncatedSequence))
                            {
                                proteinSequences.Add(truncatedSequence);
                            }
                        }
                    });
                }

                // Now write to db
                if (!UpdateProgressAndCheckForCancellation(progressMonitor, ref status, Resources.ProteomeDb_AddFastaFile_Saving_changes, 50))
                {
                    return(null);
                }
                bool committed = true;
                int  digestedPeptideIdsCount;
                try
                {
                    using (var transaction = session.BeginTransaction())
                    {
                        session.SaveOrUpdate(dbDigestion);

                        Dictionary <String, long> digestedPeptideIds
                            = new Dictionary <string, long>();
                        const String sqlPeptide =
                            "INSERT INTO ProteomeDbDigestedPeptide (Digestion, Sequence) VALUES(?,?);select last_insert_rowid();";     // Not L10N
                        using (var commandPeptide = session.Connection.CreateCommand())
                            using (var commandProtein = session.Connection.CreateCommand())
                            {
                                commandPeptide.CommandText = sqlPeptide;
                                commandPeptide.Parameters.Add(new SQLiteParameter());
                                commandPeptide.Parameters.Add(new SQLiteParameter());
                                const String sqlPeptideProtein =
                                    "INSERT INTO ProteomeDbDigestedPeptideProtein (Peptide, Protein) VALUES(?,?);"; // Not L10N
                                commandProtein.CommandText = sqlPeptideProtein;
                                commandProtein.Parameters.Add(new SQLiteParameter());
                                commandProtein.Parameters.Add(new SQLiteParameter());
                                commandProtein.Parameters.Add(new SQLiteParameter());
                                for (int i = 0; i < proteinCount; i++)
                                {
                                    var protein = proteinsList[i];
                                    if (!UpdateProgressAndCheckForCancellation(progressMonitor, ref status, message, 50 * (proteinCount + i) / proteinCount))
                                    {
                                        return(null);
                                    }
                                    foreach (var truncatedSequence in truncatedSequences[i])
                                    {
                                        long digestedPeptideId;
                                        if (!digestedPeptideIds.TryGetValue(truncatedSequence, out digestedPeptideId))
                                        {
                                            ((SQLiteParameter)commandPeptide.Parameters[0]).Value = dbDigestion.Id;
                                            ((SQLiteParameter)commandPeptide.Parameters[1]).Value = truncatedSequence;
                                            digestedPeptideId = Convert.ToInt64(commandPeptide.ExecuteScalar());
                                            digestedPeptideIds.Add(truncatedSequence, digestedPeptideId);
                                        }
                                        ((SQLiteParameter)commandProtein.Parameters[0]).Value = digestedPeptideId;
                                        ((SQLiteParameter)commandProtein.Parameters[1]).Value = protein.Id;
                                        commandProtein.ExecuteNonQuery();
                                    }
                                }
                            }
                        try
                        {
                            transaction.Commit();
                        }
                        catch (Exception)
                        {
                            committed = false;
                        }
                        digestedPeptideIdsCount = digestedPeptideIds.Count;
                    }
                }
                catch (Exception)
                {
                    if (!committed)
                    {
                        return(null); // Interrupted
                    }
                    else
                    {
                        throw;
                    }
                }
                if (committed && !delayDbIndexing)
                {
                    AnalyzeDb(session); // This runs asynchronously, and interferes with writes
                }
                if (committed)
                {
                    progressMonitor.UpdateProgress(new ProgressStatus(string.Format(Resources.ProteomeDb_Digest_Digested__0__proteins_into__1__unique_peptides, proteinCount, digestedPeptideIdsCount)).ChangePercentComplete(100));
                }
                return(committed ? new Digestion(this, dbDigestion) : null);
            }
        }
        public static void StartRamp(IProtease protease, double percentIdentifiedSteps = 0.05, int maxMissed = 3, int minLength = 5, int maxLength = 35)
        {
            List<Peptide> peps = new List<Peptide>();
            List<Protein> proteins = new List<Protein>();

            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peps.Add(peptide);
                    }
                    proteins.Add(protein);
                }
            }

            // Fixed seed to make it reproducible
            Random random = new Random(480912341);
            peps = peps.OrderBy(x => random.Next()).ToList();

            for (double percentIdentified = 0; percentIdentified <= 1; percentIdentified += percentIdentifiedSteps)
            {
                // Take the first x % to act as our identified peptides
                List<Peptide> identifiedPeptides = peps.Take((int) (peps.Count*percentIdentified)).ToList();

                List<ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList();
                Console.WriteLine("{0} peptides {1} protein groups", identifiedPeptides.Count, proteinGroups.Count);
            }
        }