ReadNextProtein() public method

public ReadNextProtein ( ) : IEnumerable
return IEnumerable
示例#1
0
 public void LoadProteins(string fastaFile)
 {
     using (FastaReader reader = new FastaReader(fastaFile))
     {
         LoadProteins(reader.ReadNextProtein());
     }
 }
示例#2
0
        public static void Start(IProtease protease, int maxMissed = 3, int minLength = 5, int maxLength = 35)
        {
            Console.WriteLine("**Start Morpheus Search**");
            Stopwatch watch = new Stopwatch();
            watch.Start();
            List<int> hashCodes = new List<int>();
            // Generate peptide candidates

            HashSet<Peptide> peptides = new HashSet<Peptide>();
            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peptides.Add(peptide);
                    }
                }
            }

            MSSearchEngine engine = new MorpheusSearchEngine();
            engine.PrecursorMassTolerance = Tolerance.FromPPM(100);
            engine.ProductMassTolerance = Tolerance.FromPPM(10);

            engine.LoadPeptides(peptides);

            watch.Stop();
            Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
            Console.WriteLine("Memory used: {0:N0} MB", Environment.WorkingSet/(1024*1024));
            Console.WriteLine("**End Morpheus Search**");
        }
        public static void Start(IProtease protease, double percentIdentified = 0.05, int maxMissed = 3, int minLength = 5, int maxLength = 35)
        {
            Console.WriteLine("**Start Protein Grouping**");
            Stopwatch watch = new Stopwatch();
            watch.Start();
            List<Peptide> peps = new List<Peptide>();
            List<Protein> proteins = new List<Protein>();

            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peps.Add(peptide);
                    }
                    proteins.Add(protein);
                }
            }
            Console.WriteLine("Loaded {0:N0} peptides from {1:N0} proteins in {2} ms", peps.Count, proteins.Count, watch.ElapsedMilliseconds);

            // Fixed seed to make it reproducible
            Random random = new Random(480912341);

            // Take the first x % to act as our identified peptides
            List<Peptide> identifiedPeptides = peps.OrderBy(x => random.Next()).Take((int) (peps.Count*percentIdentified)).ToList();

            List<ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList();

            watch.Stop();
            Console.WriteLine("{0:N0} proteins produced {1:N0} protein groups from {2:N0} identified sequences", proteins.Count, proteinGroups.Count, identifiedPeptides.Count);
            Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
            Console.WriteLine("Memory used: {0:N0} MB", System.Environment.WorkingSet/(1024*1024));
            Console.WriteLine("**END Protein Grouping**");
        }
        public static void ExampleProteinGrouping(IProtease protease, double percentIdentified = 0.01, int maxMissed = 3, int minLength = 5, int maxLength = 50)
        {
            Stopwatch watch = new Stopwatch();
            watch.Start();
            List<Peptide> peps = new List<Peptide>(1000000);
            List<Protein> proteins = new List<Protein>(7000);

            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    peps.AddRange(protein.Digest(protease, maxMissed, minLength, maxLength));
                    proteins.Add(protein);
                }
            }
            Console.WriteLine("Loaded {0:N0} peptides from {1:N0} proteins in {2} ms", peps.Count, proteins.Count, watch.ElapsedMilliseconds);
            watch.Restart();

            Random random = new Random(480912341);

            // Take the first x % to act as our identified peptides
            List<Peptide> identifiedPeptides = peps.OrderBy(x => random.Next()).Take((int) (peps.Count*percentIdentified)).ToList();

            List<ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList();

            watch.Stop();

            Console.WriteLine("{0:N0} proteins produced {1:N0} protein groups from {2:N0} identified sequences", proteins.Count, proteinGroups.Count, identifiedPeptides.Count);
            Console.WriteLine();
            Console.WriteLine("Time elapsed: {0} ms", watch.ElapsedMilliseconds);
        }
示例#5
0
 public static void Start(IProtease protease, int maxMissed = 1, int minLength = 0, int maxLength = int.MaxValue, bool storeSequenceString = true)
 {
     Console.WriteLine("**Start Digestion**");
     Stopwatch watch = new Stopwatch();
     watch.Start();
     List<Peptide> peps = new List<Peptide>();
     List<Protein> prots = new List<Protein>();
     List<double> allMzs = new List<double>();
     AminoAcidPolymer.StoreSequenceString = storeSequenceString;
     using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
     {
         foreach (Protein protein in reader.ReadNextProtein())
         {
             foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
             {
                 peps.Add(peptide);
                 allMzs.Add(peptide.ToMz(2)); // forces the calculation of the mass and thus chemical formula
             }
             prots.Add(protein);
         }
     }
     watch.Stop();
     Console.WriteLine("{0:N0} proteins produced {1:N0} peptides using {2:N0} missed cleavages", prots.Count, peps.Count, maxMissed);
     Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
     Console.WriteLine("Memory used: {0:N0} MB", System.Environment.WorkingSet / (1024 * 1024));
     Console.WriteLine("**End Digestion**");
 }
示例#6
0
        public static void BasicCsvWriting(string outputFilePath)
        {
            Console.WriteLine("Writing file to: " + outputFilePath);

            // Create a stream writer to output data to a stream. In this case, the stream points to a file path on the
            // computer and is saved as a file. The using statement is the same as doing the following:
            // Stream writer = new StreamWriter(outputFilePath);
            // writer.Open();
            // writer.WriteLine("Hello World");
            // writer.Close();
            using (StreamWriter writer = new StreamWriter(outputFilePath))
            {
                // Now that the stream is open, we can a line of text to it to serve as the header row.
                // CSV formats are just text files with fields seperated by commas.
                writer.WriteLine("Protein Name,# of Amino Acids,Mass (da)");

                // Open a connection to a fasta file (very similar syntax as the StreamWriter above)
                using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
                {
                    // Loop over each protein in the fasta file
                    foreach (Protein protein in reader.ReadNextProtein())
                    {
                        // StringBuilder objects are an effective tool for constructing strings to write to files.
                        StringBuilder sb = new StringBuilder();

                        // To add items to the string builder, just call the append method with whatever you want to add
                        sb.Append(protein.Description);
                        sb.Append(','); // we need to add the delimiter after each field we add as well.

                        // Add the next item
                        sb.Append(protein.Length);
                        sb.Append(',');

                        // The append method can take any object, it will simply call the .ToString() method on the object supplied.
                        sb.Append(protein.MonoisotopicMass);

                        // No delimiter is needed after the last field is added.

                        // Now to write this string to the file itself.
                        // We convert the StringBuilder object (named: sb) to a string, and then write it on its own line in the
                        // file writer (named: writer)
                        writer.WriteLine(sb.ToString());
                    }
                }
            } // The file is automatically written and closed after exiting the using {} block.
        }
示例#7
0
        public static void Start(IProtease protease, int maxMissed = 3, int minLength = 5, int maxLength = 35)
        {
            Console.WriteLine("**Start Morpheus Search**");
            Stopwatch watch = new Stopwatch();
            watch.Start();
            List<int> hashCodes = new List<int>();
            // Generate peptide candidates

            HashSet<Peptide> peptides = new HashSet<Peptide>();
            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peptides.Add(peptide);
                    }
                }
            }

            MSSearchEngine engine = new MorpheusSearchEngine();
            engine.PrecursorMassTolerance = Tolerance.FromPPM(100);
            engine.ProductMassTolerance = Tolerance.FromPPM(10);

            engine.LoadPeptides(peptides);
            using (MSDataFile msDataFile = new ThermoRawFile("Resources/ThermoRawFileMS1MS2.raw"))
            {
                //SortedMaxSizedContainer<PeptideSpectralMatch> psms = engine.Search(msDataFile.Where(scan => scan.MsnOrder > 1));

                //foreach (MSDataScan scan in msDataFile.Where(scan => scan.MsnOrder > 1))
                //{
                //    List<PeptideSpectralMatch> psms = engine.Search(scan);
                //    Console.WriteLine("{0} {1}", scan.SpectrumNumber, psms.Count);
                //}
            }
            watch.Stop();
            Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
            Console.WriteLine("Memory used: {0:N0} MB", System.Environment.WorkingSet / (1024 * 1024));
            Console.WriteLine("**End Morpheus Search**");
        }
示例#8
0
 public static void ExampleDigestion()
 {
     const string fastaFilePath = "Resources/yeast_uniprot_120226.fasta";
     IProtease trypsin = Protease.GetProtease("Trypsin");
     const int maxMissedCleavages = 3;
     const int minPeptideLength = 5;
     const int maxPeptideLength = 50;
     List<double> masses = new List<double>();
     Stopwatch watch = new Stopwatch();
     watch.Start();
     using (FastaReader reader = new FastaReader(fastaFilePath))
     {
         foreach (Protein protein in reader.ReadNextProtein())
         {
             foreach (Peptide peptide in protein.Digest(trypsin, maxMissedCleavages, minPeptideLength, maxPeptideLength))
             {
                 masses.Add(peptide.MonoisotopicMass);
             }
         }
     }
     //Console.WriteLine("Average Peptide Mass = {0:F4}", masses.Average());
     watch.Stop();
     Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
 }
示例#9
0
 public void LoadProteins(string fastaFile)
 {
     using (FastaReader reader = new FastaReader(fastaFile))
     {
         LoadProteins(reader.ReadNextProtein());
     }
 }
        public static void StartRamp(IProtease protease, double percentIdentifiedSteps = 0.05, int maxMissed = 3, int minLength = 5, int maxLength = 35)
        {
            List<Peptide> peps = new List<Peptide>();
            List<Protein> proteins = new List<Protein>();

            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peps.Add(peptide);
                    }
                    proteins.Add(protein);
                }
            }

            // Fixed seed to make it reproducible
            Random random = new Random(480912341);
            peps = peps.OrderBy(x => random.Next()).ToList();

            for (double percentIdentified = 0; percentIdentified <= 1; percentIdentified += percentIdentifiedSteps)
            {
                // Take the first x % to act as our identified peptides
                List<Peptide> identifiedPeptides = peps.Take((int) (peps.Count*percentIdentified)).ToList();

                List<ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList();
                Console.WriteLine("{0} peptides {1} protein groups", identifiedPeptides.Count, proteinGroups.Count);
            }
        }