Exemplo n.º 1
0
        public static void ExampleProteinGrouping(IProtease protease, double percentIdentified = 0.01, int maxMissed = 3, int minLength = 5, int maxLength = 50)
        {
            Stopwatch watch = new Stopwatch();

            watch.Start();
            List <Peptide> peps     = new List <Peptide>(1000000);
            List <Protein> proteins = new List <Protein>(7000);

            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    peps.AddRange(protein.Digest(protease, maxMissed, minLength, maxLength));
                    proteins.Add(protein);
                }
            }
            Console.WriteLine("Loaded {0:N0} peptides from {1:N0} proteins in {2} ms", peps.Count, proteins.Count, watch.ElapsedMilliseconds);
            watch.Restart();

            Random random = new Random(480912341);

            // Take the first x % to act as our identified peptides
            List <Peptide> identifiedPeptides = peps.OrderBy(x => random.Next()).Take((int)(peps.Count * percentIdentified)).ToList();

            List <ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList();

            watch.Stop();

            Console.WriteLine("{0:N0} proteins produced {1:N0} protein groups from {2:N0} identified sequences", proteins.Count, proteinGroups.Count, identifiedPeptides.Count);
            Console.WriteLine();
            Console.WriteLine("Time elapsed: {0} ms", watch.ElapsedMilliseconds);
        }
Exemplo n.º 2
0
 public static IEnumerable <ProteinGroup> GroupProteins(string fastaFile, IProtease protease, IEnumerable <IAminoAcidSequence> observeredSequences, IEqualityComparer <IAminoAcidSequence> peptideComparer, int MaxMissedCleavages = 3)
 {
     using (FastaReader fasta = new FastaReader(fastaFile))
     {
         return(GroupProteins(fasta.ReadNextProtein(), new[] { protease }, observeredSequences, peptideComparer, MaxMissedCleavages));
     }
 }
Exemplo n.º 3
0
        public static void StartRamp(IProtease protease, double percentIdentifiedSteps = 0.05, int maxMissed = 3, int minLength = 5, int maxLength = 35)
        {
            List <Peptide> peps     = new List <Peptide>();
            List <Protein> proteins = new List <Protein>();

            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peps.Add(peptide);
                    }
                    proteins.Add(protein);
                }
            }

            // Fixed seed to make it reproducible
            Random random = new Random(480912341);

            peps = peps.OrderBy(x => random.Next()).ToList();

            for (double percentIdentified = 0; percentIdentified <= 1; percentIdentified += percentIdentifiedSteps)
            {
                // Take the first x % to act as our identified peptides
                List <Peptide> identifiedPeptides = peps.Take((int)(peps.Count * percentIdentified)).ToList();

                List <ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList();
                Console.WriteLine("{0} peptides {1} protein groups", identifiedPeptides.Count, proteinGroups.Count);
            }
        }
Exemplo n.º 4
0
        public static void Start(IProtease protease, int maxMissed = 3, int minLength = 5, int maxLength = 35)
        {
            Console.WriteLine("**Start Morpheus Search**");
            Stopwatch watch = new Stopwatch();

            watch.Start();
            List <int> hashCodes = new List <int>();
            // Generate peptide candidates

            HashSet <Peptide> peptides = new HashSet <Peptide>();

            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peptides.Add(peptide);
                    }
                }
            }

            MSSearchEngine engine = new MorpheusSearchEngine();

            engine.PrecursorMassTolerance = Tolerance.FromPPM(100);
            engine.ProductMassTolerance   = Tolerance.FromPPM(10);

            engine.LoadPeptides(peptides);

            watch.Stop();
            Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
            Console.WriteLine("Memory used: {0:N0} MB", Environment.WorkingSet / (1024 * 1024));
            Console.WriteLine("**End Morpheus Search**");
        }
Exemplo n.º 5
0
        public static void ExampleDigestion()
        {
            const string  fastaFilePath      = "Resources/yeast_uniprot_120226.fasta";
            IProtease     trypsin            = Protease.GetProtease("Trypsin");
            const int     maxMissedCleavages = 3;
            const int     minPeptideLength   = 5;
            const int     maxPeptideLength   = 50;
            List <double> masses             = new List <double>();
            Stopwatch     watch = new Stopwatch();

            watch.Start();
            using (FastaReader reader = new FastaReader(fastaFilePath))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(trypsin, maxMissedCleavages, minPeptideLength, maxPeptideLength))
                    {
                        masses.Add(peptide.MonoisotopicMass);
                    }
                }
            }
            //Console.WriteLine("Average Peptide Mass = {0:F4}", masses.Average());
            watch.Stop();
            Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
        }
Exemplo n.º 6
0
        public static void Start(IProtease protease, int maxMissed = 1, int minLength = 0, int maxLength = int.MaxValue, bool storeSequenceString = true)
        {
            Console.WriteLine("**Start Digestion**");
            Stopwatch watch = new Stopwatch();

            watch.Start();
            List <Peptide> peps   = new List <Peptide>();
            List <Protein> prots  = new List <Protein>();
            List <double>  allMzs = new List <double>();

            AminoAcidPolymer.StoreSequenceString = storeSequenceString;
            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peps.Add(peptide);
                        allMzs.Add(peptide.ToMz(2)); // forces the calculation of the mass and thus chemical formula
                    }
                    prots.Add(protein);
                }
            }
            watch.Stop();
            Console.WriteLine("{0:N0} proteins produced {1:N0} peptides using {2:N0} missed cleavages", prots.Count, peps.Count, maxMissed);
            Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
            Console.WriteLine("Memory used: {0:N0} MB", Environment.WorkingSet / (1024 * 1024));
            Console.WriteLine("**End Digestion**");
        }
Exemplo n.º 7
0
        public static void BasicCsvWriting(string outputFilePath)
        {
            Console.WriteLine("Writing file to: " + outputFilePath);

            // Create a stream writer to output data to a stream. In this case, the stream points to a file path on the
            // computer and is saved as a file. The using statement is the same as doing the following:
            // Stream writer = new StreamWriter(outputFilePath);
            // writer.Open();
            // writer.WriteLine("Hello World");
            // writer.Close();
            using (StreamWriter writer = new StreamWriter(outputFilePath))
            {
                // Now that the stream is open, we can a line of text to it to serve as the header row.
                // CSV formats are just text files with fields seperated by commas.
                writer.WriteLine("Protein Name,# of Amino Acids,Mass (da)");

                // Open a connection to a fasta file (very similar syntax as the StreamWriter above)
                using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
                {
                    // Loop over each protein in the fasta file
                    foreach (Protein protein in reader.ReadNextProtein())
                    {
                        // StringBuilder objects are an effective tool for constructing strings to write to files.
                        StringBuilder sb = new StringBuilder();

                        // To add items to the string builder, just call the append method with whatever you want to add
                        sb.Append(protein.Description);
                        sb.Append(','); // we need to add the delimiter after each field we add as well.

                        // Add the next item
                        sb.Append(protein.Length);
                        sb.Append(',');

                        // The append method can take any object, it will simply call the .ToString() method on the object supplied.
                        sb.Append(protein.MonoisotopicMass);

                        // No delimiter is needed after the last field is added.

                        // Now to write this string to the file itself.
                        // We convert the StringBuilder object (named: sb) to a string, and then write it on its own line in the
                        // file writer (named: writer)
                        writer.WriteLine(sb.ToString());
                    }
                }
            } // The file is automatically written and closed after exiting the using {} block.
        }
Exemplo n.º 8
0
        public static void Start(IProtease protease, double percentIdentified = 0.05, int maxMissed = 3, int minLength = 5, int maxLength = 35)
        {
            Console.WriteLine("**Start Protein Grouping**");
            Stopwatch watch = new Stopwatch();

            watch.Start();
            List <Peptide> peps     = new List <Peptide>();
            List <Protein> proteins = new List <Protein>();

            using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta"))
            {
                foreach (Protein protein in reader.ReadNextProtein())
                {
                    foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength))
                    {
                        peps.Add(peptide);
                    }
                    proteins.Add(protein);
                }
            }
            Console.WriteLine("Loaded {0:N0} peptides from {1:N0} proteins in {2} ms", peps.Count, proteins.Count, watch.ElapsedMilliseconds);

            // Fixed seed to make it reproducible
            Random random = new Random(480912341);

            // Take the first x % to act as our identified peptides
            List <Peptide> identifiedPeptides = peps.OrderBy(x => random.Next()).Take((int)(peps.Count * percentIdentified)).ToList();

            List <ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList();

            watch.Stop();
            Console.WriteLine("{0:N0} proteins produced {1:N0} protein groups from {2:N0} identified sequences", proteins.Count, proteinGroups.Count, identifiedPeptides.Count);
            Console.WriteLine("Time elapsed: {0}", watch.Elapsed);
            Console.WriteLine("Memory used: {0:N0} MB", System.Environment.WorkingSet / (1024 * 1024));
            Console.WriteLine("**END Protein Grouping**");
        }