public static void ExampleProteinGrouping(IProtease protease, double percentIdentified = 0.01, int maxMissed = 3, int minLength = 5, int maxLength = 50) { Stopwatch watch = new Stopwatch(); watch.Start(); List <Peptide> peps = new List <Peptide>(1000000); List <Protein> proteins = new List <Protein>(7000); using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta")) { foreach (Protein protein in reader.ReadNextProtein()) { peps.AddRange(protein.Digest(protease, maxMissed, minLength, maxLength)); proteins.Add(protein); } } Console.WriteLine("Loaded {0:N0} peptides from {1:N0} proteins in {2} ms", peps.Count, proteins.Count, watch.ElapsedMilliseconds); watch.Restart(); Random random = new Random(480912341); // Take the first x % to act as our identified peptides List <Peptide> identifiedPeptides = peps.OrderBy(x => random.Next()).Take((int)(peps.Count * percentIdentified)).ToList(); List <ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList(); watch.Stop(); Console.WriteLine("{0:N0} proteins produced {1:N0} protein groups from {2:N0} identified sequences", proteins.Count, proteinGroups.Count, identifiedPeptides.Count); Console.WriteLine(); Console.WriteLine("Time elapsed: {0} ms", watch.ElapsedMilliseconds); }
public static IEnumerable <ProteinGroup> GroupProteins(string fastaFile, IProtease protease, IEnumerable <IAminoAcidSequence> observeredSequences, IEqualityComparer <IAminoAcidSequence> peptideComparer, int MaxMissedCleavages = 3) { using (FastaReader fasta = new FastaReader(fastaFile)) { return(GroupProteins(fasta.ReadNextProtein(), new[] { protease }, observeredSequences, peptideComparer, MaxMissedCleavages)); } }
public static void StartRamp(IProtease protease, double percentIdentifiedSteps = 0.05, int maxMissed = 3, int minLength = 5, int maxLength = 35) { List <Peptide> peps = new List <Peptide>(); List <Protein> proteins = new List <Protein>(); using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta")) { foreach (Protein protein in reader.ReadNextProtein()) { foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength)) { peps.Add(peptide); } proteins.Add(protein); } } // Fixed seed to make it reproducible Random random = new Random(480912341); peps = peps.OrderBy(x => random.Next()).ToList(); for (double percentIdentified = 0; percentIdentified <= 1; percentIdentified += percentIdentifiedSteps) { // Take the first x % to act as our identified peptides List <Peptide> identifiedPeptides = peps.Take((int)(peps.Count * percentIdentified)).ToList(); List <ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList(); Console.WriteLine("{0} peptides {1} protein groups", identifiedPeptides.Count, proteinGroups.Count); } }
public static void Start(IProtease protease, int maxMissed = 3, int minLength = 5, int maxLength = 35) { Console.WriteLine("**Start Morpheus Search**"); Stopwatch watch = new Stopwatch(); watch.Start(); List <int> hashCodes = new List <int>(); // Generate peptide candidates HashSet <Peptide> peptides = new HashSet <Peptide>(); using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta")) { foreach (Protein protein in reader.ReadNextProtein()) { foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength)) { peptides.Add(peptide); } } } MSSearchEngine engine = new MorpheusSearchEngine(); engine.PrecursorMassTolerance = Tolerance.FromPPM(100); engine.ProductMassTolerance = Tolerance.FromPPM(10); engine.LoadPeptides(peptides); watch.Stop(); Console.WriteLine("Time elapsed: {0}", watch.Elapsed); Console.WriteLine("Memory used: {0:N0} MB", Environment.WorkingSet / (1024 * 1024)); Console.WriteLine("**End Morpheus Search**"); }
public static void ExampleDigestion() { const string fastaFilePath = "Resources/yeast_uniprot_120226.fasta"; IProtease trypsin = Protease.GetProtease("Trypsin"); const int maxMissedCleavages = 3; const int minPeptideLength = 5; const int maxPeptideLength = 50; List <double> masses = new List <double>(); Stopwatch watch = new Stopwatch(); watch.Start(); using (FastaReader reader = new FastaReader(fastaFilePath)) { foreach (Protein protein in reader.ReadNextProtein()) { foreach (Peptide peptide in protein.Digest(trypsin, maxMissedCleavages, minPeptideLength, maxPeptideLength)) { masses.Add(peptide.MonoisotopicMass); } } } //Console.WriteLine("Average Peptide Mass = {0:F4}", masses.Average()); watch.Stop(); Console.WriteLine("Time elapsed: {0}", watch.Elapsed); }
public static void Start(IProtease protease, int maxMissed = 1, int minLength = 0, int maxLength = int.MaxValue, bool storeSequenceString = true) { Console.WriteLine("**Start Digestion**"); Stopwatch watch = new Stopwatch(); watch.Start(); List <Peptide> peps = new List <Peptide>(); List <Protein> prots = new List <Protein>(); List <double> allMzs = new List <double>(); AminoAcidPolymer.StoreSequenceString = storeSequenceString; using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta")) { foreach (Protein protein in reader.ReadNextProtein()) { foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength)) { peps.Add(peptide); allMzs.Add(peptide.ToMz(2)); // forces the calculation of the mass and thus chemical formula } prots.Add(protein); } } watch.Stop(); Console.WriteLine("{0:N0} proteins produced {1:N0} peptides using {2:N0} missed cleavages", prots.Count, peps.Count, maxMissed); Console.WriteLine("Time elapsed: {0}", watch.Elapsed); Console.WriteLine("Memory used: {0:N0} MB", Environment.WorkingSet / (1024 * 1024)); Console.WriteLine("**End Digestion**"); }
public static void BasicCsvWriting(string outputFilePath) { Console.WriteLine("Writing file to: " + outputFilePath); // Create a stream writer to output data to a stream. In this case, the stream points to a file path on the // computer and is saved as a file. The using statement is the same as doing the following: // Stream writer = new StreamWriter(outputFilePath); // writer.Open(); // writer.WriteLine("Hello World"); // writer.Close(); using (StreamWriter writer = new StreamWriter(outputFilePath)) { // Now that the stream is open, we can a line of text to it to serve as the header row. // CSV formats are just text files with fields seperated by commas. writer.WriteLine("Protein Name,# of Amino Acids,Mass (da)"); // Open a connection to a fasta file (very similar syntax as the StreamWriter above) using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta")) { // Loop over each protein in the fasta file foreach (Protein protein in reader.ReadNextProtein()) { // StringBuilder objects are an effective tool for constructing strings to write to files. StringBuilder sb = new StringBuilder(); // To add items to the string builder, just call the append method with whatever you want to add sb.Append(protein.Description); sb.Append(','); // we need to add the delimiter after each field we add as well. // Add the next item sb.Append(protein.Length); sb.Append(','); // The append method can take any object, it will simply call the .ToString() method on the object supplied. sb.Append(protein.MonoisotopicMass); // No delimiter is needed after the last field is added. // Now to write this string to the file itself. // We convert the StringBuilder object (named: sb) to a string, and then write it on its own line in the // file writer (named: writer) writer.WriteLine(sb.ToString()); } } } // The file is automatically written and closed after exiting the using {} block. }
public static void Start(IProtease protease, double percentIdentified = 0.05, int maxMissed = 3, int minLength = 5, int maxLength = 35) { Console.WriteLine("**Start Protein Grouping**"); Stopwatch watch = new Stopwatch(); watch.Start(); List <Peptide> peps = new List <Peptide>(); List <Protein> proteins = new List <Protein>(); using (FastaReader reader = new FastaReader("Resources/yeast_uniprot_120226.fasta")) { foreach (Protein protein in reader.ReadNextProtein()) { foreach (Peptide peptide in protein.Digest(protease, maxMissed, minLength, maxLength)) { peps.Add(peptide); } proteins.Add(protein); } } Console.WriteLine("Loaded {0:N0} peptides from {1:N0} proteins in {2} ms", peps.Count, proteins.Count, watch.ElapsedMilliseconds); // Fixed seed to make it reproducible Random random = new Random(480912341); // Take the first x % to act as our identified peptides List <Peptide> identifiedPeptides = peps.OrderBy(x => random.Next()).Take((int)(peps.Count * percentIdentified)).ToList(); List <ProteinGroup> proteinGroups = ProteinGroup.GroupProteins(proteins, protease, identifiedPeptides, new AminoAcidLeucineSequenceComparer(), maxMissed).ToList(); watch.Stop(); Console.WriteLine("{0:N0} proteins produced {1:N0} protein groups from {2:N0} identified sequences", proteins.Count, proteinGroups.Count, identifiedPeptides.Count); Console.WriteLine("Time elapsed: {0}", watch.Elapsed); Console.WriteLine("Memory used: {0:N0} MB", System.Environment.WorkingSet / (1024 * 1024)); Console.WriteLine("**END Protein Grouping**"); }