Inheritance: AminoAcidPolymer
示例#1
0
        public void DuplicatePeptidesReturn()
        {
            Protein prot = new Protein("DEREKDEREK");

            var peptides = prot.Digest(Protease.GetProtease("LysC"), 0).ToList();
            Assert.AreEqual(peptides.Count, 2);
        }
示例#2
0
        public static void WritePepXml()
        {
            string filePath = Path.Combine(Examples.BASE_DIRECTORY, "example.pepXML");

            Console.WriteLine("Writting to " + filePath);
            using (PepXmlWriter writer = new PepXmlWriter(filePath))
            {
                writer.WriteSampleProtease(Protease.Trypsin);

                writer.StartSearchSummary("OMSSA", true, true);

                writer.WriteProteinDatabase("Resources/yeast_uniprot_120226.fasta");

                writer.WriteSearchProtease(Protease.Trypsin, 3);

                writer.WriteModification(ModificationDictionary.GetModification("Acetyl"), ModificationSites.K | ModificationSites.NPep);
                writer.WriteModification(ModificationDictionary.GetModification("CAM"), ModificationSites.C);

                writer.WriteModification(ModificationDictionary.GetModification("Phospho"), ModificationSites.S | ModificationSites.T | ModificationSites.Y, false);

                writer.SetCurrentStage(PepXmlWriter.Stage.Spectra, true);

                writer.StartSpectrum(15, 1.234, 523.4324, 3);

                PeptideSpectralMatch psm = new PeptideSpectralMatch(PeptideSpectralMatchScoreType.OmssaEvalue);
                psm.Score = 1.5e-5;
                Protein protein = new Protein("", "Test Protein");
                psm.Peptide = new Peptide("DEREK",protein);
                psm.Charge = 3;
                writer.WritePSM(psm);

                writer.EndSpectrum();
            }
        }
示例#3
0
        public void DuplicatePeptidesAreEqualivant()
        {
            Protein prot = new Protein("DEREKDEREK");

            var peptides = prot.Digest(Protease.GetProtease("LysC"), 0).ToList();
            Assert.AreEqual(peptides[0], peptides[1]);
        }
示例#4
0
 public void Write(Protein protein)
 {
     Write(protein.Sequence, protein.Description);
 }
示例#5
0
        /// <summary>
        /// Performs an in silico digestion of all the proteins found within the fasta file.
        /// </summary>
        /// <param name="fastaFile">The fasta filename to perfrom the digestion on</param>
        /// <param name="uniquePeptides">The unique peptides that were read in from the csv files</param>
        /// <param name="proteases"></param>
        /// <param name="semiDigestion">Perform a Semi Digestion</param>
        /// <returns>True if all the unique peptides get isMapped to at least one protein, false otherwise</returns>
        private List<Protein> GetMappedProteinsFromFasta(string fastaFile, Dictionary<string, Peptide> uniquePeptides, IList<Protease> proteases, bool semiDigestion = false)
        {
            string fastaFileniceName = Path.GetFileName(fastaFile);
            StringBuilder sb = new StringBuilder();
            foreach (Protease protease in proteases)
            {
                sb.Append(protease.Name);
                sb.Append(',');
            }
            if (sb.Length > 0) { sb.Remove(sb.Length - 1, 1); }
            Log("Performing {0}{1} digestion on {2}...", semiDigestion ? "semi " : "", sb, fastaFileniceName);
            //Peptide.MappedCount = 0;
            int forwardProteins = 0, decoyProteins = 0, forwardProteinsMapped = 0, decoyProteinsMapped = 0, fastaCounter = 0, pepsMapped = 0;
            long totalBytes = new FileInfo(fastaFile).Length;

            // A hashset of all proteins that have a peptide that was in the input files
            Dictionary<Protein, Protein> proteins = new Dictionary<Protein, Protein>(1 << 13);

            // Min and Max length of peptides
            int minLength = semiDigestion ? 1 : _smallestPeptide - 1;
            int maxLength = semiDigestion ? int.MaxValue : _largestPeptide + 1;

            // Open the reader for the protein database in the .fasta format
            using (FastaReader reader = new FastaReader(fastaFile))
            {
                // Read in each protein one-by-one
                foreach (Fasta fasta in reader.ReadNextFasta())
                {
                    // The number of fasta (proteins) read in (for progress bar feedback)
                    fastaCounter++;

                    // Create a new protein from the fasta
                    Protein prot = new Protein(fasta.Description, fasta.Sequence);

                    // Check if the protein is a decoy protein or not
                    if (prot.IsDecoy)
                    {
                        decoyProteins++;
                    }
                    else
                    {
                        forwardProteins++;
                    }

                    // Loop over each protease
                    foreach (Protease protease in proteases)
                    {

                        // Digest the protein's leucine sequences (all I's are now L's) with the given proteases, max missed cleavages, limiting it to the smallest and largest peptide observed (speed improvement)
                        // *Note each peptide sequence (pep_seq) will be leucine sequences as well
                        foreach (string pepSeq in AminoAcidPolymer.Digest(prot.Sequence, protease, MaxMissedCleavages, minLength, maxLength, semiDigestion: semiDigestion))
                        {
                            // Is this one of the unique peptide sequences in the csv files? If not, we don't care about it
                            Peptide pep;
                            if (!uniquePeptides.TryGetValue(pepSeq.Replace('I', 'L'), out pep))
                                continue;

                            // Check to see if this protein has already been added to the list of proteins hit
                            if(!proteins.ContainsKey(prot)) // returns true if the protein is new to the hashset of proteins
                            {
                                proteins.Add(prot, prot);
                                if (prot.IsDecoy)
                                {
                                    decoyProteinsMapped++;
                                }
                                else
                                {
                                    forwardProteinsMapped++;
                                }
                            }

                            // Add the peptide to the protein (internally hashed, so don't worry about duplicates)
                            prot.AddPeptide(pep);

                            // Mark that this peptide was successfully mapped, this is for error checking purposes
                            if (!pep.IsMapped)
                            {
                                pepsMapped++;
                                pep.IsMapped = true;
                            }
                            //pep.MarkAsMapped();
                        }

                    }

                    // Only call every 100 proteins otherwise you are wasting a lot of time refreshing and not doing actual work
                    //if (fastaCounter > 100)
                    //{
                    //    fastaCounter = 0;
                    //    ProgressUpdate((double)reader.Position / totalBytes);
                    //}
                }
            }

            // Check to see if every peptide is matched, if not try using a brute force search method instead
            if (uniquePeptides.Count > pepsMapped)
            {
                // Get all the peptides that weren't mapped
                List<Peptide> unMapedPeptides = uniquePeptides.Values.Where(p => !p.IsMapped).ToList();

                Log("[WARNING] Couldn't find every peptide using digestion method (wrong enzyme perhaps?), trying brute force search instead on the remaining {0} peptides...", unMapedPeptides.Count);

                ProgressUpdate(0.1);
                using (FastaReader reader = new FastaReader(fastaFile))
                {
                    fastaCounter = 0;

                    // Read in each protein one-by-one
                    foreach (Fasta fasta in reader.ReadNextFasta())
                    {
                        string seq = fasta.Sequence.Replace('I', 'L');

                        foreach (Peptide pep2 in unMapedPeptides)
                        {
                            if (!seq.Contains(pep2.LeucineSequence))
                                continue;

                            Protein prot = new Protein(fasta.Description, fasta.Sequence);
                            Protein realProt;
                            if (proteins.TryGetValue(prot, out realProt))
                            {
                                // Add the peptide to the protein
                                realProt.AddPeptide(pep2);
                            }
                            else
                            {
                                proteins.Add(prot, prot);
                                if (prot.IsDecoy)
                                {
                                    decoyProteinsMapped++;
                                }
                                else
                                {
                                    forwardProteinsMapped++;
                                }

                                // Add the peptide to the protein
                                prot.AddPeptide(pep2);
                            }

                            // Mark that this peptide was successfully isMapped, this is for error checking purposes
                            pep2.IsMapped = true;
                            //pep2.MarkAsMapped();
                        }

                        fastaCounter++;

                        //// Only call every 100 proteins otherwise you are wasting a lot of time refreshing and not doing actual work
                        //if (fastaCounter > 100)
                        //{
                        //    fastaCounter = 0;
                        //    ProgressUpdate((double)reader.BaseStream.Position / totalBytes);
                        //}

                    }
                }

                // Still missing peptides?
                if (unMapedPeptides.Any(p => !p.IsMapped))
                {
                    int count = 0;
                    foreach (Peptide pep2 in unMapedPeptides)
                    {
                        if (pep2.IsMapped)
                            continue;
                        count++;
                        Log("[ERROR]\tPeptide {0} was not isMapped", pep2);
                    }
                    throw new ArgumentException(
                        string.Format(
                            "[ERROR] Unable to map every peptide ({0}) to {1}. You might be using either the wrong database, enzyme, or max missed cleavages!",
                            count, fastaFileniceName));
                }

            }

            Log("Every unique peptide was successfully mapped to at least one protein");
            Log("{0:N0} of {1:N0} ({2:F2}%) target proteins were mapped at least once", forwardProteinsMapped, forwardProteins, 100.0 * (double)forwardProteinsMapped / (double)forwardProteins);
            Log("{0:N0} of {1:N0} ({2:F2}%) decoy proteins were mapped at least once", decoyProteinsMapped, decoyProteins, 100.0 * (double)decoyProteinsMapped / (double)decoyProteins);

            // force the progress bar to go into marquee mode
            ProgressUpdate(0.0);

            // Return a list of all the proteins that were isMapped at least once
            return proteins.Values.ToList();
        }
示例#6
0
        private void WriteProteinsPerMinute(List<Peptide> allPeptides, List<Protein> proteins, string outputDirectory)
        {
            string fileName = Path.Combine(outputDirectory, "proteins_per_minute.csv");
            Log("Writing file " + fileName);
            double maxPeptides = allPeptides.Count;
            List<ProteinGroup> groups = null;

            using (StreamWriter writer = new StreamWriter(fileName))
            {
                writer.WriteLine("Time (min),Unique Peptides,Protein Groups");

                double i = 0;
                while(i < 1000)
                {
                    HashSet<Peptide> currentPeptides = new HashSet<Peptide>(allPeptides.Where(pep => pep.PSMs.Any(psm => psm.RetentionTime <= i)));
                    List<Protein> currentProteins = new List<Protein>();

                    foreach (Peptide peptide in allPeptides)
                    {
                        peptide.ProteinGroups.Clear();
                    }

                    if (currentPeptides.Count > 0)
                    {
                        foreach (Protein protein in proteins)
                        {
                            Protein protein2 = null;
                            bool first = true;
                            foreach (Peptide peptide in protein.Peptides)
                            {
                                if (currentPeptides.Contains(peptide))
                                {
                                    if (first)
                                    {
                                        protein2 = new Protein(protein.Description, protein.Sequence);
                                        currentProteins.Add(protein2);
                                        first = false;
                                    }
                                    protein2.AddPeptide(peptide);
                                }
                            }
                        }
                    }

                    groups = GroupProteins(currentProteins, false);

                    int fdrGroups = groups.Count(g => g.PassesFDR);

                    writer.WriteLine(i + "," + currentPeptides.Count + "," + fdrGroups);
                    ProgressUpdate(currentPeptides.Count / maxPeptides);
                    if (currentPeptides.Count >= maxPeptides)
                        break;
                    i++;
                }
            }
            return;
        }
示例#7
0
 public void Setup()
 {
     _proteinA = new Protein("MMRGFKQRLIKKTTGSSSSSSSKKKDKEKEKEKSSTTSSTSKKPASASSSSHGTTHSSASSTGSKSTTEKGKQSGSVPSQ" +
                             "GKHHSSSTSKTKTATTPSSSSSSSRSSSVSRSGSSSTKKTSSRKGQEQSKQSQQPSQSQKQGSSSSSAAIMNPTPVLTVT" +
                             "KDDKSTSGEDHAHPTLLGAVSAVPSSPISNASGTAVSSDVENGNSNNNNMNINTSNTQDANHASSQSIDIPRSSHSFERL" +
                             "PTPTKLNPDTDLELIKTPQRHSSSRFEPSRYTPLTKLPNFNEVSPEERIPLFIAKVDQCNTMFDFNDPSFDIQGKEIKRS" +
                             "TLDELIEFLVTNRFTYTNEMYAHVVNMFKINLFRPIPPPVNPVGDIYDPDEDEPVNELAWPHMQAVYEFFLRFVESPDFN" +
                             "HQIAKQYIDQDFILKLLELFDSEDIRERDCLKTTLHRIYGKFLSLRSFIRRSMNNIFLQFIYETEKFNGVAELLEILGSI" +
                             "INGFALPLKEEHKVFLVRILIPLHKVRCLSLYHPQLAYCIVQFLEKDPLLTEEVVMGLLRYWPKINSTKEIMFLNEIEDI" +
                             "FEVIEPLEFIKVEVPLFVQLAKCISSPHFQVAEKVLSYWNNEYFLNLCIENAEVILPIIFPALYELTSQLELDTANGEDS" +
                             "ISDPYMLVEQAINSGSWNRAIHAMAFKALKIFLETNPVLYENCNALYLSSVKETQQRKVQREENWSKLEEYVKNLRINND" +
                             "KDQYTIKNPELRNSFNTASENNTLNEENENDCDSEIQ");
 }
示例#8
0
        public void SemiTrypiticDigestion()
        {
            Protein prot = new Protein("MMRGFKQRLIKKTTGSSSSSSSKKKDKEKEKEKSSTTSSTSKKPASASSSSHGTTHSSASSTGSKSTTEKGKQSGSVPSQ");

            var peptides = prot.Digest(Protease.GetProtease("Trypsin"), 0, 5, 10, semiDigestion: true).ToList();

            Assert.AreEqual(17, peptides.Count);
        }