Ejemplo n.º 1
0
        public static void TestRetrieveUniProtProteome()
        {
            string filepath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests");

            //UP000008595 is Uukuniemi virus (strain S23) (Uuk) which only has 4 proteins
            string returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.fasta, ProteinDbRetriever.Reviewed.yes, ProteinDbRetriever.Compress.yes, ProteinDbRetriever.IncludeIsoforms.yes);

            filepath += "\\UP000008595_reviewed_isoform.fasta.gz";

            Assert.AreEqual(filepath, returnedFilePath);
            Assert.IsTrue(File.Exists(filepath));

            var proteinList = ProteinDbLoader.LoadProteinFasta(filepath, true, DecoyType.None, false, out var dbErrors, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex,
                                                               ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex);

            Assert.AreEqual(4, proteinList.Count);
            Assert.IsTrue(proteinList.Select(p => p.Accession).ToList().Contains("P33453"));
            Assert.AreEqual("MLLAICSRTIRQQGLNCPPAVTFTSSHMRPPIPSFLLWTEGSDVLMDFDLDTIPAGSVTGSSIGPKFKIKTQAASSFVHDFTFAHWCDASDMPLRDHFPLVNDTFDHWTPDFISQRLDGSKVVVEFTTNRSDQEQSLISAFNTKVGKYEVALHNRSTTSSILFGVVV" +
                            "VSETTVVTNLNLNQQEVDELCFRFLVARAVHLEMTTKMIIPEYDDEDEDKRSREVKAAFHSVQPDWNVTEANFAPFSRRMFSNFAQMEPDKEYLAHIILDSLKQAQADLDGNHYLNESLTEQARLDRNREESLNMVKDFERDFNNAAQRSAWSHKSTVPFPGVIPKVSGDTTSLSRLVEL" +
                            "PVITGGSDATIRAWRSAYGSVSNGTVERCDEDVERERRAALCSLTVEELEESKALRMKYHRCKIDNGMMDKLDLAMQGVEAKEFKNHPSIIKKRSKSKKTFPLTADTRDIDLFLHHDDLMFNNEHSQTPPAAMIEAVKAGADAQSLHGLDKSANPWYASALWFLGLPIGLWLFMCTCIGVEL" +
                            "SISLKQHCGRQKFIIKKLRFFDIFLLIKPTNSGSHVFYSIAFPESAILGKLHRSQCFKGLQFEDGWFWTEFSSFKMSKLTNVVKCLSTGFNLFWFWRDYYEVPFWAGNEKDFQTGKQRANKMFKFCLLMLLEDKARTEEIATLSRYVMMEGFVSPPCIPKPQKMIEKLPNLARTKFQVWLISR" +
                            "MLQTIIRVSDYPFKITAGHKSANWTGMFNWVTGEPIESTQKLISLFYLGYLKNKEESPERNASIGMYKKILEYEDKHPGRYTYLGLGDPPSDDTRFHEYSISLLKHLCIHAEHDLRRNWGESFKAMISRDIVDAIASLDLERLATLKASSNFNEEWYQKRGDGKTYHRSKVLEKVSKYVKKSSSH" +
                            "VHHIMEECLRKVESQGCMHVCLFKKPQHGGLREIYVLGFEERVVQLVIETIARQICKRFKSETLTNPKQKLAIPETHGLRAVKTCGIHHETVATSDDAAKWNQCHHVTKFALMLCHFTDPLFHGFIIRGCSMFMKKRIMIDQSLIDIIDSHTTLETSDAYLQKIHRGYHGSLDDQPRWISRGGAFVQ" +
                            "TETGMMQGILHYTSSLLHTLLQEWLRTFSQRFIRTRVSVDQRPDVLVDVLQSSDDSGMMISFPSTDKGATGKYRYLSALIFKYKKVIGKYLGIYSSVKSTNNTLHLLEFNSEFFFHINHNRPLLRWITACDTISEQESLASRQEEMYNNLTSVLEGGGSFSLVSFCQFGQLLLHYTLLGMTVSPLFLEY" +
                            "IKLVSEIKDPSLGYFLMDHPFGSGLSGFKYNVWVAVQNSILGSRYRSLLEAIQNSDSAAPKKTLDTTTSGTFVQSTIIRFGDRKKWQRLVDRLNLPEDWLDVIDKNPEIVYRRPRDGFEVSLRIAEKVHSPGVSNSLSKGNCIIRVISSSVYILSRSILSDGLAWLYDEEEEVKRPLLYKVMNQPELDLHSRLTPA" +
                            "QLSTLFPMMAEFEKLQTHLRSYMKIEGEFISKKKVITQTRVNILETERFLRARPEDLIADKWFGFTRTRMTPRTFKEEWENLTSVFPWLTGNPSETLELSPFQHHVQLRNFFSRLDLKGRDIRIIGAPIKKSSGVSNVSTAIRDNFFPRFVLTHIPDEAAMERIEAAGILKHALFLTVTGPYTDQSKLDMCRDF" +
                            "ITSSEPITLKPNHGKTRTNVLSLFQDYFSKRGPDIIFNRIQMANCGVIGGFTSPQKPKEVDGKIVYTGDGVWRGIVDGFQIQLVITYMPKQKSNELKSITVNSDRCISALSSFCQSWCKEMGVFNTEDFSKTQRFSKASFFMHKFKISGSKQTLGAPIFIVSEKIFRPICWDPSKLEFRVRGNTLNLTY" +
                            "KEVNPGAGQRMFNILSYTVKDTDVSDENAFKLMSLSPRHKFHGREPSTSWICMRALPISTIDKLLERILNRERISGSIDNERLAECFKNVMESTLRRKGVFLSEFSRATQKMLDGLSRDMLDFFAEAGLNDDLLLEEEPWLSGLDTFMLDDEAYLEEYNLGPFGVFSVEQEMNTKYYHHLLLD" +
                            "SLVEDVIQKLSLDGLRKLFQEEEAPLEYKKEVIRLLNILQRDASQIKWKSRDLLSENMGLDVDDDMFG", proteinList.Where(p => p.Accession == "P33453").FirstOrDefault().BaseSequence);

            File.Delete(filepath);

            //fasta; unreviewed; non-compressed; no isoforms
            filepath         = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests");
            returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.fasta, ProteinDbRetriever.Reviewed.no, ProteinDbRetriever.Compress.no, ProteinDbRetriever.IncludeIsoforms.no);
            filepath        += "\\UP000008595_unreviewed.fasta";
            Assert.AreEqual(filepath, returnedFilePath);
            Assert.IsTrue(File.Exists(filepath));
            File.Delete(filepath);

            //xml; reviewed; compresseded; no isoforms
            filepath         = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests");
            returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.xml, ProteinDbRetriever.Reviewed.yes, ProteinDbRetriever.Compress.yes, ProteinDbRetriever.IncludeIsoforms.no);
            filepath        += "\\UP000008595_reviewed.xml.gz";
            Assert.AreEqual(filepath, returnedFilePath);
            Assert.IsTrue(File.Exists(filepath));
            File.Delete(filepath);

            //xml; unreviewed; non-compresseded; no isoforms
            filepath         = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests");
            returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.xml, ProteinDbRetriever.Reviewed.no, ProteinDbRetriever.Compress.no, ProteinDbRetriever.IncludeIsoforms.no);
            filepath        += "\\UP000008595_unreviewed.xml";
            Assert.AreEqual(filepath, returnedFilePath);
            Assert.IsTrue(File.Exists(filepath));
            File.Delete(filepath);

            //junk null return
            filepath         = "pathDoesNotExists";
            returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.xml, ProteinDbRetriever.Reviewed.no, ProteinDbRetriever.Compress.no, ProteinDbRetriever.IncludeIsoforms.no);
            filepath        += "\\UP000008595_unreviewed.xml";
            Assert.IsNull(returnedFilePath);

            //we don't support filetypes other than fasta or xml currently
            //requesting gff or other file formats will return null for now.
            filepath         = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests");
            returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.gff, ProteinDbRetriever.Reviewed.no, ProteinDbRetriever.Compress.no, ProteinDbRetriever.IncludeIsoforms.no);
            filepath        += "\\UP000008595_unreviewed.xml";
            Assert.IsNull(returnedFilePath);
        }
Ejemplo n.º 2
0
 public static void ClassCleanup()
 {
     File.Delete(@"TestFiles\AttackAnimKits_Test.dbc");
     File.Delete(@"TestFiles\CharacterFacialHairStyles_Test.dbc");
     File.Delete(@"TestFiles\WowError_Strings_Test.dbc");
 }
Ejemplo n.º 3
0
        public void FilesLoading() //delete mzLib\Test\bin\x64\Debug to update your local unimod list
        {
            Loaders.LoadElements();
            string uniModPath  = Path.Combine(TestContext.CurrentContext.TestDirectory, "unimod_tables2.xml");
            string psiModPath  = Path.Combine(TestContext.CurrentContext.TestDirectory, "PSI-MOD.obo2.xml");
            string uniProtPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "ptmlist2.txt");

            // UniModPTMs
            var unimodMods = Loaders.LoadUnimod(uniModPath).ToList();

            Assert.AreEqual(2702, unimodMods.Count); // UniMod PTM list may be updated at some point, causing the unit test to fail

            List <Modification> myList = unimodMods.Where(m => m.OriginalId.Equals("HexNAc(2)")).ToList();

            Modification testMod          = myList.First();
            int          neutralLossCount = 0;

            if (testMod.NeutralLosses.Count != 0)
            {
                foreach (KeyValuePair <MassSpectrometry.DissociationType, List <double> > item in testMod.NeutralLosses)
                {
                    foreach (double loss in item.Value)
                    {
                        neutralLossCount++;
                    }
                }
            }

            Assert.AreEqual(2, neutralLossCount);

            // PsiMod PTMs
            var psiModDeserialized = Loaders.LoadPsiMod(psiModPath);

            // N6,N6,N6-trimethyllysine
            var trimethylLysine = psiModDeserialized.Items.OfType <UsefulProteomicsDatabases.Generated.oboTerm>().First(b => b.id.Equals("MOD:00083"));

            Assert.AreEqual("1+", trimethylLysine.xref_analog.First(b => b.dbname.Equals("FormalCharge")).name);

            // Phosphoserine
            Assert.IsFalse(psiModDeserialized.Items.OfType <UsefulProteomicsDatabases.Generated.oboTerm>().First(b => b.id.Equals("MOD:00046")).xref_analog.Any(b => b.dbname.Equals("FormalCharge")));

            Dictionary <string, int> formalChargesDictionary = Loaders.GetFormalChargesDictionary(psiModDeserialized);

            // UniProt PTMs
            var uniprotPtms = Loaders.LoadUniprot(uniProtPath, formalChargesDictionary).ToList();

            Assert.LessOrEqual(300, uniprotPtms.Count()); // UniProt PTM list may be updated at some point, causing the unit test to fail

            // write UniProt and UniMod PTMs to a file
            using (StreamWriter w = new StreamWriter(Path.Combine(TestContext.CurrentContext.TestDirectory, "test.txt")))
            {
                foreach (var nice in uniprotPtms)
                {
                    w.WriteLine(nice.ToString());
                    w.WriteLine("//");
                }
                foreach (var nice in unimodMods)
                {
                    w.WriteLine(nice.ToString());
                    w.WriteLine("//");
                }
            }

            // read in the file and make sure that it has the same number of PTMs
            var sampleModList = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "test.txt"), out var errors).ToList();

            Assert.AreEqual(uniprotPtms.Count + unimodMods.Count, sampleModList.Count());

            List <Modification> myOtherList = new List <Modification>();

            foreach (Modification mod in sampleModList)
            {
                if (mod.IdWithMotif != null && mod.IdWithMotif.Contains("Acetyl"))
                {
                    myOtherList.Add(mod);
                }
            }

            var thisMod = myOtherList.First();

            Assert.IsTrue(thisMod.MonoisotopicMass > 42);
            Assert.IsTrue(thisMod.MonoisotopicMass < 43);

            File.Delete(uniModPath);
            File.Delete(psiModPath);
            File.Delete(uniProtPath);
        }