Exemple #1
0
        public static void TestDownloadListOfColumnsAvailableAtUniProt()
        {
            var uniProtColumnDictionary = ProteinDbRetriever.UniprotColumnsList();

            Assert.IsTrue(uniProtColumnDictionary.Keys.Contains("Entry"));
            Assert.AreEqual("id", uniProtColumnDictionary["Entry"]);
        }
Exemple #2
0
        public static void TestDownloadAvailableUniProtProteomes()
        {
            string filepath           = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests");
            string downloadedFilePath = ProteinDbRetriever.DownloadAvailableUniProtProteomes(filepath);

            Assert.AreEqual(filepath + "\\availableUniProtProteomes.txt.gz", downloadedFilePath);

            Dictionary <string, string> uniprotProteoms = ProteinDbRetriever.UniprotProteomesList(downloadedFilePath);

            Assert.IsTrue(uniprotProteoms.Keys.Contains("UP000005640"));
            Assert.AreEqual("H**o sapiens (Human)", uniprotProteoms["UP000005640"]);

            File.Delete(downloadedFilePath);

            //return null for bad filepath
            filepath           = "bubba";
            downloadedFilePath = ProteinDbRetriever.DownloadAvailableUniProtProteomes(filepath);
            Assert.IsNull(downloadedFilePath);

            //bad file path returns null
            uniprotProteoms = ProteinDbRetriever.UniprotProteomesList("badFilePath");
            Assert.IsNull(uniprotProteoms);


            //wrong file extension returns null
            uniprotProteoms = ProteinDbRetriever.UniprotProteomesList(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"bad.fasta"));
            Assert.IsNull(uniprotProteoms);
        }
Exemple #3
0
        public static void TestRetrieveUniProtProteome()
        {
            string filepath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests");

            //UP000008595 is Uukuniemi virus (strain S23) (Uuk) which only has 4 proteins
            string returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.fasta, ProteinDbRetriever.Reviewed.yes, ProteinDbRetriever.Compress.yes, ProteinDbRetriever.IncludeIsoforms.yes);

            filepath += "\\UP000008595_reviewed_isoform.fasta.gz";

            Assert.AreEqual(filepath, returnedFilePath);
            Assert.IsTrue(File.Exists(filepath));

            var proteinList = ProteinDbLoader.LoadProteinFasta(filepath, true, DecoyType.None, false, out var dbErrors, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex,
                                                               ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex);

            Assert.AreEqual(4, proteinList.Count);
            Assert.IsTrue(proteinList.Select(p => p.Accession).ToList().Contains("P33453"));
            Assert.AreEqual("MLLAICSRTIRQQGLNCPPAVTFTSSHMRPPIPSFLLWTEGSDVLMDFDLDTIPAGSVTGSSIGPKFKIKTQAASSFVHDFTFAHWCDASDMPLRDHFPLVNDTFDHWTPDFISQRLDGSKVVVEFTTNRSDQEQSLISAFNTKVGKYEVALHNRSTTSSILFGVVV" +
                            "VSETTVVTNLNLNQQEVDELCFRFLVARAVHLEMTTKMIIPEYDDEDEDKRSREVKAAFHSVQPDWNVTEANFAPFSRRMFSNFAQMEPDKEYLAHIILDSLKQAQADLDGNHYLNESLTEQARLDRNREESLNMVKDFERDFNNAAQRSAWSHKSTVPFPGVIPKVSGDTTSLSRLVEL" +
                            "PVITGGSDATIRAWRSAYGSVSNGTVERCDEDVERERRAALCSLTVEELEESKALRMKYHRCKIDNGMMDKLDLAMQGVEAKEFKNHPSIIKKRSKSKKTFPLTADTRDIDLFLHHDDLMFNNEHSQTPPAAMIEAVKAGADAQSLHGLDKSANPWYASALWFLGLPIGLWLFMCTCIGVEL" +
                            "SISLKQHCGRQKFIIKKLRFFDIFLLIKPTNSGSHVFYSIAFPESAILGKLHRSQCFKGLQFEDGWFWTEFSSFKMSKLTNVVKCLSTGFNLFWFWRDYYEVPFWAGNEKDFQTGKQRANKMFKFCLLMLLEDKARTEEIATLSRYVMMEGFVSPPCIPKPQKMIEKLPNLARTKFQVWLISR" +
                            "MLQTIIRVSDYPFKITAGHKSANWTGMFNWVTGEPIESTQKLISLFYLGYLKNKEESPERNASIGMYKKILEYEDKHPGRYTYLGLGDPPSDDTRFHEYSISLLKHLCIHAEHDLRRNWGESFKAMISRDIVDAIASLDLERLATLKASSNFNEEWYQKRGDGKTYHRSKVLEKVSKYVKKSSSH" +
                            "VHHIMEECLRKVESQGCMHVCLFKKPQHGGLREIYVLGFEERVVQLVIETIARQICKRFKSETLTNPKQKLAIPETHGLRAVKTCGIHHETVATSDDAAKWNQCHHVTKFALMLCHFTDPLFHGFIIRGCSMFMKKRIMIDQSLIDIIDSHTTLETSDAYLQKIHRGYHGSLDDQPRWISRGGAFVQ" +
                            "TETGMMQGILHYTSSLLHTLLQEWLRTFSQRFIRTRVSVDQRPDVLVDVLQSSDDSGMMISFPSTDKGATGKYRYLSALIFKYKKVIGKYLGIYSSVKSTNNTLHLLEFNSEFFFHINHNRPLLRWITACDTISEQESLASRQEEMYNNLTSVLEGGGSFSLVSFCQFGQLLLHYTLLGMTVSPLFLEY" +
                            "IKLVSEIKDPSLGYFLMDHPFGSGLSGFKYNVWVAVQNSILGSRYRSLLEAIQNSDSAAPKKTLDTTTSGTFVQSTIIRFGDRKKWQRLVDRLNLPEDWLDVIDKNPEIVYRRPRDGFEVSLRIAEKVHSPGVSNSLSKGNCIIRVISSSVYILSRSILSDGLAWLYDEEEEVKRPLLYKVMNQPELDLHSRLTPA" +
                            "QLSTLFPMMAEFEKLQTHLRSYMKIEGEFISKKKVITQTRVNILETERFLRARPEDLIADKWFGFTRTRMTPRTFKEEWENLTSVFPWLTGNPSETLELSPFQHHVQLRNFFSRLDLKGRDIRIIGAPIKKSSGVSNVSTAIRDNFFPRFVLTHIPDEAAMERIEAAGILKHALFLTVTGPYTDQSKLDMCRDF" +
                            "ITSSEPITLKPNHGKTRTNVLSLFQDYFSKRGPDIIFNRIQMANCGVIGGFTSPQKPKEVDGKIVYTGDGVWRGIVDGFQIQLVITYMPKQKSNELKSITVNSDRCISALSSFCQSWCKEMGVFNTEDFSKTQRFSKASFFMHKFKISGSKQTLGAPIFIVSEKIFRPICWDPSKLEFRVRGNTLNLTY" +
                            "KEVNPGAGQRMFNILSYTVKDTDVSDENAFKLMSLSPRHKFHGREPSTSWICMRALPISTIDKLLERILNRERISGSIDNERLAECFKNVMESTLRRKGVFLSEFSRATQKMLDGLSRDMLDFFAEAGLNDDLLLEEEPWLSGLDTFMLDDEAYLEEYNLGPFGVFSVEQEMNTKYYHHLLLD" +
                            "SLVEDVIQKLSLDGLRKLFQEEEAPLEYKKEVIRLLNILQRDASQIKWKSRDLLSENMGLDVDDDMFG", proteinList.Where(p => p.Accession == "P33453").FirstOrDefault().BaseSequence);

            File.Delete(filepath);

            //fasta; unreviewed; non-compressed; no isoforms
            filepath         = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests");
            returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.fasta, ProteinDbRetriever.Reviewed.no, ProteinDbRetriever.Compress.no, ProteinDbRetriever.IncludeIsoforms.no);
            filepath        += "\\UP000008595_unreviewed.fasta";
            Assert.AreEqual(filepath, returnedFilePath);
            Assert.IsTrue(File.Exists(filepath));
            File.Delete(filepath);

            //xml; reviewed; compresseded; no isoforms
            filepath         = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests");
            returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.xml, ProteinDbRetriever.Reviewed.yes, ProteinDbRetriever.Compress.yes, ProteinDbRetriever.IncludeIsoforms.no);
            filepath        += "\\UP000008595_reviewed.xml.gz";
            Assert.AreEqual(filepath, returnedFilePath);
            Assert.IsTrue(File.Exists(filepath));
            File.Delete(filepath);

            //xml; unreviewed; non-compresseded; no isoforms
            filepath         = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests");
            returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.xml, ProteinDbRetriever.Reviewed.no, ProteinDbRetriever.Compress.no, ProteinDbRetriever.IncludeIsoforms.no);
            filepath        += "\\UP000008595_unreviewed.xml";
            Assert.AreEqual(filepath, returnedFilePath);
            Assert.IsTrue(File.Exists(filepath));
            File.Delete(filepath);

            //junk null return
            filepath         = "pathDoesNotExists";
            returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.xml, ProteinDbRetriever.Reviewed.no, ProteinDbRetriever.Compress.no, ProteinDbRetriever.IncludeIsoforms.no);
            filepath        += "\\UP000008595_unreviewed.xml";
            Assert.IsNull(returnedFilePath);

            //we don't support filetypes other than fasta or xml currently
            //requesting gff or other file formats will return null for now.
            filepath         = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests");
            returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.gff, ProteinDbRetriever.Reviewed.no, ProteinDbRetriever.Compress.no, ProteinDbRetriever.IncludeIsoforms.no);
            filepath        += "\\UP000008595_unreviewed.xml";
            Assert.IsNull(returnedFilePath);
        }
Exemple #4
0
 private static void LoadAvailableProteomes()
 {
     AvailableUniProtProteomes = ProteinDbRetriever.UniprotProteomesList(Path.Combine(DataDir, @"Proteomes", @"availableUniProtProteomes.txt.gz"));
 }