public static void TestDownloadListOfColumnsAvailableAtUniProt() { var uniProtColumnDictionary = ProteinDbRetriever.UniprotColumnsList(); Assert.IsTrue(uniProtColumnDictionary.Keys.Contains("Entry")); Assert.AreEqual("id", uniProtColumnDictionary["Entry"]); }
public static void TestDownloadAvailableUniProtProteomes() { string filepath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests"); string downloadedFilePath = ProteinDbRetriever.DownloadAvailableUniProtProteomes(filepath); Assert.AreEqual(filepath + "\\availableUniProtProteomes.txt.gz", downloadedFilePath); Dictionary <string, string> uniprotProteoms = ProteinDbRetriever.UniprotProteomesList(downloadedFilePath); Assert.IsTrue(uniprotProteoms.Keys.Contains("UP000005640")); Assert.AreEqual("H**o sapiens (Human)", uniprotProteoms["UP000005640"]); File.Delete(downloadedFilePath); //return null for bad filepath filepath = "bubba"; downloadedFilePath = ProteinDbRetriever.DownloadAvailableUniProtProteomes(filepath); Assert.IsNull(downloadedFilePath); //bad file path returns null uniprotProteoms = ProteinDbRetriever.UniprotProteomesList("badFilePath"); Assert.IsNull(uniprotProteoms); //wrong file extension returns null uniprotProteoms = ProteinDbRetriever.UniprotProteomesList(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", @"bad.fasta")); Assert.IsNull(uniprotProteoms); }
public static void TestRetrieveUniProtProteome() { string filepath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests"); //UP000008595 is Uukuniemi virus (strain S23) (Uuk) which only has 4 proteins string returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.fasta, ProteinDbRetriever.Reviewed.yes, ProteinDbRetriever.Compress.yes, ProteinDbRetriever.IncludeIsoforms.yes); filepath += "\\UP000008595_reviewed_isoform.fasta.gz"; Assert.AreEqual(filepath, returnedFilePath); Assert.IsTrue(File.Exists(filepath)); var proteinList = ProteinDbLoader.LoadProteinFasta(filepath, true, DecoyType.None, false, out var dbErrors, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex); Assert.AreEqual(4, proteinList.Count); Assert.IsTrue(proteinList.Select(p => p.Accession).ToList().Contains("P33453")); Assert.AreEqual("MLLAICSRTIRQQGLNCPPAVTFTSSHMRPPIPSFLLWTEGSDVLMDFDLDTIPAGSVTGSSIGPKFKIKTQAASSFVHDFTFAHWCDASDMPLRDHFPLVNDTFDHWTPDFISQRLDGSKVVVEFTTNRSDQEQSLISAFNTKVGKYEVALHNRSTTSSILFGVVV" + "VSETTVVTNLNLNQQEVDELCFRFLVARAVHLEMTTKMIIPEYDDEDEDKRSREVKAAFHSVQPDWNVTEANFAPFSRRMFSNFAQMEPDKEYLAHIILDSLKQAQADLDGNHYLNESLTEQARLDRNREESLNMVKDFERDFNNAAQRSAWSHKSTVPFPGVIPKVSGDTTSLSRLVEL" + "PVITGGSDATIRAWRSAYGSVSNGTVERCDEDVERERRAALCSLTVEELEESKALRMKYHRCKIDNGMMDKLDLAMQGVEAKEFKNHPSIIKKRSKSKKTFPLTADTRDIDLFLHHDDLMFNNEHSQTPPAAMIEAVKAGADAQSLHGLDKSANPWYASALWFLGLPIGLWLFMCTCIGVEL" + "SISLKQHCGRQKFIIKKLRFFDIFLLIKPTNSGSHVFYSIAFPESAILGKLHRSQCFKGLQFEDGWFWTEFSSFKMSKLTNVVKCLSTGFNLFWFWRDYYEVPFWAGNEKDFQTGKQRANKMFKFCLLMLLEDKARTEEIATLSRYVMMEGFVSPPCIPKPQKMIEKLPNLARTKFQVWLISR" + "MLQTIIRVSDYPFKITAGHKSANWTGMFNWVTGEPIESTQKLISLFYLGYLKNKEESPERNASIGMYKKILEYEDKHPGRYTYLGLGDPPSDDTRFHEYSISLLKHLCIHAEHDLRRNWGESFKAMISRDIVDAIASLDLERLATLKASSNFNEEWYQKRGDGKTYHRSKVLEKVSKYVKKSSSH" + "VHHIMEECLRKVESQGCMHVCLFKKPQHGGLREIYVLGFEERVVQLVIETIARQICKRFKSETLTNPKQKLAIPETHGLRAVKTCGIHHETVATSDDAAKWNQCHHVTKFALMLCHFTDPLFHGFIIRGCSMFMKKRIMIDQSLIDIIDSHTTLETSDAYLQKIHRGYHGSLDDQPRWISRGGAFVQ" + "TETGMMQGILHYTSSLLHTLLQEWLRTFSQRFIRTRVSVDQRPDVLVDVLQSSDDSGMMISFPSTDKGATGKYRYLSALIFKYKKVIGKYLGIYSSVKSTNNTLHLLEFNSEFFFHINHNRPLLRWITACDTISEQESLASRQEEMYNNLTSVLEGGGSFSLVSFCQFGQLLLHYTLLGMTVSPLFLEY" + "IKLVSEIKDPSLGYFLMDHPFGSGLSGFKYNVWVAVQNSILGSRYRSLLEAIQNSDSAAPKKTLDTTTSGTFVQSTIIRFGDRKKWQRLVDRLNLPEDWLDVIDKNPEIVYRRPRDGFEVSLRIAEKVHSPGVSNSLSKGNCIIRVISSSVYILSRSILSDGLAWLYDEEEEVKRPLLYKVMNQPELDLHSRLTPA" + "QLSTLFPMMAEFEKLQTHLRSYMKIEGEFISKKKVITQTRVNILETERFLRARPEDLIADKWFGFTRTRMTPRTFKEEWENLTSVFPWLTGNPSETLELSPFQHHVQLRNFFSRLDLKGRDIRIIGAPIKKSSGVSNVSTAIRDNFFPRFVLTHIPDEAAMERIEAAGILKHALFLTVTGPYTDQSKLDMCRDF" + "ITSSEPITLKPNHGKTRTNVLSLFQDYFSKRGPDIIFNRIQMANCGVIGGFTSPQKPKEVDGKIVYTGDGVWRGIVDGFQIQLVITYMPKQKSNELKSITVNSDRCISALSSFCQSWCKEMGVFNTEDFSKTQRFSKASFFMHKFKISGSKQTLGAPIFIVSEKIFRPICWDPSKLEFRVRGNTLNLTY" + "KEVNPGAGQRMFNILSYTVKDTDVSDENAFKLMSLSPRHKFHGREPSTSWICMRALPISTIDKLLERILNRERISGSIDNERLAECFKNVMESTLRRKGVFLSEFSRATQKMLDGLSRDMLDFFAEAGLNDDLLLEEEPWLSGLDTFMLDDEAYLEEYNLGPFGVFSVEQEMNTKYYHHLLLD" + "SLVEDVIQKLSLDGLRKLFQEEEAPLEYKKEVIRLLNILQRDASQIKWKSRDLLSENMGLDVDDDMFG", proteinList.Where(p => p.Accession == "P33453").FirstOrDefault().BaseSequence); File.Delete(filepath); //fasta; unreviewed; non-compressed; no isoforms filepath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests"); returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.fasta, ProteinDbRetriever.Reviewed.no, ProteinDbRetriever.Compress.no, ProteinDbRetriever.IncludeIsoforms.no); filepath += "\\UP000008595_unreviewed.fasta"; Assert.AreEqual(filepath, returnedFilePath); Assert.IsTrue(File.Exists(filepath)); File.Delete(filepath); //xml; reviewed; compresseded; no isoforms filepath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests"); returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.xml, ProteinDbRetriever.Reviewed.yes, ProteinDbRetriever.Compress.yes, ProteinDbRetriever.IncludeIsoforms.no); filepath += "\\UP000008595_reviewed.xml.gz"; Assert.AreEqual(filepath, returnedFilePath); Assert.IsTrue(File.Exists(filepath)); File.Delete(filepath); //xml; unreviewed; non-compresseded; no isoforms filepath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests"); returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.xml, ProteinDbRetriever.Reviewed.no, ProteinDbRetriever.Compress.no, ProteinDbRetriever.IncludeIsoforms.no); filepath += "\\UP000008595_unreviewed.xml"; Assert.AreEqual(filepath, returnedFilePath); Assert.IsTrue(File.Exists(filepath)); File.Delete(filepath); //junk null return filepath = "pathDoesNotExists"; returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.xml, ProteinDbRetriever.Reviewed.no, ProteinDbRetriever.Compress.no, ProteinDbRetriever.IncludeIsoforms.no); filepath += "\\UP000008595_unreviewed.xml"; Assert.IsNull(returnedFilePath); //we don't support filetypes other than fasta or xml currently //requesting gff or other file formats will return null for now. filepath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests"); returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.gff, ProteinDbRetriever.Reviewed.no, ProteinDbRetriever.Compress.no, ProteinDbRetriever.IncludeIsoforms.no); filepath += "\\UP000008595_unreviewed.xml"; Assert.IsNull(returnedFilePath); }
private static void LoadAvailableProteomes() { AvailableUniProtProteomes = ProteinDbRetriever.UniprotProteomesList(Path.Combine(DataDir, @"Proteomes", @"availableUniProtProteomes.txt.gz")); }