public static void TestRetrieveUniProtProteome() { string filepath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests"); //UP000008595 is Uukuniemi virus (strain S23) (Uuk) which only has 4 proteins string returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.fasta, ProteinDbRetriever.Reviewed.yes, ProteinDbRetriever.Compress.yes, ProteinDbRetriever.IncludeIsoforms.yes); filepath += "\\UP000008595_reviewed_isoform.fasta.gz"; Assert.AreEqual(filepath, returnedFilePath); Assert.IsTrue(File.Exists(filepath)); var proteinList = ProteinDbLoader.LoadProteinFasta(filepath, true, DecoyType.None, false, out var dbErrors, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex); Assert.AreEqual(4, proteinList.Count); Assert.IsTrue(proteinList.Select(p => p.Accession).ToList().Contains("P33453")); Assert.AreEqual("MLLAICSRTIRQQGLNCPPAVTFTSSHMRPPIPSFLLWTEGSDVLMDFDLDTIPAGSVTGSSIGPKFKIKTQAASSFVHDFTFAHWCDASDMPLRDHFPLVNDTFDHWTPDFISQRLDGSKVVVEFTTNRSDQEQSLISAFNTKVGKYEVALHNRSTTSSILFGVVV" + "VSETTVVTNLNLNQQEVDELCFRFLVARAVHLEMTTKMIIPEYDDEDEDKRSREVKAAFHSVQPDWNVTEANFAPFSRRMFSNFAQMEPDKEYLAHIILDSLKQAQADLDGNHYLNESLTEQARLDRNREESLNMVKDFERDFNNAAQRSAWSHKSTVPFPGVIPKVSGDTTSLSRLVEL" + "PVITGGSDATIRAWRSAYGSVSNGTVERCDEDVERERRAALCSLTVEELEESKALRMKYHRCKIDNGMMDKLDLAMQGVEAKEFKNHPSIIKKRSKSKKTFPLTADTRDIDLFLHHDDLMFNNEHSQTPPAAMIEAVKAGADAQSLHGLDKSANPWYASALWFLGLPIGLWLFMCTCIGVEL" + "SISLKQHCGRQKFIIKKLRFFDIFLLIKPTNSGSHVFYSIAFPESAILGKLHRSQCFKGLQFEDGWFWTEFSSFKMSKLTNVVKCLSTGFNLFWFWRDYYEVPFWAGNEKDFQTGKQRANKMFKFCLLMLLEDKARTEEIATLSRYVMMEGFVSPPCIPKPQKMIEKLPNLARTKFQVWLISR" + "MLQTIIRVSDYPFKITAGHKSANWTGMFNWVTGEPIESTQKLISLFYLGYLKNKEESPERNASIGMYKKILEYEDKHPGRYTYLGLGDPPSDDTRFHEYSISLLKHLCIHAEHDLRRNWGESFKAMISRDIVDAIASLDLERLATLKASSNFNEEWYQKRGDGKTYHRSKVLEKVSKYVKKSSSH" + "VHHIMEECLRKVESQGCMHVCLFKKPQHGGLREIYVLGFEERVVQLVIETIARQICKRFKSETLTNPKQKLAIPETHGLRAVKTCGIHHETVATSDDAAKWNQCHHVTKFALMLCHFTDPLFHGFIIRGCSMFMKKRIMIDQSLIDIIDSHTTLETSDAYLQKIHRGYHGSLDDQPRWISRGGAFVQ" + "TETGMMQGILHYTSSLLHTLLQEWLRTFSQRFIRTRVSVDQRPDVLVDVLQSSDDSGMMISFPSTDKGATGKYRYLSALIFKYKKVIGKYLGIYSSVKSTNNTLHLLEFNSEFFFHINHNRPLLRWITACDTISEQESLASRQEEMYNNLTSVLEGGGSFSLVSFCQFGQLLLHYTLLGMTVSPLFLEY" + "IKLVSEIKDPSLGYFLMDHPFGSGLSGFKYNVWVAVQNSILGSRYRSLLEAIQNSDSAAPKKTLDTTTSGTFVQSTIIRFGDRKKWQRLVDRLNLPEDWLDVIDKNPEIVYRRPRDGFEVSLRIAEKVHSPGVSNSLSKGNCIIRVISSSVYILSRSILSDGLAWLYDEEEEVKRPLLYKVMNQPELDLHSRLTPA" + "QLSTLFPMMAEFEKLQTHLRSYMKIEGEFISKKKVITQTRVNILETERFLRARPEDLIADKWFGFTRTRMTPRTFKEEWENLTSVFPWLTGNPSETLELSPFQHHVQLRNFFSRLDLKGRDIRIIGAPIKKSSGVSNVSTAIRDNFFPRFVLTHIPDEAAMERIEAAGILKHALFLTVTGPYTDQSKLDMCRDF" + "ITSSEPITLKPNHGKTRTNVLSLFQDYFSKRGPDIIFNRIQMANCGVIGGFTSPQKPKEVDGKIVYTGDGVWRGIVDGFQIQLVITYMPKQKSNELKSITVNSDRCISALSSFCQSWCKEMGVFNTEDFSKTQRFSKASFFMHKFKISGSKQTLGAPIFIVSEKIFRPICWDPSKLEFRVRGNTLNLTY" + "KEVNPGAGQRMFNILSYTVKDTDVSDENAFKLMSLSPRHKFHGREPSTSWICMRALPISTIDKLLERILNRERISGSIDNERLAECFKNVMESTLRRKGVFLSEFSRATQKMLDGLSRDMLDFFAEAGLNDDLLLEEEPWLSGLDTFMLDDEAYLEEYNLGPFGVFSVEQEMNTKYYHHLLLD" + "SLVEDVIQKLSLDGLRKLFQEEEAPLEYKKEVIRLLNILQRDASQIKWKSRDLLSENMGLDVDDDMFG", proteinList.Where(p => p.Accession == "P33453").FirstOrDefault().BaseSequence); File.Delete(filepath); //fasta; unreviewed; non-compressed; no isoforms filepath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests"); returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.fasta, ProteinDbRetriever.Reviewed.no, ProteinDbRetriever.Compress.no, ProteinDbRetriever.IncludeIsoforms.no); filepath += "\\UP000008595_unreviewed.fasta"; Assert.AreEqual(filepath, returnedFilePath); Assert.IsTrue(File.Exists(filepath)); File.Delete(filepath); //xml; reviewed; compresseded; no isoforms filepath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests"); returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.xml, ProteinDbRetriever.Reviewed.yes, ProteinDbRetriever.Compress.yes, ProteinDbRetriever.IncludeIsoforms.no); filepath += "\\UP000008595_reviewed.xml.gz"; Assert.AreEqual(filepath, returnedFilePath); Assert.IsTrue(File.Exists(filepath)); File.Delete(filepath); //xml; unreviewed; non-compresseded; no isoforms filepath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests"); returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.xml, ProteinDbRetriever.Reviewed.no, ProteinDbRetriever.Compress.no, ProteinDbRetriever.IncludeIsoforms.no); filepath += "\\UP000008595_unreviewed.xml"; Assert.AreEqual(filepath, returnedFilePath); Assert.IsTrue(File.Exists(filepath)); File.Delete(filepath); //junk null return filepath = "pathDoesNotExists"; returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.xml, ProteinDbRetriever.Reviewed.no, ProteinDbRetriever.Compress.no, ProteinDbRetriever.IncludeIsoforms.no); filepath += "\\UP000008595_unreviewed.xml"; Assert.IsNull(returnedFilePath); //we don't support filetypes other than fasta or xml currently //requesting gff or other file formats will return null for now. filepath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"DatabaseTests"); returnedFilePath = ProteinDbRetriever.RetrieveProteome("UP000008595", filepath, ProteinDbRetriever.ProteomeFormat.gff, ProteinDbRetriever.Reviewed.no, ProteinDbRetriever.Compress.no, ProteinDbRetriever.IncludeIsoforms.no); filepath += "\\UP000008595_unreviewed.xml"; Assert.IsNull(returnedFilePath); }
public static void ClassCleanup() { File.Delete(@"TestFiles\AttackAnimKits_Test.dbc"); File.Delete(@"TestFiles\CharacterFacialHairStyles_Test.dbc"); File.Delete(@"TestFiles\WowError_Strings_Test.dbc"); }
public void FilesLoading() //delete mzLib\Test\bin\x64\Debug to update your local unimod list { Loaders.LoadElements(); string uniModPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "unimod_tables2.xml"); string psiModPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "PSI-MOD.obo2.xml"); string uniProtPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "ptmlist2.txt"); // UniModPTMs var unimodMods = Loaders.LoadUnimod(uniModPath).ToList(); Assert.AreEqual(2702, unimodMods.Count); // UniMod PTM list may be updated at some point, causing the unit test to fail List <Modification> myList = unimodMods.Where(m => m.OriginalId.Equals("HexNAc(2)")).ToList(); Modification testMod = myList.First(); int neutralLossCount = 0; if (testMod.NeutralLosses.Count != 0) { foreach (KeyValuePair <MassSpectrometry.DissociationType, List <double> > item in testMod.NeutralLosses) { foreach (double loss in item.Value) { neutralLossCount++; } } } Assert.AreEqual(2, neutralLossCount); // PsiMod PTMs var psiModDeserialized = Loaders.LoadPsiMod(psiModPath); // N6,N6,N6-trimethyllysine var trimethylLysine = psiModDeserialized.Items.OfType <UsefulProteomicsDatabases.Generated.oboTerm>().First(b => b.id.Equals("MOD:00083")); Assert.AreEqual("1+", trimethylLysine.xref_analog.First(b => b.dbname.Equals("FormalCharge")).name); // Phosphoserine Assert.IsFalse(psiModDeserialized.Items.OfType <UsefulProteomicsDatabases.Generated.oboTerm>().First(b => b.id.Equals("MOD:00046")).xref_analog.Any(b => b.dbname.Equals("FormalCharge"))); Dictionary <string, int> formalChargesDictionary = Loaders.GetFormalChargesDictionary(psiModDeserialized); // UniProt PTMs var uniprotPtms = Loaders.LoadUniprot(uniProtPath, formalChargesDictionary).ToList(); Assert.LessOrEqual(300, uniprotPtms.Count()); // UniProt PTM list may be updated at some point, causing the unit test to fail // write UniProt and UniMod PTMs to a file using (StreamWriter w = new StreamWriter(Path.Combine(TestContext.CurrentContext.TestDirectory, "test.txt"))) { foreach (var nice in uniprotPtms) { w.WriteLine(nice.ToString()); w.WriteLine("//"); } foreach (var nice in unimodMods) { w.WriteLine(nice.ToString()); w.WriteLine("//"); } } // read in the file and make sure that it has the same number of PTMs var sampleModList = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "test.txt"), out var errors).ToList(); Assert.AreEqual(uniprotPtms.Count + unimodMods.Count, sampleModList.Count()); List <Modification> myOtherList = new List <Modification>(); foreach (Modification mod in sampleModList) { if (mod.IdWithMotif != null && mod.IdWithMotif.Contains("Acetyl")) { myOtherList.Add(mod); } } var thisMod = myOtherList.First(); Assert.IsTrue(thisMod.MonoisotopicMass > 42); Assert.IsTrue(thisMod.MonoisotopicMass < 43); File.Delete(uniModPath); File.Delete(psiModPath); File.Delete(uniProtPath); }