public void CompactFormReading2() { Assert.AreEqual(2, PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "sampleModFileDouble2.txt"), out var errors).Count()); }
public static void TestEverythingRunner() { foreach (var modFile in Directory.GetFiles(@"Mods")) { GlobalVariables.AddMods(PtmListLoader.ReadModsFromFile(modFile, out var fmww), false); } CalibrationTask task1 = new CalibrationTask { CommonParameters = new CommonParameters(digestionParams: new DigestionParams(maxMissedCleavages: 0, minPeptideLength: 1, initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain)), CalibrationParameters = new CalibrationParameters { WriteIntermediateFiles = true, NumFragmentsNeededForEveryIdentification = 6, } }; GptmdTask task2 = new GptmdTask { CommonParameters = new CommonParameters() }; SearchTask task3 = new SearchTask { CommonParameters = new CommonParameters(), SearchParameters = new SearchParameters { DoParsimony = true, SearchTarget = true, SearchType = SearchType.Modern } }; SearchTask task4 = new SearchTask { CommonParameters = new CommonParameters(), SearchParameters = new SearchParameters { SearchType = SearchType.Modern, } }; List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)> { ("task1", task1), ("task2", task2), ("task3", task3), ("task4", task4), }; List <Modification> variableModifications = GlobalVariables.AllModsKnown.OfType <Modification>().Where(b => task1.CommonParameters.ListOfModsVariable.Contains((b.ModificationType, b.IdWithMotif))).ToList(); List <Modification> fixedModifications = GlobalVariables.AllModsKnown.OfType <Modification>().Where(b => task1.CommonParameters.ListOfModsFixed.Contains((b.ModificationType, b.IdWithMotif))).ToList(); // Generate data for files Protein ParentProtein = new Protein("MPEPTIDEKANTHE", "accession1"); var digestedList = ParentProtein.Digest(task1.CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList(); Assert.AreEqual(3, digestedList.Count); PeptideWithSetModifications pepWithSetMods1 = digestedList[0]; PeptideWithSetModifications pepWithSetMods2 = digestedList[2]; var dictHere = new Dictionary <int, List <Modification> >(); ModificationMotif.TryGetMotif("E", out ModificationMotif motif); dictHere.Add(3, new List <Modification> { new Modification(_originalId: "21", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 21.981943) }); Protein ParentProteinToNotInclude = new Protein("MPEPTIDEK", "accession2", "organism", new List <Tuple <string, string> >(), dictHere); digestedList = ParentProteinToNotInclude.Digest(task1.CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList(); MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications> { pepWithSetMods1, pepWithSetMods2, digestedList[1] }); Protein proteinWithChain = new Protein("MAACNNNCAA", "accession3", "organism", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(4, 8, "chain") }, "name2", "fullname2"); string mzmlName = @"ok.mzML"; IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, mzmlName, false); string xmlName = "okk.xml"; ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> { ParentProtein, proteinWithChain }, xmlName); string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestEverythingRunner"); // RUN! var engine = new EverythingRunnerEngine(taskList, new List <string> { mzmlName }, new List <DbForTask> { new DbForTask(xmlName, false) }, outputFolder); engine.Run(); File.Delete(Path.Combine(TestContext.CurrentContext.TestDirectory, mzmlName)); File.Delete(Path.Combine(TestContext.CurrentContext.TestDirectory, xmlName)); Directory.Delete(outputFolder, true); }
public static void CompactFormReadingGeneral(string filename) { Assert.AreEqual(2, PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", filename), out var errors).Count()); }
public static void SampleModFileLoadingFail1General(string filename) { var a = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", filename), out var errors).ToList(); Assert.AreEqual(0, a.Count); }
public void get_theoretical_proteoforms(string current_directory) { if (!ready_to_make_database(current_directory)) { return; } //Clear out data from potential previous runs foreach (ProteoformCommunity community in Sweet.lollipop.decoy_proteoform_communities.Values) { community.theoretical_proteoforms = new TheoreticalProteoform[0]; } theoretical_proteins.Clear(); //Read the UniProt-XML and ptmlist var psiModDeserialized = Loaders.LoadPsiMod(Path.Combine(current_directory, "Mods", "PSI-MOD.obo.xml")); Dictionary <string, int> formalChargesDictionary = Loaders.GetFormalChargesDictionary(psiModDeserialized); List <Modification> all_known_modifications = Sweet.lollipop.get_files(Sweet.lollipop.input_files, Purpose.PtmList) .SelectMany(file => PtmListLoader.ReadModsFromFile(file.complete_path, formalChargesDictionary, out List <(Modification, string)> filteredModificationsWithWarnings)) .ToList(); uniprotModifications = make_modification_dictionary(all_known_modifications); Parallel.ForEach(Sweet.lollipop.get_files(Sweet.lollipop.input_files, Purpose.ProteinDatabase).ToList(), database => { if (database.extension == ".xml") { lock (theoretical_proteins) theoretical_proteins.Add(database, ProteinDbLoader.LoadProteinXML(database.complete_path, true, DecoyType.None, all_known_modifications, database.ContaminantDB, Sweet.lollipop.mod_types_to_exclude, out Dictionary <string, Modification> um).ToArray()); lock (all_known_modifications) all_known_modifications.AddRange(ProteinDbLoader.GetPtmListFromProteinXml(database.complete_path).Where(m => !Sweet.lollipop.mod_types_to_exclude.Contains(m.ModificationType))); } else if (database.extension == ".fasta") { lock (theoretical_proteins) theoretical_proteins.Add(database, ProteinDbLoader.LoadProteinFasta(database.complete_path, true, DecoyType.None, database.ContaminantDB, ProteinDbLoader.UniprotAccessionRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotFullNameRegex, ProteinDbLoader.UniprotGeneNameRegex, ProteinDbLoader.UniprotOrganismRegex, out var dbErrors).ToArray()); } }); foreach (string filename in Directory.GetFiles(Path.Combine(current_directory, "Mods"))) { List <Modification> new_mods = !filename.EndsWith("variable.txt") || Sweet.lollipop.methionine_oxidation ? PtmListLoader.ReadModsFromFile(filename, formalChargesDictionary, out List <(Modification, string)> filteredModificationsWithWarnings).ToList() : new List <Modification>(); // Empty variable modifications if not selected if (filename.EndsWith("variable.txt")) { variableModifications = new_mods; } all_known_modifications.AddRange(new_mods); } all_known_modifications = new HashSet <Modification>(all_known_modifications).ToList(); uniprotModifications = make_modification_dictionary(all_known_modifications); all_mods_with_mass = uniprotModifications.SelectMany(kv => kv.Value).Concat(variableModifications).ToList(); Sweet.lollipop.modification_ranks = rank_mods(theoretical_proteins, variableModifications, all_mods_with_mass); unlocalized_lookup = make_unlocalized_lookup(all_mods_with_mass.Concat(new List <Modification> { new Ptm().modification })); load_unlocalized_names(Path.Combine(Environment.CurrentDirectory, "Mods", "stored_mods.modnames")); //this is for ptmsets --> used in RELATIONS all_possible_ptmsets = PtmCombos.generate_all_ptmsets(2, all_mods_with_mass, Sweet.lollipop.modification_ranks, Sweet.lollipop.mod_rank_first_quartile / 2).ToList(); for (int i = 2; i <= Math.Max(ptmset_max_number_of_a_kind, Sweet.lollipop.max_ptms); i++) // the method above doesn't make 2 or more of a kind, so we make it here { all_possible_ptmsets.AddRange(all_mods_with_mass.Select(m => new PtmSet(Enumerable.Repeat(new Ptm(-1, m), i).ToList(), Sweet.lollipop.modification_ranks, Sweet.lollipop.mod_rank_first_quartile / 2))); } //Generate lookup table for ptm sets based on rounded mass of eligible PTMs -- used in forming ET relations possible_ptmset_dictionary = make_ptmset_dictionary(); make_theoretical_proteoforms(); }
public static void SampleModFileLoadingFail3General(string filename, string errorMessage) { Assert.That(() => PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", filename), out var errors).ToList(), Throws.TypeOf <MzLibException>().With.Property("Message").EqualTo(errorMessage)); }
public static void SampleModFileLoadingFail5General_missingPosition() { var a = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "sampleModFileFail_missingPosition.txt")).ToList(); Assert.AreEqual(0, a.Count()); // ID is missing }
public void SampleModFileLoading() { PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "sampleModFile.txt")); }
public static void LoadModWithNl() { var hah = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "cfInNL.txt")).First() as ModificationWithMassAndCf; Assert.AreEqual(2, hah.neutralLosses.Count); }
public static void SampleModFileLoadingFail1General() //TG is not valide { var a = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "sampleModFileFail1.txt")).ToList(); Assert.AreEqual(0, a.Count()); }
public void CompactFormReading() { Assert.AreEqual(2, PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "sampleModFileDouble.txt")).Count()); }
public void get_theoretical_proteoforms(string current_directory) { if (!ready_to_make_database(current_directory)) { return; } //Clear out data from potential previous runs foreach (ProteoformCommunity community in SaveState.lollipop.decoy_proteoform_communities.Values) { community.theoretical_proteoforms = new TheoreticalProteoform[0]; } theoretical_proteins.Clear(); //Read the UniProt-XML and ptmlist List <ModificationWithLocation> all_known_modifications = SaveState.lollipop.get_files(SaveState.lollipop.input_files, Purpose.PtmList).SelectMany(file => PtmListLoader.ReadModsFromFile(file.complete_path)).ToList(); uniprotModifications = make_modification_dictionary(all_known_modifications); Dictionary <string, Modification> um; Parallel.ForEach(SaveState.lollipop.get_files(SaveState.lollipop.input_files, Purpose.ProteinDatabase).ToList(), database => { lock (theoretical_proteins) theoretical_proteins.Add(database, ProteinDbLoader.LoadProteinXML(database.complete_path, false, all_known_modifications, database.ContaminantDB, SaveState.lollipop.mod_types_to_exclude, out um).ToArray()); lock (all_known_modifications) all_known_modifications.AddRange(ProteinDbLoader.GetPtmListFromProteinXml(database.complete_path).OfType <ModificationWithLocation>().Where(m => !SaveState.lollipop.mod_types_to_exclude.Contains(m.modificationType))); }); foreach (string filename in Directory.GetFiles(Path.Combine(current_directory, "Mods"))) { var new_mods = !filename.EndsWith("variable.txt") || SaveState.lollipop.methionine_oxidation ? PtmListLoader.ReadModsFromFile(filename) : new List <ModificationWithLocation>(); // Empty variable modifications if not selected if (filename.EndsWith("variable.txt")) { variableModifications = new_mods.OfType <ModificationWithMass>().ToList(); } if (filename.EndsWith("intact_mods.txt")) { List <double> old_mods = all_known_modifications.OfType <ModificationWithMass>().Select(m => m.monoisotopicMass).ToList(); new_mods = new_mods.OfType <ModificationWithMass>().Where(m => !old_mods.Contains(m.monoisotopicMass)); // get rid of the unlocalized mods if they're already present } all_known_modifications.AddRange(new_mods); } all_known_modifications = new HashSet <ModificationWithLocation>(all_known_modifications).ToList(); uniprotModifications = make_modification_dictionary(all_known_modifications); all_mods_with_mass = uniprotModifications.SelectMany(kv => kv.Value).OfType <ModificationWithMass>().Concat(variableModifications).ToList(); SaveState.lollipop.modification_ranks = rank_mods(theoretical_proteins, variableModifications, all_mods_with_mass); unlocalized_lookup = make_unlocalized_lookup(all_mods_with_mass.Concat(new List <ModificationWithMass> { new Ptm().modification })); load_unlocalized_names(Path.Combine(Environment.CurrentDirectory, "Mods", "stored_mods.modnames")); //Generate all two-member sets and all three-member (or greater) sets of the same modification (three-member combinitorics gets out of hand for assignment) all_possible_ptmsets = PtmCombos.generate_all_ptmsets(Math.Min(2, SaveState.lollipop.max_ptms), all_mods_with_mass, SaveState.lollipop.modification_ranks, SaveState.lollipop.mod_rank_first_quartile / 2).ToList(); for (int i = 2; i < SaveState.lollipop.max_ptms + 1; i++) { all_possible_ptmsets.AddRange(all_mods_with_mass.Select(m => new PtmSet(Enumerable.Repeat(new Ptm(-1, m), i).ToList(), SaveState.lollipop.modification_ranks, SaveState.lollipop.mod_rank_first_quartile / 2))); } //Generate lookup table for ptm sets based on rounded mass of eligible PTMs -- used in forming ET relations possible_ptmset_dictionary = make_ptmset_dictionary(); expanded_proteins = expand_protein_entries(theoretical_proteins.Values.SelectMany(p => p).ToArray()); aaIsotopeMassList = new AminoAcidMasses(SaveState.lollipop.carbamidomethylation, SaveState.lollipop.natural_lysine_isotope_abundance, SaveState.lollipop.neucode_light_lysine, SaveState.lollipop.neucode_heavy_lysine).AA_Masses; if (SaveState.lollipop.combine_identical_sequences) { expanded_proteins = group_proteins_by_sequence(expanded_proteins); } expanded_proteins = expanded_proteins.OrderBy(x => x.OneBasedPossibleLocalizedModifications.Count).ToArray(); // Take on harder problems first to use parallelization more effectively process_entries(expanded_proteins, variableModifications); process_decoys(expanded_proteins, variableModifications); if (SaveState.lollipop.combine_theoretical_proteoforms_byMass) { SaveState.lollipop.target_proteoform_community.theoretical_proteoforms = group_proteoforms_by_mass(SaveState.lollipop.target_proteoform_community.theoretical_proteoforms); foreach (ProteoformCommunity community in SaveState.lollipop.decoy_proteoform_communities.Values) { community.theoretical_proteoforms = group_proteoforms_by_mass(community.theoretical_proteoforms); } } }
public static Dictionary <string, List <Modification> > read_mods() { Loaders.LoadElements(); List <Modification> all_modifications = Sweet.lollipop.get_files(Sweet.lollipop.input_files, Purpose.PtmList).SelectMany(file => PtmListLoader.ReadModsFromFile(file.complete_path, out List <(Modification, string)> filteredModificationsWithWarnings)).ToList(); return(Sweet.lollipop.theoretical_database.make_modification_dictionary(all_modifications)); }
public static void CNBrProteinDigestion() { var proteaseMods = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "ModificationTests", "ProteaseMods.txt"), out var errors).ToList(); var prot = new Protein("PEPTIDEMPEPTIDEM", null); var prot2 = new Protein("MPEPTIDEMPEPTIDE", null); string path = Path.Combine(TestContext.CurrentContext.TestDirectory, "DoubleProtease.tsv"); Assert.That(File.Exists(path)); var proteaseDict = ProteaseDictionary.LoadProteaseDictionary(path, proteaseMods); ProteaseDictionary.Dictionary = ProteaseDictionary.LoadProteaseDictionary(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "ProteolyticDigestion", "proteases.tsv"), proteaseMods); var protease1 = proteaseDict["CNBr"]; DigestionParams digestionParams1 = new DigestionParams( protease: protease1.Name, maxMissedCleavages: 0, minPeptideLength: 1, initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain); List <Modification> variableModifications1 = new List <Modification>(); var protease2 = proteaseDict["CNBr_old"]; DigestionParams digestionParams2 = new DigestionParams( protease: protease2.Name, maxMissedCleavages: 0, minPeptideLength: 1, initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain); List <Modification> variableModifications2 = new List <Modification>(); var protease3 = proteaseDict["CNBr_N"]; DigestionParams digestionParams3 = new DigestionParams( protease: protease3.Name, maxMissedCleavages: 0, minPeptideLength: 1, initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain); List <Modification> variableModifications3 = new List <Modification>(); var peps1 = prot.Digest(digestionParams1, new List <Modification>(), variableModifications1).ToList(); var peps2 = prot.Digest(digestionParams2, new List <Modification>(), variableModifications2).ToList(); var peps3 = prot2.Digest(digestionParams3, new List <Modification>(), variableModifications1).ToList(); Assert.AreNotEqual(null, protease3.CleavageMod); Assert.AreEqual("M", protease3.CleavageMod.Target.ToString()); Assert.AreNotEqual(peps3[0].MonoisotopicMass, peps3[1].MonoisotopicMass); Assert.AreEqual(882.39707781799996, peps3[1].MonoisotopicMass); Assert.AreEqual(930.400449121, peps3[0].MonoisotopicMass); Assert.AreEqual(null, protease2.CleavageMod); Assert.AreNotEqual(null, protease1.CleavageMod); Assert.AreEqual("M", protease1.CleavageMod.Target.ToString()); Assert.AreEqual(peps1[1].MonoisotopicMass, peps2[1].MonoisotopicMass); Assert.AreEqual(peps1[1].MonoisotopicMass, peps2[0].MonoisotopicMass); Assert.AreEqual(peps2[0].MonoisotopicMass, peps2[1].MonoisotopicMass); Assert.AreNotEqual(peps1[0].MonoisotopicMass, peps1[1].MonoisotopicMass); Assert.AreNotEqual(peps1[0].MonoisotopicMass, peps2[0].MonoisotopicMass); Assert.AreNotEqual(peps1[0].MonoisotopicMass, peps2[1].MonoisotopicMass); Assert.AreEqual(882.39707781799996, peps1[0].MonoisotopicMass); Assert.AreEqual(930.400449121, peps1[1].MonoisotopicMass); }
public static void SampleModFileLoadingFail5General_missingMonoisotopicMassAndChemicalFormula() { var a = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "sampleModFileFail_missingChemicalFormulaAndMonoisotopicMass.txt")).ToList(); Assert.AreEqual(0, a.Count()); // ID is missing }
public void FilesLoading() { Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, "elements2.dat")); var unimodMods = Loaders.LoadUnimod(Path.Combine(TestContext.CurrentContext.TestDirectory, "unimod_tables2.xml")).ToList(); Assert.AreEqual(2639, unimodMods.Count); // UniMod PTM list may be updated at some point, causing the unit test to fail List <Modification> myList = unimodMods.Where(m => m.OriginalId.Equals("HexNAc(2)")).ToList(); Modification testMod = myList.First(); int neutralLossCount = 0; if (testMod.NeutralLosses.Count != 0) { foreach (KeyValuePair <MassSpectrometry.DissociationType, List <double> > item in testMod.NeutralLosses) { foreach (double loss in item.Value) { neutralLossCount++; } } } Assert.AreEqual(2, neutralLossCount); var psiModDeserialized = Loaders.LoadPsiMod(Path.Combine(TestContext.CurrentContext.TestDirectory, "PSI-MOD.obo2.xml")); // N6,N6,N6-trimethyllysine var trimethylLysine = psiModDeserialized.Items.OfType <UsefulProteomicsDatabases.Generated.oboTerm>().First(b => b.id.Equals("MOD:00083")); Assert.AreEqual("1+", trimethylLysine.xref_analog.First(b => b.dbname.Equals("FormalCharge")).name); // Phosphoserine Assert.IsFalse(psiModDeserialized.Items.OfType <UsefulProteomicsDatabases.Generated.oboTerm>().First(b => b.id.Equals("MOD:00046")).xref_analog.Any(b => b.dbname.Equals("FormalCharge"))); Dictionary <string, int> formalChargesDictionary = Loaders.GetFormalChargesDictionary(psiModDeserialized); var uniprotPtms = Loaders.LoadUniprot(Path.Combine(TestContext.CurrentContext.TestDirectory, "ptmlist2.txt"), formalChargesDictionary).ToList(); Assert.AreEqual(334, uniprotPtms.Count()); // UniProt PTM list may be updated at some point, causing the unit test to fail using (StreamWriter w = new StreamWriter(Path.Combine(TestContext.CurrentContext.TestDirectory, "test.txt"))) { foreach (var nice in uniprotPtms) { w.WriteLine(nice.ToString()); w.WriteLine("//"); } foreach (var nice in unimodMods) { w.WriteLine(nice.ToString()); w.WriteLine("//"); } } var sampleModList = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "test.txt")).ToList(); Assert.AreEqual(2973, sampleModList.Count()); string s = ""; List <Modification> myOtherList = new List <Modification>(); foreach (Modification mod in sampleModList) { if (mod.IdWithMotif != null && mod.IdWithMotif.Contains("Acetyl")) { myOtherList.Add(mod); } } var thisMod = myOtherList.First(); Assert.IsTrue(thisMod.MonoisotopicMass > 42); Assert.IsTrue(thisMod.MonoisotopicMass < 43); }
public static Dictionary <string, List <Modification> > read_mods() { Loaders.LoadElements(Path.Combine(TestContext.CurrentContext.TestDirectory, "elements.dat")); List <ModificationWithLocation> all_modifications = SaveState.lollipop.get_files(SaveState.lollipop.input_files, Purpose.PtmList).SelectMany(file => PtmListLoader.ReadModsFromFile(file.complete_path)).ToList(); return(SaveState.lollipop.theoretical_database.make_modification_dictionary(all_modifications)); }
public void SampleModFileLoadingFail2() { var b = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "sampleModFileFail2.txt")); Assert.AreEqual(0, b.Count()); }
public void SampleModFileLoading() { var sampleModList = PtmListLoader.ReadModsFromFile(Path.Combine(TestContext.CurrentContext.TestDirectory, "sampleModFile.txt")).ToList(); Console.WriteLine(sampleModList.First().ToString()); }