public static void TestLoadAndRunMgf() { //The purpose of this test is to ensure that mgfs can be run without crashing. //Whenever a new feature is added that may require things an mgf does not have, //there should be a check that prevents mgfs from using that feature. string mgfName = @"TestData\ok.mgf"; string xmlName = @"TestData\okk.xml"; string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestLoadAndRunMgf"); SearchTask task1 = new SearchTask { SearchParameters = new SearchParameters { DoParsimony = true, DoQuantification = true } }; List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)> { ("task1", task1), }; //run! var engine = new EverythingRunnerEngine(taskList, new List <string> { mgfName }, new List <DbForTask> { new DbForTask(xmlName, false) }, outputFolder); engine.Run(); //Just don't crash! There should also be at least one psm at 1% FDR, but can't check for that. Directory.Delete(outputFolder, true); }
public static void TestPrunedGeneration() { //Create GPTMD Task //Create Search Task GptmdTask task1 = new GptmdTask { CommonParameters = new CommonParameters(), GptmdParameters = new GptmdParameters { ListOfModsGptmd = GlobalVariables.AllModsKnown.Where(b => b.ModificationType.Equals("Common Artifact") || b.ModificationType.Equals("Common Biological") || b.ModificationType.Equals("Metal") || b.ModificationType.Equals("Less Common") ).Select(b => (b.ModificationType, b.IdWithMotif)).ToList() } }; SearchTask task2 = new SearchTask { CommonParameters = new CommonParameters(), SearchParameters = new SearchParameters { DoParsimony = true, SearchTarget = true, WritePrunedDatabase = true, SearchType = SearchType.Classic } }; List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)> { ("task1", task1), ("task2", task2) }; string mzmlName = @"TestData\PrunedDbSpectra.mzml"; string fastaName = @"TestData\DbForPrunedDb.fasta"; string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestPrunedGeneration"); var engine = new EverythingRunnerEngine(taskList, new List <string> { mzmlName }, new List <DbForTask> { new DbForTask(fastaName, false) }, outputFolder); engine.Run(); string final = Path.Combine(MySetUpClass.outputFolder, "task2", "DbForPrunedDbGPTMDproteinPruned.xml"); List <Protein> proteins = ProteinDbLoader.LoadProteinXML(final, true, DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out var ok); //ensures that protein out put contins the correct number of proteins to match the folowing conditions. // all proteins in DB have baseSequence!=null (not ambiguous) // all proteins that belong to a protein group are written to DB Assert.AreEqual(18, proteins.Count); int totalNumberOfMods = proteins.Sum(p => p.OneBasedPossibleLocalizedModifications.Count + p.SequenceVariations.Sum(sv => sv.OneBasedModifications.Count)); //tests that modifications are being done correctly Assert.AreEqual(0, totalNumberOfMods); Directory.Delete(outputFolder, true); }
public static void TestPrunedGeneration() { //Create GPTMD Task //Create Search Task GptmdTask task1 = new GptmdTask { CommonParameters = new CommonParameters(), }; SearchTask task2 = new SearchTask { CommonParameters = new CommonParameters(), SearchParameters = new SearchParameters { DoParsimony = true, SearchTarget = true, WritePrunedDatabase = true, SearchType = SearchType.Classic } }; List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)> { ("task1", task1), ("task2", task2) }; string mzmlName = @"TestData\PrunedDbSpectra.mzml"; string fastaName = @"TestData\DbForPrunedDb.fasta"; var engine = new EverythingRunnerEngine(taskList, new List <string> { mzmlName }, new List <DbForTask> { new DbForTask(fastaName, false) }, Environment.CurrentDirectory); engine.Run(); string final = Path.Combine(MySetUpClass.outputFolder, "task2", "DbForPrunedDbGPTMDproteinPruned.xml"); List <Protein> proteins = ProteinDbLoader.LoadProteinXML(final, true, DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out var ok); //ensures that protein out put contins the correct number of proteins to match the folowing conditions. // all proteins in DB have baseSequence!=null (not ambiguous) // all proteins that belong to a protein group are written to DB Assert.AreEqual(proteins.Count(), 20); int totalNumberOfMods = 0; foreach (Protein p in proteins) { int numberOfMods = p.OneBasedPossibleLocalizedModifications.Count(); totalNumberOfMods = totalNumberOfMods + numberOfMods; } //tests that modifications are being done correctly Assert.AreEqual(totalNumberOfMods, 0); }
public static void SemiSpecificTest() { List <FragmentationTerminus> terminiToTest = new List <FragmentationTerminus> { FragmentationTerminus.N, FragmentationTerminus.C }; string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestSemiSpecific"); string myFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\PrunedDbSpectra.mzml"); string myDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\DbForPrunedDb.fasta"); foreach (FragmentationTerminus fragTerm in terminiToTest) { SearchTask searchTask = new SearchTask() { SearchParameters = new SearchParameters { SearchType = SearchType.NonSpecific, LocalFdrCategories = new List <FdrCategory> { FdrCategory.FullySpecific, FdrCategory.SemiSpecific } }, CommonParameters = new CommonParameters(scoreCutoff: 4, addCompIons: true, digestionParams: new DigestionParams(searchModeType: CleavageSpecificity.Semi, fragmentationTerminus: fragTerm)) }; DbForTask db = new DbForTask(myDatabase, false); List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)> { ("TestSemiSpecific", searchTask) }; var engine = new EverythingRunnerEngine(taskList, new List <string> { myFile }, new List <DbForTask> { new DbForTask(myDatabase, false) }, outputFolder); engine.Run(); string outputPath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestSemiSpecific\TestSemiSpecific\AllPSMs.psmtsv"); var output = File.ReadAllLines(outputPath); Assert.That(output.Length == 13); //if N is only producing 11 lines, then the c is not being searched with it. //If only 12 lines, maybe missed mono issue } Directory.Delete(outputFolder, true); }
public static void SemiSpecificFullAndSmallMatches() { SearchTask searchTask = new SearchTask() { SearchParameters = new SearchParameters { WriteMzId = true, SearchType = SearchType.NonSpecific, LocalFdrCategories = new List <FdrCategory> { FdrCategory.FullySpecific, FdrCategory.SemiSpecific } }, CommonParameters = new CommonParameters(addCompIons: true, scoreCutoff: 11, digestionParams: new DigestionParams(minPeptideLength: 7, searchModeType: CleavageSpecificity.Semi, fragmentationTerminus: FragmentationTerminus.N)) }; string myFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\tinySemi.mgf"); string myDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\semiTest.fasta"); DbForTask db = new DbForTask(myDatabase, false); List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)> { ("TestSemiSpecificSmall", searchTask) }; var engine = new EverythingRunnerEngine(taskList, new List <string> { myFile }, new List <DbForTask> { new DbForTask(myDatabase, false) }, Environment.CurrentDirectory); engine.Run(); string outputPath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestSemiSpecificSmall\AllPSMs.psmtsv"); var output = File.ReadAllLines(outputPath); Assert.IsTrue(output.Length == 3); var mzId = File.ReadAllLines(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestSemiSpecificSmall\tinySemi.mzID")); Assert.That(mzId[115].Equals(" <cvParam name=\"mzML format\" cvRef=\"PSI-MS\" accession=\"MS:1000584\" />")); Assert.That(mzId[118].Equals(" <cvParam name=\"mzML unique identifier\" cvRef=\"PSI-MS\" accession=\"MS:1001530\" />")); Assert.That(mzId[97].Equals(" <cvParam name=\"pep:FDR threshold\" value=\"0.01\" cvRef=\"PSI-MS\" accession=\"MS:1001448\" />")); }
private void RunAllTasks_Click(object sender, RoutedEventArgs e) { GlobalVariables.StopLoops = false; // check for valid tasks/spectra files/protein databases if (!ProteinDbObservableCollection.Any()) { GuiWarnHandler(null, new StringEventArgs("You need to add at least one protein database!", null)); return; } DynamicTasksObservableCollection = new ObservableCollection <InRunTask>(); for (int i = 0; i < StaticTasksObservableCollection.Count; i++) { DynamicTasksObservableCollection.Add(new InRunTask("Task" + (i + 1) + "-" + StaticTasksObservableCollection[i].proteaseGuruTask.TaskType.ToString(), StaticTasksObservableCollection[i].proteaseGuruTask)); } // output folder if (string.IsNullOrEmpty(OutputFolderTextBox.Text)) { var pathOfFirstSpectraFile = System.IO.Path.GetDirectoryName(ProteinDbObservableCollection.First().FilePath); OutputFolderTextBox.Text = System.IO.Path.Combine(pathOfFirstSpectraFile, @"$DATETIME"); } var startTimeForAllFilenames = DateTime.Now.ToString("yyyy-MM-dd-HH-mm-ss", CultureInfo.InvariantCulture); string outputFolder = OutputFolderTextBox.Text.Replace("$DATETIME", startTimeForAllFilenames); OutputFolderTextBox.Text = outputFolder; // everything is OK to run var taskList = DynamicTasksObservableCollection.Select(b => (b.DisplayName, b.Task)).ToList(); var databaseList = ProteinDbObservableCollection.Select(b => new DbForDigestion(b.FilePath)).ToList(); EverythingRunnerEngine a = new EverythingRunnerEngine(taskList, databaseList, outputFolder); var t = new Task(a.Run); t.ContinueWith(EverythingRunnerExceptionHandler, TaskContinuationOptions.OnlyOnFaulted); t.Start(); }
public static void MultipleCustomFragmentations() { string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestCustomFragmentations"); string myFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\PrunedDbSpectra.mzml"); string myDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\DbForPrunedDb.fasta"); // create 3 search tasks with different custom fragmentation ions var task1 = Toml.ReadFile <SearchTask>(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\customBY.toml"), MetaMorpheusTask.tomlConfig); var task2 = Toml.ReadFile <SearchTask>(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\customCZ.toml"), MetaMorpheusTask.tomlConfig); var task3 = Toml.ReadFile <SearchTask>(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\customBCZ.toml"), MetaMorpheusTask.tomlConfig); // run all tasks DbForTask db = new DbForTask(myDatabase, false); List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)> { ("TestSearchBY", task1), ("TestSearchCZ", task2), ("TestSearchBCZ", task3) }; var engine = new EverythingRunnerEngine(taskList, new List <string> { myFile }, new List <DbForTask> { new DbForTask(myDatabase, false) }, outputFolder); engine.Run(); // read generated toml settings and make sure custom fragmentations match are handled properly for each task string outputPath = Path.Combine(outputFolder, @"Task Settings"); var task1Settings = Toml.ReadFile <SearchTask>(Path.Combine(outputPath, @"TestSearchBYConfig.toml"), MetaMorpheusTask.tomlConfig); var task2Settings = Toml.ReadFile <SearchTask>(Path.Combine(outputPath, @"TestSearchCZConfig.toml"), MetaMorpheusTask.tomlConfig); var task3Settings = Toml.ReadFile <SearchTask>(Path.Combine(outputPath, @"TestSearchBCZConfig.toml"), MetaMorpheusTask.tomlConfig); var task1CustomIons = task1Settings.CommonParameters.CustomIons; var task2CustomIons = task2Settings.CommonParameters.CustomIons; var task3CustomIons = task3Settings.CommonParameters.CustomIons; Assert.That(task1CustomIons.Contains(ProductType.b) && task1CustomIons.Contains(ProductType.y) && task1CustomIons.Count == 2); Assert.That(task2CustomIons.Contains(ProductType.c) && task2CustomIons.Contains(ProductType.zDot) && task2CustomIons.Count == 2); Assert.That(task3CustomIons.Contains(ProductType.c) && task3CustomIons.Contains(ProductType.zDot) && task3CustomIons.Contains(ProductType.b) && task3CustomIons.Count == 3); Directory.Delete(outputFolder, true); }
public static void SemiSpecificFullAndSmallMatches() { SearchTask searchTask = new SearchTask() { SearchParameters = new SearchParameters { SearchType = SearchType.NonSpecific, LocalFdrCategories = new List <FdrCategory> { FdrCategory.FullySpecific, FdrCategory.SemiSpecific } }, CommonParameters = new CommonParameters(addCompIons: true, scoreCutoff: 11, digestionParams: new DigestionParams(minPeptideLength: 7, searchModeType: CleavageSpecificity.Semi, fragmentationTerminus: FragmentationTerminus.N)) }; string myFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\tinySemi.mgf"); string myDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\semiTest.fasta"); DbForTask db = new DbForTask(myDatabase, false); List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)> { ("TestSemiSpecificSmall", searchTask) }; var engine = new EverythingRunnerEngine(taskList, new List <string> { myFile }, new List <DbForTask> { new DbForTask(myDatabase, false) }, Environment.CurrentDirectory); engine.Run(); string outputPath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestSemiSpecificSmall\AllPSMs.psmtsv"); var output = File.ReadAllLines(outputPath); Assert.IsTrue(output.Length == 3); }
private static int Run(CommandLineSettings settings) { int errorCode = 0; if (settings.Verbosity == CommandLineSettings.VerbosityType.minimal || settings.Verbosity == CommandLineSettings.VerbosityType.normal) { Console.WriteLine("Welcome to MetaMorpheus"); } if (settings.CustomDataDirectory != null) { GlobalVariables.UserSpecifiedDataDir = settings.CustomDataDirectory; } GlobalVariables.SetUpGlobalVariables(); if (settings.Verbosity == CommandLineSettings.VerbosityType.minimal || settings.Verbosity == CommandLineSettings.VerbosityType.normal) { Console.WriteLine(GlobalVariables.MetaMorpheusVersion); } try { settings.ValidateCommandLineSettings(); CommandLineSettings = settings; } catch (Exception e) { if (settings.Verbosity == CommandLineSettings.VerbosityType.minimal || settings.Verbosity == CommandLineSettings.VerbosityType.normal) { Console.WriteLine("MetaMorpheus encountered the following error:" + Environment.NewLine + e.Message); } errorCode = 2; return(errorCode); } if (settings.GenerateDefaultTomls) { if (settings.Verbosity == CommandLineSettings.VerbosityType.minimal || settings.Verbosity == CommandLineSettings.VerbosityType.normal) { Console.WriteLine("Generating default tomls at location: " + settings.OutputFolder); } CommandLineSettings.GenerateDefaultTaskTomls(settings.OutputFolder); return(errorCode); } // set up microvignette if (settings.RunMicroVignette) { // set up the spectra file settings.Spectra.Clear(); settings.Spectra.Add(Path.Combine(GlobalVariables.DataDir, @"Data", "SmallCalibratible_Yeast.mzML")); // set up the database settings.Databases.Clear(); settings.Databases.Add(Path.Combine(GlobalVariables.DataDir, @"Data", "SmallYeast.fasta")); // set up the tasks (calibration, GPTMD, search) settings.Tasks.Clear(); CommandLineSettings.GenerateDefaultTaskTomls(settings.OutputFolder); settings.Tasks.Add(Path.Combine(settings.OutputFolder, "CalibrationTask.toml")); settings.Tasks.Add(Path.Combine(settings.OutputFolder, "GptmdTask.toml")); settings.Tasks.Add(Path.Combine(settings.OutputFolder, "SearchTask.toml")); } MetaMorpheusEngine.WarnHandler += WarnHandler; MetaMorpheusEngine.OutProgressHandler += MyEngine_outProgressHandler; MetaMorpheusEngine.StartingSingleEngineHander += MyEngine_startingSingleEngineHander; MetaMorpheusEngine.FinishedSingleEngineHandler += MyEngine_finishedSingleEngineHandler; MetaMorpheusTask.WarnHandler += WarnHandler; MetaMorpheusTask.LogHandler += LogHandler; MetaMorpheusTask.StartingSingleTaskHander += MyTaskEngine_startingSingleTaskHander; MetaMorpheusTask.FinishedSingleTaskHandler += MyTaskEngine_finishedSingleTaskHandler; MetaMorpheusTask.FinishedWritingFileHandler += MyTaskEngine_finishedWritingFileHandler; bool containsRawFiles = settings.Spectra.Select(v => Path.GetExtension(v).ToLowerInvariant()).Any(v => v == ".raw"); if (containsRawFiles && !GlobalVariables.GlobalSettings.UserHasAgreedToThermoRawFileReaderLicence) { // write the Thermo RawFileReader licence agreement Console.WriteLine(ThermoRawFileReaderLicence.ThermoLicenceText); Console.WriteLine("\nIn order to search Thermo .raw files, you must agree to the above terms. Do you agree to the above terms? y/n\n"); string res = Console.ReadLine().ToLowerInvariant(); if (res == "y") { var newGlobalSettings = new GlobalSettings { UserHasAgreedToThermoRawFileReaderLicence = true, WriteExcelCompatibleTSVs = GlobalVariables.GlobalSettings.WriteExcelCompatibleTSVs }; Toml.WriteFile <GlobalSettings>(newGlobalSettings, Path.Combine(GlobalVariables.DataDir, @"settings.toml")); GlobalVariables.GlobalSettings = newGlobalSettings; } else { Console.WriteLine("Thermo licence has been declined. Exiting MetaMorpheus. You can still search .mzML and .mgf files without agreeing to the Thermo licence."); errorCode = 3; return(errorCode); } } foreach (var db in settings.Databases) { if (!Path.GetExtension(db).Equals(".fasta")) { GlobalVariables.AddMods(UsefulProteomicsDatabases.ProteinDbLoader.GetPtmListFromProteinXml(db).OfType <Modification>(), true); // print any error messages reading the mods to the console foreach (var error in GlobalVariables.ErrorsReadingMods) { if (settings.Verbosity == CommandLineSettings.VerbosityType.minimal || settings.Verbosity == CommandLineSettings.VerbosityType.normal) { Console.WriteLine(error); } } GlobalVariables.ErrorsReadingMods.Clear(); } } List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)>(); var tasks = settings.Tasks.ToList(); for (int i = 0; i < tasks.Count; i++) { var filePath = tasks[i]; var toml = Toml.ReadFile(filePath, MetaMorpheusTask.tomlConfig); switch (toml.Get <string>("TaskType")) { case "Search": var searchTask = Toml.ReadFile <SearchTask>(filePath, MetaMorpheusTask.tomlConfig); taskList.Add(("Task" + (i + 1) + "SearchTask", searchTask)); break; case "Calibrate": var calibrationTask = Toml.ReadFile <CalibrationTask>(filePath, MetaMorpheusTask.tomlConfig); taskList.Add(("Task" + (i + 1) + "CalibrationTask", calibrationTask)); break; case "Gptmd": var GptmdTask = Toml.ReadFile <GptmdTask>(filePath, MetaMorpheusTask.tomlConfig); taskList.Add(("Task" + (i + 1) + "GptmdTask", GptmdTask)); break; case "XLSearch": var XlTask = Toml.ReadFile <XLSearchTask>(filePath, MetaMorpheusTask.tomlConfig); taskList.Add(("Task" + (i + 1) + "XLSearchTask", XlTask)); break; case "GlycoSearch": var GlycoTask = Toml.ReadFile <GlycoSearchTask>(filePath, MetaMorpheusTask.tomlConfig); taskList.Add(("Task" + (i + 1) + "GlycoSearchTask", GlycoTask)); break; default: if (settings.Verbosity == CommandLineSettings.VerbosityType.minimal || settings.Verbosity == CommandLineSettings.VerbosityType.normal) { Console.WriteLine(toml.Get <string>("TaskType") + " is not a known task type! Skipping."); } break; } } List <string> startingRawFilenameList = settings.Spectra.Select(b => Path.GetFullPath(b)).ToList(); List <DbForTask> startingXmlDbFilenameList = settings.Databases.Select(b => new DbForTask(Path.GetFullPath(b), IsContaminant(b))).ToList(); // check that experimental design is defined if normalization is enabled var searchTasks = taskList .Where(p => p.Item2.TaskType == MyTask.Search) .Select(p => (SearchTask)p.Item2); string pathToExperDesign = Directory.GetParent(startingRawFilenameList.First()).FullName; pathToExperDesign = Path.Combine(pathToExperDesign, GlobalVariables.ExperimentalDesignFileName); if (!File.Exists(pathToExperDesign)) { if (searchTasks.Any(p => p.SearchParameters.Normalize)) { if (settings.Verbosity == CommandLineSettings.VerbosityType.minimal || settings.Verbosity == CommandLineSettings.VerbosityType.normal) { Console.WriteLine("Experimental design file was missing! This must be defined to do normalization. Download a template from https://github.com/smith-chem-wisc/MetaMorpheus/wiki/Experimental-Design"); } return(5); } } else { ExperimentalDesign.ReadExperimentalDesign(pathToExperDesign, startingRawFilenameList, out var errors); if (errors.Any()) { if (searchTasks.Any(p => p.SearchParameters.Normalize)) { if (settings.Verbosity == CommandLineSettings.VerbosityType.minimal || settings.Verbosity == CommandLineSettings.VerbosityType.normal) { foreach (var error in errors) { Console.WriteLine(error); } } return(5); } else { if (settings.Verbosity == CommandLineSettings.VerbosityType.minimal || settings.Verbosity == CommandLineSettings.VerbosityType.normal) { Console.WriteLine("An experimental design file was found, but an error " + "occurred reading it. Do you wish to continue with an empty experimental design? (This will delete your experimental design file) y/n" + "\nThe error was: " + errors.First()); var result = Console.ReadLine(); if (result.ToLowerInvariant() == "y" || result.ToLowerInvariant() == "yes") { File.Delete(pathToExperDesign); } else { return(5); } } else { // just continue on if verbosity is on "none" File.Delete(pathToExperDesign); } } } else { if (settings.Verbosity == CommandLineSettings.VerbosityType.minimal || settings.Verbosity == CommandLineSettings.VerbosityType.normal) { Console.WriteLine("Read ExperimentalDesign.tsv successfully"); } } } EverythingRunnerEngine a = new EverythingRunnerEngine(taskList, startingRawFilenameList, startingXmlDbFilenameList, settings.OutputFolder); try { a.Run(); } catch (Exception e) { while (e.InnerException != null) { e = e.InnerException; } var message = "Run failed, Exception: " + e.Message; if (settings.Verbosity == CommandLineSettings.VerbosityType.minimal || settings.Verbosity == CommandLineSettings.VerbosityType.normal) { Console.WriteLine(message); } errorCode = 4; } return(errorCode); }
public static void TestPrunedDatabase() { //Create Search Task SearchTask task1 = new SearchTask { SearchParameters = new SearchParameters { WritePrunedDatabase = true, SearchTarget = true, MassDiffAcceptorType = MassDiffAcceptorType.Exact, ModsToWriteSelection = new Dictionary <string, int> { { "ConnorModType", 1 } } }, CommonParameters = new CommonParameters(digestionParams: new DigestionParams(minPeptideLength: 5)) }; //add task to task list List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)> { ("task1", task1) }; ModificationMotif.TryGetMotif("P", out ModificationMotif motif); var connorMod = new Modification(_originalId: "ConnorMod on P", _modificationType: "ConnorModType", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 10); GlobalVariables.AddMods(new List <Modification> { connorMod }, false); //create modification lists List <Modification> variableModifications = GlobalVariables.AllModsKnown.OfType <Modification>() .Where(b => task1.CommonParameters.ListOfModsVariable.Contains((b.ModificationType, b.IdWithMotif))).ToList(); //add modification to Protein object var dictHere = new Dictionary <int, List <Modification> >(); Modification modToAdd = connorMod; Modification modToAdd2 = connorMod; dictHere.Add(1, new List <Modification> { modToAdd }); dictHere.Add(3, new List <Modification> { modToAdd2 }); //protein Creation (One with mod and one without) Protein TestProteinWithMod = new Protein("PEPTID", "accession1", "organism", new List <Tuple <string, string> >(), dictHere); //First Write XML Database string xmlName = "okkk.xml"; //Add Mod to list and write XML input database Dictionary <string, HashSet <Tuple <int, Modification> > > modList = new Dictionary <string, HashSet <Tuple <int, Modification> > >(); var Hash = new HashSet <Tuple <int, Modification> > { new Tuple <int, Modification>(3, modToAdd) }; modList.Add("test", Hash); ProteinDbWriter.WriteXmlDatabase(modList, new List <Protein> { TestProteinWithMod }, xmlName); //now write MZML file var protein = ProteinDbLoader.LoadProteinXML(xmlName, true, DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> ok); //Dictionary 'ok' contains unknown modifications. There are no unknown modifications in this test. Assert.AreEqual(0, ok.Count); //One protein is read from the .xml database and one decoy is created. Therefore, the list of proteins contains 2 entries. Assert.AreEqual(2, protein.Count); //The original database had two localized mods on the protein. Therefore. both protein and decoy should have two mods. Assert.AreEqual(2, protein[0].OneBasedPossibleLocalizedModifications.Count); List <int> foundResidueIndicies = protein[0].OneBasedPossibleLocalizedModifications.Select(k => k.Key).ToList(); List <int> expectedResidueIndices = new List <int>() { 1, 3 }; Assert.That(foundResidueIndicies, Is.EquivalentTo(expectedResidueIndices)); Assert.AreEqual(2, protein[1].OneBasedPossibleLocalizedModifications.Count); foundResidueIndicies = protein[1].OneBasedPossibleLocalizedModifications.Select(k => k.Key).ToList(); expectedResidueIndices = new List <int>() { 4, 6 }; //originally modified residues are now at the end in the decoy Assert.That(foundResidueIndicies, Is.EquivalentTo(expectedResidueIndices)); var thisOk = ok; //for debugging var commonParamsAtThisPoint = task1.CommonParameters.DigestionParams; //for debugging var digestedList = protein[0].Digest(task1.CommonParameters.DigestionParams, new List <Modification> { }, variableModifications).ToList(); Assert.AreEqual(4, digestedList.Count); //Set Peptide with 1 mod at position 3 PeptideWithSetModifications pepWithSetMods1 = digestedList[1]; //Finally Write MZML file Assert.AreEqual("PEP[ConnorModType:ConnorMod on P]TID", pepWithSetMods1.FullSequence);//this might be base sequence MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications> { pepWithSetMods1 }); string mzmlName = @"hello.mzML"; IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, mzmlName, false); //run! string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestPrunedDatabase"); var engine = new EverythingRunnerEngine(taskList, new List <string> { mzmlName }, new List <DbForTask> { new DbForTask(xmlName, false) }, outputFolder); engine.Run(); string final = Path.Combine(MySetUpClass.outputFolder, "task1", "okkkpruned.xml"); var proteins = ProteinDbLoader.LoadProteinXML(final, true, DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out ok); //check length Assert.AreEqual(1, proteins[0].OneBasedPossibleLocalizedModifications.Count); //check location (key) Assert.AreEqual(true, proteins[0].OneBasedPossibleLocalizedModifications.ContainsKey(3)); List <Modification> listOfMods = proteins[0].OneBasedPossibleLocalizedModifications[3]; //check Type, count, ID Assert.AreEqual(listOfMods[0].ModificationType, "ConnorModType"); Assert.AreEqual(listOfMods[0].IdWithMotif, "ConnorMod on P"); Assert.AreEqual(listOfMods.Count, 1); Directory.Delete(outputFolder, true); File.Delete(xmlName); File.Delete(mzmlName); }
public static void TestUserModSelectionInPrunedDB() { List <(string, string)> listOfModsFixed = new List <(string, string)> { ("Common Fixed", "Carbamidomethyl of C"), ("Common Fixed", "Carbamidomethyl of U") }; //Create Search Task SearchTask task5 = new SearchTask { SearchParameters = new SearchParameters { WritePrunedDatabase = true, SearchTarget = true, MassDiffAcceptorType = MassDiffAcceptorType.Exact, }, CommonParameters = new CommonParameters(listOfModsFixed: listOfModsFixed) }; task5.SearchParameters.ModsToWriteSelection["Mod"] = 0; task5.SearchParameters.ModsToWriteSelection["Common Fixed"] = 1; task5.SearchParameters.ModsToWriteSelection["Glycan"] = 2; task5.SearchParameters.ModsToWriteSelection["missing"] = 3; //add task 1 to task list List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)> { ("task5", task5) }; ModificationMotif.TryGetMotif("P", out ModificationMotif motif); ModificationMotif.TryGetMotif("E", out ModificationMotif motif2); var connorMod = new Modification(_originalId: "ModToNotAppear", _modificationType: "Mod", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 10); var connorMod2 = new Modification(_originalId: "Default(Mod in DB and Observed)", _modificationType: "Common Fixed", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 10); var connorMod3 = new Modification(_originalId: "ModToAlwaysAppear", _modificationType: "Glycan", _target: motif, _locationRestriction: "Anywhere.", _monoisotopicMass: 10); var connorMod4 = new Modification(_originalId: "ModObservedNotinDB", _modificationType: "missing", _target: motif2, _locationRestriction: "Anywhere.", _monoisotopicMass: 5); GlobalVariables.AddMods(new List <Modification> { connorMod, connorMod2, connorMod3, connorMod4 }, false); //create modification lists List <Modification> variableModifications = GlobalVariables.AllModsKnown.OfType <Modification>().Where(b => task5.CommonParameters.ListOfModsVariable.Contains ((b.ModificationType, b.IdWithMotif))).ToList(); List <Modification> fixedModifications = GlobalVariables.AllModsKnown.OfType <Modification>().Where(b => task5.CommonParameters.ListOfModsFixed.Contains ((b.ModificationType, b.IdWithMotif))).ToList(); //add modification to Protein object var dictHere = new Dictionary <int, List <Modification> >(); Modification modToAdd = connorMod; Modification modToAdd2 = connorMod2; Modification modToAdd3 = connorMod3; Modification modToAdd4 = connorMod4; //add Fixed modifcation so can test if mod that is observed and not in DB fixedModifications.Add(connorMod4); listOfModsFixed.Add((connorMod4.ModificationType, connorMod4.IdWithMotif)); dictHere.Add(1, new List <Modification> { modToAdd }); dictHere.Add(2, new List <Modification> { modToAdd2 }); //default dictHere.Add(3, new List <Modification> { modToAdd3 }); //Alway Appear var dictHere2 = new Dictionary <int, List <Modification> > { { 1, new List <Modification> { modToAdd } }, { 2, new List <Modification> { modToAdd2 } }, //default { 3, new List <Modification> { modToAdd3 } }, //Alway Appear { 4, new List <Modification> { modToAdd4 } } //observed }; //protein Creation (One with mod and one without) Protein TestProteinWithModForDB = new Protein("PPPPPPPPPPE", "accession1", "organism", new List <Tuple <string, string> >(), dictHere); Protein TestProteinWithModObsevred = new Protein("PPPPPPPPPPE", "accession1", "organism", new List <Tuple <string, string> >(), dictHere2); //First Write XML Database string xmlName = "selectedMods.xml"; string xmlName2 = "selectedModsObvs.xml"; //Add Mod to list and write XML input database Dictionary <string, HashSet <Tuple <int, Modification> > > modList = new Dictionary <string, HashSet <Tuple <int, Modification> > >(); var Hash = new HashSet <Tuple <int, Modification> > { new Tuple <int, Modification>(1, modToAdd), new Tuple <int, Modification>(2, modToAdd2), new Tuple <int, Modification>(3, modToAdd3), new Tuple <int, Modification>(4, modToAdd4), //Observed Only }; modList.Add("test", Hash); ProteinDbWriter.WriteXmlDatabase(modList, new List <Protein> { TestProteinWithModForDB }, xmlName); //Add Observed Only modList.Add("test2", Hash); ProteinDbWriter.WriteXmlDatabase(modList, new List <Protein> { TestProteinWithModObsevred }, xmlName2); //now create MZML data var protein = ProteinDbLoader.LoadProteinXML(xmlName2, true, DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> ok); var digestedList = protein[0].Digest(task5.CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList(); //Set Peptide with 1 mod at position 3 PeptideWithSetModifications pepWithSetMods1 = digestedList[0]; PeptideWithSetModifications pepWithSetMods2 = digestedList[1]; PeptideWithSetModifications pepWithSetMods3 = digestedList[2]; PeptideWithSetModifications pepWithSetMods4 = digestedList[3]; PeptideWithSetModifications pepWithSetMods5 = digestedList[4]; //CUSTOM PEP MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications> { pepWithSetMods1, pepWithSetMods2, pepWithSetMods3, pepWithSetMods4, pepWithSetMods5 }); string mzmlName = @"newMzml.mzML"; IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, mzmlName, false); //make sure this runs correctly //run! string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestUserModSelectionInPrunedDB"); var engine = new EverythingRunnerEngine(taskList, new List <string> { mzmlName }, new List <DbForTask> { new DbForTask(xmlName, false) }, outputFolder); engine.Run(); string final = Path.Combine(MySetUpClass.outputFolder, "task5", "selectedModspruned.xml"); var proteins = ProteinDbLoader.LoadProteinXML(final, true, DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out ok); var Dlist = proteins[0].GetVariantProteins().SelectMany(vp => vp.Digest(task5.CommonParameters.DigestionParams, fixedModifications, variableModifications)).ToList(); Assert.AreEqual(Dlist[0].NumFixedMods, 1); //check length Assert.AreEqual(proteins[0].OneBasedPossibleLocalizedModifications.Count, 3); List <Modification> listOfLocalMods = new List <Modification>(); listOfLocalMods.AddRange(proteins[0].OneBasedPossibleLocalizedModifications[2]); listOfLocalMods.AddRange(proteins[0].OneBasedPossibleLocalizedModifications[3]); listOfLocalMods.AddRange(proteins[0].OneBasedPossibleLocalizedModifications[11]); //check Type, count, ID Assert.AreEqual(listOfLocalMods[0].ModificationType, "Common Fixed"); Assert.AreEqual(listOfLocalMods[2].ModificationType, "missing"); Assert.IsFalse(listOfLocalMods.Contains(connorMod)); //make sure that mod set not to show up is not in mod list Assert.AreEqual(listOfLocalMods[0].IdWithMotif, "Default(Mod in DB and Observed) on P"); Assert.AreEqual(listOfLocalMods[1].IdWithMotif, "ModToAlwaysAppear on P"); //Makes sure Mod that was not in the DB but was observed is in pruned DB Assert.AreEqual(listOfLocalMods[2].IdWithMotif, "ModObservedNotinDB on E"); Assert.AreEqual(listOfLocalMods.Count, 3); Directory.Delete(outputFolder, true); File.Delete(mzmlName); File.Delete(xmlName); File.Delete(xmlName2); }
public static void TestEverythingRunner() { foreach (var modFile in Directory.GetFiles(@"Mods")) { GlobalVariables.AddMods(PtmListLoader.ReadModsFromFile(modFile)); } CalibrationTask task1 = new CalibrationTask { CommonParameters = new CommonParameters(digestionParams: new DigestionParams(maxMissedCleavages: 0, minPeptideLength: 1, initiatorMethionineBehavior: InitiatorMethionineBehavior.Retain)), CalibrationParameters = new CalibrationParameters { WriteIntermediateFiles = true, NumFragmentsNeededForEveryIdentification = 6, } }; GptmdTask task2 = new GptmdTask { CommonParameters = new CommonParameters() }; SearchTask task3 = new SearchTask { CommonParameters = new CommonParameters(), SearchParameters = new SearchParameters { DoParsimony = true, SearchTarget = true, SearchType = SearchType.Modern } }; SearchTask task4 = new SearchTask { CommonParameters = new CommonParameters(), SearchParameters = new SearchParameters { SearchType = SearchType.Modern, } }; List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)> { ("task1", task1), ("task2", task2), ("task3", task3), ("task4", task4), }; List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => task1.CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList(); List <ModificationWithMass> fixedModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => task1.CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList(); // Generate data for files Protein ParentProtein = new Protein("MPEPTIDEKANTHE", "accession1"); var digestedList = ParentProtein.Digest(task1.CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList(); Assert.AreEqual(3, digestedList.Count); PeptideWithSetModifications pepWithSetMods1 = digestedList[0]; PeptideWithSetModifications pepWithSetMods2 = digestedList[2]; var dictHere = new Dictionary <int, List <Modification> >(); ModificationMotif.TryGetMotif("E", out ModificationMotif motif); dictHere.Add(3, new List <Modification> { new ModificationWithMass("21", null, motif, TerminusLocalization.Any, 21.981943) }); Protein ParentProteinToNotInclude = new Protein("MPEPTIDEK", "accession2", "organism", new List <Tuple <string, string> >(), dictHere); digestedList = ParentProteinToNotInclude.Digest(task1.CommonParameters.DigestionParams, fixedModifications, variableModifications).ToList(); MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications> { pepWithSetMods1, pepWithSetMods2, digestedList[1] }); Protein proteinWithChain = new Protein("MAACNNNCAA", "accession3", "organism", new List <Tuple <string, string> >(), new Dictionary <int, List <Modification> >(), new List <ProteolysisProduct> { new ProteolysisProduct(4, 8, "chain") }, "name2", "fullname2"); string mzmlName = @"ok.mzML"; IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, mzmlName, false); string xmlName = "okk.xml"; ProteinDbWriter.WriteXmlDatabase(new Dictionary <string, HashSet <Tuple <int, Modification> > >(), new List <Protein> { ParentProtein, proteinWithChain }, xmlName); // RUN! var engine = new EverythingRunnerEngine(taskList, new List <string> { mzmlName }, new List <DbForTask> { new DbForTask(xmlName, false) }, Environment.CurrentDirectory); engine.Run(); }
private static int Run(CommandLineSettings settings) { if (settings.Verbosity == CommandLineSettings.VerbosityType.minimal || settings.Verbosity == CommandLineSettings.VerbosityType.normal) { Console.WriteLine("Welcome to MetaMorpheus"); Console.WriteLine(GlobalVariables.MetaMorpheusVersion); } int errorCode = 0; try { settings.ValidateCommandLineSettings(); CommandLineSettings = settings; } catch (Exception e) { if (settings.Verbosity == CommandLineSettings.VerbosityType.minimal || settings.Verbosity == CommandLineSettings.VerbosityType.normal) { Console.WriteLine("MetaMorpheus encountered the following error:" + Environment.NewLine + e.Message); } errorCode = 2; return(errorCode); } if (settings.GenerateDefaultTomls) { if (settings.Verbosity == CommandLineSettings.VerbosityType.minimal || settings.Verbosity == CommandLineSettings.VerbosityType.normal) { Console.WriteLine("Generating default tomls at location: " + settings.OutputFolder); } CommandLineSettings.GenerateDefaultTaskTomls(settings.OutputFolder); return(errorCode); } // set up microvignette if (settings.RunMicroVignette) { // set up the spectra file settings.Spectra.Clear(); settings.Spectra.Add(Path.Combine(GlobalVariables.DataDir, @"Data", "SmallCalibratible_Yeast.mzML")); // set up the database settings.Databases.Clear(); settings.Databases.Add(Path.Combine(GlobalVariables.DataDir, @"Data", "SmallYeast.fasta")); // set up the tasks (calibration, GPTMD, search) settings.Tasks.Clear(); CommandLineSettings.GenerateDefaultTaskTomls(settings.OutputFolder); settings.Tasks.Add(Path.Combine(settings.OutputFolder, "CalibrationTask.toml")); settings.Tasks.Add(Path.Combine(settings.OutputFolder, "GptmdTask.toml")); settings.Tasks.Add(Path.Combine(settings.OutputFolder, "SearchTask.toml")); } MetaMorpheusEngine.WarnHandler += WarnHandler; MetaMorpheusEngine.OutProgressHandler += MyEngine_outProgressHandler; MetaMorpheusEngine.StartingSingleEngineHander += MyEngine_startingSingleEngineHander; MetaMorpheusEngine.FinishedSingleEngineHandler += MyEngine_finishedSingleEngineHandler; MetaMorpheusTask.WarnHandler += WarnHandler; MetaMorpheusTask.LogHandler += LogHandler; MetaMorpheusTask.StartingSingleTaskHander += MyTaskEngine_startingSingleTaskHander; MetaMorpheusTask.FinishedSingleTaskHandler += MyTaskEngine_finishedSingleTaskHandler; MetaMorpheusTask.FinishedWritingFileHandler += MyTaskEngine_finishedWritingFileHandler; bool containsRawFiles = settings.Spectra.Select(v => Path.GetExtension(v).ToLowerInvariant()).Any(v => v == ".raw"); if (containsRawFiles && !GlobalVariables.GlobalSettings.UserHasAgreedToThermoRawFileReaderLicence) { // write the Thermo RawFileReader licence agreement Console.WriteLine(ThermoRawFileReader.ThermoRawFileReaderLicence.ThermoLicenceText); Console.WriteLine("\nIn order to search Thermo .raw files, you must agree to the above terms. Do you agree to the above terms? y/n\n"); string res = Console.ReadLine().ToLowerInvariant(); if (res == "y") { var newGlobalSettings = new GlobalSettings { UserHasAgreedToThermoRawFileReaderLicence = true, WriteExcelCompatibleTSVs = GlobalVariables.GlobalSettings.WriteExcelCompatibleTSVs }; Toml.WriteFile <GlobalSettings>(newGlobalSettings, Path.Combine(GlobalVariables.DataDir, @"settings.toml")); GlobalVariables.GlobalSettings = newGlobalSettings; } else { Console.WriteLine("Thermo licence has been declined. Exiting MetaMorpheus. You can still search .mzML and .mgf files without agreeing to the Thermo licence."); errorCode = 3; return(errorCode); } } foreach (var db in settings.Databases) { if (!Path.GetExtension(db).Equals(".fasta")) { GlobalVariables.AddMods(UsefulProteomicsDatabases.ProteinDbLoader.GetPtmListFromProteinXml(db).OfType <Modification>(), true); // print any error messages reading the mods to the console foreach (var error in GlobalVariables.ErrorsReadingMods) { if (settings.Verbosity == CommandLineSettings.VerbosityType.minimal || settings.Verbosity == CommandLineSettings.VerbosityType.normal) { Console.WriteLine(error); } } GlobalVariables.ErrorsReadingMods.Clear(); } } List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)>(); var tasks = settings.Tasks.ToList(); for (int i = 0; i < tasks.Count; i++) { var filePath = tasks[i]; var toml = Toml.ReadFile(filePath, MetaMorpheusTask.tomlConfig); switch (toml.Get <string>("TaskType")) { case "Search": var searchTask = Toml.ReadFile <SearchTask>(filePath, MetaMorpheusTask.tomlConfig); taskList.Add(("Task" + (i + 1) + "SearchTask", searchTask)); break; case "Calibrate": var calibrationTask = Toml.ReadFile <CalibrationTask>(filePath, MetaMorpheusTask.tomlConfig); taskList.Add(("Task" + (i + 1) + "CalibrationTask", calibrationTask)); break; case "Gptmd": var GptmdTask = Toml.ReadFile <GptmdTask>(filePath, MetaMorpheusTask.tomlConfig); taskList.Add(("Task" + (i + 1) + "GptmdTask", GptmdTask)); break; case "XLSearch": var XlTask = Toml.ReadFile <XLSearchTask>(filePath, MetaMorpheusTask.tomlConfig); taskList.Add(("Task" + (i + 1) + "XLSearchTask", XlTask)); break; case "GlycoSearch": var GlycoTask = Toml.ReadFile <GlycoSearchTask>(filePath, MetaMorpheusTask.tomlConfig); taskList.Add(("Task" + (i + 1) + "GlycoSearchTask", GlycoTask)); break; default: if (settings.Verbosity == CommandLineSettings.VerbosityType.minimal || settings.Verbosity == CommandLineSettings.VerbosityType.normal) { Console.WriteLine(toml.Get <string>("TaskType") + " is not a known task type! Skipping."); } break; } } List <string> startingRawFilenameList = settings.Spectra.Select(b => Path.GetFullPath(b)).ToList(); List <DbForTask> startingXmlDbFilenameList = settings.Databases.Select(b => new DbForTask(Path.GetFullPath(b), IsContaminant(b))).ToList(); EverythingRunnerEngine a = new EverythingRunnerEngine(taskList, startingRawFilenameList, startingXmlDbFilenameList, settings.OutputFolder); try { a.Run(); } catch (Exception e) { while (e.InnerException != null) { e = e.InnerException; } var message = "Run failed, Exception: " + e.Message; if (settings.Verbosity == CommandLineSettings.VerbosityType.minimal || settings.Verbosity == CommandLineSettings.VerbosityType.normal) { Console.WriteLine(message); } errorCode = 4; } return(errorCode); }
public static void TestSearchPtmVariantDatabase() { //Create Search Task SearchTask task1 = new SearchTask { SearchParameters = new SearchParameters { SearchTarget = true, MassDiffAcceptorType = MassDiffAcceptorType.Exact, }, CommonParameters = new CommonParameters(digestionParams: new DigestionParams(minPeptideLength: 5)) }; //add task to task list var taskList = new List <(string, MetaMorpheusTask)> { ("task1", task1) }; //create modification lists List <Modification> variableModifications = GlobalVariables.AllModsKnown.OfType <Modification>().Where (b => task1.CommonParameters.ListOfModsVariable.Contains((b.ModificationType, b.IdWithMotif))).ToList(); //protein Creation (One with mod and one without) ModificationMotif.TryGetMotif("P", out ModificationMotif motifP); ModificationMotif.TryGetMotif("K", out ModificationMotif motifK); var variant = new SequenceVariation(3, "P", "K", @"1\t50000000\t.\tA\tG\t.\tPASS\tANN=G|||||||||||||||||||\tGT:AD:DP\t1/1:30,30:30"); Protein testProteinWithMod = new Protein("PEPTID", "accession1", sequenceVariations: new List <SequenceVariation> { variant }); string variantAcc = VariantApplication.GetAccession(testProteinWithMod, new[] { variant }); //First Write XML Database string xmlName = "oblm.xml"; //Add Mod to list and write XML input database var modList = new Dictionary <string, HashSet <Tuple <int, Modification> > >(); var hash = new HashSet <Tuple <int, Modification> > { new Tuple <int, Modification>(1, new Modification(_originalId: "acetyl on P", _modificationType: "type", _target: motifP, _monoisotopicMass: 42, _locationRestriction: "Anywhere.")), }; var hashVar = new HashSet <Tuple <int, Modification> > { new Tuple <int, Modification>(3, new Modification(_originalId: "acetyl on K", _modificationType: "type", _target: motifK, _monoisotopicMass: 42, _locationRestriction: "Anywhere.")), }; modList.Add(testProteinWithMod.Accession, hash); modList.Add(variantAcc, hashVar); ProteinDbWriter.WriteXmlDatabase(modList, new List <Protein> { testProteinWithMod }, xmlName); //now write MZML file var variantProteins = ProteinDbLoader.LoadProteinXML(xmlName, true, DecoyType.Reverse, null, false, null, out var unknownModifications); var variantProtein = variantProteins[0]; var variantDecoy = variantProteins[1]; Assert.AreEqual(0, unknownModifications.Count); Assert.AreEqual(2, variantProteins.Count); // target & decoy Assert.AreEqual(2, variantProteins[0].OneBasedPossibleLocalizedModifications.Count); List <int> foundResidueIndicies = variantProtein.OneBasedPossibleLocalizedModifications.Select(k => k.Key).ToList(); List <int> expectedResidueIndices = new List <int>() { 1, 3 }; Assert.That(foundResidueIndicies, Is.EquivalentTo(expectedResidueIndices)); Assert.AreEqual(2, variantDecoy.OneBasedPossibleLocalizedModifications.Count); foundResidueIndicies = variantDecoy.OneBasedPossibleLocalizedModifications.Select(k => k.Key).ToList(); expectedResidueIndices = new List <int>() { 4, 6 }; //originally modified residues are now at the end in the decoy Assert.That(foundResidueIndicies, Is.EquivalentTo(expectedResidueIndices)); var thisOk = unknownModifications; //for debugging var commonParamsAtThisPoint = task1.CommonParameters.DigestionParams; //for debugging var digestedList = variantProteins[0].GetVariantProteins()[0].Digest(task1.CommonParameters.DigestionParams, new List <Modification>(), variableModifications).ToList(); Assert.AreEqual(4, digestedList.Count); //Set Peptide with 1 mod at position 3 PeptideWithSetModifications pepWithSetMods1 = digestedList[1]; //Finally Write MZML file Assert.AreEqual("PEK[type:acetyl on K]TID", pepWithSetMods1.FullSequence);//this might be base sequence MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications> { pepWithSetMods1 }); string mzmlName = @"hello.mzML"; IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, mzmlName, false); //run! var engine = new EverythingRunnerEngine(taskList, new List <string> { mzmlName }, new List <DbForTask> { new DbForTask(xmlName, false) }, Environment.CurrentDirectory); engine.Run(); }
public static void InternalFragmentIonTest() { SearchTask searchTask = new SearchTask() { SearchParameters = new SearchParameters { MinAllowedInternalFragmentLength = 1 }, CommonParameters = new CommonParameters( digestionParams: new DigestionParams("top-down"), listOfModsVariable: new List <(string, string)> { ("Common Variable", "Oxidation on M"), ("Common Biological", "Acetylation on K"), ("Common Biological", "Acetylation on X") }) }; string myFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\InternalTest.mgf"); string myDatabase = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestData\InternalTest.fasta"); DbForTask db = new DbForTask(myDatabase, false); List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)> { ("TestInternal", searchTask) }; var engine = new EverythingRunnerEngine(taskList, new List <string> { myFile }, new List <DbForTask> { new DbForTask(myDatabase, false) }, Environment.CurrentDirectory); engine.Run(); string outputPath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestInternal\AllPSMs.psmtsv"); //var output = File.ReadAllLines(outputPath); //read the psmtsv List <PsmFromTsv> psms = PsmTsvReader.ReadTsv(outputPath, out var warning); Assert.IsTrue(psms.Count == 1); //check that it's been disambiguated Assert.IsFalse(psms[0].FullSequence.Contains("|")); int numTotalFragments = psms[0].MatchedIons.Count; //test again but no variable acetyl on K. Make sure that internal fragments are still searched even without ambiguity searchTask = new SearchTask() { SearchParameters = new SearchParameters { MinAllowedInternalFragmentLength = 1 }, CommonParameters = new CommonParameters( digestionParams: new DigestionParams("top-down"), listOfModsVariable: new List <(string, string)> { ("Common Variable", "Oxidation on M"), ("Common Biological", "Acetylation on X") }) }; taskList = new List <(string, MetaMorpheusTask)> { ("TestInternal", searchTask) }; engine = new EverythingRunnerEngine(taskList, new List <string> { myFile }, new List <DbForTask> { new DbForTask(myDatabase, false) }, Environment.CurrentDirectory); engine.Run(); psms = PsmTsvReader.ReadTsv(outputPath, out warning); Assert.IsTrue(psms.Count == 1); Assert.IsTrue(psms[0].MatchedIons.Count == numTotalFragments); Directory.Delete(Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestInternal"), true); }
public static void TestPrunedDatabase() { //Create Search Task SearchTask task1 = new SearchTask { SearchParameters = new SearchParameters { WritePrunedDatabase = true, SearchTarget = true, MassDiffAcceptorType = MassDiffAcceptorType.Exact, ModsToWriteSelection = new Dictionary <string, int> { { "ConnorModType", 1 } } }, CommonParameters = new CommonParameters(digestionParams: new DigestionParams(minPeptideLength: 5)) }; //add task to task list List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)> { ("task1", task1) }; ModificationMotif.TryGetMotif("P", out ModificationMotif motif); var connorMod = new ModificationWithMass("ConnorMod", "ConnorModType", motif, TerminusLocalization.Any, 10); GlobalVariables.AddMods(new List <ModificationWithLocation> { connorMod }); //create modification lists List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where (b => task1.CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList(); //add modification to Protein object var dictHere = new Dictionary <int, List <Modification> >(); ModificationWithMass modToAdd = connorMod; ModificationWithMass modToAdd2 = connorMod; dictHere.Add(1, new List <Modification> { modToAdd }); dictHere.Add(3, new List <Modification> { modToAdd2 }); //protein Creation (One with mod and one without) Protein TestProteinWithMod = new Protein("PEPTID", "accession1", "organism", new List <Tuple <string, string> >(), dictHere); //First Write XML Database string xmlName = "okkk.xml"; //Add Mod to list and write XML input database Dictionary <string, HashSet <Tuple <int, Modification> > > modList = new Dictionary <string, HashSet <Tuple <int, Modification> > >(); var Hash = new HashSet <Tuple <int, Modification> > { new Tuple <int, Modification>(3, modToAdd) }; modList.Add("test", Hash); ProteinDbWriter.WriteXmlDatabase(modList, new List <Protein> { TestProteinWithMod }, xmlName); //now write MZML file var protein = ProteinDbLoader.LoadProteinXML(xmlName, true, DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out Dictionary <string, Modification> ok); var digestedList = protein[0].Digest(task1.CommonParameters.DigestionParams, new List <ModificationWithMass> { }, variableModifications).ToList(); Assert.AreEqual(4, digestedList.Count); //Set Peptide with 1 mod at position 3 PeptideWithSetModifications pepWithSetMods1 = digestedList[1]; //Finally Write MZML file Assert.AreEqual("PEP[ConnorModType:ConnorMod]TID", pepWithSetMods1.Sequence); MsDataFile myMsDataFile = new TestDataFile(new List <PeptideWithSetModifications> { pepWithSetMods1 }); string mzmlName = @"hello.mzML"; IO.MzML.MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, mzmlName, false); //run! var engine = new EverythingRunnerEngine(taskList, new List <string> { mzmlName }, new List <DbForTask> { new DbForTask(xmlName, false) }, Environment.CurrentDirectory); engine.Run(); string final = Path.Combine(MySetUpClass.outputFolder, "task1", "okkkpruned.xml"); var proteins = ProteinDbLoader.LoadProteinXML(final, true, DecoyType.Reverse, new List <Modification>(), false, new List <string>(), out ok); //check length Assert.AreEqual(proteins[0].OneBasedPossibleLocalizedModifications.Count, 1); //check location (key) Assert.AreEqual(proteins[0].OneBasedPossibleLocalizedModifications.ContainsKey(3), true); List <Modification> listOfMods = proteins[0].OneBasedPossibleLocalizedModifications[3]; //check Type, count, ID Assert.AreEqual(listOfMods[0].modificationType, "ConnorModType"); Assert.AreEqual(listOfMods[0].id, "ConnorMod"); Assert.AreEqual(listOfMods.Count, 1); }
private static void Main(string[] args) { Console.WriteLine("Welcome to MetaMorpheus"); Console.WriteLine(GlobalVariables.MetaMorpheusVersion); var p = new FluentCommandLineParser <ApplicationArguments>(); p.Setup(arg => arg.Tasks) .As('t', "tasks") .SetDefault(new List <string>()) .WithDescription("Single-task TOMLs."); p.Setup(arg => arg.OutputFolder) .As('o', "outputFolder") .SetDefault(null) .WithDescription("Folder into which to place results."); p.Setup(arg => arg.MetaTasks) .As('m', "meta-task") .SetDefault(new List <string>()) .WithDescription("Multi-task TOMLs."); p.Setup(arg => arg.Spectra) .As('s', "spectra") .Required() .WithDescription("Spectra to analyze."); p.Setup(arg => arg.Databases) .As('d', "databases") .Required() .WithDescription("Protein sequence databases (FASTA, XML)."); p.SetupHelp("h", "help") .Callback(text => Console.WriteLine(text)); var result = p.Parse(args); if (p.Object.MetaTasks != null && (p.Object.MetaTasks.Count != 0 || p.Object.Tasks.Count != 0)) { if (!result.HasErrors) { MetaMorpheusEngine.WarnHandler += WarnHandler; MetaMorpheusEngine.OutProgressHandler += MyEngine_outProgressHandler; MetaMorpheusEngine.StartingSingleEngineHander += MyEngine_startingSingleEngineHander; MetaMorpheusEngine.FinishedSingleEngineHandler += MyEngine_finishedSingleEngineHandler; MetaMorpheusTask.WarnHandler += WarnHandler; MetaMorpheusTask.LogHandler += LogHandler; MetaMorpheusTask.StartingSingleTaskHander += MyTaskEngine_startingSingleTaskHander; MetaMorpheusTask.FinishedSingleTaskHandler += MyTaskEngine_finishedSingleTaskHandler; MetaMorpheusTask.FinishedWritingFileHandler += MyTaskEngine_finishedWritingFileHandler; foreach (var db in p.Object.Databases) { if (!Path.GetExtension(db).Equals(".fasta")) { GlobalVariables.AddMods(UsefulProteomicsDatabases.ProteinDbLoader.GetPtmListFromProteinXml(db).OfType <Modification>(), true); // print any error messages reading the mods to the console foreach (var error in GlobalVariables.ErrorsReadingMods) { Console.WriteLine(error); } GlobalVariables.ErrorsReadingMods.Clear(); } } List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)>(); for (int i = 0; i < p.Object.Tasks.Count; i++) { var filePath = p.Object.Tasks[i]; var uhum = Toml.ReadFile(filePath, MetaMorpheusTask.tomlConfig); switch (uhum.Get <string>("TaskType")) { case "Search": var ye1 = Toml.ReadFile <SearchTask>(filePath, MetaMorpheusTask.tomlConfig); taskList.Add(("Task" + (i + 1) + "SearchTask", ye1)); break; case "Calibrate": var ye2 = Toml.ReadFile <CalibrationTask>(filePath, MetaMorpheusTask.tomlConfig); taskList.Add(("Task" + (i + 1) + "CalibrationTask", ye2)); break; case "Gptmd": var ye3 = Toml.ReadFile <GptmdTask>(filePath, MetaMorpheusTask.tomlConfig); taskList.Add(("Task" + (i + 1) + "GptmdTask", ye3)); break; case "XLSearch": var ye4 = Toml.ReadFile <XLSearchTask>(filePath, MetaMorpheusTask.tomlConfig); taskList.Add(("Task" + (i + 1) + "XLSearchTask", ye4)); break; default: Console.WriteLine(uhum.Get <string>("TaskType") + " is not a known task type! Skipping."); break; } } for (int i = 0; i < p.Object.MetaTasks.Count; i++) { var filePath = p.Object.MetaTasks[i]; var uhum = Toml.ReadFile(filePath, MetaMorpheusTask.tomlConfig); switch (uhum.Get <string>("TaskType")) { case "Search": Console.WriteLine("Search tasks are individual tasks. Please use -t for task instead of -m. Skipping."); break; case "Calibrate": Console.WriteLine("Calibrate tasks are individual tasks. Please use -t for task instead of -m. Skipping."); break; case "Gptmd": Console.WriteLine("Gptmd tasks are individual tasks. Please use -t for task instead of -m. Skipping."); break; case "XLSearch": Console.WriteLine("XLSearch tasks are individual tasks. Please use -t for task instead of -m. Skipping."); break; default: Console.WriteLine(uhum.Get <string>("TaskType") + " is not a known task type! Skipping."); break; } } List <string> startingRawFilenameList = p.Object.Spectra.Select(b => Path.GetFullPath(b)).ToList(); List <DbForTask> startingXmlDbFilenameList = p.Object.Databases.Select(b => new DbForTask(Path.GetFullPath(b), IsContaminant(b))).ToList(); string outputFolder = p.Object.OutputFolder; if (outputFolder == null) { var pathOfFirstSpectraFile = Path.GetDirectoryName(startingRawFilenameList.First()); outputFolder = Path.Combine(pathOfFirstSpectraFile, @"$DATETIME"); } EverythingRunnerEngine a = new EverythingRunnerEngine(taskList, startingRawFilenameList, startingXmlDbFilenameList, outputFolder); try { a.Run(); } catch (Exception e) { while (e.InnerException != null) { e = e.InnerException; } var message = "Run failed, Exception: " + e.Message; Console.WriteLine(message); } } else { Console.WriteLine("Error Text:" + result.ErrorText); } } else { Console.WriteLine("Error Text: No toml file was specified. Use -t for tasks or -m for meta-tasks."); } }
private static void Main(string[] args) { Console.WriteLine("Welcome to MetaMorpheus"); Console.WriteLine(GlobalVariables.MetaMorpheusVersion); var p = new FluentCommandLineParser <ApplicationArguments>(); p.Setup(arg => arg.Tasks) .As('t', "tasks") .SetDefault(new List <string>()); p.Setup(arg => arg.MetaTasks) .As('m', "meta-task") .SetDefault(new List <string>()); p.Setup(arg => arg.Spectra) .As('s', "spectra") .Required(); p.Setup(arg => arg.Databases) .As('d', "databases") .Required(); var result = p.Parse(args); if (p.Object.MetaTasks.Count != 0 || p.Object.Tasks.Count != 0) { if (!result.HasErrors) { MetaMorpheusEngine.WarnHandler += WarnHandler; MetaMorpheusEngine.OutProgressHandler += MyEngine_outProgressHandler; MetaMorpheusEngine.StartingSingleEngineHander += MyEngine_startingSingleEngineHander; MetaMorpheusEngine.FinishedSingleEngineHandler += MyEngine_finishedSingleEngineHandler; MetaMorpheusTask.WarnHandler += WarnHandler; MetaMorpheusTask.LogHandler += LogHandler; MetaMorpheusTask.StartingSingleTaskHander += MyTaskEngine_startingSingleTaskHander; MetaMorpheusTask.FinishedSingleTaskHandler += MyTaskEngine_finishedSingleTaskHandler; MetaMorpheusTask.FinishedWritingFileHandler += MyTaskEngine_finishedWritingFileHandler; foreach (var db in p.Object.Databases) { if (!Path.GetExtension(db).Equals(".fasta")) { GlobalVariables.AddMods(UsefulProteomicsDatabases.ProteinDbLoader.GetPtmListFromProteinXml(db).OfType <ModificationWithLocation>()); } } List <(string, MetaMorpheusTask)> taskList = new List <(string, MetaMorpheusTask)>(); for (int i = 0; i < p.Object.Tasks.Count; i++) { var filePath = p.Object.Tasks[i]; var uhum = Toml.ReadFile(filePath, MetaMorpheusTask.tomlConfig); switch (uhum.Get <string>("TaskType")) { case "Search": var ye1 = Toml.ReadFile <SearchTask>(filePath, MetaMorpheusTask.tomlConfig); taskList.Add(("Task" + (i + 1) + "SearchTask", ye1)); break; case "Calibrate": var ye2 = Toml.ReadFile <CalibrationTask>(filePath, MetaMorpheusTask.tomlConfig); taskList.Add(("Task" + (i + 1) + "CalibrationTask", ye2)); break; case "Gptmd": var ye3 = Toml.ReadFile <GptmdTask>(filePath, MetaMorpheusTask.tomlConfig); taskList.Add(("Task" + (i + 1) + "GptmdTask", ye3)); break; case "XLSearch": var ye4 = Toml.ReadFile <XLSearchTask>(filePath, MetaMorpheusTask.tomlConfig); taskList.Add(("Task" + (i + 1) + "XLSearchTask", ye4)); break; case "Neo": Console.WriteLine("Neo tasks are meta-tasks that rely on several other tasks. Please use -m for meta instead of -t. Skipping."); break; default: Console.WriteLine(uhum.Get <string>("TaskType") + " is not a known task type! Skipping."); break; } } for (int i = 0; i < p.Object.MetaTasks.Count; i++) { var filePath = p.Object.MetaTasks[i]; var uhum = Toml.ReadFile(filePath, MetaMorpheusTask.tomlConfig); switch (uhum.Get <string>("TaskType")) { case "Search": Console.WriteLine("Search tasks are individual tasks. Please use -t for task instead of -m. Skipping."); break; case "Calibrate": Console.WriteLine("Calibrate tasks are individual tasks. Please use -t for task instead of -m. Skipping."); break; case "Gptmd": Console.WriteLine("Gptmd tasks are individual tasks. Please use -t for task instead of -m. Skipping."); break; case "XLSearch": Console.WriteLine("XLSearch tasks are individual tasks. Please use -t for task instead of -m. Skipping."); break; case "Neo": var ye5 = Toml.ReadFile <NeoSearchTask>(filePath, MetaMorpheusTask.tomlConfig); foreach (MetaMorpheusTask task in NeoLoadTomls.LoadTomls(ye5)) { taskList.Add(("Task" + (taskList.Count + 1) + ye5.TaskType, ye5)); } break; default: Console.WriteLine(uhum.Get <string>("TaskType") + " is not a known task type! Skipping."); break; } } List <string> startingRawFilenameList = p.Object.Spectra.Select(b => Path.GetFullPath(b)).ToList(); List <DbForTask> startingXmlDbFilenameList = p.Object.Databases.Select(b => new DbForTask(Path.GetFullPath(b), IsContaminant(b))).ToList(); var MatchingChars = from len in Enumerable.Range(0, startingRawFilenameList.Min(s => s.Length)).Reverse() let possibleMatch = startingRawFilenameList.First().Substring(0, len) where startingRawFilenameList.All(f => f.StartsWith(possibleMatch, StringComparison.Ordinal)) select possibleMatch; string outputFolder = Path.Combine(Path.GetDirectoryName(MatchingChars.First()), @"$DATETIME"); EverythingRunnerEngine a = new EverythingRunnerEngine(taskList, startingRawFilenameList, startingXmlDbFilenameList, outputFolder); try { a.Run(); } catch (Exception e) { while (e.InnerException != null) { e = e.InnerException; } var message = "Run failed, Exception: " + e.Message; Console.WriteLine(message); } } else { Console.WriteLine("Error Text:" + result.ErrorText); } } else { Console.WriteLine("Error Text: No toml file was specified. Use -t for tasks or -m for meta-tasks."); } }