/// <summary> /// Updates the status of the masurca assembler. If it has either errored out (-1) or is completed (last step), it will not change the status. /// </summary> /// <param name="client">The current SSH client session.</param> /// <param name="genomeModel">The model of the particular job.</param> private static void CheckMasurcaStep(SshClient client, GenomeModel genomeModel) { if (string.IsNullOrEmpty(ErrorHandling.error) && !genomeModel.MasurcaCurrentStep.Equals(-1) && !genomeModel.MasurcaCurrentStep.Equals(StepDescriptions.GetMasurcaStepList().Last().step)) { if (LinuxCommands.DirectoryHasFiles(client, Accessors.GetMasurcaOutputPath(genomeModel.Seed))) { int currentMasurcaStep = LinuxCommands.GetCurrentStep(client, Accessors.GetMasurcaOutputPath(genomeModel.Seed), StepDescriptions.GetMasurcaStepList()); // Provided we didn't encounter an error, set the status of masurca and the job. if (currentMasurcaStep != -1) { genomeModel.MasurcaCurrentStep = currentMasurcaStep; genomeModel.MasurcaStatus = StepDescriptions.GetCurrentStepDescription(StepDescriptions.GetMasurcaStepList(), currentMasurcaStep); } else { StepDescriptions.SetMasurcaError(client, genomeModel); } } // Either masurca hasn't started or it has but no files have been created yet. else { genomeModel.MasurcaCurrentStep = 1; genomeModel.MasurcaStatus = StepDescriptions.GetCurrentStepDescription(StepDescriptions.GetMasurcaStepList(), 1); } } }
protected internal static string GetMasurcaError(SshClient client, int seed) { using (var cmd = client.CreateCommand("cat " + Accessors.GetMasurcaFailureLogPath(seed, true))) { cmd.Execute(); ErrorHandling.CommandError(cmd); return(cmd.Result.ToString()); } }
/// <summary> /// Creates the initial file which is run at the beginning of each run. The primary function of which is to download the user's data at runtime. /// </summary> private void BuildInitConfig() { // File Server string fileName = "init_" + seed + ".sh"; string fullPath = localPath + fileName; // BigDog string jobDataDirectory = Accessors.GetJobDataPath(seed); if (!File.Exists(fullPath)) { try { using (TextWriter tw = new StreamWriter(fullPath)) { tw.WriteLine("#!/bin/bash"); // Note: Due to the fact that the compute nodes do not have internet access, an ssh to the login node (which does) is required for the // data to download successfully. tw.WriteLine("ssh login-0-0"); // If we have sequential reads there will be only a single URL: if (dataSources.Count == 1) { tw.WriteLine("wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 5 -O " + jobDataDirectory + "sequentialData.fastq " + dataSources[0].ToString()); } // If we have any other type of reads there will be at least a left and right read: else { List <string> leftReads = new List <string>(); List <string> rightReads = new List <string>(); string concatFiles = ""; // Create the URL lists with the left and right reads split. HelperMethods.CreateUrlLists(dataSources, out leftReads, out rightReads); // Now add the wgets for the left reads URLs and rename them to leftData_[j]: for (int j = 0; j < leftReads.Count; j++) { string newDataName = "leftData_" + j; tw.WriteLine("wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 5 -O " + jobDataDirectory + newDataName + " " + leftReads[j].ToString()); // If it is compressed, uncompress the data. Add additional and more concise checks here later. if (leftReads[j].ToString().Contains(".gz")) { tw.WriteLine("gunzip " + newDataName); } concatFiles = concatFiles + " " + jobDataDirectory + "leftData_" + j; } // Concat the left reads together into a leftReads.fastq file and delete old files. tw.WriteLine("cat " + concatFiles + " > " + jobDataDirectory + "leftReads.fastq"); tw.WriteLine("rm " + jobDataDirectory + "leftData_*"); concatFiles = ""; // Now add the wgets for the right reads URLs and rename them to rightData_[i]: for (int i = 0; i < rightReads.Count; i++) { string newDataName = "rightData_" + i; tw.WriteLine("wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 5 -O " + jobDataDirectory + newDataName + " " + rightReads[i].ToString()); // If it is compressed, uncompress the data. Add additional and more concise checks here later. if (rightReads[i].ToString().Contains(".gz")) { tw.WriteLine("gunzip " + newDataName); } concatFiles = concatFiles + " " + jobDataDirectory + "rightData_" + i; } // Concat the right reads together into a rightReads.fastq file and delete old files. tw.WriteLine("cat " + concatFiles + " > " + jobDataDirectory + "rightReads.fastq"); tw.WriteLine("rm " + jobDataDirectory + "rightData_* "); } // Exit the ssh session for the login node. tw.WriteLine("exit"); // DEBUG: COMMENTING OUT FOR TESTING PURPOSES. if (genomeModel.UseMasurca) { // Generate assembly file in the output directory because that is where the files will be generated by default. tw.WriteLine("/share/bio/masurca/bin/masurca " + Accessors.GetMasurcaScriptPath(seed) + " -o " + Accessors.GetMasurcaOutputPath(seed) + "assembly.sh"); // Run the assembler. tw.WriteLine("." + Accessors.GetMasurcaOutputPath(seed) + "assembly.sh"); } if (genomeModel.UseSGA) { } if (genomeModel.UseWGS) { } tw.WriteLine("#EOF"); InitConfigURL = Accessors.FTP_URL + urlPath + fileName; } } catch (Exception e) { ErrorHandling.error = e.Message; } } // We have a problem since the file already exists. else { ErrorHandling.error = "Unfortunately, we couldn't create the necessary configuration files to submit your job. Please contact an administrator."; throw new IOException("Attempted to create \"" + fullPath + "\" but it already exists so we cannot create the file. Continuing is not advised. "); } }
/// <summary> /// Creates all the necessary folders, downloads the config scripts, and adds the job to the scheduler on BigDog. /// </summary> /// <returns>Returns true only if a job gets successfully added to SGE.</returns> public bool CreateJob() { // The init.sh script will contain all the basic logic to download the data and initiate the job on the assembler(s). using (var client = new SshClient(Accessors.BD_IP, genomeModel.SSHUser, genomeModel.SSHPass)) { // Set defaults //Accessors.masterPath = "/share/scratch/bioinfo/" + HelperMethods.GetUsername(); string node = Accessors.BD_COMPUTE_NODE1; // default string wgetLogParameter = "--output-file=" + Accessors.GetJobLogPath(seed) + "wget.error"; string initPath = Accessors.GetJobConfigPath(seed) + "init.sh"; string masurcaPath = Accessors.GetJobConfigPath(seed) + "masurca_config.txt"; string jobName = HelperMethods.GetUsername() + "-" + seed; try { client.Connect(); if (NoError()) { LinuxCommands.CreateDirectory(client, Accessors.USER_ROOT_JOB_DIRECTORY, "-p"); } if (NoError()) { LinuxCommands.CreateDirectory(client, Accessors.GetJobPath(seed), "-p"); } if (NoError()) { LinuxCommands.CreateDirectory(client, Accessors.GetJobDataPath(seed), "-p"); } if (NoError()) { LinuxCommands.CreateDirectory(client, Accessors.GetJobConfigPath(seed), "-p"); } if (NoError()) { LinuxCommands.CreateDirectory(client, Accessors.GetJobOutputPath(seed), "-p"); } if (NoError()) { // Create subdirectories in the output directory to separate the outputs. if (genomeModel.UseMasurca) { LinuxCommands.CreateDirectory(client, Accessors.GetMasurcaOutputPath(seed), "-p"); } if (genomeModel.UseSGA) { LinuxCommands.CreateDirectory(client, Accessors.GetSgaOutputPath(seed), "-p"); } if (genomeModel.UseWGS) { LinuxCommands.CreateDirectory(client, Accessors.GetWgsOutputPath(seed), "-p"); } } if (NoError()) { LinuxCommands.CreateDirectory(client, Accessors.GetJobLogPath(seed), "-p"); } if (NoError()) { LinuxCommands.DownloadFile(client, initPath, InitConfigURL, wgetLogParameter); } if (NoError()) { LinuxCommands.RunDos2Unix(client, initPath); } if (NoError()) { LinuxCommands.DownloadFile(client, masurcaPath, MasurcaConfigURL, wgetLogParameter); } if (NoError()) { LinuxCommands.RunDos2Unix(client, masurcaPath); } if (NoError()) { LinuxCommands.ChangePermissions(client, Accessors.GetJobPath(seed), "777", "-R"); } if (NoError()) { // So COMPUTENODE2 has a smaller load, we want to use that instead. if (LinuxCommands.GetNodeLoad(client, Accessors.BD_COMPUTE_NODE1) > LinuxCommands.GetNodeLoad(client, Accessors.BD_COMPUTE_NODE2)) { node = Accessors.BD_COMPUTE_NODE2; } else { node = Accessors.BD_COMPUTE_NODE1; } } if (NoError()) { LinuxCommands.AddJobToScheduler(client, Accessors.GetJobDataPath(seed), Accessors.GetJobLogPath(seed), node, jobName, initPath); } if (NoError()) { genomeModel.SGEJobId = LinuxCommands.SetJobNumber(client, genomeModel.SSHUser, jobName); } // There were no errors. if (NoError()) { return(true); } else { return(false); } } // SSH Connection couldn't be established. catch (SocketException e) { ErrorHandling.error = "The SSH connection couldn't be established. " + e.Message; return(false); } // Authentication failure. catch (SshAuthenticationException e) { ErrorHandling.error = "The credentials were entered incorrectly. " + e.Message; return(false); } // The SSH connection was dropped. catch (SshConnectionException e) { ErrorHandling.error = "The connection was terminated unexpectedly. " + e.Message; return(false); } catch (Exception e) { ErrorHandling.error = "There was an uncaught exception. " + e.Message; return(false); } } }
/// <summary> /// Updates the status of a single job. But it does not perform the upload if that needs to happen. /// </summary> /// <param name="genomeModel">The model of the particular job.</param> protected internal static void UpdateStatus(GenomeModel genomeModel) { using (var client = new SshClient(CreatePrivateKeyConnectionInfo())) { try { client.Connect(); /// TODO: Modify the code to skip entire sections if they have already been completed. This will be based off the CURRENT STEP stored in the model data. using (GenomeAssemblyDbContext db = new GenomeAssemblyDbContext()) { bool continueUpdate = true; // Determines whether we will continue checking the job status. bool DEBUG_MODE = false; // Debug mode to skip some assembler steps. bool outOfRange = false; // If the overall step is out of bounds, then we set this to true to attempt a correction. ErrorHandling.error = ""; // Reset the errror flag. while (continueUpdate && ErrorHandling.NoError()) { // Depending on the current step, this switch will determine if the state of the job needs to change. switch (genomeModel.OverallCurrentStep) { // Queued step case 1: { if (DEBUG_MODE) { genomeModel.NextStep(); break; } if (LinuxCommands.IsProcessRunning(client, "conversionScript.sh")) { genomeModel.NextStep(); } // If a conversion was never run or if we missed it, then check if the job has already started. else if (LinuxCommands.JobRunningAlt(client, genomeModel.SSHUser)) { genomeModel.NextStep(); } else { continueUpdate = false; } break; } // Data conversion step case 2: { if (DEBUG_MODE) { genomeModel.NextStep(); break; } if (LinuxCommands.DirectoryHasFiles(client, Accessors.GetMasurcaOutputPath(genomeModel.Seed))) { genomeModel.NextStep(); } else { continueUpdate = false; } break; } // Running assemblers step case 3: { if (DEBUG_MODE) { genomeModel.NextStep(); break; } if (genomeModel.UseMasurca) { CheckMasurcaStep(client, genomeModel); } if (genomeModel.UseSGA) { } if (genomeModel.UseWGS) { } if (genomeModel.IsAssemblyFinished()) { genomeModel.NextStep(); } else { continueUpdate = false; } break; } // Data analysis step case 4: { // Until data analysis is implemented, we skip the step. genomeModel.NextStep(); break; //if (LinuxCommands.IsProcessRunning(client, "dataAnalysis.sh")) // continueUpdate = false; //else //{ // // Has it finished? // if (LinuxCommands.FileExists(client, Accessors.GetJobOutputPath(genomeModel.Seed) + "dataAnalysisResult")) // genomeModel.NextStep(); // else // LinuxCommands.RunDataAnalysis(client); //} //break; } // TODO: Create a more robust method in checking for a completed upload. Maybe connect to the FTP and compare file sizes and see if they are close. // Uploading Data step case 5: { //if (LinuxCommands.IsJobUploading(client, Accessors.USER_ROOT_JOB_DIRECTORY, Accessors.GetCompressedDataPath(genomeModel.Seed))) // continueUpdate = false; //else if (LinuxCommands.FileExists(client, Accessors.GetCompressedDataPath(genomeModel.Seed))) // genomeModel.NextStep(); //else //{ LinuxCommands.UploadJobData(client, Accessors.USER_ROOT_JOB_DIRECTORY, Accessors.GetCompressedDataPath(genomeModel.Seed) , Accessors.GetRelativeJobDirectory(genomeModel.Seed), Accessors.GetRemoteDownloadLocation(genomeModel.Seed), true, "yr"); continueUpdate = false; //} break; } // Completed step case 6: { continueUpdate = false; break; } default: { // If we have attempted a correction and failed, throw in the towel. if (outOfRange) { throw new IndexOutOfRangeException("The current step of the program is out of bounds after an attempted correction. The current step: " + genomeModel.OverallCurrentStep); } else { outOfRange = true; // Reset the state to default and have it run through the update method again. genomeModel.OverallCurrentStep = 1; genomeModel.OverallStatus = StepDescriptions.GetOverallStepList()[genomeModel.OverallCurrentStep].ToString(); } break; } } } } } // SSH Connection couldn't be established. catch (SocketException e) { ErrorHandling.error = "The SSH connection couldn't be established. " + e.Message; } // The SSH connection was dropped. catch (SshConnectionException e) { ErrorHandling.error = "The connection was terminated unexpectedly. " + e.Message; } } }