/// <summary>
        /// Updates the status of the masurca assembler. If it has either errored out (-1) or is completed (last step), it will not change the status.
        /// </summary>
        /// <param name="client">The current SSH client session.</param>
        /// <param name="genomeModel">The model of the particular job.</param>
        private static void CheckMasurcaStep(SshClient client, GenomeModel genomeModel)
        {
            if (string.IsNullOrEmpty(ErrorHandling.error) &&
                !genomeModel.MasurcaCurrentStep.Equals(-1) &&
                !genomeModel.MasurcaCurrentStep.Equals(StepDescriptions.GetMasurcaStepList().Last().step))
            {
                if (LinuxCommands.DirectoryHasFiles(client, Accessors.GetMasurcaOutputPath(genomeModel.Seed)))
                {
                    int currentMasurcaStep = LinuxCommands.GetCurrentStep(client, Accessors.GetMasurcaOutputPath(genomeModel.Seed), StepDescriptions.GetMasurcaStepList());

                    // Provided we didn't encounter an error, set the status of masurca and the job.
                    if (currentMasurcaStep != -1)
                    {
                        genomeModel.MasurcaCurrentStep = currentMasurcaStep;
                        genomeModel.MasurcaStatus      = StepDescriptions.GetCurrentStepDescription(StepDescriptions.GetMasurcaStepList(), currentMasurcaStep);
                    }

                    else
                    {
                        StepDescriptions.SetMasurcaError(client, genomeModel);
                    }
                }

                // Either masurca hasn't started or it has but no files have been created yet.
                else
                {
                    genomeModel.MasurcaCurrentStep = 1;
                    genomeModel.MasurcaStatus      = StepDescriptions.GetCurrentStepDescription(StepDescriptions.GetMasurcaStepList(), 1);
                }
            }
        }
        protected internal static string GetMasurcaError(SshClient client, int seed)
        {
            using (var cmd = client.CreateCommand("cat " + Accessors.GetMasurcaFailureLogPath(seed, true)))
            {
                cmd.Execute();

                ErrorHandling.CommandError(cmd);

                return(cmd.Result.ToString());
            }
        }
Beispiel #3
0
        /// <summary>
        /// Creates the initial file which is run at the beginning of each run. The primary function of which is to download the user's data at runtime.
        /// </summary>
        private void BuildInitConfig()
        {
            // File Server
            string fileName = "init_" + seed + ".sh";
            string fullPath = localPath + fileName;

            // BigDog
            string jobDataDirectory = Accessors.GetJobDataPath(seed);

            if (!File.Exists(fullPath))
            {
                try
                {
                    using (TextWriter tw = new StreamWriter(fullPath))
                    {
                        tw.WriteLine("#!/bin/bash");

                        // Note: Due to the fact that the compute nodes do not have internet access, an ssh to the login node (which does) is required for the
                        // data to download successfully.
                        tw.WriteLine("ssh login-0-0");

                        // If we have sequential reads there will be only a single URL:
                        if (dataSources.Count == 1)
                        {
                            tw.WriteLine("wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 5 -O " + jobDataDirectory + "sequentialData.fastq " + dataSources[0].ToString());
                        }

                        // If we have any other type of reads there will be at least a left and right read:
                        else
                        {
                            List <string> leftReads   = new List <string>();
                            List <string> rightReads  = new List <string>();
                            string        concatFiles = "";

                            // Create the URL lists with the left and right reads split.
                            HelperMethods.CreateUrlLists(dataSources, out leftReads, out rightReads);

                            // Now add the wgets for the left reads URLs and rename them to leftData_[j]:
                            for (int j = 0; j < leftReads.Count; j++)
                            {
                                string newDataName = "leftData_" + j;

                                tw.WriteLine("wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 5 -O " + jobDataDirectory + newDataName + " " + leftReads[j].ToString());

                                // If it is compressed, uncompress the data. Add additional and more concise checks here later.
                                if (leftReads[j].ToString().Contains(".gz"))
                                {
                                    tw.WriteLine("gunzip " + newDataName);
                                }

                                concatFiles = concatFiles + " " + jobDataDirectory + "leftData_" + j;
                            }

                            // Concat the left reads together into a leftReads.fastq file and delete old files.
                            tw.WriteLine("cat " + concatFiles + " > " + jobDataDirectory + "leftReads.fastq");
                            tw.WriteLine("rm " + jobDataDirectory + "leftData_*");
                            concatFiles = "";

                            // Now add the wgets for the right reads URLs and rename them to rightData_[i]:
                            for (int i = 0; i < rightReads.Count; i++)
                            {
                                string newDataName = "rightData_" + i;

                                tw.WriteLine("wget --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 5 -O " + jobDataDirectory + newDataName + " " + rightReads[i].ToString());

                                // If it is compressed, uncompress the data. Add additional and more concise checks here later.
                                if (rightReads[i].ToString().Contains(".gz"))
                                {
                                    tw.WriteLine("gunzip " + newDataName);
                                }

                                concatFiles = concatFiles + " " + jobDataDirectory + "rightData_" + i;
                            }

                            // Concat the right reads together into a rightReads.fastq file and delete old files.
                            tw.WriteLine("cat " + concatFiles + " > " + jobDataDirectory + "rightReads.fastq");
                            tw.WriteLine("rm " + jobDataDirectory + "rightData_* ");
                        }

                        // Exit the ssh session for the login node.
                        tw.WriteLine("exit");

                        // DEBUG: COMMENTING OUT FOR TESTING PURPOSES.
                        if (genomeModel.UseMasurca)
                        {
                            // Generate assembly file in the output directory because that is where the files will be generated by default.
                            tw.WriteLine("/share/bio/masurca/bin/masurca " + Accessors.GetMasurcaScriptPath(seed)
                                         + " -o " + Accessors.GetMasurcaOutputPath(seed) + "assembly.sh");

                            // Run the assembler.
                            tw.WriteLine("." + Accessors.GetMasurcaOutputPath(seed) + "assembly.sh");
                        }

                        if (genomeModel.UseSGA)
                        {
                        }

                        if (genomeModel.UseWGS)
                        {
                        }

                        tw.WriteLine("#EOF");

                        InitConfigURL = Accessors.FTP_URL + urlPath + fileName;
                    }
                }

                catch (Exception e)
                {
                    ErrorHandling.error = e.Message;
                }
            }

            // We have a problem since the file already exists.
            else
            {
                ErrorHandling.error = "Unfortunately, we couldn't create the necessary configuration files to submit your job. Please contact an administrator.";

                throw new IOException("Attempted to create \"" + fullPath + "\" but it already exists so we cannot create the file. Continuing is not advised. ");
            }
        }
Beispiel #4
0
        /// <summary>
        /// Creates all the necessary folders, downloads the config scripts, and adds the job to the scheduler on BigDog.
        /// </summary>
        /// <returns>Returns true only if a job gets successfully added to SGE.</returns>
        public bool CreateJob()
        {
            // The init.sh script will contain all the basic logic to download the data and initiate the job on the assembler(s).
            using (var client = new SshClient(Accessors.BD_IP, genomeModel.SSHUser, genomeModel.SSHPass))
            {
                // Set defaults
                //Accessors.masterPath = "/share/scratch/bioinfo/" + HelperMethods.GetUsername();
                string node             = Accessors.BD_COMPUTE_NODE1; // default
                string wgetLogParameter = "--output-file=" + Accessors.GetJobLogPath(seed) + "wget.error";
                string initPath         = Accessors.GetJobConfigPath(seed) + "init.sh";
                string masurcaPath      = Accessors.GetJobConfigPath(seed) + "masurca_config.txt";
                string jobName          = HelperMethods.GetUsername() + "-" + seed;

                try
                {
                    client.Connect();

                    if (NoError())
                    {
                        LinuxCommands.CreateDirectory(client, Accessors.USER_ROOT_JOB_DIRECTORY, "-p");
                    }
                    if (NoError())
                    {
                        LinuxCommands.CreateDirectory(client, Accessors.GetJobPath(seed), "-p");
                    }
                    if (NoError())
                    {
                        LinuxCommands.CreateDirectory(client, Accessors.GetJobDataPath(seed), "-p");
                    }
                    if (NoError())
                    {
                        LinuxCommands.CreateDirectory(client, Accessors.GetJobConfigPath(seed), "-p");
                    }
                    if (NoError())
                    {
                        LinuxCommands.CreateDirectory(client, Accessors.GetJobOutputPath(seed), "-p");
                    }
                    if (NoError())
                    {
                        // Create subdirectories in the output directory to separate the outputs.
                        if (genomeModel.UseMasurca)
                        {
                            LinuxCommands.CreateDirectory(client, Accessors.GetMasurcaOutputPath(seed), "-p");
                        }

                        if (genomeModel.UseSGA)
                        {
                            LinuxCommands.CreateDirectory(client, Accessors.GetSgaOutputPath(seed), "-p");
                        }

                        if (genomeModel.UseWGS)
                        {
                            LinuxCommands.CreateDirectory(client, Accessors.GetWgsOutputPath(seed), "-p");
                        }
                    }
                    if (NoError())
                    {
                        LinuxCommands.CreateDirectory(client, Accessors.GetJobLogPath(seed), "-p");
                    }

                    if (NoError())
                    {
                        LinuxCommands.DownloadFile(client, initPath, InitConfigURL, wgetLogParameter);
                    }
                    if (NoError())
                    {
                        LinuxCommands.RunDos2Unix(client, initPath);
                    }

                    if (NoError())
                    {
                        LinuxCommands.DownloadFile(client, masurcaPath, MasurcaConfigURL, wgetLogParameter);
                    }
                    if (NoError())
                    {
                        LinuxCommands.RunDos2Unix(client, masurcaPath);
                    }

                    if (NoError())
                    {
                        LinuxCommands.ChangePermissions(client, Accessors.GetJobPath(seed), "777", "-R");
                    }

                    if (NoError())
                    {
                        // So COMPUTENODE2 has a smaller load, we want to use that instead.
                        if (LinuxCommands.GetNodeLoad(client, Accessors.BD_COMPUTE_NODE1) > LinuxCommands.GetNodeLoad(client, Accessors.BD_COMPUTE_NODE2))
                        {
                            node = Accessors.BD_COMPUTE_NODE2;
                        }

                        else
                        {
                            node = Accessors.BD_COMPUTE_NODE1;
                        }
                    }

                    if (NoError())
                    {
                        LinuxCommands.AddJobToScheduler(client, Accessors.GetJobDataPath(seed), Accessors.GetJobLogPath(seed), node, jobName, initPath);
                    }

                    if (NoError())
                    {
                        genomeModel.SGEJobId = LinuxCommands.SetJobNumber(client, genomeModel.SSHUser, jobName);
                    }

                    // There were no errors.
                    if (NoError())
                    {
                        return(true);
                    }

                    else
                    {
                        return(false);
                    }
                }

                // SSH Connection couldn't be established.
                catch (SocketException e)
                {
                    ErrorHandling.error = "The SSH connection couldn't be established. " + e.Message;

                    return(false);
                }

                // Authentication failure.
                catch (SshAuthenticationException e)
                {
                    ErrorHandling.error = "The credentials were entered incorrectly. " + e.Message;

                    return(false);
                }

                // The SSH connection was dropped.
                catch (SshConnectionException e)
                {
                    ErrorHandling.error = "The connection was terminated unexpectedly. " + e.Message;

                    return(false);
                }

                catch (Exception e)
                {
                    ErrorHandling.error = "There was an uncaught exception. " + e.Message;

                    return(false);
                }
            }
        }
        /// <summary>
        /// Updates the status of a single job. But it does not perform the upload if that needs to happen.
        /// </summary>
        /// <param name="genomeModel">The model of the particular job.</param>
        protected internal static void UpdateStatus(GenomeModel genomeModel)
        {
            using (var client = new SshClient(CreatePrivateKeyConnectionInfo()))
            {
                try
                {
                    client.Connect();

                    /// TODO: Modify the code to skip entire sections if they have already been completed. This will be based off the CURRENT STEP stored in the model data.
                    using (GenomeAssemblyDbContext db = new GenomeAssemblyDbContext())
                    {
                        bool continueUpdate = true;     // Determines whether we will continue checking the job status.
                        bool DEBUG_MODE     = false;    // Debug mode to skip some assembler steps.
                        bool outOfRange     = false;    // If the overall step is out of bounds, then we set this to true to attempt a correction.
                        ErrorHandling.error = "";       // Reset the errror flag.

                        while (continueUpdate && ErrorHandling.NoError())
                        {
                            // Depending on the current step, this switch will determine if the state of the job needs to change.
                            switch (genomeModel.OverallCurrentStep)
                            {
                            // Queued step
                            case 1:
                            {
                                if (DEBUG_MODE)
                                {
                                    genomeModel.NextStep();
                                    break;
                                }

                                if (LinuxCommands.IsProcessRunning(client, "conversionScript.sh"))
                                {
                                    genomeModel.NextStep();
                                }

                                // If a conversion was never run or if we missed it, then check if the job has already started.
                                else if (LinuxCommands.JobRunningAlt(client, genomeModel.SSHUser))
                                {
                                    genomeModel.NextStep();
                                }

                                else
                                {
                                    continueUpdate = false;
                                }

                                break;
                            }

                            // Data conversion step
                            case 2:
                            {
                                if (DEBUG_MODE)
                                {
                                    genomeModel.NextStep();
                                    break;
                                }

                                if (LinuxCommands.DirectoryHasFiles(client, Accessors.GetMasurcaOutputPath(genomeModel.Seed)))
                                {
                                    genomeModel.NextStep();
                                }

                                else
                                {
                                    continueUpdate = false;
                                }

                                break;
                            }

                            // Running assemblers step
                            case 3:
                            {
                                if (DEBUG_MODE)
                                {
                                    genomeModel.NextStep();
                                    break;
                                }

                                if (genomeModel.UseMasurca)
                                {
                                    CheckMasurcaStep(client, genomeModel);
                                }

                                if (genomeModel.UseSGA)
                                {
                                }

                                if (genomeModel.UseWGS)
                                {
                                }

                                if (genomeModel.IsAssemblyFinished())
                                {
                                    genomeModel.NextStep();
                                }

                                else
                                {
                                    continueUpdate = false;
                                }

                                break;
                            }

                            // Data analysis step
                            case 4:
                            {
                                // Until data analysis is implemented, we skip the step.
                                genomeModel.NextStep();
                                break;

                                //if (LinuxCommands.IsProcessRunning(client, "dataAnalysis.sh"))
                                //    continueUpdate = false;

                                //else
                                //{
                                //    // Has it finished?
                                //    if (LinuxCommands.FileExists(client, Accessors.GetJobOutputPath(genomeModel.Seed) + "dataAnalysisResult"))
                                //        genomeModel.NextStep();

                                //    else
                                //        LinuxCommands.RunDataAnalysis(client);
                                //}

                                //break;
                            }

                            // TODO: Create a more robust method in checking for a completed upload. Maybe connect to the FTP and compare file sizes and see if they are close.
                            // Uploading Data step
                            case 5:
                            {
                                //if (LinuxCommands.IsJobUploading(client, Accessors.USER_ROOT_JOB_DIRECTORY, Accessors.GetCompressedDataPath(genomeModel.Seed)))
                                //    continueUpdate = false;

                                //else if (LinuxCommands.FileExists(client, Accessors.GetCompressedDataPath(genomeModel.Seed)))
                                //    genomeModel.NextStep();

                                //else
                                //{
                                LinuxCommands.UploadJobData(client, Accessors.USER_ROOT_JOB_DIRECTORY, Accessors.GetCompressedDataPath(genomeModel.Seed)
                                                            , Accessors.GetRelativeJobDirectory(genomeModel.Seed), Accessors.GetRemoteDownloadLocation(genomeModel.Seed), true, "yr");

                                continueUpdate = false;
                                //}


                                break;
                            }

                            // Completed step
                            case 6:
                            {
                                continueUpdate = false;

                                break;
                            }

                            default:
                            {
                                // If we have attempted a correction and failed, throw in the towel.
                                if (outOfRange)
                                {
                                    throw new IndexOutOfRangeException("The current step of the program is out of bounds after an attempted correction. The current step: "
                                                                       + genomeModel.OverallCurrentStep);
                                }

                                else
                                {
                                    outOfRange = true;

                                    // Reset the state to default and have it run through the update method again.
                                    genomeModel.OverallCurrentStep = 1;
                                    genomeModel.OverallStatus      = StepDescriptions.GetOverallStepList()[genomeModel.OverallCurrentStep].ToString();
                                }

                                break;
                            }
                            }
                        }
                    }
                }

                // SSH Connection couldn't be established.
                catch (SocketException e)
                {
                    ErrorHandling.error = "The SSH connection couldn't be established. " + e.Message;
                }

                // The SSH connection was dropped.
                catch (SshConnectionException e)
                {
                    ErrorHandling.error = "The connection was terminated unexpectedly. " + e.Message;
                }
            }
        }