/// <summary> /// Get allocated nodes per task /// </summary> /// <param name="connectorClient">Connector</param> /// <param name="taskInfo">Task information</param> public virtual IEnumerable <string> GetAllocatedNodes(object connectorClient, SubmittedTaskInfo taskInfo) { SshCommandWrapper command = null; StringBuilder cmdBuilder = new(); var cluster = taskInfo.Specification.JobSpecification.Cluster; var nodeNames = taskInfo.TaskAllocationNodes.Select(s => $"{s.AllocationNodeId}.{cluster.DomainName ?? cluster.MasterNodeName}") .ToList(); nodeNames.ForEach(f => cmdBuilder.Append($"dig +short {f};")); string sshCommand = cmdBuilder.ToString(); _log.Info($"Get allocation nodes of task \"{taskInfo.Id}\", command \"{sshCommand}\""); try { command = SshCommandUtils.RunSshCommand(new SshClientAdapter((SshClient)connectorClient), sshCommand); return(command.Result.Split('\n').Where(w => !string.IsNullOrEmpty(w)) .ToList()); } catch (FormatException e) { throw new Exception($@"Exception thrown when retrieving allocation nodes used by running task (HPC job): ""{taskInfo.ScheduledJobId}"". Submission script result: ""{command.Result}"".\nSubmission script message: ""{command.Error}"".\n Command line for job submission: ""{sshCommand}""\n", e); } }
/// <summary> /// Submit job to scheduler /// </summary> /// <param name="connectorClient">Connector</param> /// <param name="jobSpecification">Job specification</param> /// <param name="credentials">Credentials</param> /// <returns></returns> /// <exception cref="Exception"></exception> public virtual IEnumerable <SubmittedTaskInfo> SubmitJob(object connectorClient, JobSpecification jobSpecification, ClusterAuthenticationCredentials credentials) { var schedulerJobIdClusterAllocationNamePairs = new List <(string ScheduledJobId, string ClusterAllocationName)>(); SshCommandWrapper command = null; string sshCommand = (string)_convertor.ConvertJobSpecificationToJob(jobSpecification, "sbatch"); _log.Info($"Submitting job \"{jobSpecification.Id}\", command \"{sshCommand}\""); string sshCommandBase64 = $"{_commands.InterpreterCommand} '{_commands.ExecutieCmdScriptPath} {Convert.ToBase64String(Encoding.UTF8.GetBytes(sshCommand))}'"; try { command = SshCommandUtils.RunSshCommand(new SshClientAdapter((SshClient)connectorClient), sshCommandBase64); var jobIds = _convertor.GetJobIds(command.Result).ToList(); for (int i = 0; i < jobSpecification.Tasks.Count; i++) { schedulerJobIdClusterAllocationNamePairs.Add((jobIds[i], jobSpecification.Tasks[i].ClusterNodeType.ClusterAllocationName)); } return(GetActualTasksInfo(connectorClient, jobSpecification.Cluster, schedulerJobIdClusterAllocationNamePairs)); } catch (FormatException e) { throw new Exception(@$ "Exception thrown when submitting a job: " "{jobSpecification.Name}" " to the cluster: " "{jobSpecification.Cluster.Name}" ". Submission script result: " "{command.Result}" ".\nSubmission script error message: " "{command.Error}" ".\n Command line for job submission: " "{sshCommandBase64}" ".\n", e); } }
/// <summary> /// Get actual scheduler queue status /// </summary> /// <param name="connectorClient">Connector</param> /// <param name="nodeType">Cluster node type</param> public virtual ClusterNodeUsage GetCurrentClusterNodeUsage(object connectorClient, ClusterNodeType nodeType) { SshCommandWrapper command = null; var allocationCluster = string.Empty; if (!string.IsNullOrEmpty(nodeType.ClusterAllocationName)) { allocationCluster = $"--clusters={nodeType.ClusterAllocationName} "; } var sshCommand = $"{_commands.InterpreterCommand} 'sinfo -t alloc {allocationCluster}--partition={nodeType.Queue} -h -o \"%.6D\"'"; _log.Info($"Get usage of queue \"{nodeType.Queue}\", command \"{sshCommand}\""); try { command = SshCommandUtils.RunSshCommand(new SshClientAdapter((SshClient)connectorClient), sshCommand); return(_convertor.ReadQueueActualInformation(nodeType, command.Result)); } catch (FormatException e) { throw new Exception($@"Exception thrown when retrieving usage of Cluster node: ""{nodeType.Name}"". Submission script result: ""{command.Result}"".\nSubmission script message: ""{command.Error}"".\n Command line for queue usage: ""{sshCommand}""\n", e); } }
/// <summary> /// Get actual scheduler queue status /// </summary> /// <param name="connectorClient">Connector</param> /// <param name="nodeType">Cluster node type</param> /// <returns></returns> /// <exception cref="Exception"></exception> public virtual ClusterNodeUsage GetCurrentClusterNodeUsage(object connectorClient, ClusterNodeType nodeType) { SshCommandWrapper command = null; string sshCommand = $"{_commands.InterpreterCommand} 'qstat -Q -f {nodeType.Queue}'"; _log.Info($"Get usage of queue \"{nodeType.Queue}\", command \"{sshCommand}\""); try { command = SshCommandUtils.RunSshCommand(new SshClientAdapter((SshClient)connectorClient), sshCommand); return(_convertor.ReadQueueActualInformation(nodeType, command.Result)); } catch (FormatException e) { throw new Exception($@"Exception thrown when retrieving parameters of queue: ""{nodeType.Name}"". Submission script result: ""{command.Result}"".\nSubmission script message: ""{command.Error}"".\n Command line for job submission: ""{sshCommand}""\n", e); } }