public virtual object Run(TaskRunContext task) { var node = GetNode(task); var pack = PackageByName(node, task.PackageName); var taskId = task.TaskId; var farmId = task.Resource.Controller.FarmId; var esService = GetExecuteServiceClient(node); var resorceHomeFolder = IncarnationParams.IncarnatePath(node.DataFolders.LocalFolder, task.TaskId, farmId, CopyPhase.None); PrepareEnviroment(esService, pack, resorceHomeFolder, farmId); CopyInputFiles(task, resorceHomeFolder); string cmdLine = String.Format(task.CommandLine, pack.AppPath, taskId); Log.Info("cmdline = " + cmdLine); var result = esService.ExecuteTaskOnFarm(taskId, farmId, cmdLine); Log.Info("Exec done. Job id = " + result); esService.Close(); return(result + "\n" + node.NodeName); }
public string CopyInputFiles(TaskRunContext task, out string fileNames) { var node = GetNode(task); //string ftpFolder = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromSystem, taskId, CopyPhase.In); //string jobFtpFolder = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromSystem, taskId, CopyPhase.None); //string ftpInputFolder = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromResource, taskId, CopyPhase.In); //string ftpOutputFolder = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromResource, taskId, CopyPhase.Out); string clusterHomeFolder = IncarnationParams.IncarnatePath(node.DataFolders.LocalFolder, task.TaskId, CopyPhase.None); //IOProxy.Ftp.MakePath(ftpInputFolder); //IOProxy.Ftp.MakePath(ftpOutputFolder); try { Log.Info(Thread.CurrentThread.ManagedThreadId + " entered."); SshExec(node, "mkdir " + clusterHomeFolder); Log.Info(Thread.CurrentThread.ManagedThreadId + " exited."); } catch (Exception e) { Log.Warn(e.ToString()); } Log.Info("Copying input files for task " + task.TaskId.ToString()); fileNames = ""; //String.Join(" ", incarnation.FilesToCopy.Select(f => f.FileName)); foreach (var file in task.InputFiles) { string tmpFile = Path.GetTempFileName(); IOProxy.Storage.Download(file.StorageId, tmpFile); string fileOnCluster = clusterHomeFolder.TrimEnd(new[] { '/', '\\' }) + "/" + file.FileName; fileNames += " " + fileOnCluster; Log.Info("Copying file " + fileOnCluster); //ScpCopy(node, fileOnCluster, tmpFile); UploadFile(node, fileOnCluster, tmpFile); File.Delete(tmpFile); } return(clusterHomeFolder); }
protected override bool GetFromResourceTaskStateInfo(TaskRunContext task, out string result) { var node = GetNode(task); try { result = SshExec(node, GetTaskStateCommand(), (string)task.LocalId, null).ToLowerInvariant(); } catch (Exception e) { Log.Error(String.Format("Exception while updating task's {0} state: {1}", task.TaskId, e)); result = "SshExec error while updating task's state"; } string clusterFolder = IncarnationParams.IncarnatePath(node.DataFolders.LocalFolder, task.TaskId, CopyPhase.Out); string result2 = SshExec(node, SshUnixCommands.Ls, clusterFolder); return(result.Contains(task.LocalId.ToString()) && !result2.Contains(ClavireFinishFileName)); }
public TaskStateInfo GetTaskStateInfo(TaskRunContext task) { lock (_gridLock) { RefreshCertificate(); ulong taskId = task.TaskId; string localId = (string)task.LocalId; string state = SshExec(PilotCommands.JobStatus, localId).ToLower(); if (state.Contains("is new")) { return(new TaskStateInfo(TaskState.Started, state)); } //return Tuple.Create(TaskState.Scheduled, state); if (state.Contains("is running") || state.Contains("is starting")) { return(new TaskStateInfo(TaskState.Started, state)); } var node = GetNode(task); string ftpOutFolderFromSystem = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromSystem, taskId, CopyPhase.Out); string ftpOutFolderFromResource = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromResource, taskId, CopyPhase.Out); string gridFolder = IncarnationParams.IncarnatePath(node.DataFolders.LocalFolder, taskId, CopyPhase.None); IOProxy.Ftp.MakePath(ftpOutFolderFromSystem); SshExec(PilotCommands.CopyFilesToGridFtp, gridFolder + " " + ftpOutFolderFromResource); if (state.Contains("is finished")) { return(new TaskStateInfo(TaskState.Completed, state)); } else { return(new TaskStateInfo(TaskState.Failed, state)); } } }
public void CopyOutputsToExchange(TaskRunContext task, string farmId) { ulong taskId = task.TaskId; var node = GetNode(task); var pack = PackageByName(node, task.PackageName); // temporary hack: files are not pushed from resource => using path from resource for scp copying string outFolderFromSystem = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromResource, taskId, CopyPhase.Out); //string outFolderFromSystem = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromSystem, taskId, CopyPhase.Out); bool copyingOutsToFtp = outFolderFromSystem.StartsWith("ftp://"); if (copyingOutsToFtp && !outFolderFromSystem.EndsWith("/")) { outFolderFromSystem += '/'; } if (!copyingOutsToFtp && !outFolderFromSystem.EndsWith("\\")) { outFolderFromSystem += '\\'; } string clusterFolder = IncarnationParams.IncarnatePath((!String.IsNullOrEmpty(pack.LocalDir)) ? String.Format(pack.LocalDir, task.TaskId) : node.DataFolders.LocalFolder, taskId, CopyPhase.Out); if (!clusterFolder.EndsWith("\\")) { clusterFolder += "\\"; } var exClient = GetExecuteServiceClient(node); string[] fileNames = exClient.GetAllFileNames(farmId, taskId); foreach (var output in task.ExpectedOutputFileNames) { Log.Info(output); } //IOProxy.Ftp.MakePath(ftpOutFolderFromSystem); var dirStructure = fileNames .Where(name => name.Contains('/') || name.Contains('\\')) // inside subdir .Select(name => name.Remove(name.LastIndexOfAny(new[] { '\\', '/' }))) .Distinct() .Select(file => outFolderFromSystem + file) .Union(new[] { outFolderFromSystem }); foreach (string dir in dirStructure) { if (copyingOutsToFtp) { IOProxy.Ftp.MakePath(dir); } else { Log.Debug("Creating dir " + dir); Directory.CreateDirectory(dir); } } Log.Info("Copying output files"); //System.Threading.Tasks.Parallel.ForEach(fileNames, (fileName) => foreach (string fileName in fileNames) { //if (files.Contains(fileName)) { string tmpFile = Path.GetTempFileName(); try { Log.Info("Copying file " + clusterFolder + fileName); //ScpGet( node, clusterFolder + fileName, tmpFile, false); DownloadFile(node, clusterFolder + fileName, tmpFile, taskId, farmId); if (copyingOutsToFtp) { IOProxy.Ftp.UploadLocalFile(tmpFile, outFolderFromSystem, fileName, shouldCreatePath: false); } else { File.Copy(tmpFile, outFolderFromSystem + fileName); } File.Delete(tmpFile); Log.Info("File copied " + fileName); } catch (Exception e) { Log.Warn(String.Format("Exception on file '{0}' copy: {1}", clusterFolder + fileName, e)); } } }//); }
public object Run(TaskRunContext task) { lock (_gridLock) { RefreshCertificate(); //var incarnation = task.Incarnation; string tmpFileName = null; if (task.UserCert != null) { Log.Info("Using user's certificate"); tmpFileName = Path.GetTempFileName(); IOProxy.Storage.Download(task.UserCert, tmpFileName); var scpForCert = new SSH.Scp(HELPER_SSH_HOST, HELPER_SSH_USER, HELPER_SSH_PASS); scpForCert.Connect(); scpForCert.Recursive = true; scpForCert.Put(tmpFileName, "/tmp/x509up_u500"); scpForCert.Close(); File.Delete(tmpFileName); SshExec(PilotCommands.SetPermissionsOnProxyCertFile); } else { Log.Info("Using system's certificate"); } try { long coresToUse = task.NodesConfig.Sum(cfg => cfg.Cores); var node = GetNode(task); var pack = node.PackageByName(task.PackageName); // todo : remove string commandLine = task.CommandLine; commandLine = commandLine.Replace("java -jar ", ""); if (task.PackageName.ToLowerInvariant() == "cnm") { commandLine = commandLine.Replace("{0}", "ru.ifmo.hpc.main.ExtendedModel"); } else if (task.PackageName.ToLowerInvariant() == "ism") { commandLine = commandLine.Replace("{0}", "ru.ifmo.hpc.main.SpreadModel"); } else { //if (task.PackageName.ToLowerInvariant() == "orca") commandLine = commandLine.Replace("{0}", ""); } string ftpFolderFromSystem = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromSystem, task.TaskId, CopyPhase.In); string ftpFolderFromResource = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromResource, task.TaskId, CopyPhase.In); string gridFtpFolder = IncarnationParams.IncarnatePath(node.DataFolders.LocalFolder, task.TaskId, CopyPhase.None); SshExec(PilotCommands.MakeFolderOnGridFtp, gridFtpFolder); string endl = "\n"; // Сначала дописываем недостающий входной файл (скрипт запуска пакета на кластере) string scriptName = pack.AppPath; //if (pack.EnvVars.Any()) { // Файл с установкой переменных окружения, если пакет их использует scriptName = "run.sh"; var scriptContent = new StringBuilder(); scriptContent.Append("#!/bin/bash" + endl); foreach (var pair in pack.EnvVars) { scriptContent.AppendFormat("export {0}={1}" + endl, pair.Key, pair.Value); } scriptContent.Append(pack.AppPath); /* * if (task.PackageName.ToLowerInvariant() == "orca") * { * string[] args = commandLine.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); * for (int i = 0; i < args.Length; i++) * { * if (args[i] == "orca.out") * scriptContent.Append(" >"); * * scriptContent.Append(" $" + (i + 1).ToString()); * } * } * else*/ { scriptContent.Append(" " + commandLine); } string scriptLocalPath = Path.GetTempFileName(); File.WriteAllText(scriptLocalPath, scriptContent.ToString()); IOProxy.Ftp.UploadLocalFile(scriptLocalPath, ftpFolderFromSystem, scriptName); File.Delete(scriptLocalPath); } //IOProxy.Ftp.UploadLocalFile(DEFAULT_JOB_LAUNCHER_PATH, GetFtpInputFolder(taskId), Path.GetFileName(DEFAULT_JOB_LAUNCHER_PATH)); // Копируем входные файлы с ФТП на ГридФТП SshExec(PilotCommands.CopyFilesToGridFtp, ftpFolderFromResource + " " + gridFtpFolder); SshExec(PilotCommands.MakeFilesExecutableOnGridFtp, gridFtpFolder + "*"); // Формируем описание задания для грида var jobFileContent = new StringBuilder(); jobFileContent.AppendFormat(@"{{ ""version"": 2, ""description"": ""{0}""," + endl, task.TaskId); jobFileContent.AppendFormat(@" ""default_storage_base"": ""{0}""," + endl, gridFtpFolder); jobFileContent.AppendFormat(@" ""tasks"": [ {{ ""id"": ""a"", ""description"": ""task"", ""definition"": {{ ""version"": 2," + endl); jobFileContent.AppendFormat(@" ""executable"": ""{0}""," + endl, scriptName); //jobFileContent.AppendFormat(@" ""arguments"": [ ""{0}"" ]," + endl, String.Join(@""", """, args)); jobFileContent.AppendFormat(@" ""input_files"": {{" + endl); if (scriptName == "run.sh") // todo : if no input files? { jobFileContent.AppendFormat(@" ""run.sh"": ""run.sh""," + endl); } jobFileContent.AppendFormat(@" " + String.Join( "," + endl + " ", task.InputFiles.Select( file => String.Format(@"""{0}"": ""{0}""", file.FileName) ) )); jobFileContent.AppendFormat(endl + @" }}," + endl); jobFileContent.AppendFormat(@" ""output_files"": {{" + endl); //if (task.PackageName.ToLowerInvariant() == "cnm") // jobFileContent.AppendFormat(@" ""output.dat"": ""output.dat""" + endl); //else if (task.PackageName.ToLowerInvariant() == "ism") { jobFileContent.AppendFormat(@" ""output.dat"": ""output.dat""" + endl); } else if (task.PackageName.ToLowerInvariant() == "orca") { jobFileContent.AppendFormat(@" ""orca.out"": ""orca.out""," + endl); jobFileContent.AppendFormat(@" ""eldens.cube"": ""eldens.cube""" + endl); } else { jobFileContent.AppendFormat(@" " + String.Join( "," + endl + " ", task.ExpectedOutputFileNames .Where(name => name != "std.out" && name != "std.err") .Select( name => String.Format(@"""{0}"": ""{0}""", name) ) ) + endl); } jobFileContent.AppendFormat(@" }}," + endl); jobFileContent.AppendFormat(@" ""stdout"": ""std.out"", ""stderr"": ""std.err"", " + endl); jobFileContent.AppendFormat(@" ""count"": {0}" + endl, coresToUse); if (pack.Params.ContainsKey("requirements")) { jobFileContent.AppendFormat(@" ,""requirements"": {0}" + endl, pack.Params["requirements"]); } jobFileContent.AppendFormat(@" }} }} ]," + endl); jobFileContent.AppendFormat(@" ""requirements"": {{ ""hostname"": [""{0}""]", node.NodeAddress); //if (pack.Params.ContainsKey("requirements")) // jobFileContent.AppendFormat(@", {0}" + endl, pack.Params["requirements"]); jobFileContent.AppendFormat(@"}}" + endl + "}}", node.NodeAddress); Log.Debug(String.Format("Task's '{0}' grid job JSON: ", task.TaskId, jobFileContent)); string jobFileName = "job_" + task.TaskId.ToString() + ".js"; string jobFilePathOnHelper = JOBS_FOLDER_ON_HELPER + jobFileName; //string jobFileContent = File.ReadAllText(DEFAULT_JOB_DESCR_PATH).Replace(GRIDFTP_PATH_TOKEN, taskFolderOnGridFtp); string jobFilePathLocal = Path.GetTempFileName(); File.WriteAllText(jobFilePathLocal, jobFileContent.ToString()); // Записываем его на сервер с Пилотом var scp = new SSH.Scp(HELPER_SSH_HOST, HELPER_SSH_USER, HELPER_SSH_PASS); /* * var notifier = new JobDescriptionUploadNotifier(TaskId, Cluster, RunParams); * scp.OnTransferEnd += new SSH.FileTransferEvent(notifier.OnFinish); // todo : необязательно */ scp.Connect(); scp.Recursive = true; scp.Put(jobFilePathLocal, jobFilePathOnHelper); scp.Close(); File.Delete(jobFilePathLocal); // todo : remove files on helper and gridftp // Запускаем Log.Info(String.Format( "Trying to exec task {0} on grid cluster {1}", task.TaskId, node.NodeName )); string launchResult = SshExec(PilotCommands.SubmitJob, jobFilePathOnHelper, pilotUrl: node.Services.ExecutionUrl); int urlPos = launchResult.IndexOf("https://"); string jobUrl = launchResult.Substring(urlPos).Trim() + "a"; Log.Debug(jobUrl); Log.Info(String.Format( "Task {0} launched on grid with jobUrl = {1}", task.TaskId, jobUrl )); return(jobUrl); } catch (Exception e) { Log.Error(String.Format( "Error while starting task {0} in grid: {1}\n{2}", task.TaskId, e.Message, e.StackTrace )); throw; } finally { if (task.UserCert != null) { Log.Info("Wiping user's certificate"); tmpFileName = Path.GetTempFileName(); File.WriteAllText(tmpFileName, "Wiped by Easis system"); var scpForCert = new SSH.Scp(HELPER_SSH_HOST, HELPER_SSH_USER, HELPER_SSH_PASS); scpForCert.Connect(); scpForCert.Recursive = true; scpForCert.Put(tmpFileName, "/tmp/x509up_u500"); scpForCert.Close(); File.Delete(tmpFileName); SshExec(PilotCommands.SetPermissionsOnProxyCertFile); } } } }
public void CopyOutputsToExchange(TaskRunContext task) { ulong taskId = task.TaskId; var node = GetNode(task); var pack = PackageByName(node, task.PackageName); // temporary hack: files are not pushed from resource => using path from resource for scp copying string outFolderFromSystem = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromResource, taskId, CopyPhase.Out); //string outFolderFromSystem = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromSystem, taskId, CopyPhase.Out); bool copyingOutsToFtp = outFolderFromSystem.StartsWith("ftp://"); if (copyingOutsToFtp && !outFolderFromSystem.EndsWith("/")) { outFolderFromSystem += '/'; } if (!copyingOutsToFtp && !outFolderFromSystem.EndsWith("\\")) { outFolderFromSystem += '\\'; } string clusterFolder = IncarnationParams.IncarnatePath((!String.IsNullOrEmpty(pack.LocalDir)) ? String.Format(pack.LocalDir, task.TaskId) : node.DataFolders.LocalFolder, taskId, CopyPhase.Out); if (!clusterFolder.EndsWith("/")) { clusterFolder += "/"; } //var files = ImproveFiles(task.Incarnation.ExpectedOutputFileNames); /* var fileNames = * SshExec(node, SshPbsCommands.Find, clusterFolder) * .Split(new[] { ", ", "," }, StringSplitOptions.RemoveEmptyEntries) * .Where(st => !st.Contains("/")) * .Select(st => st.Replace("*", "").Replace("|", "").Replace("\n","")) * .Where(st => !st.Contains(".rst") && !st.Contains(".err") && !st.Contains(".esav"));*/ var fileNames = SshExec(node, "cd " + clusterFolder + "; " + SSH_FIND_COMMAND, "") .Replace("./", "/").Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries) .Where(st => !st.Contains(".rst") /*&& !st.Contains(".err")*/ && !st.Contains(".esav")) .Select(st => st.Trim(new[] { '/', '\\' })); //IOProxy.Ftp.MakePath(ftpOutFolderFromSystem); var dirStructure = fileNames .Where(name => name.Contains('/') || name.Contains('\\')) // inside subdir .Select(name => name.Remove(name.LastIndexOfAny(new[] { '\\', '/' }))) .Distinct() .Select(file => outFolderFromSystem + file) .Union(new[] { outFolderFromSystem }); foreach (string dir in dirStructure) { if (copyingOutsToFtp) { IOProxy.Ftp.MakePath(dir); } else { Log.Debug("Creating dir " + dir); Directory.CreateDirectory(dir); } } Log.Info("Copying output files"); //System.Threading.Tasks.Parallel.ForEach(fileNames, (fileName) => foreach (string fileName in fileNames) { //if (files.Contains(fileName)) { string tmpFile = Path.GetTempFileName(); try { Log.Info("Copying file " + clusterFolder + fileName); //ScpGet(node, clusterFolder + fileName, tmpFile, false); DownloadFile(node, clusterFolder + fileName, tmpFile); if (copyingOutsToFtp) { IOProxy.Ftp.UploadLocalFile(tmpFile, outFolderFromSystem, fileName, shouldCreatePath: false); } else { File.Copy(tmpFile, outFolderFromSystem + fileName); } File.Delete(tmpFile); Log.Info("File copied " + fileName); } catch (Ssh.SshTransferException e) { Log.Warn(String.Format("During coping file {0} for task {1} from error was happend: {2}", fileName, taskId, e)); // todo : lolwut? } catch (Exception e) { Log.Warn(String.Format("Exception on file '{0}' copy: {1}", clusterFolder + fileName, e)); } } }//); }