Beispiel #1
0
        public TaskStateInfo GetTaskStateInfo(TaskRunContext task)
        {
            var node = GetNode(task);
            var taskId = task.TaskId;

            string result;
            TaskStateInfo info = null;

            if (GetFromResourceTaskStateInfo(task, out result))
            {

                info = new TaskStateInfo(TaskState.Started, result);

            }

            // esle if (Aborted, Failed)
            else
            {
                CopyOutputsToExchange(task);

                info = new TaskStateInfo(TaskState.Completed, result);
            }

            //todo nbutakov change
            info.ProcessInfo = GetCurrentTaskInfo(task);
            info.NodeName = GetNode(task).NodeName;
            return info;
        }
Beispiel #2
0
        public object Run(TaskRunContext task)
        {
            var   node   = GetNode(task);
            var   pack   = PackageByName(node, task.PackageName);
            ulong taskId = task.TaskId;

            Log.Info("Locking operation");
            var operationHolder = LockOperation(task.TaskId, TaskLock.WRITE_OPERATION_EXECUTED);

            string fileNames;
            string clusterHomeFolder = CopyInputFiles(task, out fileNames);


            string cmdLine = String.Format(task.CommandLine, pack.AppPath, taskId, fileNames.Trim());

            Log.Info("cmdline = " + cmdLine);

            String scriptPath;

            Log.Info("Preparing script");
            ScriptPrepare(pack, cmdLine, node, clusterHomeFolder, out scriptPath);
            Log.Info("Script prepared. Executing it.");

            var result = ExecuteRun(node, scriptPath);

            string jobId = result.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries).First();

            Log.Info("Exec done. Job id = " + jobId);

            UnLockOperation(task.TaskId, operationHolder);
            Log.Info("Operation unlocked");

            return(jobId);
        }
Beispiel #3
0
        private void RevokeTask(TaskRunContext task)
        {
            lock (task.Lock)
            {
                lock (_nodeStateCacheLock)
                {
                    var nodeStates = _nodeStateCache[task.Resource.ResourceName];
                    foreach (var nodeConfig in task.NodesConfig)
                    {
                        var nodeState = nodeStates.Single(n => n.NodeName == nodeConfig.NodeName);

                        nodeState.TasksSubmitted--;
                        nodeState.CoresReserved -= nodeConfig.Cores;

                        if (nodeState.TasksSubmitted < 0)
                        {
                            Log.Warn();
                            nodeState.TasksSubmitted = 0;
                        }

                        if (nodeState.CoresReserved < 0)
                        {
                            Log.Warn();
                            nodeState.CoresReserved = 0;
                        }
                    }
                }
            }
        }
Beispiel #4
0
        public TaskStateInfo GetTaskStateInfo(TaskRunContext task)
        {
            var node   = GetNode(task);
            var taskId = task.TaskId;

            string        result;
            TaskStateInfo info = null;

            if (GetFromResourceTaskStateInfo(task, out result))
            {
                info = new TaskStateInfo(TaskState.Started, result);
            }

            // esle if (Aborted, Failed)
            else
            {
                CopyOutputsToExchange(task);

                info = new TaskStateInfo(TaskState.Completed, result);
            }

            //todo nbutakov change
            info.ProcessInfo = GetCurrentTaskInfo(task);
            info.NodeName    = GetNode(task).NodeName;
            return(info);
        }
Beispiel #5
0
        protected virtual bool GetFromResourceTaskStateInfo(TaskRunContext task, out string result)
        {
            var node = GetNode(task);

            result = SshExec(node, GetTaskStateCommand(), (string)task.LocalId, new PbsErrorResolver()).ToLowerInvariant();
            return(result.Contains("job_state = R") || result.Contains("job_state = Q") || result.Contains("job_state = r") || result.Contains("job_state = q"));
        }
Beispiel #6
0
        public TaskStateInfo GetTaskStateInfo(TaskRunContext task)
        {
            var node = GetNode(task);

            string result = SshExec(node, SshCommands.GetTaskState, (string)task.LocalId);
            string result_UPPER = result.ToUpperInvariant();

            string[] runningTokens   = new[] { "CONFIGURING", "COMPLETING", "PENDING", "RUNNING", "SUSPENDED" };
            string[] abortedTokens   = new[] { "CANCELLED", "TIMEOUT" };
            string[] failedTokens    = new[] { "FAILED", "NODE_FAIL", "PREEMPTED" };
            string[] completedTokens = new[] { "COMPLETED", "Invalid job id specified".ToUpperInvariant() };

            if (runningTokens.Any(st => result_UPPER.Contains(st)))
                return new TaskStateInfo(TaskState.Started, result);
            else
            if (abortedTokens.Any(st => result_UPPER.Contains(st)))
                return new TaskStateInfo(TaskState.Aborted, result);
            else
            if (failedTokens.Any(st => result_UPPER.Contains(st)))
                return new TaskStateInfo(TaskState.Failed, result);
            else
            if (completedTokens.Any(st => result_UPPER.Contains(st)))
            {
                CopyOutputsToExchange(task);
                return new TaskStateInfo(TaskState.Completed, result);
            }
            else
            {
                Log.Warn("Wnknown responce from SLURM. Hoping task was completed: " + result);

                CopyOutputsToExchange(task);
                return new TaskStateInfo(TaskState.Completed, result);
            }
        }
        public void Run(TaskRunContext task)
        {
            _resourcesLock.EnterReadLock();

            try
            {
                Log.Info("Running task " + task.ToString());

                string resourceName  = task.NodesConfig.First().ResourceName;
                var    resourceCache =
                    ResourceCache.GetByName(resourceName);

                lock (resourceCache.StateLock)
                {
                    CheckNodeConfigConsistency(task.TaskId, task.NodesConfig, resourceCache.Resource);

                    task.Resource   = resourceCache.Resource;
                    task.Controller = resourceCache.Controller;
                }

                try
                {
                    resourceCache.Acquire(task.NodesConfig);  // todo : m.b. move under resourceCache.StateLock?

                    Log.Info(String.Format("Trying to run task {0} on resource {1}", task.TaskId, task.Resource.ResourceName));

                    task.LocalId = task.Controller.Run(task);

                    Log.Info(String.Format("Task {0} ({1}) started on resource {2} with localId = {3}",
                                           task.TaskId, task.PackageName, task.Resource.ResourceName, task.LocalId
                                           ));

                    var state = new TaskStateInfo(TaskState.Started, task.LocalId.ToString());
                    TaskCache.AddTask(task, state);
                }
                catch (Exception e)
                {
                    resourceCache.Release(task.NodesConfig);

                    Log.Error(String.Format("Unable to run task {0}: {1}", task.TaskId, e));
                    throw;
                }
            }
            catch (Exception e)
            {
                Log.Error(String.Format("Exception on Farm.Run(task {0}): {1}", task.TaskId, e));
                throw;
            }
            finally
            {
                _resourcesLock.ExitReadLock();
            }


            //todo for mock
            if (CacheCollectorFactory.CheckMockMode())
            {
                CacheCollectorFactory.GetInstance().SendTask(task);
            }
        }
Beispiel #8
0
            private TaskCache(TaskRunContext context, TaskStateInfo state)
            {
                Context   = context;
                StateInfo = state;

                _isUpdating     = false;
                _lastUpdateTime = DateTime.Now;
            }
Beispiel #9
0
            }                                                          // mutable

            private TaskCache(TaskRunContext context, TaskState state = TaskState.Started, string stateComment = "")
            {
                Context   = context;
                StateInfo = new TaskStateInfo(state, stateComment);

                _isUpdating     = false;
                _lastUpdateTime = DateTime.Now - UPDATE_INTERVAL - TimeSpan.FromMilliseconds(50);
            }
Beispiel #10
0
            public static void AddTask(TaskRunContext context, TaskState state = TaskState.Started, string stateComment = "")
            {
                var taskCache = new TaskCache(context, state, stateComment);

                lock (_globalLock)
                {
                    _cache[context.TaskId] = taskCache;
                }
            }
Beispiel #11
0
        public static void AddTask(TaskRunContext context, TaskStateInfo state)                                         //, TaskState state = TaskState.Started, string stateComment = "")
        {
            var taskCache = new TaskCache(context, state, CacheCollectorFactory.GetInstance().GetTaskCacheCollector()); // autosaves

            lock (_globalLock)
            {
                _cache[context.TaskId] = taskCache;
            }
        }
Beispiel #12
0
        public void Abort(TaskRunContext task)
        {
            lock (_gridLock)
            {
                RefreshCertificate();

                string localId = (string) task.LocalId;
                localId = localId.EndsWith("/a") ? localId.Remove(localId.Length - 2) : localId;
                string sshOut = SshExec(PilotCommands.CancelJob, localId);
            }
        }
        public void Abort(TaskRunContext task)
        {
            var node = GetNode(task);

            var esService = GetExecuteServiceClient(node);

            var providedWords = ((string)task.LocalId).Split(new[] { '\n' }); // todo : string -> string[]
            if (providedWords.Length > 2)
                Log.Warn(String.Format("Too many sections in provided task id for win PC: {0}", task.LocalId));

            string pid = providedWords[0];

            esService.StopTaskRunning(int.Parse(pid));
        }
Beispiel #14
0
        private TaskStateInfo _stateInfo; //todo : BsonElement("StateInfo")

        #endregion Fields

        #region Constructors

        private TaskCache(TaskRunContext context, TaskStateInfo state, ITaskGlobalCacheCollector collector = null)
        {
            lock (_globalLock)
            {
                StateLock = new object(); // needs to be explicitly before SetState, which triggers Save (i.e. makes object publicly available in memory)

                _isUpdating = false;
                _lastUpdateTime = DateTime.Now - UPDATE_INTERVAL - TimeSpan.FromMilliseconds(50);

                Context = context;
            }

            SetState(state);
            gcCollector = collector;
        }
Beispiel #15
0
        private TaskCache(TaskRunContext context, TaskStateInfo state, ITaskGlobalCacheCollector collector = null)
        {
            lock (_globalLock)
            {
                StateLock = new object(); // needs to be explicitly before SetState, which triggers Save (i.e. makes object publicly available in memory)

                _isUpdating     = false;
                _lastUpdateTime = DateTime.Now - UPDATE_INTERVAL - TimeSpan.FromMilliseconds(50);

                Context = context;
            }

            SetState(state);
            gcCollector = collector;
        }
Beispiel #16
0
 public void Abort(TaskRunContext task)
 {
     try
     {
         var node = GetNode(task);
         SshExec(node, GetTaskStateCommand(), (string)task.LocalId); // todo : Abort, not GetTaskState?
     }
     catch (Exception e)
     {
         Log.Error(String.Format("Failed to abort task {1} on resource {2}: {3}{0}{4}",
                                 Environment.NewLine,
                                 task.TaskId, task.Resource.ResourceName,
                                 e.Message, e.StackTrace
                                 ));
         // todo : throw;
     }
 }
Beispiel #17
0
        private void SubmitTask(TaskRunContext task)
        {
            lock (task.Lock)
            {
                try
                {
                    lock (_nodeStateCacheLock)
                    {
                        bool nodesOverloaded = false;

                        var nodeStates = _nodeStateCache[task.Resource.ResourceName];
                        foreach (var nodeConfig in task.NodesConfig)
                        {
                            var nodeState = nodeStates.Single(n => n.NodeName == nodeConfig.NodeName);

                            if (nodeState.CoresAvailable <= nodeConfig.Cores)
                            {
                                nodesOverloaded = true;
                            }

                            nodeState.TasksSubmitted++;
                            nodeState.CoresReserved += nodeConfig.Cores;
                        }

                        if (nodesOverloaded)
                        {
                            Log.Error("Nodes overload for resource " + task.Resource.ResourceName);
                            throw new Exception("Wrong config for task " + task.TaskId.ToString() + ". Selected nodes are overloaded");
                        }
                    }

                    task.LocalId       = task.Controller.Run(task);
                    task.CachedRunInfo = new TaskRunInfo(TaskState.Started);
                }
                catch (Exception e)
                {
                    RevokeTask(task);

                    Log.Error(String.Format("Unable to run task {1}: {2}{0}{3}", Environment.NewLine,
                                            task.TaskId, e.Message, e.StackTrace
                                            ));

                    throw;
                }
            }
        }
Beispiel #18
0
 public void Abort(TaskRunContext task)
 {
     try
     {
         var node = GetNode(task);
         SshExec(node, SshCommands.Abort, (string) task.LocalId); // todo : Abort, not GetTaskState?
     }
     catch (Exception e)
     {
         Log.Error(String.Format("Failed to abort task {1} on resource {2}: {3}{0}{4}",
             Environment.NewLine,
             task.TaskId, task.Resource.ResourceName,
             e.Message, e.StackTrace
         ));
         // todo : throw;
     }
 }
Beispiel #19
0
        public void Run(TaskRunContext task)
        {
            lock (task.Lock)
            {
                Log.Info("Running task " + task.ToString());

                string resourceName = task.NodesConfig.First().ResourceName;
                if (task.NodesConfig.Any(node => node.ResourceName != resourceName))
                {
                    Log.Error("Node configs have different resources: " + String.Join(", ", task.NodesConfig.Select(c => c.ResourceName)));
                    throw new ArgumentException("All node configs should have the same resource name");
                }

                lock (_resourcesCacheLock)
                {
                    if (!_resourcesCache.ContainsKey(resourceName))
                    {
                        Log.Error("No controller for resource " + resourceName);
                        throw new ArgumentException("No such resource controller");
                    }

                    var unknownNodes = task.NodesConfig.Select(n => n.NodeName).Except(_nodeStateCache[resourceName].Select(n => n.NodeName));
                    if (unknownNodes.Any())
                    {
                        Log.Error(String.Format(
                                      "Task {0} has unknown nodes for resource {1}: {2}",
                                      task.TaskId, resourceName, String.Join(", ", unknownNodes)
                                      ));
                        throw new Exception("Wrong node config for task " + task.TaskId.ToString() + ": " + String.Join(", ", unknownNodes));
                    }

                    task.Resource   = _resourcesCache[resourceName];
                    task.Controller = _controllers[resourceName];
                }

                // POSSIBLE DATA RACE?! if state is inside controller
                SubmitTask(task);

                lock (_tasksCacheLock)
                {
                    _tasksCache.Add(task.TaskId, task);
                }
            }
        }
        public string CopyInputFiles(TaskRunContext task, out string fileNames)
        {
            var node = GetNode(task);

            //string ftpFolder = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromSystem, taskId, CopyPhase.In);
            //string jobFtpFolder = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromSystem, taskId, CopyPhase.None);
            //string ftpInputFolder = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromResource, taskId, CopyPhase.In);
            //string ftpOutputFolder = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromResource, taskId, CopyPhase.Out);
            string clusterHomeFolder = IncarnationParams.IncarnatePath(node.DataFolders.LocalFolder, task.TaskId, CopyPhase.None);

            //IOProxy.Ftp.MakePath(ftpInputFolder);
            //IOProxy.Ftp.MakePath(ftpOutputFolder);

            try
            {
                Log.Info(Thread.CurrentThread.ManagedThreadId + " entered.");

                SshExec(node, "mkdir " + clusterHomeFolder);

                Log.Info(Thread.CurrentThread.ManagedThreadId + " exited.");
            }
            catch (Exception e)
            {
                Log.Warn(e.ToString());
            }

            Log.Info("Copying input files for task " + task.TaskId.ToString());
            fileNames = ""; //String.Join(" ", incarnation.FilesToCopy.Select(f => f.FileName));
            foreach (var file in task.InputFiles)
            {
                string tmpFile = Path.GetTempFileName();
                IOProxy.Storage.Download(file.StorageId, tmpFile);

                string fileOnCluster = clusterHomeFolder.TrimEnd(new[] { '/', '\\' }) + "/" + file.FileName;
                fileNames += " " + fileOnCluster;

                Log.Info("Copying file " + fileOnCluster);
                //ScpCopy(node, fileOnCluster, tmpFile);
                UploadFile(node, fileOnCluster, tmpFile);
                File.Delete(tmpFile);
            }

            return clusterHomeFolder;
        }
Beispiel #21
0
        protected override bool GetFromResourceTaskStateInfo(TaskRunContext task, out string result)
        {
            var node = GetNode(task);

            try
            {
                result = SshExec(node, GetTaskStateCommand(), (string)task.LocalId, null).ToLowerInvariant();
            }
            catch (Exception e)
            {
                Log.Error(String.Format("Exception while updating task's {0} state: {1}", task.TaskId, e));
                result = "SshExec error while updating task's state";
            }

            string clusterFolder = IncarnationParams.IncarnatePath(node.DataFolders.LocalFolder, task.TaskId, CopyPhase.Out);
            string result2 = SshExec(node, SshUnixCommands.Ls, clusterFolder);

            return result.Contains(task.LocalId.ToString()) && !result2.Contains(ClavireFinishFileName);
        }
Beispiel #22
0
        public TaskStateInfo GetTaskStateInfo(TaskRunContext task)
        {
            var node = GetNode(task);

            string result       = SshExec(node, SshCommands.GetTaskState, (string)task.LocalId);
            string result_UPPER = result.ToUpperInvariant();

            string[] runningTokens   = new[] { "CONFIGURING", "COMPLETING", "PENDING", "RUNNING", "SUSPENDED" };
            string[] abortedTokens   = new[] { "CANCELLED", "TIMEOUT" };
            string[] failedTokens    = new[] { "FAILED", "NODE_FAIL", "PREEMPTED" };
            string[] completedTokens = new[] { "COMPLETED", "Invalid job id specified".ToUpperInvariant() };

            if (runningTokens.Any(st => result_UPPER.Contains(st)))
            {
                return(new TaskStateInfo(TaskState.Started, result));
            }
            else
            if (abortedTokens.Any(st => result_UPPER.Contains(st)))
            {
                return(new TaskStateInfo(TaskState.Aborted, result));
            }
            else
            if (failedTokens.Any(st => result_UPPER.Contains(st)))
            {
                return(new TaskStateInfo(TaskState.Failed, result));
            }
            else
            if (completedTokens.Any(st => result_UPPER.Contains(st)))
            {
                CopyOutputsToExchange(task);
                return(new TaskStateInfo(TaskState.Completed, result));
            }
            else
            {
                Log.Warn("Wnknown responce from SLURM. Hoping task was completed: " + result);

                CopyOutputsToExchange(task);
                return(new TaskStateInfo(TaskState.Completed, result));
            }
        }
Beispiel #23
0
        public void push(TaskRunContext context, ulong taskId, TaskStateInfo info)
        {
            Common.Utility.LogInfo("TaskCacheCollector.push taskId=" + taskId + " info=" + info.ProcessInfo.TimeSnapshot);

            if (!IsServicedController(context.Controller)){ return;}

            var resName = context.Resource.ResourceName;

            lock (_lock)
            {
                if (!bufferTaskInfo.ContainsKey(taskId))
                {
                    bufferTaskInfo.Add(taskId, new TaskStatInfo(new Dictionary<string, List<ProcessStatInfo>>(),resName));
                }

                if (!bufferTaskInfo[taskId].ProcessInfoCollection.ContainsKey(info.NodeName))
                {
                    bufferTaskInfo[taskId].ProcessInfoCollection.Add(info.NodeName,new List<ProcessStatInfo>());
                }

                bufferTaskInfo[taskId].ProcessInfoCollection[info.NodeName].Add(info.ProcessInfo);
            }
        }
Beispiel #24
0
        public TaskStateInfo GetTaskStateInfo(TaskRunContext task)
        {
            string[] providedWords = ((string)task.LocalId).Split(new char[] { '\n' }); // todo : string -> string[]
            if (providedWords.Length > 2)
            {
                logger.Warn("Too many sections in provided task id for win PC: {0}", task.LocalId);
            }

            string pid      = providedWords[0];
            string nodeName = providedWords[1];
            var    node     = task.Resource.Nodes.First(n => n.NodeName == nodeName);

            var rexService = GetREx(node.Services.ExecutionUrl);

            try
            {
                //rexService.InnerChannel.OperationTimeout = TimeSpan.FromSeconds(10);
                bool isRunning = rexService.IsProcessRunning(Int32.Parse(pid));
                rexService.Close();

                if (!isRunning)
                {
                    return(new TaskStateInfo(TaskState.Completed, ""));
                }

                return(new TaskStateInfo(TaskState.Started, ""));
            }
            catch (Exception e)
            {
                rexService.Abort();
                logger.WarnException(string.Format("Exception while getting task '{0}' state (local id = {1}): ", task.TaskId, task.LocalId), e);

                throw;
                //return new TaskStateInfo(TaskState.Started, "");
            }
        }
Beispiel #25
0
        public void Run(TaskRunContext task)
        {
            lock (task.Lock)
            {
                Log.Info("Running task " + task.ToString());

                string resourceName = task.NodesConfig.First().ResourceName;
                if (task.NodesConfig.Any(node => node.ResourceName != resourceName))
                {
                    Log.Error("Node configs have different resources: " + String.Join(", ", task.NodesConfig.Select(c => c.ResourceName)));
                    throw new ArgumentException("All node configs should have the same resource name");
                }

                lock (_resourcesCacheLock)
                {
                    if (!_resourcesCache.ContainsKey(resourceName))
                    {
                        Log.Error("No controller for resource " + resourceName);
                        throw new ArgumentException("No such resource controller");
                    }

                    var unknownNodes = task.NodesConfig.Select(n => n.NodeName).Except(_nodeStateCache[resourceName].Select(n => n.NodeName));
                    if (unknownNodes.Any())
                    {
                        Log.Error(String.Format(
                            "Task {0} has unknown nodes for resource {1}: {2}",
                            task.TaskId, resourceName, String.Join(", ", unknownNodes)
                        ));
                        throw new Exception("Wrong node config for task " + task.TaskId.ToString() + ": " + String.Join(", ", unknownNodes));
                    }

                    task.Resource = _resourcesCache[resourceName];
                    task.Controller = _controllers[resourceName];
                }

                // POSSIBLE DATA RACE?! if state is inside controller
                SubmitTask(task);

                lock (_tasksCacheLock)
                {
                    _tasksCache.Add(task.TaskId, task);
                }
            }
        }
Beispiel #26
0
        public object Run(TaskRunContext task)
        {
            lock (_gridLock)
            {
                RefreshCertificate();
                //var incarnation = task.Incarnation;

                string tmpFileName = null;
                if (task.UserCert != null)
                {
                    Log.Info("Using user's certificate");
                    tmpFileName = Path.GetTempFileName();
                    IOProxy.Storage.Download(task.UserCert, tmpFileName);

                    var scpForCert = new SSH.Scp(HELPER_SSH_HOST, HELPER_SSH_USER, HELPER_SSH_PASS);
                    scpForCert.Connect();
                    scpForCert.Recursive = true;
                    scpForCert.Put(tmpFileName, "/tmp/x509up_u500");
                    scpForCert.Close();

                    File.Delete(tmpFileName);
                    SshExec(PilotCommands.SetPermissionsOnProxyCertFile);
                }
                else
                {
                    Log.Info("Using system's certificate");
                }

                try
                {
                    long coresToUse = task.NodesConfig.Sum(cfg => cfg.Cores);
                    var node = GetNode(task);
                    var pack = node.PackageByName(task.PackageName);

                    // todo : remove
                    string commandLine = task.CommandLine;
                    commandLine = commandLine.Replace("java -jar ", "");
                    if (task.PackageName.ToLowerInvariant() == "cnm")
                        commandLine = commandLine.Replace("{0}", "ru.ifmo.hpc.main.ExtendedModel");
                    else
                    if (task.PackageName.ToLowerInvariant() == "ism")
                        commandLine = commandLine.Replace("{0}", "ru.ifmo.hpc.main.SpreadModel");
                    else
                        //if (task.PackageName.ToLowerInvariant() == "orca")
                        commandLine = commandLine.Replace("{0}", "");

                    string ftpFolderFromSystem = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromSystem, task.TaskId, CopyPhase.In);
                    string ftpFolderFromResource = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromResource, task.TaskId, CopyPhase.In);

                    string gridFtpFolder = IncarnationParams.IncarnatePath(node.DataFolders.LocalFolder, task.TaskId, CopyPhase.None);
                    SshExec(PilotCommands.MakeFolderOnGridFtp, gridFtpFolder);

                    string endl = "\n";

                    // Сначала дописываем недостающий входной файл (скрипт запуска пакета на кластере)

                    string scriptName = pack.AppPath;

                    //if (pack.EnvVars.Any())
                    {
                        // Файл с установкой переменных окружения, если пакет их использует

                        scriptName = "run.sh";
                        var scriptContent = new StringBuilder();
                        scriptContent.Append("#!/bin/bash" + endl);
                        foreach (var pair in pack.EnvVars)
                            scriptContent.AppendFormat("export {0}={1}" + endl, pair.Key, pair.Value);

                        scriptContent.Append(pack.AppPath);

                        /*
                        if (task.PackageName.ToLowerInvariant() == "orca")
                        {
                            string[] args = commandLine.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
                            for (int i = 0; i < args.Length; i++)
                            {
                                if (args[i] == "orca.out")
                                    scriptContent.Append(" >");

                                scriptContent.Append(" $" + (i + 1).ToString());
                            }
                        }
                        else*/
                        {
                            scriptContent.Append(" " + commandLine);
                        }

                        string scriptLocalPath = Path.GetTempFileName();
                        File.WriteAllText(scriptLocalPath, scriptContent.ToString());
                        IOProxy.Ftp.UploadLocalFile(scriptLocalPath, ftpFolderFromSystem, scriptName);

                        File.Delete(scriptLocalPath);
                    }

                    //IOProxy.Ftp.UploadLocalFile(DEFAULT_JOB_LAUNCHER_PATH, GetFtpInputFolder(taskId), Path.GetFileName(DEFAULT_JOB_LAUNCHER_PATH));

                    // Копируем входные файлы с ФТП на ГридФТП

                    SshExec(PilotCommands.CopyFilesToGridFtp, ftpFolderFromResource + " " + gridFtpFolder);
                    SshExec(PilotCommands.MakeFilesExecutableOnGridFtp, gridFtpFolder + "*");

                    // Формируем описание задания для грида

                    var jobFileContent = new StringBuilder();

                    jobFileContent.AppendFormat(@"{{ ""version"": 2, ""description"": ""{0}""," + endl, task.TaskId);
                    jobFileContent.AppendFormat(@"  ""default_storage_base"": ""{0}""," + endl, gridFtpFolder);
                    jobFileContent.AppendFormat(@"  ""tasks"": [ {{ ""id"": ""a"", ""description"": ""task"", ""definition"": {{ ""version"": 2," + endl);
                    jobFileContent.AppendFormat(@"      ""executable"": ""{0}""," + endl, scriptName);
                    //jobFileContent.AppendFormat(@"      ""arguments"": [ ""{0}"" ]," + endl, String.Join(@""", """, args));

                    jobFileContent.AppendFormat(@"      ""input_files"": {{" + endl);
                    if (scriptName == "run.sh") // todo : if no input files?
                        jobFileContent.AppendFormat(@"          ""run.sh"": ""run.sh""," + endl);
                    jobFileContent.AppendFormat(@"          " + String.Join(
                        "," + endl + "          ",
                        task.InputFiles.Select(
                            file => String.Format(@"""{0}"": ""{0}""", file.FileName)
                        )
                    ));
                    jobFileContent.AppendFormat(endl + @"      }}," + endl);

                    jobFileContent.AppendFormat(@"      ""output_files"": {{" + endl);

                    //if (task.PackageName.ToLowerInvariant() == "cnm")
                    //    jobFileContent.AppendFormat(@"          ""output.dat"": ""output.dat""" + endl);
                    //else
                    if (task.PackageName.ToLowerInvariant() == "ism")
                        jobFileContent.AppendFormat(@"          ""output.dat"": ""output.dat""" + endl);
                    else
                    if (task.PackageName.ToLowerInvariant() == "orca")
                    {
                        jobFileContent.AppendFormat(@"          ""orca.out"":    ""orca.out""," + endl);
                        jobFileContent.AppendFormat(@"          ""eldens.cube"": ""eldens.cube""" + endl);
                    }
                    else
                    {
                        jobFileContent.AppendFormat(@"          " + String.Join(
                            "," + endl + "          ",
                            task.ExpectedOutputFileNames
                                .Where(name => name != "std.out" && name != "std.err")
                                .Select(
                                    name => String.Format(@"""{0}"": ""{0}""", name)
                                )
                        ) + endl);
                    }

                    jobFileContent.AppendFormat(@"      }}," + endl);

                    jobFileContent.AppendFormat(@"      ""stdout"": ""std.out"", ""stderr"": ""std.err"", " + endl);
                    jobFileContent.AppendFormat(@"      ""count"": {0}" + endl, coresToUse);

                    if (pack.Params.ContainsKey("requirements"))
                        jobFileContent.AppendFormat(@"      ,""requirements"": {0}" + endl, pack.Params["requirements"]);

                    jobFileContent.AppendFormat(@"  }} }} ]," + endl);

                    jobFileContent.AppendFormat(@"  ""requirements"": {{ ""hostname"": [""{0}""]", node.NodeAddress);

                    //if (pack.Params.ContainsKey("requirements"))
                    //    jobFileContent.AppendFormat(@", {0}" + endl, pack.Params["requirements"]);

                    jobFileContent.AppendFormat(@"}}" + endl + "}}", node.NodeAddress);

                    Log.Debug(String.Format("Task's '{0}' grid job JSON: ", task.TaskId, jobFileContent));

                    string jobFileName = "job_" + task.TaskId.ToString() + ".js";
                    string jobFilePathOnHelper = JOBS_FOLDER_ON_HELPER + jobFileName;

                    //string jobFileContent = File.ReadAllText(DEFAULT_JOB_DESCR_PATH).Replace(GRIDFTP_PATH_TOKEN, taskFolderOnGridFtp);
                    string jobFilePathLocal = Path.GetTempFileName();
                    File.WriteAllText(jobFilePathLocal, jobFileContent.ToString());

                    // Записываем его на сервер с Пилотом

                    var scp = new SSH.Scp(HELPER_SSH_HOST, HELPER_SSH_USER, HELPER_SSH_PASS);

                    /*
                    var notifier = new JobDescriptionUploadNotifier(TaskId, Cluster, RunParams);
                    scp.OnTransferEnd += new SSH.FileTransferEvent(notifier.OnFinish); // todo : необязательно
                    */

                    scp.Connect();
                    scp.Recursive = true;
                    scp.Put(jobFilePathLocal, jobFilePathOnHelper);
                    scp.Close();

                    File.Delete(jobFilePathLocal); // todo : remove files on helper and gridftp

                    // Запускаем

                    Log.Info(String.Format(
                        "Trying to exec task {0} on grid cluster {1}",
                        task.TaskId, node.NodeName
                    ));

                    string launchResult = SshExec(PilotCommands.SubmitJob, jobFilePathOnHelper, pilotUrl: node.Services.ExecutionUrl);
                    int urlPos = launchResult.IndexOf("https://");
                    string jobUrl = launchResult.Substring(urlPos).Trim() + "a";
                    Log.Debug(jobUrl);

                    Log.Info(String.Format(
                        "Task {0} launched on grid with jobUrl = {1}",
                        task.TaskId, jobUrl
                    ));

                    return jobUrl;
                }
                catch (Exception e)
                {
                    Log.Error(String.Format(
                        "Error while starting task {0} in grid: {1}\n{2}",
                        task.TaskId, e.Message, e.StackTrace
                    ));

                    throw;
                }
                finally
                {
                    if (task.UserCert != null)
                    {
                        Log.Info("Wiping user's certificate");
                        tmpFileName = Path.GetTempFileName();
                        File.WriteAllText(tmpFileName, "Wiped by Easis system");

                        var scpForCert = new SSH.Scp(HELPER_SSH_HOST, HELPER_SSH_USER, HELPER_SSH_PASS);
                        scpForCert.Connect();
                        scpForCert.Recursive = true;
                        scpForCert.Put(tmpFileName, "/tmp/x509up_u500");
                        scpForCert.Close();

                        File.Delete(tmpFileName);
                        SshExec(PilotCommands.SetPermissionsOnProxyCertFile);
                    }
                }
            }
        }
Beispiel #27
0
        public TaskStateInfo GetTaskStateInfo(TaskRunContext task)
        {
            lock (_gridLock)
            {
                RefreshCertificate();

                ulong taskId = task.TaskId;
                string localId = (string) task.LocalId;
                string state = SshExec(PilotCommands.JobStatus, localId).ToLower();

                if (state.Contains("is new"))
                    return new TaskStateInfo(TaskState.Started, state);
                //return Tuple.Create(TaskState.Scheduled, state);

                if (state.Contains("is running") || state.Contains("is starting"))
                    return new TaskStateInfo(TaskState.Started, state);

                var node = GetNode(task);
                string ftpOutFolderFromSystem = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromSystem, taskId, CopyPhase.Out);
                string ftpOutFolderFromResource = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromResource, taskId, CopyPhase.Out);
                string gridFolder = IncarnationParams.IncarnatePath(node.DataFolders.LocalFolder, taskId, CopyPhase.None);

                IOProxy.Ftp.MakePath(ftpOutFolderFromSystem);
                SshExec(PilotCommands.CopyFilesToGridFtp, gridFolder + " " + ftpOutFolderFromResource);

                if (state.Contains("is finished"))
                    return new TaskStateInfo(TaskState.Completed, state);
                else
                    return new TaskStateInfo(TaskState.Failed, state);
            }
        }
        public TaskStateInfo GetTaskStateInfo(TaskRunContext task)
        {
            string[] providedWords = ((string)task.LocalId).Split(new[] { '\n' }); // todo : string -> string[]
            if (providedWords.Length > 2)
                Log.Warn(String.Format("Too many sections in provided task id for win PC: {0}", task.LocalId));

            string pid = providedWords[0];
            string nodeName = providedWords[1];

            var farmId = task.Resource.Controller.FarmId;

            var node = task.Resource.Nodes.First(n => n.NodeName == nodeName);
            Log.Info(String.Format("Getting task {0} info...", pid));
            var esClient = GetExecuteServiceClient(node);

            try
            {
                var isRunning = esClient.IsTaskRunning((int.Parse(pid)));
                esClient.Close();

                if (!isRunning)
                {
                    CopyOutputsToExchange(task, farmId);
                    return new TaskStateInfo(TaskState.Completed, "");
                }

                Log.Info(String.Format("task {0} running is : {1} ", pid, isRunning));

                return new TaskStateInfo();
            }
            catch (Exception e)
            {
                esClient.Abort();
                Log.Warn(String.Format(
                    "Exception while getting task '{0}' state (local id = {1}): {2}",
                    task.TaskId, task.LocalId, e
                ));

                throw;
            }
        }
        public ProcessStatInfo GetCurrentTaskInfo(TaskRunContext task)
        {
            var node = GetNode(task);
            var result = SshExec(node, GetTaskInfoCommand());

            return (ProcessStatInfo) ObtainInfo(result, typeof (ProcessStatInfo));
        }
Beispiel #30
0
 public void Abort(TaskRunContext task)
 {
     logger.Warn("Abort is not implemented on windows controller!");
 }
 protected ResourceNode GetNode(TaskRunContext task)
 {
     return GetNode(task.Resource, task.NodesConfig);
 }
            private List<Tuple<ResourceNode, int>> ExtractInfoCountPerNode(TaskRunContext task)
            {
                var nodesForTask = task.Resource.Nodes.Where(x => task.NodesConfig.Any(y => y.NodeName == x.NodeName));

                var random = new Random();

                List<Tuple<ResourceNode, int>> list = nodesForTask.Select(x => new Tuple<ResourceNode, int>(x, random.Next(2, 5))).ToList();

                return list;
            }
 //for mock
 public virtual void SendTask(TaskRunContext task)
 {
 }
            public override void SendTask(TaskRunContext task)
            {
                //todo rewrite all this sht later
                if (taskCacheCollector == null)
                {
                    taskCacheCollector = base.GetTaskCacheCollector();
                }

                var data = ExtractInfoCountPerNode(task);
                var iter = data.GetEnumerator();

                int current = 0;

                var t = 0;
                var coeff = Math.Sin(t);
                var angleRandom = new Random();

                 Utility.CreateAndRunRepeatedProcess(1000, false, () =>
                 {
                     Common.Utility.ExceptionablePlaceWrapper(() =>
                     {
                         var info = GenerateTaskMockData(iter.Current.Item1.NodeName, t);
                         taskCacheCollector.push(task, task.TaskId, info);
                     }, " Exception while creating and pushing task mock data for taskId=" + task.TaskId + " in MockCacheCollectorFactory",
                        " Mock statistic data for task with taskId=" + task.TaskId + " have been generated and pushed", false);
                } , () =>
                {
                    if (current == 0)
                    {
                        if (!iter.MoveNext())
                        {
                            t = 0;
                            return true;
                        }
                        current = iter.Current.Item2;
                    }

                    t += angleRandom.Next(5,20);
                    --current;
                    return false;

                });
            }
        public void CopyOutputsToExchange(TaskRunContext task, string farmId)
        {
            ulong taskId = task.TaskId;
            var node = GetNode(task);
            var pack = PackageByName(node, task.PackageName);

            // temporary hack: files are not pushed from resource => using path from resource for scp copying
            string outFolderFromSystem = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromResource, taskId, CopyPhase.Out);
            //string outFolderFromSystem = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromSystem, taskId, CopyPhase.Out);
            bool copyingOutsToFtp = outFolderFromSystem.StartsWith("ftp://");
            if (copyingOutsToFtp && !outFolderFromSystem.EndsWith("/"))
                outFolderFromSystem += '/';
            if (!copyingOutsToFtp && !outFolderFromSystem.EndsWith("\\"))
                outFolderFromSystem += '\\';

            string clusterFolder = IncarnationParams.IncarnatePath((!String.IsNullOrEmpty(pack.LocalDir)) ? String.Format(pack.LocalDir, task.TaskId) : node.DataFolders.LocalFolder, taskId, CopyPhase.Out);
            if (!clusterFolder.EndsWith("\\"))
                clusterFolder += "\\";

            var exClient = GetExecuteServiceClient(node);

            string[] fileNames = exClient.GetAllFileNames(farmId, taskId);

            foreach(var output in task.ExpectedOutputFileNames)
            {
                Log.Info(output);
            }

            //IOProxy.Ftp.MakePath(ftpOutFolderFromSystem);
            var dirStructure = fileNames
                .Where(name => name.Contains('/') || name.Contains('\\')) // inside subdir
                .Select(name => name.Remove(name.LastIndexOfAny(new[] { '\\', '/' })))
                .Distinct()
                .Select(file => outFolderFromSystem + file)
                .Union(new[] { outFolderFromSystem });
            foreach (string dir in dirStructure)
            {
                if (copyingOutsToFtp)
                    IOProxy.Ftp.MakePath(dir);
                else
                {
                    Log.Debug("Creating dir " + dir);
                    Directory.CreateDirectory(dir);
                }
            }

            Log.Info("Copying output files");
            //System.Threading.Tasks.Parallel.ForEach(fileNames, (fileName) =>
            foreach (string fileName in fileNames)
            {
                //if (files.Contains(fileName))
                {
                    string tmpFile = Path.GetTempFileName();
                    try
                    {
                        Log.Info("Copying file " + clusterFolder + fileName);
                        //ScpGet( node, clusterFolder + fileName, tmpFile, false);
                        DownloadFile(node, clusterFolder + fileName, tmpFile, taskId, farmId);

                        if (copyingOutsToFtp)
                            IOProxy.Ftp.UploadLocalFile(tmpFile, outFolderFromSystem, fileName, shouldCreatePath: false);
                        else
                            File.Copy(tmpFile, outFolderFromSystem + fileName);

                        File.Delete(tmpFile);
                        Log.Info("File copied " + fileName);
                    }

                    catch (Exception e)
                    {
                        Log.Warn(String.Format("Exception on file '{0}' copy: {1}", clusterFolder + fileName, e));
                    }
                }
            }//);
        }
Beispiel #36
0
            public static void AddTask(TaskRunContext context, TaskState state = TaskState.Started, string stateComment = "")
            {
                var taskCache = new TaskCache(context, state, stateComment);

                lock (_globalLock)
                {
                    _cache[context.TaskId] = taskCache;
                }
            }
        private void CopyInputFiles(TaskRunContext task, string resorceHomeFolder)
        {
            var node = GetNode(task);

            Log.Info("Copying input files for task " + task.TaskId);

            foreach (var file in task.InputFiles)
            {
                var tmpFile = Path.GetTempFileName();
                try {
                    IOProxy.Storage.Download(file.StorageId, tmpFile);
                } catch(Exception exp)
                {
                    Log.Error("Error " + exp);
                }
                var fileOnResource = resorceHomeFolder.TrimEnd(new[] { '/', '\\' }) + "\\" + file.FileName;

                Log.Info("Copying file " + fileOnResource);

                UploadFile(node, fileOnResource, tmpFile, task.TaskId.ToString(), task.Resource.Controller.FarmId);

                File.Delete(tmpFile);
            }

            Log.Info(String.Format("Copying input files for task {0} finished.", task.TaskId));
        }
        public virtual object Run(TaskRunContext task)
        {
            var node = GetNode(task);
            var pack = PackageByName(node, task.PackageName);

            var taskId = task.TaskId;

            var farmId = task.Resource.Controller.FarmId;

            var esService = GetExecuteServiceClient(node);

            var resorceHomeFolder = IncarnationParams.IncarnatePath(node.DataFolders.LocalFolder, task.TaskId, farmId, CopyPhase.None);

            PrepareEnviroment(esService, pack, resorceHomeFolder, farmId);

            CopyInputFiles(task, resorceHomeFolder);

            string cmdLine = String.Format(task.CommandLine, pack.AppPath, taskId);

            Log.Info("cmdline = " + cmdLine);

            var result = esService.ExecuteTaskOnFarm(taskId, farmId, cmdLine);

            Log.Info("Exec done. Job id = " + result);

            esService.Close();

            return result + "\n" + node.NodeName;
        }
Beispiel #39
0
        public object Run(TaskRunContext task)
        {
            ulong taskId     = task.TaskId;
            int   coresToUse = (int)task.NodesConfig.Sum(cfg => cfg.Cores);
            var   node       = GetNode(task);

            string ftpFolder          = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromSystem, taskId, CopyPhase.In);
            string jobFtpFolder       = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromSystem, taskId, CopyPhase.None);
            string sharedInputFolder  = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromResource, taskId, CopyPhase.In);
            string sharedOutputFolder = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromResource, taskId, CopyPhase.Out);
            string tmpFolder          = IncarnationParams.IncarnatePath(node.DataFolders.LocalFolder, taskId, CopyPhase.None);

            IOProxy.Ftp.MakePath(ftpFolder);
            IOProxy.Ftp.MakePath(jobFtpFolder);

            string jobFileName = "job_" + taskId + ".cmd";

            logger.Info("Trying to exec task {0} on win PC {1}.{2}", taskId, node.ResourceName, node.NodeName);

            var    pack         = node.Packages.First(p => String.Equals(p.Name, task.PackageName, StringComparison.InvariantCultureIgnoreCase));
            string batchContent = "";

            batchContent += "mkdir " + tmpFolder.TrimEnd(new char[] { '/', '\\' }) + Environment.NewLine;

            if (Path.IsPathRooted(tmpFolder)) // change drive if needed
            {
                batchContent += Path.GetPathRoot(tmpFolder).TrimEnd(new char[] { '/', '\\' }) + Environment.NewLine;
            }

            batchContent += String.Format(
                @"cd {0}" + Environment.NewLine,
                tmpFolder.TrimEnd(new char[] { '/', '\\' })
                );

            batchContent += "echo %time% > clavire_script_started" + Environment.NewLine;

            foreach (string copyPath in pack.CopyOnStartup)
            {
                batchContent += String.Format(
                    @"xcopy {0} {1}\ /z /s /e /c /i /h /r /y" + Environment.NewLine,
                    copyPath.TrimEnd(new char[] { '/', '\\' }),
                    tmpFolder.TrimEnd(new char[] { '/', '\\' })
                    );
            }

            batchContent += String.Format(
                //@"ping localhost -w 1000 -n 50" + Environment.NewLine +
                @"xcopy {0} {1}\ /z /s /e /c /i /h /r /y" + Environment.NewLine,
                sharedInputFolder.TrimEnd(new char[] { '/', '\\' }),
                tmpFolder.TrimEnd(new char[] { '/', '\\' })
                );

            foreach (var envVar in pack.EnvVars)
            {
                batchContent += "set " + envVar.Key + "=" + envVar.Value + Environment.NewLine;
            }

            string commandLine = task.CommandLine;

            //var pack = node.Packages.First(p => commandLine.StartsWith(p.Name, StringComparison.InvariantCultureIgnoreCase));
            //commandLine = pack.Params["appPath"] + commandLine.Substring(pack.Name.Length);
            commandLine = String.Format(task.CommandLine, pack.AppPath);
            //commandLine = String.Format(incarnation.CommandLine, pack.Params["appPath"]);

            batchContent += "echo %time% > clavire_task_started" + Environment.NewLine;
            batchContent += //"start \"" + jobFileName + " " + incarnation.PackageNameInConfig + "\" /wait /b" +
                            "cmd.exe /c " + commandLine + Environment.NewLine;
            batchContent += "echo %time% > clavire_task_finished" + Environment.NewLine;


            foreach (string copyPath in pack.CleanupIgnore)
            {
                batchContent += String.Format(
                    @"xcopy {1} {0} /z /s /e /c /i /h /r /y" + Environment.NewLine,
                    (sharedOutputFolder.TrimEnd(new char[] { '/', '\\' }) + "/" + copyPath.TrimStart(new char[] { '/', '\\' })).Replace("/", "\\"),
                    (tmpFolder.TrimEnd(new char[] { '/', '\\' }) + "/" + copyPath.TrimStart(new char[] { '/', '\\' })).Replace("/", "\\")
                    );
            }

            foreach (string delPath in pack.Cleanup)
            {
                batchContent += String.Format(
                    @"rmdir /s /q {0}" + Environment.NewLine +
                    @"del /f /s /q {0}" + Environment.NewLine,
                    tmpFolder + delPath  // todo: delPath.TrimStart
                    );
            }

            batchContent += String.Format(
                @"xcopy {1} {0}\ /z /s /e /c /i /h /r /y" + Environment.NewLine,
                sharedOutputFolder.TrimEnd(new char[] { '/', '\\' }),
                tmpFolder.TrimEnd(new char[] { '/', '\\' })
                );

            batchContent += String.Format(
                //@"ping localhost -n 3" + Environment.NewLine +
                @"echo %time% > clavire_script_finished" + Environment.NewLine +
                @"xcopy clavire_script_finished {1}\ /z /s /e /c /i /h /r /y" + Environment.NewLine +
                @"cd {0}" + Environment.NewLine +
                @"cd .." + Environment.NewLine +
                //@"rmdir /s /q {0}" + Environment.NewLine +
                "",
                tmpFolder.TrimEnd(new char[] { '/', '\\' }),
                sharedOutputFolder.TrimEnd(new char[] { '/', '\\' })
                );


            int pauseLine = -1;

            Int32.TryParse(Config.AppSettings[DEBUG_PAUSE_PARAM_NAME] ?? "-1", out pauseLine);
            if (pauseLine >= 0)
            {
                var    batchLines      = batchContent.Replace("\r", "").Split(new[] { '\n' });
                string newBatchContent =
                    String.Join(Environment.NewLine, batchLines.Take(pauseLine)) + Environment.NewLine +
                    "pause" + Environment.NewLine +
                    String.Join(Environment.NewLine, batchLines.Skip(pauseLine));
                batchContent = newBatchContent;
            }


            IOProxy.Ftp.UploadFileContent(batchContent, jobFtpFolder, jobFileName);


            var rexService = GetREx(node.Services.ExecutionUrl);     // todo : close service client!
            int pid        = rexService.Exec(taskId);

            logger.Info("Task {0} ({1}) started on pc {2}.{3} with pid = {4}", taskId, pack.Name, node.ResourceName, node.NodeName, pid);

            return(pid + "\n" + node.NodeName);
        }
 public void push(TaskRunContext context, ulong taskId, TaskStateInfo info)
 {
 }
Beispiel #41
0
        public object Run(TaskRunContext task)
        {
            var node = GetNode(task);
            var pack = PackageByName(node, task.PackageName);
            ulong taskId = task.TaskId;

            Log.Info("Locking operation");
            var operationHolder = LockOperation(task.TaskId, TaskLock.WRITE_OPERATION_EXECUTED);

            string fileNames;
            string clusterHomeFolder = CopyInputFiles(task, out fileNames);

            string cmdLine = String.Format(task.CommandLine, pack.AppPath, taskId, fileNames.Trim());
            Log.Debug("cmdline = " + cmdLine);

            Log.Info("Preparing script");
            string scriptPath = MakeScript(pack, cmdLine, node, clusterHomeFolder);

            Log.Info("Script prepared. Executing it.");
            var result = SshExec(node, SshCommands.Run, scriptPath);

            UnLockOperation(task.TaskId, operationHolder);
            Log.Info("Operation unlocked");

            string jobId = result.Split(new[] { '\r', '\n', ' ', '\t' }, StringSplitOptions.RemoveEmptyEntries).Last();
            Log.Info("Exec done. Job id = " + jobId);
            return jobId;
        }
Beispiel #42
0
            private TaskCache(TaskRunContext context, TaskStateInfo state)
            {
                Context = context;
                StateInfo = state;

                _isUpdating = false;
                _lastUpdateTime = DateTime.Now;
            }
Beispiel #43
0
            private TaskCache(TaskRunContext context, TaskState state = TaskState.Started, string stateComment = "")
            {
                Context = context;
                StateInfo = new TaskStateInfo(state, stateComment);

                _isUpdating = false;
                _lastUpdateTime = DateTime.Now - UPDATE_INTERVAL - TimeSpan.FromMilliseconds(50);
            }
Beispiel #44
0
        private void SubmitTask(TaskRunContext task)
        {
            lock (task.Lock)
            {
                try
                {
                    lock (_nodeStateCacheLock)
                    {
                        bool nodesOverloaded = false;

                        var nodeStates = _nodeStateCache[task.Resource.ResourceName];
                        foreach (var nodeConfig in task.NodesConfig)
                        {
                            var nodeState = nodeStates.Single(n => n.NodeName == nodeConfig.NodeName);

                            if (nodeState.CoresAvailable <= nodeConfig.Cores)
                                nodesOverloaded = true;

                            nodeState.TasksSubmitted++;
                            nodeState.CoresReserved += nodeConfig.Cores;
                        }

                        if (nodesOverloaded)
                        {
                            Log.Error("Nodes overload for resource " + task.Resource.ResourceName);
                            throw new Exception("Wrong config for task " + task.TaskId.ToString() + ". Selected nodes are overloaded");
                        }
                    }

                    task.LocalId = task.Controller.Run(task);
                    task.CachedRunInfo = new TaskRunInfo(TaskState.Started);
                }
                catch (Exception e)
                {
                    RevokeTask(task);

                    Log.Error(String.Format("Unable to run task {1}: {2}{0}{3}", Environment.NewLine,
                        task.TaskId, e.Message, e.StackTrace
                    ));

                    throw;
                }
            }
        }
        public void CopyOutputsToExchange(TaskRunContext task)
        {
            ulong taskId = task.TaskId;
            var node = GetNode(task);
            var pack = PackageByName(node, task.PackageName);

            // temporary hack: files are not pushed from resource => using path from resource for scp copying
            string outFolderFromSystem = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromResource, taskId, CopyPhase.Out);
            //string outFolderFromSystem = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromSystem, taskId, CopyPhase.Out);
            bool copyingOutsToFtp = outFolderFromSystem.StartsWith("ftp://");
            if (copyingOutsToFtp && !outFolderFromSystem.EndsWith("/"))
                outFolderFromSystem += '/';
            if (!copyingOutsToFtp && !outFolderFromSystem.EndsWith("\\"))
                outFolderFromSystem += '\\';

            string clusterFolder = IncarnationParams.IncarnatePath((!String.IsNullOrEmpty(pack.LocalDir)) ? String.Format(pack.LocalDir, task.TaskId) : node.DataFolders.LocalFolder, taskId, CopyPhase.Out);
            if (!clusterFolder.EndsWith("/"))
                clusterFolder += "/";

            //var files = ImproveFiles(task.Incarnation.ExpectedOutputFileNames);
            /*                var fileNames =
                                SshExec(node, SshPbsCommands.Find, clusterFolder)
                                    .Split(new[] { ", ", "," }, StringSplitOptions.RemoveEmptyEntries)
                                    .Where(st => !st.Contains("/"))
                                    .Select(st => st.Replace("*", "").Replace("|", "").Replace("\n",""))
                                    .Where(st => !st.Contains(".rst") && !st.Contains(".err") && !st.Contains(".esav"));*/

            var fileNames = SshExec(node, "cd " + clusterFolder + "; " + SSH_FIND_COMMAND, "")
                            .Replace("./", "/").Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries)
                            .Where(st => !st.Contains(".rst") /*&& !st.Contains(".err")*/ && !st.Contains(".esav"))
                            .Select(st => st.Trim(new[] { '/', '\\' }));

            //IOProxy.Ftp.MakePath(ftpOutFolderFromSystem);
            var dirStructure = fileNames
                .Where(name => name.Contains('/') || name.Contains('\\')) // inside subdir
                .Select(name => name.Remove(name.LastIndexOfAny(new[] { '\\', '/' })))
                .Distinct()
                .Select(file => outFolderFromSystem + file)
                .Union(new[] { outFolderFromSystem });
            foreach (string dir in dirStructure)
            {
                if (copyingOutsToFtp)
                    IOProxy.Ftp.MakePath(dir);
                else
                {
                    Log.Debug("Creating dir " + dir);
                    Directory.CreateDirectory(dir);
                }
            }

            Log.Info("Copying output files");
            //System.Threading.Tasks.Parallel.ForEach(fileNames, (fileName) =>
            foreach (string fileName in fileNames)
            {
                //if (files.Contains(fileName))
                {
                    string tmpFile = Path.GetTempFileName();
                    try
                    {
                        Log.Info("Copying file " + clusterFolder + fileName);
                        //ScpGet(node, clusterFolder + fileName, tmpFile, false);
                        DownloadFile(node, clusterFolder + fileName, tmpFile);

                        if (copyingOutsToFtp)
                            IOProxy.Ftp.UploadLocalFile(tmpFile, outFolderFromSystem, fileName, shouldCreatePath: false);
                        else
                            File.Copy(tmpFile, outFolderFromSystem + fileName);

                        File.Delete(tmpFile);
                        Log.Info("File copied " + fileName);
                    }
                    catch (Ssh.SshTransferException e)
                    {
                        Log.Warn(String.Format("During coping file {0} for task {1} from error was happend: {2}", fileName, taskId, e)); // todo : lolwut?
                    }
                    catch (Exception e)
                    {
                        Log.Warn(String.Format("Exception on file '{0}' copy: {1}", clusterFolder + fileName, e));
                    }
                }
            }//);
        }
Beispiel #46
0
        private void RevokeTask(TaskRunContext task)
        {
            lock (task.Lock)
            {
                lock (_nodeStateCacheLock)
                {
                    var nodeStates = _nodeStateCache[task.Resource.ResourceName];
                    foreach (var nodeConfig in task.NodesConfig)
                    {
                        var nodeState = nodeStates.Single(n => n.NodeName == nodeConfig.NodeName);

                        nodeState.TasksSubmitted--;
                        nodeState.CoresReserved -= nodeConfig.Cores;

                        if (nodeState.TasksSubmitted < 0)
                        {
                            Log.Warn();
                            nodeState.TasksSubmitted = 0;
                        }

                        if (nodeState.CoresReserved < 0)
                        {
                            Log.Warn();
                            nodeState.CoresReserved = 0;
                        }
                    }
                }
            }
        }
 protected ResourceNode GetNode(TaskRunContext task)
 {
     return(GetNode(task.Resource, task.NodesConfig));
 }
        public Dictionary<string, List<ProcessStatInfo>> GetTaskInfoStartWith(ulong taskId, DateTime startTime, TaskRunContext task)
        {
            //todo ask about getting all nodes
            var node = GetNode(task);

            var client = GetStatisticsServiceClient(node);

            var result = new Dictionary<string, List<ProcessStatInfo>>();

            try
            {
                var data = client.GetAllTaskInfoStartedWith((int) taskId, startTime);
                result.Add(node.NodeName,data);
            }
            catch (FaultException ex)
            {
                //todo this Exception can be linked with state of a task
                //resolve it later
                Log.Error(" Exception while trying to get cacheable task info  " + ex.ToString());
                //throw ex;
            }
            catch (Exception ex)
            {
                Log.Error(" Exception while trying to get cacheable task info  " + ex.ToString());
                throw ex;
            }

            return result;
        }