public override NodeState GetNodeState(ResourceNode node) { var nodeState = new NodeState(); var service = EntryPointProxy.GetClustersService(); ClustersService.Code errCode; ClustersService.ClusterStateInfo clusterStateInfo; lock (_clustersServiceLock) { clusterStateInfo = service.GetClusterStateInfo(node.ResourceName, out errCode); // GetClusterStateInfoFast is probably buggy! (doesn't update immediatly after task launch) } if (errCode != ServiceProxies.ClustersService.Code.OperationSuccess || clusterStateInfo == null) { throw new ClusterException(errCode); } try { var nodeStateInfo = clusterStateInfo.Node.First(stateInfo => stateInfo != null && !String.IsNullOrEmpty(stateInfo.DNSName) && !String.IsNullOrEmpty(node.NodeName) && stateInfo.DNSName == node.NodeName ); if (nodeStateInfo == null || (nodeStateInfo.TaskID != null && nodeStateInfo.TaskID.Count > 0)) { nodeState.CoresAvailable = 0; } else { nodeState.CoresAvailable = node.CoresCount; } } catch { nodeState.CoresAvailable = 0; } return(nodeState); }
public override void Abort(string providedTaskId, Resource resource, IEnumerable <NodeConfig> nodesConfig) { // todo : [5] implement ClustersProxy.Abort(clusterName) var service = EntryPointProxy.GetClustersService(); ClustersService.Code errCode; lock (_clustersServiceLock) { service.CancelTask(providedTaskId, out errCode); } if (errCode != ClustersService.Code.OperationSuccess) { throw new ClusterException(errCode); } }
public override Tuple <TaskState, string> GetTaskState(ulong taskId, string providedTaskId, Resource resource, IEnumerable <NodeConfig> nodesConfig) { lock (_pcLock) { try { string[] providedWords = providedTaskId.Split(new char[] { '\n' }); if (providedWords.Length > 2) { Log.Warn(String.Format("Too many sections in provided task id for win PC: {0}", providedTaskId)); } string pid = providedWords[0]; string nodeName = providedWords[1]; var node = resource.Nodes.First(n => n.NodeName == nodeName); using (var rexService = EntryPointProxy.GetREx(node.Services.ExecutionUrl)) { bool isRunning = rexService.IsProcessRunning(Int32.Parse(pid)); if (!isRunning) { _nodeUsed[node.NodeName] = false; return(Tuple.Create(TaskState.Completed, "")); } return(Tuple.Create(TaskState.Started, "")); } } catch (Exception e) { Log.Warn(String.Format( "Exception while getting task state (provided id = {0}): {1}\n{2}", providedTaskId, e.Message, e.StackTrace )); return(Tuple.Create(TaskState.Started, "")); } } }
//public static string GetNewProvidedTaskId() //{ // var service = EntryPointProxy.GetClustersService(); // ClustersService.Code errCode; // ServiceProxies.ClustersService.TaskInfo taskInfo = null; // lock (_clustersServiceLock) // { // taskInfo = service.CreateTask(out errCode); // } // if (errCode == ServiceProxies.ClustersService.Code.OperationSuccess) // return taskInfo.TaskID; // else // throw new ClusterException(errCode); //} /// <summary> /// Get resource-specific task state and convert it to one of task's possible states /// </summary> /// <param name="providedTaskId">Task’s id specific to this resource provider</param> /// <returns>Tuple (task state, "fail reason or some comment to task's state")</returns> public override Tuple <TaskState, string> GetTaskState(ulong taskId, string providedTaskId, Resource resource, IEnumerable <NodeConfig> nodesConfig) { var service = EntryPointProxy.GetClustersService(); ClustersService.Code errCode; ClustersService.TaskInfo taskInfo; lock (_clustersServiceLock) { taskInfo = service.GetTaskState(providedTaskId, out errCode); } if (errCode != ServiceProxies.ClustersService.Code.OperationSuccess) { throw new ClusterException(errCode); } if (taskInfo.State == ClustersService.TaskState.Complete) { return(Tuple.Create(TaskState.Completed, "")); } if (taskInfo.State == ClustersService.TaskState.Fail) { return(Tuple.Create(TaskState.Failed, "Failed on cluster")); } if (taskInfo.State == ClustersService.TaskState.Execute) { return(Tuple.Create(TaskState.Started, "")); } if (taskInfo.State == ClustersService.TaskState.Cancel) { return(Tuple.Create(TaskState.Aborted, "")); } return(Tuple.Create(TaskState.Defined, taskInfo.State.ToString())); }
public override string Run(ulong taskId, IncarnationParams incarnation, Resource resource, IEnumerable <NodeConfig> nodesConfig) { string providedTaskId = taskId.ToString(); var node = GetDefaultNodeSettings(resource, nodesConfig); var pack = node.PackageByName(incarnation.PackageName); var service = EntryPointProxy.GetClustersService(); ClustersService.Code errCode; ClustersService.TaskInfo taskInfo; lock (_clustersServiceLock) { taskInfo = service.GetTaskState(providedTaskId, out errCode); } if (errCode != ServiceProxies.ClustersService.Code.OperationSuccess) { throw new ClusterException(errCode); } taskInfo.ClusterName = resource.ResourceName; taskInfo.CommandLine = String.Format(incarnation.CommandLine, pack.AppPath); taskInfo.PackageName = incarnation.PackageName.ToUpperInvariant(); /* * if (!String.IsNullOrEmpty(incarnation.StdInFile)) * taskInfo.StdinFileName = incarnation.StdInFile; * else * taskInfo.StdinFileName = ""; * * if (!String.IsNullOrEmpty(incarnation.StdOutFile)) * taskInfo.StdoutFileName = incarnation.StdOutFile; * else * taskInfo.StdoutFileName = ""; */ // cores on nodes: {n, 0, 0} -> {n} taskInfo.NumberOfCores = new ClustersService.ArrayOfInt(); taskInfo.NumberOfCores.AddRange(nodesConfig.Where(conf => conf.Cores > 0).Select(conf => conf.Cores)); taskInfo.NumberOfNodes = taskInfo.NumberOfCores.Count; var logStream = new StringWriter(); logStream.WriteLine("Задача {0} ({1}) запускается на кластере {2}", taskInfo.TaskID, taskInfo.PackageName, taskInfo.ClusterName); logStream.WriteLine(" Папка с файлами расчета: {0}", taskInfo.FTPPath); logStream.WriteLine(" Строка запуска: {0}", taskInfo.CommandLine); logStream.WriteLine(" Перенаправление вывода: {0}", taskInfo.StdoutFileName); logStream.Write(" Количество ядер (по каждому узлу): "); foreach (int coresCount in taskInfo.NumberOfCores) { logStream.Write("{0} ", coresCount); } Log.Info(logStream.ToString()); lock (_clustersServiceLock) { errCode = service.ExecuteTask(taskInfo); } if (errCode != ServiceProxies.ClustersService.Code.OperationSuccess) { throw new ClusterException(String.Format( CONST.Dirty <string>("Ошибка интегратора управления кластерами при запуске задачи: {0}"), errCode.ToString() )); } return(providedTaskId); }
public override string Run(ulong taskId, IncarnationParams incarnation, Resource resource, IEnumerable <NodeConfig> nodesConfig) { lock (_pcLock) { //AcceptPsToolsEula(); int coresToUse = nodesConfig.Sum(conf => conf.Cores); var node = GetDefaultNodeSettings(resource, nodesConfig); if (_nodeUsed[node.NodeName]) { throw new Exception(String.Format("Could not run task {0} on node {1}: node used by another task", taskId, node.NodeName)); } string ftpFolder = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromSystem, taskId, CopyPhase.In); string jobFtpFolder = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromSystem, taskId, CopyPhase.None); string sharedInputFolder = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromResource, taskId, CopyPhase.In); string sharedOutputFolder = IncarnationParams.IncarnatePath(node.DataFolders.ExchangeUrlFromResource, taskId, CopyPhase.Out); string tmpFolder = IncarnationParams.IncarnatePath(node.DataFolders.LocalFolder, taskId, CopyPhase.None); IOProxy.Ftp.MakePath(ftpFolder); IOProxy.Ftp.MakePath(jobFtpFolder); string jobFileName = "job_" + taskId + ".cmd"; Log.Info(String.Format( "Trying to exec task {0} on win PC {1}", taskId, node.NodeName )); var pack = node.Packages.First(p => String.Equals(p.Name, incarnation.PackageName, StringComparison.InvariantCultureIgnoreCase)); string batchContent = ""; batchContent += "mkdir " + tmpFolder.TrimEnd(new char[] { '/', '\\' }) + Environment.NewLine; if (Path.IsPathRooted(tmpFolder)) // change drive if needed { batchContent += Path.GetPathRoot(tmpFolder).TrimEnd(new char[] { '/', '\\' }) + Environment.NewLine; } batchContent += String.Format( @"cd {0}" + Environment.NewLine, tmpFolder.TrimEnd(new char[] { '/', '\\' }) ); batchContent += "echo %time% > clavire_script_started" + Environment.NewLine; foreach (string copyPath in pack.CopyOnStartup) { batchContent += String.Format( @"xcopy {0} {1}\ /z /s /e /c /i /h /r /y" + Environment.NewLine, copyPath.TrimEnd(new char[] { '/', '\\' }), tmpFolder.TrimEnd(new char[] { '/', '\\' }) ); } batchContent += String.Format( //@"ping localhost -w 1000 -n 50" + Environment.NewLine + @"xcopy {0} {1}\ /z /s /e /c /i /h /r /y" + Environment.NewLine, sharedInputFolder.TrimEnd(new char[] { '/', '\\' }), tmpFolder.TrimEnd(new char[] { '/', '\\' }) ); // todo : env vars on WinPc provider string commandLine = incarnation.CommandLine; //var pack = node.Packages.First(p => commandLine.StartsWith(p.Name, StringComparison.InvariantCultureIgnoreCase)); //commandLine = pack.Params["appPath"] + commandLine.Substring(pack.Name.Length); commandLine = String.Format(incarnation.CommandLine, pack.AppPath); //commandLine = String.Format(incarnation.CommandLine, pack.Params["appPath"]); batchContent += "echo %time% > clavire_task_started" + Environment.NewLine; batchContent += //"start \"" + jobFileName + " " + incarnation.PackageNameInConfig + "\" /wait /b" + "cmd.exe /c " + commandLine + Environment.NewLine; batchContent += "echo %time% > clavire_task_finished" + Environment.NewLine; foreach (string delPath in pack.Cleanup) { batchContent += String.Format( @"rmdir /s /q {0}" + Environment.NewLine + @"del /f /s /q {0}" + Environment.NewLine, tmpFolder + delPath ); } batchContent += String.Format( @"xcopy {1} {0}\ /z /s /e /c /i /h /r /y" + Environment.NewLine, sharedOutputFolder.TrimEnd(new char[] { '/', '\\' }), tmpFolder.TrimEnd(new char[] { '/', '\\' }) ); batchContent += String.Format( @"ping localhost -n 3" + Environment.NewLine + @"echo %time% > clavire_script_finished" + Environment.NewLine + @"xcopy clavire_script_finished {1}\ /z /s /e /c /i /h /r /y" + Environment.NewLine + @"cd {0}" + Environment.NewLine + @"cd .." + Environment.NewLine + //@"rmdir /s /q {0}" + Environment.NewLine + "", tmpFolder.TrimEnd(new char[] { '/', '\\' }), sharedOutputFolder.TrimEnd(new char[] { '/', '\\' }) ); IOProxy.Ftp.UploadFileContent(batchContent, jobFtpFolder, jobFileName); //string cmdArgs = "/c " + CONST.Path.PsExec.Replace("PsExec.exe", "p.cmd"); //string cmdArgs = "\\\\192.168.4.1 -u nano -p Yt1NyDpQNm -d cmd.exe /c \"\\\\192.168.4.1\\ftp_exchange\\Tasks\\10043\\job_10043.cmd\""; //Log.Debug(cmdArgs); //Process.Start(CONST.Path.PsExec, cmdArgs); //**/ //var psexecProcess = new Process(); //psexecProcess.StartInfo.UseShellExecute = false; ////psexecProcess.StartInfo.RedirectStandardOutput = true; ////psexecProcess.StartInfo.RedirectStandardError = true; //psexecProcess.StartInfo.FileName = CONST.Path.PsExec; //psexecProcess.StartInfo.Arguments = String.Format( // "\\\\{0} -d -u {1} -p {2} cmd.exe /c {4}", // -d -w \"{3}\" ^> C:\\Temp\\out // //"-u nano -p Yt1NyDpQNm cmd.exe /c " + CONST.Path.PsExec.Replace("PsExec.exe", "p.cmd"), // resParams.name, resParams.user, resParams.pass, // resParams.tempFolderOnMachine.Replace(@"\", @"\\"), // sharedJobFilePath //); //* //psexecProcess.StartInfo.UserName = "******"; //psexecProcess.StartInfo.Password = new System.Security.SecureString(); //foreach (var c in "Yt1NyDpQNm".ToCharArray()) //{ // psexecProcess.StartInfo.Password.AppendChar(c); //} //**/ //Log.Debug("psexec args:\n" + psexecProcess.StartInfo.Arguments); ////psexecProcess.Start(); //Log.Debug("psexec process started"); //string execMessage = /*psexecProcess.StandardOutput.ReadToEnd() + " " +*/ "1 " + PS_PID_START_MSG + "5."; //psexecProcess.StandardError.ReadToEnd(); //execMessage = execMessage.Trim(); ////psexecProcess.WaitForExit(); //System.Threading.Thread.Sleep(3000); //Log.Debug("psexec output:\n" + execMessage); //if (!execMessage.Contains(PS_PID_START_MSG)) // throw new Exception(String.Format( // "Couldn't exec task {0} on win pc {1}", // taskId, resParams.name // )); //execMessage = execMessage.Remove(0, execMessage.IndexOf(PS_PID_START_MSG) + PS_PID_START_MSG.Length); //string pid = execMessage.Substring(0, execMessage.Length-1); var rexService = EntryPointProxy.GetREx(node.Services.ExecutionUrl); int pid = rexService.Exec(taskId); Log.Debug(String.Format( "Task {0} ({1}) started on pc {2} with pid = {3}", taskId, pack.Name, node.NodeName, pid )); _nodeUsed[node.NodeName] = true; //System.Threading.Thread.Sleep(1000); return(pid + "\n" + node.NodeName); } }