/// <summary> /// 根据进程Id获取实例 /// </summary> /// <returns>The get by identifier.</returns> /// <param name="processId">Process identifier.</param> /// <param name="task">Task.</param> /// <param name="process">Process.</param> public static bool TryGetById(int processId, JobTask task, out SwiftProcess process) { process = null; Process osProcess; try { osProcess = Process.GetProcessById(processId); } catch (Exception ex) { LogWriter.Write(string.Format("根据进程Id查找进程失败:{0},{1}", task.BusinessId, processId), ex, LogLevel.Info); return(false); } LogWriter.Write("已经根据进程Id找到进程:" + processId); if (osProcess != null) { if (CheckTaskAndProcessMatch(task, osProcess, out SwiftProcessCommandLine commandLine)) { process = new SwiftProcess("ExecuteTask", task, osProcess); LogWriter.Write(string.Format("已创建出SwiftProcess实例:{0},{1}", processId, task.BusinessId)); return(true); } } return(false); }
/// <summary> /// 清理脱离控制的子进程 /// 进程的关闭会导致系统任务的退出,所以这里不用关心系统任务的清理 /// </summary> private void CleanOutOfControlChildProcess() { LogWriter.Write("开始清理脱离控制的子进程...", LogLevel.Trace); var processList = SwiftProcess.GetAllLocalProcess(); // 处理作业分割进程 var jobSplitProcess = processList.Where(d => d.GetValue(0).ToString() == "SplitJob"); if (jobSplitProcess.Any()) { CleanOutOfControlJobSplitProcess(jobSplitProcess); } // 处理任务合并进程 var collectTaskResultProcess = processList.Where(d => d.GetValue(0).ToString() == "CollectTaskResult"); if (collectTaskResultProcess.Any()) { CleanOutOfControlCollectTaskResultProcess(collectTaskResultProcess); } // 获取正在运行的任务进程 var taskExecuteProcess = processList.Where(d => d.GetValue(0).ToString() == "ExecuteTask"); if (taskExecuteProcess.Any()) { CleanOutOfControlTaskExecuteProcess(taskExecuteProcess); } }
/// <summary> /// 根据进程Id获取实例 /// </summary> /// <returns>The get by identifier.</returns> /// <param name="processId">Process identifier.</param> /// <param name="job">Task.</param> /// <param name="process">Process.</param> public static bool TryGetById(int processId, JobBase job, string method, out SwiftProcess process) { process = null; Process osProcess; try { osProcess = Process.GetProcessById(processId); } catch (Exception ex) { LogWriter.Write(string.Format("根据进程Id查找进程失败:{0},{1}", job.BusinessId, processId), ex, LogLevel.Info); return(false); } LogWriter.Write("已经根据进程Id找到进程:" + processId); if (osProcess != null) { if (CheckJobAndProcessMatch(processId, job.Name, job.Id, method)) { process = new SwiftProcess(method, job, osProcess); LogWriter.Write(string.Format("已创建出SwiftProcess实例:{0},{1}", processId, job.BusinessId)); return(true); } } return(false); }
/// <summary> /// 清理脱离控制的作业分割进程 /// </summary> /// <param name="jobSplitProcess">Job split process.</param> private void CleanOutOfControlJobSplitProcess(IEnumerable <string[]> jobSplitProcess, CancellationToken cancellationToken = default) { foreach (var processInfo in jobSplitProcess) { var processId = int.Parse(processInfo[1]); var jobName = processInfo[2]; var jobId = processInfo[3]; LogWriter.Write(string.Format("正在处理:{0},{1}", jobName, jobId)); var jobRecord = _cluster.ConfigCenter.GetJobRecord(jobName, jobId, _cluster, cancellationToken); // 作业不存在了,看看作业分割进程还在不在 if (jobRecord == null) { LogWriter.Write("作业记录不存在了,尝试关闭废弃的作业分割进程"); SwiftProcess.KillAbandonedJobSplitProcess(processId, jobName, jobId); continue; } LogWriter.Write(string.Format("作业记录存在")); // 任务非PlanMaking状态,看看进程在不在 if (jobRecord.Status != EnumJobRecordStatus.PlanMaking) { LogWriter.Write("任务非PlanMaking状态,尝试关闭废弃的作业分割进程"); SwiftProcess.KillAbandonedJobSplitProcess(processId, jobName, jobId); continue; } LogWriter.Write(string.Format("作业在PlanMaking状态,将继续运行")); } }
/// <summary> /// 检查状态为任务合并中的作业,如果需要继续运行则返回true,否则返回false /// </summary> /// <returns><c>true</c>, if plan making job was checked, <c>false</c> otherwise.</returns> /// <param name="job">Job.</param> private bool CheckTaskMergingJob(JobBase job, CancellationToken cancellationToken = default) { int processId = job.GetProcessId("CollectTaskResult"); // 没有进程Id,则认为在启动进程前就挂掉了或者进程没有启动成功 if (processId == -1) { LogWriter.Write("没有进程Id,启动任务合并进程前就挂掉了或者进程没有启动成功,将更新作业状态为TaskSynced"); job.UpdateJobStatus(EnumJobRecordStatus.TaskSynced, cancellationToken); return(false); } // 获取进程 SwiftProcess.TryGetById(processId, job, "CollectTaskResult", out SwiftProcess process); // 如果状态文件标示已经执行完毕,则更新作业状态 // 此时进程应该退出了,如果未退出可能是有未释放的资源,此时强行退出。 var collectTaskResultStatus = job.GetCollectTaskResultStatus(); if (collectTaskResultStatus.ErrCode == 0) { LogWriter.Write("任务合并状态文件标示已经执行完毕,准备更新作业状态为TaskMerged"); job.UpdateJobStatus(EnumJobRecordStatus.TaskMerged, cancellationToken); if (process != null) { job.KillCollectTaskResultProcess(); } return(false); } // 进程也没了,状态文件也不是完成,那还要分两种情况 if (process == null) { if (collectTaskResultStatus.ErrCode == 2 || collectTaskResultStatus.ErrCode == 4) { LogWriter.Write("找不到进程,状态为进行中或待执行,则回退到TaskSynced"); job.UpdateJobStatus(EnumJobRecordStatus.TaskSynced, cancellationToken); } else { LogWriter.Write("找不到进程,状态为错误,则修改为TaskMergeFailed,需要调查"); job.UpdateJobStatus(EnumJobRecordStatus.TaskMergeFailed, cancellationToken); } return(false); } // 如果根据进程Id找到进程,则启动监控运行作业 return(true); }
/// <summary> /// 检查状态为计划制定中的作业,如果需要继续运行则返回true,否则返回false /// </summary> /// <returns><c>true</c>, if plan making job was checked, <c>false</c> otherwise.</returns> /// <param name="job">Job.</param> private bool CheckPlanMakingJob(JobBase job, CancellationToken cancellationToken = default) { int processId = job.GetProcessId("SplitJob"); // 没有进程Id,则认为作业在启动进程前就挂掉了或者进程没有启动成功 if (processId == -1) { LogWriter.Write("没有进程Id,启动作业分割进程前就挂掉了或者进程没有启动成功,将更新任务状态为初始"); job.UpdateJobStatus(EnumJobRecordStatus.Pending, cancellationToken); return(false); } // 获取进程 SwiftProcess.TryGetById(processId, job, "SplitJob", out SwiftProcess process); // 如果状态文件标示已经执行完毕,则更新作业状态 // 此时进程应该退出了,如果未退出可能是有未释放的资源,此时强行退出。 var jobSplitStatus = job.GetJobSplitStatus(); if (jobSplitStatus.ErrCode == 0) { LogWriter.Write("作业分割状态文件标示已经执行完毕,准备更新作业状态为计划制定完成"); job.UpdateJobStatus(EnumJobRecordStatus.PlanMaked, cancellationToken); if (process != null) { job.KillJobSplitProcess(); } return(false); } // 进程也没了,状态文件也不是完成,那还要分两种情况 if (process == null) { if (jobSplitStatus.ErrCode == 2 || jobSplitStatus.ErrCode == 4) { LogWriter.Write("找不到进程,状态为进行中或待执行,则回退到初始状态"); job.UpdateJobStatus(EnumJobRecordStatus.Pending, cancellationToken); } else { LogWriter.Write("找不到进程,状态错误,则修改为失败状态,需要调查"); job.UpdateJobStatus(EnumJobRecordStatus.PlanFailed, cancellationToken); } return(false); } // 如果根据进程Id找到进程,则启动监控运行作业 return(true); }
/// <summary> /// 清理脱离控制的作业分割进程 /// </summary> /// <param name="jobSplitProcess">Job split process.</param> private void CleanOutOfControlJobSplitProcess(IEnumerable <string[]> jobSplitProcess) { LogWriter.Write("Worker不应该执行作业分割进程,他们都应该被Kill"); foreach (var processInfo in jobSplitProcess) { var processId = int.Parse(processInfo[1]); var jobName = processInfo[2]; var jobId = processInfo[3]; LogWriter.Write(string.Format("正在处理:{0},{1}", jobName, jobId)); SwiftProcess.KillAbandonedJobSplitProcess(processId, jobName, jobId); } }
/// <summary> /// Kills the abandoned collect task result process. /// </summary> /// <param name="processId">Process identifier.</param> /// <param name="jobName">Job name.</param> /// <param name="jobId">Job identifier.</param> public static void KillAbandonedCollectTaskResultProcess(int processId, string jobName, string jobId) { var businessId = JobBase.FormatBusinessId(jobName, jobId); Process osProcess = null; try { osProcess = Process.GetProcessById(processId); } catch (Exception ex) { LogWriter.Write(string.Format("根据进程Id查找进程异常,进程可能已经关闭了:{0},{1}", businessId, processId), ex, LogLevel.Info); } bool canDeleteProcessFile = true; if (osProcess != null) { if (SwiftProcess.CheckJobAndProcessMatch(osProcess, jobName, jobId, "CollectTaskResult")) { try { osProcess.Kill(); osProcess.WaitForExit(); LogWriter.Write(string.Format("已关闭废弃的任务合并进程:{0},{1}", businessId, processId), LogLevel.Info); } catch (Exception ex) { canDeleteProcessFile = false; LogWriter.Write(string.Format("关闭废弃的任务合并进程异常:{0},{1}", businessId, processId), ex, LogLevel.Error); } } } if (canDeleteProcessFile) { var processPath = SwiftConfiguration.GetSwiftProcessPath("CollectTaskResult", JobBase.FormatBusinessId(jobName, jobId)); try { File.Delete(processPath); LogWriter.Write(string.Format("进程文件已删除:{0}", processPath), LogLevel.Info); } catch (Exception ex) { LogWriter.Write(string.Format("删除废弃的任务合并进程文件异常:{0},{1}", businessId, processId), ex, LogLevel.Error); } } }
/// <summary> /// 清理脱离控制的执行任务进程 /// </summary> /// <param name="taskExecuteProcess">Task execute process.</param> private void CleanOutOfControlTaskExecuteProcess(IEnumerable <string[]> taskExecuteProcess) { LogWriter.Write("Manager不应该执行任务进程,他们都应该被Kill"); foreach (var processInfo in taskExecuteProcess) { var processId = int.Parse(processInfo[1]); var jobName = processInfo[2]; var jobId = processInfo[3]; var taskId = int.Parse(processInfo[4]); LogWriter.Write(string.Format("正在处理:{0},{1},{2}", jobName, jobId, taskId)); SwiftProcess.KillAbandonedTaskProcess(processId, jobName, jobId, taskId); } }
/// <summary> /// 清理任务合并进程 /// </summary> /// <param name="collectTaskResultProcess">Collect task result process.</param> private void CleanOutOfControlCollectTaskResultProcess(IEnumerable <string[]> collectTaskResultProcess) { if (_member.Id != _cluster.Manager.Id) { LogWriter.Write("非Manager节点不应该执行任务合并进程,他们都应该被Kill"); foreach (var processInfo in collectTaskResultProcess) { var processId = int.Parse(processInfo[1]); var jobName = processInfo[2]; var jobId = processInfo[3]; LogWriter.Write(string.Format("正在处理:{0},{1}", jobName, jobId)); SwiftProcess.KillAbandonedCollectTaskResultProcess(processId, jobName, jobId); } } }
/// <summary> /// 检查状态为执行中的任务,如果需要继续运行则返回true,否则返回false /// </summary> /// <returns><c>true</c>, if executing job task was checked, <c>false</c> otherwise.</returns> /// <param name="task">Task.</param> private bool CheckExecutingJobTask(JobTask task, CancellationToken cancellationToken = default(CancellationToken)) { int processId = task.GetProcessId(cancellationToken); // 没有进程Id,则认为任务在启动进程前就挂掉了或者进程没有启动成功 if (processId == -1) { LogWriter.Write("没有进程Id,任务在启动进程前就挂掉了或者进程没有启动成功,将更新任务状态为准备"); task.UpdateTaskStatus(EnumTaskStatus.Pending, cancellationToken); return(false); } // 获取进程 SwiftProcess.TryGetById(processId, task, out SwiftProcess process); // 如果任务执行状态文件标示已经执行完毕,则更新任务状态完成 // 此时任务进程应该退出了,如果未退出可能是有未释放的资源,此时强行退出。 var executeStatus = task.GetTaskExecuteStatus(); if (executeStatus.ErrCode == 0) { LogWriter.Write("任务执行状态文件标示已经执行完毕,准备更新任务状态完成"); task.UpdateTaskStatus(EnumTaskStatus.Completed, cancellationToken); if (process != null) { task.KillProcess(); } return(false); } // 任务执行状态文件未标示完成,进程又没了 if (process == null) { LogWriter.Write("找不到进程,将更新任务状态为准备"); task.UpdateTaskStatus(EnumTaskStatus.Pending, cancellationToken); return(false); } // 如果根据进程Id找到进程,则启动监控运行任务 return(true); }
/// <summary> /// 清理脱离控制的执行任务进程 /// </summary> /// <param name="taskExecuteProcess">Task execute process.</param> private void CleanOutOfControlTaskExecuteProcess(IEnumerable <string[]> taskExecuteProcess, CancellationToken cancellationToken = default(CancellationToken)) { foreach (var processInfo in taskExecuteProcess) { var processId = int.Parse(processInfo[1]); var jobName = processInfo[2]; var jobId = processInfo[3]; var taskId = int.Parse(processInfo[4]); LogWriter.Write(string.Format("正在处理:{0},{1},{2}", jobName, jobId, taskId)); var jobRecord = _cluster.ConfigCenter.GetJobRecord(jobName, jobId, _cluster, cancellationToken); // 作业不存在了,看看任务进程还在不在 if (jobRecord == null) { LogWriter.Write("作业记录不存在了,尝试关闭废弃的任务进程"); SwiftProcess.KillAbandonedTaskProcess(processId, jobName, jobId, taskId); continue; } LogWriter.Write(string.Format("作业记录存在")); // 任务不是我的了,看看进程还在不在 var task = jobRecord.TaskPlan.Where(d => d.Key == _member.Id && d.Value.Any(t => t.Id == taskId)) .SelectMany(d => d.Value).FirstOrDefault(t => t.Id == taskId); if (task == null) { LogWriter.Write("任务被重新分走了,尝试关闭废弃的任务进程"); SwiftProcess.KillAbandonedTaskProcess(processId, jobName, jobId, taskId); continue; } LogWriter.Write(string.Format("任务存在")); // 任务非Executing状态,看看进程在不在 if (task.Status != EnumTaskStatus.Executing) { LogWriter.Write("任务非Executing状态,尝试关闭废弃的任务进程"); SwiftProcess.KillAbandonedTaskProcess(processId, jobName, jobId, taskId); continue; } LogWriter.Write(string.Format("任务在Executing状态,将继续运行")); } }