private string GetFtpFolder(NodeConfig nodeConfig, Resource resource, CopyPhase phase) { string ftpFolder = resource.Nodes.First(n => n.NodeName == nodeConfig.NodeName).DataFolders.ExchangeUrlFromSystem; string incarnatedFtpFolder = IncarnationParams.IncarnatePath(ftpFolder, TaskId, phase); return(incarnatedFtpFolder); }
public HistorySample( string package, string resourceName, NodeConfig[] nodesConfig, Dictionary<string, string> packParams, Dictionary<string, double> modelCoefs, TimeSpan calcTime, PackageEngine estimatorEngine) { Package = package; ResourceName = resourceName; NodesConfig = nodesConfig; PackParams = packParams; ModelCoefs = modelCoefs; CalcTime = calcTime; EstimatorEngine = estimatorEngine; }
private TaskSchedule() { Nodes = new NodeConfig[0]; FailReason = null; ModifiedParams = new Dictionary<string, string>(); }
public static IEnumerable<TaskSchedule> Build( IEnumerable<Task> tasks, IEnumerable<Resource> resources, IEnumerable<TaskDependency> dependencies, out Dictionary<ulong, Dictionary<NodeConfig, Estimation>> estimations) { estimations = new Dictionary<ulong, Dictionary<NodeConfig, Estimation>>(); if (tasks == null || !tasks.Any()) { Log.Warn("No tasks to schedule"); return Enumerable.Empty<TaskSchedule>(); } if (resources == null || !resources.Any()) { Log.Warn("No resources to allocate"); return Enumerable.Empty<TaskSchedule>(); } var schedule = new List<TaskSchedule>(); Log.Info(String.Format("Checking permissions for tasks [{0}]", String.Join(", ", tasks.Select(t => t.TaskId.ToString())))); var failReasonForTask = new Dictionary<ulong, string>(); var permissionsForTask = GetPermissionsForTasks(tasks, resources, out failReasonForTask); foreach (var task in tasks) { // todo : move to SimpleAllocator if (/*task.State == TaskState.ReadyToExecute &&*/ !permissionsForTask[task.TaskId].Any()) { if (!failReasonForTask.ContainsKey(task.TaskId)) { Log.Warn("Unknown permission error"); failReasonForTask[task.TaskId] = "Can't run task bacause of unknown permission error"; } schedule.Add(new TaskSchedule() { TaskId = task.TaskId, Action = ScheduledAction.Fail, FailReason = failReasonForTask[task.TaskId], Nodes = new NodeConfig[0], }); } } Log.Debug( "Permissions for tasks are: " + String.Join("; ", tasks.Select(t => String.Format("{0} -> {1}", t.TaskId, String.Join(", ", permissionsForTask[t.TaskId])) )) ); // Get Estimations var estimationsForTask = new Dictionary<ulong, Dictionary<NodeConfig, Estimation>>(); foreach (var task in tasks) { task.Time.AddToOverheads(TaskTimeOverheads.Estimation, () => { var permittedNodes = resources.SelectMany(r => r.Nodes).Where(n => permissionsForTask[task.TaskId].Contains(n.ResourceName + "." + n.NodeName) ); // todo : use CoresAvailable if can run now, and CoresTotal if in the future estimationsForTask[task.TaskId] = PackageBaseProxy.GetEstimationsByModel(task.PackageEngineState, resources, permittedNodes); foreach (var node in permittedNodes) { if (!estimationsForTask[task.TaskId].Keys.Any(config => config.NodeName == node.NodeName)) { // estimate from history var config = new NodeConfig { NodeName = node.NodeName, ResourceName = node.ResourceName, Cores = 1, }; // todo: [!] estimate from history estimationsForTask[task.TaskId][config] = new Estimation( null, null //new HistoryEstimation(1337 + node.NodeName.Length) ); estimationsForTask[task.TaskId][config].CalcDuration = TimeSpan.FromSeconds(1.337 + node.NodeName.Length); } } }); } try { estimations = estimationsForTask; Log.Debug( "Estimations for tasks are: " + String.Join("; ", tasks.Select(t => String.Format("{0} -> {1}", t.TaskId, String.Join(", ", estimationsForTask[t.TaskId] .Select(pair => String.Format("{0} on {1}.{2}", pair.Value.CalcDuration, pair.Key.ResourceName, pair.Key.NodeName)) ) ) )) ); } catch (Exception estimEx) { Log.Warn(estimEx.ToString()); } bool scheduledSuccessfully = false; try { Log.Info("Scheduling"); var schedulingStrated = DateTime.Now; var scheduleByScheduler = Reschedule(tasks, resources, dependencies, /*permissionsForTask,*/ estimationsForTask); var schedulingFinished = DateTime.Now; var schedulingTime = schedulingFinished - schedulingStrated; // var schedulingTimeOnOneTask = TimeSpan.FromMilliseconds(schedulingTime.TotalMilliseconds / scheduleByScheduler.Count()); Log.Debug(String.Format("Scheduling took {0} seconds", schedulingTime.TotalSeconds)); foreach (var schedResult in scheduleByScheduler) { tasks.Single(t => t.TaskId == schedResult.TaskId) //.Time.AddToOverheads(TaskTimeOverheads.Scheduler, schedulingTimeOnOneTask); .Time.AddToOverheads(TaskTimeOverheads.Scheduler, schedulingTime); } if (scheduleByScheduler != null && scheduleByScheduler.Any()) { var nonPermittedSchedule = scheduleByScheduler.FirstOrDefault(s => s.Action == ScheduledAction.Run && s.Nodes.Select(n => n.ResourceName + "." + n.NodeName).Except(permissionsForTask[s.TaskId]).Any()); if (nonPermittedSchedule != null) { // scheduled to run on non-permitted resource Log.Error(String.Format( "Scheduler scheduled task {0} on non-permitted resource config: '{1}', nodes '{2}'", nonPermittedSchedule.TaskId, nonPermittedSchedule.ResourceName, String.Join("', '", nonPermittedSchedule.Nodes.Select(n => n.NodeName)) )); scheduledSuccessfully = false; } else { var busyNodes = resources.SelectMany(r => r.Nodes.Where(n => (n.CoresAvailable <= 0 || n.SubmissionsAvailable <= 0) && // n.CoresCount > 0 && !scheduleByScheduler.Any(s => s.Action == ScheduledAction.Abort && s.Nodes.Any(sn => sn.NodeName == n.NodeName && sn.ResourceName == n.ResourceName)) )).Select(n => n.ResourceName + "." + n.NodeName); var nodeLimits = resources.SelectMany(r => r.Nodes.Select(n => new { ResourceName = n.ResourceName, NodeName = n.NodeName, CoresLimit = n.CoresAvailable, TasksLimit = n.TasksSubmissionLimit } )).ToDictionary(lim => lim.ResourceName + "." + lim.NodeName, lim => lim); var nodeAborts = scheduleByScheduler .Where(s => s.Action == ScheduledAction.Abort) .SelectMany(s => s.Nodes) .GroupBy(s => s.ResourceName + "." + s.NodeName) .ToDictionary(group => group.Key, group => new { Tasks = group.Count(), Cores = group.Sum(s => s.Cores) }); var overtargetedNodes = scheduleByScheduler .Where(s => s.Action == ScheduledAction.Run) .SelectMany(s => s.Nodes) .GroupBy(n => n.ResourceName + "." + n.NodeName) .Where( group => group.Count() > nodeLimits[group.Key].TasksLimit + (nodeAborts.ContainsKey(group.Key) ? nodeAborts[group.Key].Tasks : 0) || group.Sum(n => n.Cores) > nodeLimits[group.Key].CoresLimit + (nodeAborts.ContainsKey(group.Key) ? nodeAborts[group.Key].Cores : 0)) .Select(group => group.Key); // todo: disallow targeting same node twice in schedule for one task var insaneDecisions = scheduleByScheduler.Join(tasks, s => s.TaskId, t => t.TaskId, (s, t) => ((s.Action == ScheduledAction.Run && t.State != TaskState.ReadyToExecute) || (s.Action == ScheduledAction.Run && String.IsNullOrEmpty(s.ResourceName)) || (s.Action == ScheduledAction.Run && !s.Nodes.Any()) || //(s.Action == ScheduledAction.Run && s.Nodes.Any(n => n.Cores <= 0)) || (s.Action == ScheduledAction.Run && busyNodes.Intersect(s.Nodes.Select(conf => conf.ResourceName + "." + conf.NodeName)).Any()) || (s.Action == ScheduledAction.Run && overtargetedNodes.Intersect(s.Nodes.Select(conf => conf.ResourceName + "." + conf.NodeName)).Any()) || (s.Action == ScheduledAction.Abort && t.State != TaskState.Started)) ? new { action = s.Action, state = t.State, taskId = t.TaskId, resourceName = s.ResourceName, nodeNames = s.Nodes.Select(n => n.NodeName) } : null ).Where(r => r != null); // if (insaneDecisions.Any()) // { // if (busyNodes.Any()) // Log.Debug("Busy nodes: " + String.Join(", ", busyNodes)); // // if (overtargetedNodes.Any()) // Log.Debug("Overtargeted nodes: " + String.Join(", ", overtargetedNodes)); // // Log.Error( // "Scheduler made some insane decisions: " + // String.Join(", ", // insaneDecisions.Select(d => String.Format( // "{0} {1} task '{2}' on {3}({4})", // d.action, d.state, d.taskId, // d.resourceName, String.Join(", ", d.nodeNames) // )) // ) + // ". Ignoring them." // ); // // //Log.Warn("NOT ignoring insane schedule for debug purposes"); // todo : [!] remove // scheduleByScheduler = scheduleByScheduler.Where(s => !insaneDecisions.Any(d => d.taskId == s.TaskId)).ToArray(); // } if (scheduleByScheduler.Any()) { schedule.AddRange(scheduleByScheduler); scheduledSuccessfully = true; } else { scheduledSuccessfully = false; } } } else if (tasks.All(t => t.State == TaskState.Started)) { Log.Info("All tasks sento to the scheduler are running. Ignore null or empty schedule."); scheduledSuccessfully = true; } else Log.Warn("Scheduler returned bad schedule (either null or empty)."); } catch (Exception e) { Log.Error(String.Format( "Exception in scheduler: {0}\n{1}", e.Message, e.StackTrace )); } //todo: remove it later // scheduledSuccessfully = false; if (!scheduledSuccessfully) { try { Log.Info("Using simple task allocator"); var scheduleByAllocator = AllocateTasks(tasks, resources, /* permissionsForTask, */ estimationsForTask); schedule.AddRange(scheduleByAllocator); } catch (Exception e) { Log.Error(String.Format( "Exception in allocator: {0}\n{1}", e.Message, e.StackTrace )); } } return schedule; }
private string GetFtpFolder(NodeConfig nodeConfig, Resource resource, CopyPhase phase) { string ftpFolder = resource.Nodes.First(n => n.NodeName == nodeConfig.NodeName).DataFolders.ExchangeUrlFromSystem; string incarnatedFtpFolder = IncarnationParams.IncarnatePath(ftpFolder, TaskId, phase); return incarnatedFtpFolder; }