private string FormatErrorMessage(string memTypeExceeded, long currentVmemUsage, long vmemLimit, long currentPmemUsage, long pmemLimit, string pId, ContainerId containerId , ResourceCalculatorProcessTree pTree) { return(string.Format("Container [pid=%s,containerID=%s] is running beyond %s memory limits. " , pId, containerId, memTypeExceeded) + "Current usage: " + this.FormatUsageString (currentVmemUsage, vmemLimit, currentPmemUsage, pmemLimit) + ". Killing container.\n" + "Dump of the process-tree for " + containerId + " :\n" + pTree.GetProcessTreeDump ()); }
public ProcessTreeInfo(ContainerId containerId, string pid, ResourceCalculatorProcessTree pTree, long vmemLimit, long pmemLimit, int cpuVcores) { this.containerId = containerId; this.pid = pid; this.pTree = pTree; this.vmemLimit = vmemLimit; this.pmemLimit = pmemLimit; this.cpuVcores = cpuVcores; }
// method provided just for easy testing purposes internal virtual bool IsProcessTreeOverLimit(ResourceCalculatorProcessTree pTree, string containerId, long limit) { long currentMemUsage = pTree.GetVirtualMemorySize(); // as processes begin with an age 1, we want to see if there are processes // more than 1 iteration old. long curMemUsageOfAgedProcesses = pTree.GetVirtualMemorySize(1); return(IsProcessTreeOverLimit(containerId, currentMemUsage, curMemUsageOfAgedProcesses , limit)); }
private bool IsEnabled() { if (resourceCalculatorPlugin == null) { Log.Info("ResourceCalculatorPlugin is unavailable on this system. " + this.GetType ().FullName + " is disabled."); return(false); } if (ResourceCalculatorProcessTree.GetResourceCalculatorProcessTree("0", processTreeClass , conf) == null) { Log.Info("ResourceCalculatorProcessTree is unavailable on this system. " + this.GetType ().FullName + " is disabled."); return(false); } if (!(IsPmemCheckEnabled() || IsVmemCheckEnabled())) { Log.Info("Neither virutal-memory nor physical-memory monitoring is " + "needed. Not running the monitor-thread" ); return(false); } return(true); }
public override void Run() { while (true) { // Print the processTrees for debugging. if (ContainersMonitorImpl.Log.IsDebugEnabled()) { StringBuilder tmp = new StringBuilder("[ "); foreach (ContainersMonitorImpl.ProcessTreeInfo p in this._enclosing.trackingContainers .Values) { tmp.Append(p.GetPID()); tmp.Append(" "); } ContainersMonitorImpl.Log.Debug("Current ProcessTree list : " + tmp.Substring(0, tmp.Length) + "]"); } // Add new containers lock (this._enclosing.containersToBeAdded) { foreach (KeyValuePair <ContainerId, ContainersMonitorImpl.ProcessTreeInfo> entry in this._enclosing.containersToBeAdded) { ContainerId containerId = entry.Key; ContainersMonitorImpl.ProcessTreeInfo processTreeInfo = entry.Value; ContainersMonitorImpl.Log.Info("Starting resource-monitoring for " + containerId); this._enclosing.trackingContainers[containerId] = processTreeInfo; } this._enclosing.containersToBeAdded.Clear(); } // Remove finished containers lock (this._enclosing.containersToBeRemoved) { foreach (ContainerId containerId in this._enclosing.containersToBeRemoved) { if (this._enclosing.containerMetricsEnabled) { ContainerMetrics.ForContainer(containerId, this._enclosing.containerMetricsPeriodMs , this._enclosing.containerMetricsUnregisterDelayMs).Finished(); } Sharpen.Collections.Remove(this._enclosing.trackingContainers, containerId); ContainersMonitorImpl.Log.Info("Stopping resource-monitoring for " + containerId); } this._enclosing.containersToBeRemoved.Clear(); } // Now do the monitoring for the trackingContainers // Check memory usage and kill any overflowing containers long vmemStillInUsage = 0; long pmemStillInUsage = 0; for (IEnumerator <KeyValuePair <ContainerId, ContainersMonitorImpl.ProcessTreeInfo> > it = this._enclosing.trackingContainers.GetEnumerator(); it.HasNext();) { KeyValuePair <ContainerId, ContainersMonitorImpl.ProcessTreeInfo> entry = it.Next( ); ContainerId containerId = entry.Key; ContainersMonitorImpl.ProcessTreeInfo ptInfo = entry.Value; try { string pId = ptInfo.GetPID(); // Initialize any uninitialized processTrees if (pId == null) { // get pid from ContainerId pId = this._enclosing.containerExecutor.GetProcessId(ptInfo.GetContainerId()); if (pId != null) { // pId will be null, either if the container is not spawned yet // or if the container's pid is removed from ContainerExecutor ContainersMonitorImpl.Log.Debug("Tracking ProcessTree " + pId + " for the first time" ); ResourceCalculatorProcessTree pt = ResourceCalculatorProcessTree.GetResourceCalculatorProcessTree (pId, this._enclosing.processTreeClass, this._enclosing.conf); ptInfo.SetPid(pId); ptInfo.SetProcessTree(pt); if (this._enclosing.containerMetricsEnabled) { ContainerMetrics usageMetrics = ContainerMetrics.ForContainer(containerId, this._enclosing .containerMetricsPeriodMs, this._enclosing.containerMetricsUnregisterDelayMs); int cpuVcores = ptInfo.GetCpuVcores(); int vmemLimit = (int)(ptInfo.GetVmemLimit() >> 20); int pmemLimit = (int)(ptInfo.GetPmemLimit() >> 20); usageMetrics.RecordResourceLimit(vmemLimit, pmemLimit, cpuVcores); usageMetrics.RecordProcessId(pId); } } } // End of initializing any uninitialized processTrees if (pId == null) { continue; } // processTree cannot be tracked ContainersMonitorImpl.Log.Debug("Constructing ProcessTree for : PID = " + pId + " ContainerId = " + containerId); ResourceCalculatorProcessTree pTree = ptInfo.GetProcessTree(); pTree.UpdateProcessTree(); // update process-tree long currentVmemUsage = pTree.GetVirtualMemorySize(); long currentPmemUsage = pTree.GetRssMemorySize(); // if machine has 6 cores and 3 are used, // cpuUsagePercentPerCore should be 300% and // cpuUsageTotalCoresPercentage should be 50% float cpuUsagePercentPerCore = pTree.GetCpuUsagePercent(); float cpuUsageTotalCoresPercentage = cpuUsagePercentPerCore / this._enclosing.resourceCalculatorPlugin .GetNumProcessors(); // Multiply by 1000 to avoid losing data when converting to int int milliVcoresUsed = (int)(cpuUsageTotalCoresPercentage * 1000 * this._enclosing .maxVCoresAllottedForContainers / this._enclosing.nodeCpuPercentageForYARN); // as processes begin with an age 1, we want to see if there // are processes more than 1 iteration old. long curMemUsageOfAgedProcesses = pTree.GetVirtualMemorySize(1); long curRssMemUsageOfAgedProcesses = pTree.GetRssMemorySize(1); long vmemLimit_1 = ptInfo.GetVmemLimit(); long pmemLimit_1 = ptInfo.GetPmemLimit(); ContainersMonitorImpl.Log.Info(string.Format("Memory usage of ProcessTree %s for container-id %s: " , pId, containerId.ToString()) + this.FormatUsageString(currentVmemUsage, vmemLimit_1 , currentPmemUsage, pmemLimit_1)); // Add usage to container metrics if (this._enclosing.containerMetricsEnabled) { ContainerMetrics.ForContainer(containerId, this._enclosing.containerMetricsPeriodMs , this._enclosing.containerMetricsUnregisterDelayMs).RecordMemoryUsage((int)(currentPmemUsage >> 20)); ContainerMetrics.ForContainer(containerId, this._enclosing.containerMetricsPeriodMs , this._enclosing.containerMetricsUnregisterDelayMs).RecordCpuUsage((int)cpuUsagePercentPerCore , milliVcoresUsed); } bool isMemoryOverLimit = false; string msg = string.Empty; int containerExitStatus = ContainerExitStatus.Invalid; if (this._enclosing.IsVmemCheckEnabled() && this._enclosing.IsProcessTreeOverLimit (containerId.ToString(), currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit_1 )) { // Container (the root process) is still alive and overflowing // memory. // Dump the process-tree and then clean it up. msg = this.FormatErrorMessage("virtual", currentVmemUsage, vmemLimit_1, currentPmemUsage , pmemLimit_1, pId, containerId, pTree); isMemoryOverLimit = true; containerExitStatus = ContainerExitStatus.KilledExceededVmem; } else { if (this._enclosing.IsPmemCheckEnabled() && this._enclosing.IsProcessTreeOverLimit (containerId.ToString(), currentPmemUsage, curRssMemUsageOfAgedProcesses, pmemLimit_1 )) { // Container (the root process) is still alive and overflowing // memory. // Dump the process-tree and then clean it up. msg = this.FormatErrorMessage("physical", currentVmemUsage, vmemLimit_1, currentPmemUsage , pmemLimit_1, pId, containerId, pTree); isMemoryOverLimit = true; containerExitStatus = ContainerExitStatus.KilledExceededPmem; } } if (isMemoryOverLimit) { // Virtual or physical memory over limit. Fail the container and // remove // the corresponding process tree ContainersMonitorImpl.Log.Warn(msg); // warn if not a leader if (!pTree.CheckPidPgrpidForMatch()) { ContainersMonitorImpl.Log.Error("Killed container process with PID " + pId + " but it is not a process group leader." ); } // kill the container this._enclosing.eventDispatcher.GetEventHandler().Handle(new ContainerKillEvent(containerId , containerExitStatus, msg)); it.Remove(); ContainersMonitorImpl.Log.Info("Removed ProcessTree with root " + pId); } else { // Accounting the total memory in usage for all containers that // are still // alive and within limits. vmemStillInUsage += currentVmemUsage; pmemStillInUsage += currentPmemUsage; } } catch (Exception e) { // Log the exception and proceed to the next container. ContainersMonitorImpl.Log.Warn("Uncaught exception in ContainerMemoryManager " + "while managing memory of " + containerId, e); } } try { Sharpen.Thread.Sleep(this._enclosing.monitoringInterval); } catch (Exception) { ContainersMonitorImpl.Log.Warn(typeof(ContainersMonitorImpl).FullName + " is interrupted. Exiting." ); break; } } }
public virtual void SetProcessTree(ResourceCalculatorProcessTree pTree) { this.pTree = pTree; }