public virtual void TestContainerMetricsLimit() { string Err = "Error in number of records"; MetricsSystem system = Org.Mockito.Mockito.Mock <MetricsSystem>(); Org.Mockito.Mockito.DoReturn(this).When(system).Register(Matchers.AnyString(), Matchers.AnyString (), Matchers.Any()); MetricsCollectorImpl collector = new MetricsCollectorImpl(); ContainerId containerId = Org.Mockito.Mockito.Mock <ContainerId>(); ContainerMetrics metrics = ContainerMetrics.ForContainer(containerId, 100, 1); int anyPmemLimit = 1024; int anyVmemLimit = 2048; int anyVcores = 10; string anyProcessId = "1234"; metrics.RecordResourceLimit(anyVmemLimit, anyPmemLimit, anyVcores); metrics.RecordProcessId(anyProcessId); Sharpen.Thread.Sleep(110); metrics.GetMetrics(collector, true); NUnit.Framework.Assert.AreEqual(Err, 1, collector.GetRecords().Count); MetricsRecord record = collector.GetRecords()[0]; MetricsRecords.AssertTag(record, ContainerMetrics.ProcessidInfo.Name(), anyProcessId ); MetricsRecords.AssertMetric(record, ContainerMetrics.PmemLimitMetricName, anyPmemLimit ); MetricsRecords.AssertMetric(record, ContainerMetrics.VmemLimitMetricName, anyVmemLimit ); MetricsRecords.AssertMetric(record, ContainerMetrics.VcoreLimitMetricName, anyVcores ); collector.Clear(); }
public override void Run() { while (true) { // Print the processTrees for debugging. if (ContainersMonitorImpl.Log.IsDebugEnabled()) { StringBuilder tmp = new StringBuilder("[ "); foreach (ContainersMonitorImpl.ProcessTreeInfo p in this._enclosing.trackingContainers .Values) { tmp.Append(p.GetPID()); tmp.Append(" "); } ContainersMonitorImpl.Log.Debug("Current ProcessTree list : " + tmp.Substring(0, tmp.Length) + "]"); } // Add new containers lock (this._enclosing.containersToBeAdded) { foreach (KeyValuePair <ContainerId, ContainersMonitorImpl.ProcessTreeInfo> entry in this._enclosing.containersToBeAdded) { ContainerId containerId = entry.Key; ContainersMonitorImpl.ProcessTreeInfo processTreeInfo = entry.Value; ContainersMonitorImpl.Log.Info("Starting resource-monitoring for " + containerId); this._enclosing.trackingContainers[containerId] = processTreeInfo; } this._enclosing.containersToBeAdded.Clear(); } // Remove finished containers lock (this._enclosing.containersToBeRemoved) { foreach (ContainerId containerId in this._enclosing.containersToBeRemoved) { if (this._enclosing.containerMetricsEnabled) { ContainerMetrics.ForContainer(containerId, this._enclosing.containerMetricsPeriodMs , this._enclosing.containerMetricsUnregisterDelayMs).Finished(); } Sharpen.Collections.Remove(this._enclosing.trackingContainers, containerId); ContainersMonitorImpl.Log.Info("Stopping resource-monitoring for " + containerId); } this._enclosing.containersToBeRemoved.Clear(); } // Now do the monitoring for the trackingContainers // Check memory usage and kill any overflowing containers long vmemStillInUsage = 0; long pmemStillInUsage = 0; for (IEnumerator <KeyValuePair <ContainerId, ContainersMonitorImpl.ProcessTreeInfo> > it = this._enclosing.trackingContainers.GetEnumerator(); it.HasNext();) { KeyValuePair <ContainerId, ContainersMonitorImpl.ProcessTreeInfo> entry = it.Next( ); ContainerId containerId = entry.Key; ContainersMonitorImpl.ProcessTreeInfo ptInfo = entry.Value; try { string pId = ptInfo.GetPID(); // Initialize any uninitialized processTrees if (pId == null) { // get pid from ContainerId pId = this._enclosing.containerExecutor.GetProcessId(ptInfo.GetContainerId()); if (pId != null) { // pId will be null, either if the container is not spawned yet // or if the container's pid is removed from ContainerExecutor ContainersMonitorImpl.Log.Debug("Tracking ProcessTree " + pId + " for the first time" ); ResourceCalculatorProcessTree pt = ResourceCalculatorProcessTree.GetResourceCalculatorProcessTree (pId, this._enclosing.processTreeClass, this._enclosing.conf); ptInfo.SetPid(pId); ptInfo.SetProcessTree(pt); if (this._enclosing.containerMetricsEnabled) { ContainerMetrics usageMetrics = ContainerMetrics.ForContainer(containerId, this._enclosing .containerMetricsPeriodMs, this._enclosing.containerMetricsUnregisterDelayMs); int cpuVcores = ptInfo.GetCpuVcores(); int vmemLimit = (int)(ptInfo.GetVmemLimit() >> 20); int pmemLimit = (int)(ptInfo.GetPmemLimit() >> 20); usageMetrics.RecordResourceLimit(vmemLimit, pmemLimit, cpuVcores); usageMetrics.RecordProcessId(pId); } } } // End of initializing any uninitialized processTrees if (pId == null) { continue; } // processTree cannot be tracked ContainersMonitorImpl.Log.Debug("Constructing ProcessTree for : PID = " + pId + " ContainerId = " + containerId); ResourceCalculatorProcessTree pTree = ptInfo.GetProcessTree(); pTree.UpdateProcessTree(); // update process-tree long currentVmemUsage = pTree.GetVirtualMemorySize(); long currentPmemUsage = pTree.GetRssMemorySize(); // if machine has 6 cores and 3 are used, // cpuUsagePercentPerCore should be 300% and // cpuUsageTotalCoresPercentage should be 50% float cpuUsagePercentPerCore = pTree.GetCpuUsagePercent(); float cpuUsageTotalCoresPercentage = cpuUsagePercentPerCore / this._enclosing.resourceCalculatorPlugin .GetNumProcessors(); // Multiply by 1000 to avoid losing data when converting to int int milliVcoresUsed = (int)(cpuUsageTotalCoresPercentage * 1000 * this._enclosing .maxVCoresAllottedForContainers / this._enclosing.nodeCpuPercentageForYARN); // as processes begin with an age 1, we want to see if there // are processes more than 1 iteration old. long curMemUsageOfAgedProcesses = pTree.GetVirtualMemorySize(1); long curRssMemUsageOfAgedProcesses = pTree.GetRssMemorySize(1); long vmemLimit_1 = ptInfo.GetVmemLimit(); long pmemLimit_1 = ptInfo.GetPmemLimit(); ContainersMonitorImpl.Log.Info(string.Format("Memory usage of ProcessTree %s for container-id %s: " , pId, containerId.ToString()) + this.FormatUsageString(currentVmemUsage, vmemLimit_1 , currentPmemUsage, pmemLimit_1)); // Add usage to container metrics if (this._enclosing.containerMetricsEnabled) { ContainerMetrics.ForContainer(containerId, this._enclosing.containerMetricsPeriodMs , this._enclosing.containerMetricsUnregisterDelayMs).RecordMemoryUsage((int)(currentPmemUsage >> 20)); ContainerMetrics.ForContainer(containerId, this._enclosing.containerMetricsPeriodMs , this._enclosing.containerMetricsUnregisterDelayMs).RecordCpuUsage((int)cpuUsagePercentPerCore , milliVcoresUsed); } bool isMemoryOverLimit = false; string msg = string.Empty; int containerExitStatus = ContainerExitStatus.Invalid; if (this._enclosing.IsVmemCheckEnabled() && this._enclosing.IsProcessTreeOverLimit (containerId.ToString(), currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit_1 )) { // Container (the root process) is still alive and overflowing // memory. // Dump the process-tree and then clean it up. msg = this.FormatErrorMessage("virtual", currentVmemUsage, vmemLimit_1, currentPmemUsage , pmemLimit_1, pId, containerId, pTree); isMemoryOverLimit = true; containerExitStatus = ContainerExitStatus.KilledExceededVmem; } else { if (this._enclosing.IsPmemCheckEnabled() && this._enclosing.IsProcessTreeOverLimit (containerId.ToString(), currentPmemUsage, curRssMemUsageOfAgedProcesses, pmemLimit_1 )) { // Container (the root process) is still alive and overflowing // memory. // Dump the process-tree and then clean it up. msg = this.FormatErrorMessage("physical", currentVmemUsage, vmemLimit_1, currentPmemUsage , pmemLimit_1, pId, containerId, pTree); isMemoryOverLimit = true; containerExitStatus = ContainerExitStatus.KilledExceededPmem; } } if (isMemoryOverLimit) { // Virtual or physical memory over limit. Fail the container and // remove // the corresponding process tree ContainersMonitorImpl.Log.Warn(msg); // warn if not a leader if (!pTree.CheckPidPgrpidForMatch()) { ContainersMonitorImpl.Log.Error("Killed container process with PID " + pId + " but it is not a process group leader." ); } // kill the container this._enclosing.eventDispatcher.GetEventHandler().Handle(new ContainerKillEvent(containerId , containerExitStatus, msg)); it.Remove(); ContainersMonitorImpl.Log.Info("Removed ProcessTree with root " + pId); } else { // Accounting the total memory in usage for all containers that // are still // alive and within limits. vmemStillInUsage += currentVmemUsage; pmemStillInUsage += currentPmemUsage; } } catch (Exception e) { // Log the exception and proceed to the next container. ContainersMonitorImpl.Log.Warn("Uncaught exception in ContainerMemoryManager " + "while managing memory of " + containerId, e); } } try { Sharpen.Thread.Sleep(this._enclosing.monitoringInterval); } catch (Exception) { ContainersMonitorImpl.Log.Warn(typeof(ContainersMonitorImpl).FullName + " is interrupted. Exiting." ); break; } } }