Пример #1
0
        public virtual void TestContainerMetricsLimit()
        {
            string        Err    = "Error in number of records";
            MetricsSystem system = Org.Mockito.Mockito.Mock <MetricsSystem>();

            Org.Mockito.Mockito.DoReturn(this).When(system).Register(Matchers.AnyString(), Matchers.AnyString
                                                                         (), Matchers.Any());
            MetricsCollectorImpl collector   = new MetricsCollectorImpl();
            ContainerId          containerId = Org.Mockito.Mockito.Mock <ContainerId>();
            ContainerMetrics     metrics     = ContainerMetrics.ForContainer(containerId, 100, 1);
            int    anyPmemLimit = 1024;
            int    anyVmemLimit = 2048;
            int    anyVcores    = 10;
            string anyProcessId = "1234";

            metrics.RecordResourceLimit(anyVmemLimit, anyPmemLimit, anyVcores);
            metrics.RecordProcessId(anyProcessId);
            Sharpen.Thread.Sleep(110);
            metrics.GetMetrics(collector, true);
            NUnit.Framework.Assert.AreEqual(Err, 1, collector.GetRecords().Count);
            MetricsRecord record = collector.GetRecords()[0];

            MetricsRecords.AssertTag(record, ContainerMetrics.ProcessidInfo.Name(), anyProcessId
                                     );
            MetricsRecords.AssertMetric(record, ContainerMetrics.PmemLimitMetricName, anyPmemLimit
                                        );
            MetricsRecords.AssertMetric(record, ContainerMetrics.VmemLimitMetricName, anyVmemLimit
                                        );
            MetricsRecords.AssertMetric(record, ContainerMetrics.VcoreLimitMetricName, anyVcores
                                        );
            collector.Clear();
        }
Пример #2
0
 public override void Run()
 {
     while (true)
     {
         // Print the processTrees for debugging.
         if (ContainersMonitorImpl.Log.IsDebugEnabled())
         {
             StringBuilder tmp = new StringBuilder("[ ");
             foreach (ContainersMonitorImpl.ProcessTreeInfo p in this._enclosing.trackingContainers
                      .Values)
             {
                 tmp.Append(p.GetPID());
                 tmp.Append(" ");
             }
             ContainersMonitorImpl.Log.Debug("Current ProcessTree list : " + tmp.Substring(0,
                                                                                           tmp.Length) + "]");
         }
         // Add new containers
         lock (this._enclosing.containersToBeAdded)
         {
             foreach (KeyValuePair <ContainerId, ContainersMonitorImpl.ProcessTreeInfo> entry in
                      this._enclosing.containersToBeAdded)
             {
                 ContainerId containerId = entry.Key;
                 ContainersMonitorImpl.ProcessTreeInfo processTreeInfo = entry.Value;
                 ContainersMonitorImpl.Log.Info("Starting resource-monitoring for " + containerId);
                 this._enclosing.trackingContainers[containerId] = processTreeInfo;
             }
             this._enclosing.containersToBeAdded.Clear();
         }
         // Remove finished containers
         lock (this._enclosing.containersToBeRemoved)
         {
             foreach (ContainerId containerId in this._enclosing.containersToBeRemoved)
             {
                 if (this._enclosing.containerMetricsEnabled)
                 {
                     ContainerMetrics.ForContainer(containerId, this._enclosing.containerMetricsPeriodMs
                                                   , this._enclosing.containerMetricsUnregisterDelayMs).Finished();
                 }
                 Sharpen.Collections.Remove(this._enclosing.trackingContainers, containerId);
                 ContainersMonitorImpl.Log.Info("Stopping resource-monitoring for " + containerId);
             }
             this._enclosing.containersToBeRemoved.Clear();
         }
         // Now do the monitoring for the trackingContainers
         // Check memory usage and kill any overflowing containers
         long vmemStillInUsage = 0;
         long pmemStillInUsage = 0;
         for (IEnumerator <KeyValuePair <ContainerId, ContainersMonitorImpl.ProcessTreeInfo>
                           > it = this._enclosing.trackingContainers.GetEnumerator(); it.HasNext();)
         {
             KeyValuePair <ContainerId, ContainersMonitorImpl.ProcessTreeInfo> entry = it.Next(
                 );
             ContainerId containerId = entry.Key;
             ContainersMonitorImpl.ProcessTreeInfo ptInfo = entry.Value;
             try
             {
                 string pId = ptInfo.GetPID();
                 // Initialize any uninitialized processTrees
                 if (pId == null)
                 {
                     // get pid from ContainerId
                     pId = this._enclosing.containerExecutor.GetProcessId(ptInfo.GetContainerId());
                     if (pId != null)
                     {
                         // pId will be null, either if the container is not spawned yet
                         // or if the container's pid is removed from ContainerExecutor
                         ContainersMonitorImpl.Log.Debug("Tracking ProcessTree " + pId + " for the first time"
                                                         );
                         ResourceCalculatorProcessTree pt = ResourceCalculatorProcessTree.GetResourceCalculatorProcessTree
                                                                (pId, this._enclosing.processTreeClass, this._enclosing.conf);
                         ptInfo.SetPid(pId);
                         ptInfo.SetProcessTree(pt);
                         if (this._enclosing.containerMetricsEnabled)
                         {
                             ContainerMetrics usageMetrics = ContainerMetrics.ForContainer(containerId, this._enclosing
                                                                                           .containerMetricsPeriodMs, this._enclosing.containerMetricsUnregisterDelayMs);
                             int cpuVcores = ptInfo.GetCpuVcores();
                             int vmemLimit = (int)(ptInfo.GetVmemLimit() >> 20);
                             int pmemLimit = (int)(ptInfo.GetPmemLimit() >> 20);
                             usageMetrics.RecordResourceLimit(vmemLimit, pmemLimit, cpuVcores);
                             usageMetrics.RecordProcessId(pId);
                         }
                     }
                 }
                 // End of initializing any uninitialized processTrees
                 if (pId == null)
                 {
                     continue;
                 }
                 // processTree cannot be tracked
                 ContainersMonitorImpl.Log.Debug("Constructing ProcessTree for : PID = " + pId + " ContainerId = "
                                                 + containerId);
                 ResourceCalculatorProcessTree pTree = ptInfo.GetProcessTree();
                 pTree.UpdateProcessTree();
                 // update process-tree
                 long currentVmemUsage = pTree.GetVirtualMemorySize();
                 long currentPmemUsage = pTree.GetRssMemorySize();
                 // if machine has 6 cores and 3 are used,
                 // cpuUsagePercentPerCore should be 300% and
                 // cpuUsageTotalCoresPercentage should be 50%
                 float cpuUsagePercentPerCore       = pTree.GetCpuUsagePercent();
                 float cpuUsageTotalCoresPercentage = cpuUsagePercentPerCore / this._enclosing.resourceCalculatorPlugin
                                                      .GetNumProcessors();
                 // Multiply by 1000 to avoid losing data when converting to int
                 int milliVcoresUsed = (int)(cpuUsageTotalCoresPercentage * 1000 * this._enclosing
                                             .maxVCoresAllottedForContainers / this._enclosing.nodeCpuPercentageForYARN);
                 // as processes begin with an age 1, we want to see if there
                 // are processes more than 1 iteration old.
                 long curMemUsageOfAgedProcesses    = pTree.GetVirtualMemorySize(1);
                 long curRssMemUsageOfAgedProcesses = pTree.GetRssMemorySize(1);
                 long vmemLimit_1 = ptInfo.GetVmemLimit();
                 long pmemLimit_1 = ptInfo.GetPmemLimit();
                 ContainersMonitorImpl.Log.Info(string.Format("Memory usage of ProcessTree %s for container-id %s: "
                                                              , pId, containerId.ToString()) + this.FormatUsageString(currentVmemUsage, vmemLimit_1
                                                                                                                      , currentPmemUsage, pmemLimit_1));
                 // Add usage to container metrics
                 if (this._enclosing.containerMetricsEnabled)
                 {
                     ContainerMetrics.ForContainer(containerId, this._enclosing.containerMetricsPeriodMs
                                                   , this._enclosing.containerMetricsUnregisterDelayMs).RecordMemoryUsage((int)(currentPmemUsage
                                                                                                                                >> 20));
                     ContainerMetrics.ForContainer(containerId, this._enclosing.containerMetricsPeriodMs
                                                   , this._enclosing.containerMetricsUnregisterDelayMs).RecordCpuUsage((int)cpuUsagePercentPerCore
                                                                                                                       , milliVcoresUsed);
                 }
                 bool   isMemoryOverLimit = false;
                 string msg = string.Empty;
                 int    containerExitStatus = ContainerExitStatus.Invalid;
                 if (this._enclosing.IsVmemCheckEnabled() && this._enclosing.IsProcessTreeOverLimit
                         (containerId.ToString(), currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit_1
                         ))
                 {
                     // Container (the root process) is still alive and overflowing
                     // memory.
                     // Dump the process-tree and then clean it up.
                     msg = this.FormatErrorMessage("virtual", currentVmemUsage, vmemLimit_1, currentPmemUsage
                                                   , pmemLimit_1, pId, containerId, pTree);
                     isMemoryOverLimit   = true;
                     containerExitStatus = ContainerExitStatus.KilledExceededVmem;
                 }
                 else
                 {
                     if (this._enclosing.IsPmemCheckEnabled() && this._enclosing.IsProcessTreeOverLimit
                             (containerId.ToString(), currentPmemUsage, curRssMemUsageOfAgedProcesses, pmemLimit_1
                             ))
                     {
                         // Container (the root process) is still alive and overflowing
                         // memory.
                         // Dump the process-tree and then clean it up.
                         msg = this.FormatErrorMessage("physical", currentVmemUsage, vmemLimit_1, currentPmemUsage
                                                       , pmemLimit_1, pId, containerId, pTree);
                         isMemoryOverLimit   = true;
                         containerExitStatus = ContainerExitStatus.KilledExceededPmem;
                     }
                 }
                 if (isMemoryOverLimit)
                 {
                     // Virtual or physical memory over limit. Fail the container and
                     // remove
                     // the corresponding process tree
                     ContainersMonitorImpl.Log.Warn(msg);
                     // warn if not a leader
                     if (!pTree.CheckPidPgrpidForMatch())
                     {
                         ContainersMonitorImpl.Log.Error("Killed container process with PID " + pId + " but it is not a process group leader."
                                                         );
                     }
                     // kill the container
                     this._enclosing.eventDispatcher.GetEventHandler().Handle(new ContainerKillEvent(containerId
                                                                                                     , containerExitStatus, msg));
                     it.Remove();
                     ContainersMonitorImpl.Log.Info("Removed ProcessTree with root " + pId);
                 }
                 else
                 {
                     // Accounting the total memory in usage for all containers that
                     // are still
                     // alive and within limits.
                     vmemStillInUsage += currentVmemUsage;
                     pmemStillInUsage += currentPmemUsage;
                 }
             }
             catch (Exception e)
             {
                 // Log the exception and proceed to the next container.
                 ContainersMonitorImpl.Log.Warn("Uncaught exception in ContainerMemoryManager " +
                                                "while managing memory of " + containerId, e);
             }
         }
         try
         {
             Sharpen.Thread.Sleep(this._enclosing.monitoringInterval);
         }
         catch (Exception)
         {
             ContainersMonitorImpl.Log.Warn(typeof(ContainersMonitorImpl).FullName + " is interrupted. Exiting."
                                            );
             break;
         }
     }
 }