//--------------------------------------------------------------------- // Implementation /// <summary> /// Performs development related cluster checks with information on potential /// problems being written to STDOUT. /// </summary> /// <param name="clusterLogin">Specifies the target cluster login.</param> /// <param name="k8s">Specifies the cluster's Kubernertes client.</param> /// <returns><c>true</c> when there are no problems, <c>false</c> otherwise.</returns> public static async Task <bool> CheckAsync(ClusterLogin clusterLogin, IKubernetes k8s) { Covenant.Requires <ArgumentNullException>(clusterLogin != null, nameof(clusterLogin)); Covenant.Requires <ArgumentNullException>(k8s != null, nameof(k8s)); var error = false; if (!await CheckNodeContainerImagesAsync(clusterLogin, k8s)) { error = true; } if (!await CheckPodPrioritiesAsync(clusterLogin, k8s)) { error = true; } if (!await CheckResourcesAsync(clusterLogin, k8s)) { error = true; } return(error); }
/// <summary> /// Verifies that all pod container specifications include resource requests and limits. /// </summary> /// <param name="clusterLogin">Specifies the target cluster login.</param> /// <param name="k8s">Specifies the cluster's Kubernertes client.</param> /// <param name="details">Optionally specifies that status should be written to STDOUT when there's no errors.</param> /// <returns><c>true</c> when there are no problems, <c>false</c> otherwise.</returns> /// <returns></returns> public static async Task <bool> CheckResourcesAsync(ClusterLogin clusterLogin, IKubernetes k8s, bool details = false) { Covenant.Requires <ArgumentNullException>(clusterLogin != null, nameof(clusterLogin)); Covenant.Requires <ArgumentNullException>(k8s != null, nameof(k8s)); Console.WriteLine(); Console.WriteLine("==============================================================================="); Console.WriteLine("Checking container resources..."); // Build a dictionary that maps the a pod reference [namespace/pod-owner-name] // to a list of resource information for each container in the pod. var podRefToContainerResources = new Dictionary <string, List <ContainerResources> >(StringComparer.InvariantCulture); foreach (var @namespace in (await k8s.ListNamespaceAsync()).Items) { foreach (var pod in (await k8s.ListNamespacedPodAsync(@namespace.Metadata.Name)).Items) { var podRef = await GetOwnerIdAsync(k8s, pod); var containers = new List <ContainerResources>(); foreach (var containerSpec in pod.Spec.Containers) { var containerResources = new ContainerResources() { ContainerImage = containerSpec.Image }; if (containerSpec.Resources != null) { if (containerSpec.Resources.Requests != null) { containerResources.RequestCpu = GetResourceQuantity("cpu", containerSpec.Resources.Requests); } if (containerSpec.Resources.Requests != null) { containerResources.RequestMemory = GetResourceQuantity("memory", containerSpec.Resources.Requests); } if (containerSpec.Resources.Limits != null) { containerResources.LimitCpu = GetResourceQuantity("cpu", containerSpec.Resources.Requests); } if (containerSpec.Resources.Limits != null) { containerResources.LimitMemory = GetResourceQuantity("memory", containerSpec.Resources.Requests); } } containers.Add(containerResources); } podRefToContainerResources[podRef] = containers; } } var badPodSpecCount = podRefToContainerResources.Values.Count(containers => containers.Any(resources => resources.Error)); if (badPodSpecCount > 0 || details) { if (badPodSpecCount > 0) { Console.WriteLine(); Console.WriteLine($"ERROR: [{badPodSpecCount}] pod deployments have containers without resource requests and/or limits."); Console.WriteLine(); } else { Console.WriteLine(); } foreach (var item in podRefToContainerResources .OrderBy(item => item.Key)) { var containers = item.Value; var error = containers.Any(container => container.Error); var errorMarker = error ? "-->" : " "; if (error || details) { Console.WriteLine($"{errorMarker} {item.Key}"); } foreach (var container in containers) { var requestCpu = container.RequestCpu != null?container.RequestCpu.ToString() : "NULL"; var requestMemory = container.RequestMemory != null?container.RequestMemory.ToString() : "NULL"; var limitCpu = container.LimitCpu != null?container.LimitCpu.ToString() : "NULL"; var limitMemory = container.LimitMemory != null?container.LimitMemory.ToString() : "NULL"; var containerError = container.RequestCpu == null || container.RequestMemory == null || container.LimitCpu == null || container.LimitMemory == null; var errorContainerMarker = containerError ? "-->" : " "; if (containerError || details) { Console.WriteLine($" {errorContainerMarker} {container.ContainerImage} [request-cpu={requestCpu}] [request-memory={requestMemory}] [limit-cpu={limitCpu}] [limit-memory={limitMemory}]"); } } } } else { Console.WriteLine(); Console.WriteLine($"OK: Container resources are set correctly."); } return(badPodSpecCount > 0); }
/// <summary> /// <para> /// Verifies that all pods running in the cluster are assigned a PriorityClass greater than /// or equal to <see cref="PriorityClass.NeonMin"/>, ensuring that our pods will not be evicted /// before user pods which could cause serious problems, especially on smalkl single node clusters. /// </para> /// <para> /// Details about any issues will be written to STDOUT. /// </para> /// </summary> /// <param name="clusterLogin">Specifies the target cluster login.</param> /// <param name="k8s">Specifies the cluster's Kubernertes client.</param> /// <param name="details">Optionally specifies that status should be written to STDOUT when there's no errors.</param> /// <returns><c>true</c> when there are no problems, <c>false</c> otherwise.</returns> /// <remarks> /// <para> /// Verifies that all pods running in the cluster are assigned a PriorityClass greater than /// or equal to <see cref="PriorityClass.NeonMin"/>. PriorityClass is used by the Kubernetes /// scheduler and Kublet to decide which pods to evict when a node encounters resource pressure. /// Pods with lower priority classes will tend to be evicted first. /// </para> /// <para> /// By default, pods will be created with <b>PriorityClass=0</b>. Kubernetes ensures that /// its own critical services have very high priority class values so they will be evicted /// last. neonKUBE deploys dozens of services that need to have priority classes higher /// than most user services. So we assign one of several priorities to our pods based on /// their reliative priority as defined here: <see cref="PriorityClass"/>. /// </para> /// <para> /// This method is useful for ensuring that we've confiured priorites for all of our pods /// and also that we've done the same for pods created by third-party operators. /// </para> /// </remarks> public static async Task <bool> CheckPodPrioritiesAsync(ClusterLogin clusterLogin, IKubernetes k8s, bool details = false) { Covenant.Requires <ArgumentNullException>(clusterLogin != null, nameof(clusterLogin)); Covenant.Requires <ArgumentNullException>(k8s != null, nameof(k8s)); Console.WriteLine(); Console.WriteLine("==============================================================================="); Console.WriteLine("Checking pod priorities..."); // Build a dictionary that maps the priority of all known priority class // priority values to the priority class name. Note that we're assuming // here that no single priority value has more than one name (we'll just // choose one of the names in this case) and also Build a dictionary that // maps all of the known priority class names to their values. var priorityToName = new Dictionary <int, string>(); var nameToPriority = new Dictionary <string, int>(StringComparer.InvariantCultureIgnoreCase); foreach (var priorityClass in (await k8s.ListPriorityClassAsync()).Items) { priorityToName[priorityClass.Value] = priorityClass.Metadata.Name; nameToPriority[priorityClass.Metadata.Name] = priorityClass.Value; } // Build a dictionary that maps the owner of a pod to a [PodPriorityInfo] with the // priority details. The owner string indicates whether the pod owned by a daemonset, // stateful set, deployment, is a standalone pod, or is something else along with // the owner's name. var ownerToPriorityInfo = new Dictionary <string, PodPriorityInfo>(StringComparer.InvariantCultureIgnoreCase); foreach (var @namespace in (await k8s.ListNamespaceAsync()).Items) { foreach (var pod in (await k8s.ListNamespacedPodAsync(@namespace.Metadata.Name)).Items) { foreach (var container in pod.Spec.Containers) { var ownerId = await GetOwnerIdAsync(k8s, pod); ownerToPriorityInfo[ownerId] = new PodPriorityInfo() { Owner = ownerId, Priority = pod.Spec.Priority, PriorityClassName = pod.Spec.PriorityClassName }; } } } // Normalize the priority info for each pod by trying to lookup the priority from // the priority class name or looking up the priority class name from the priority // value. foreach (var podPriorityInfo in ownerToPriorityInfo.Values) { if (!podPriorityInfo.Priority.HasValue && !string.IsNullOrEmpty(podPriorityInfo.PriorityClassName)) { if (nameToPriority.TryGetValue(podPriorityInfo.PriorityClassName, out var foundPriority)) { podPriorityInfo.Priority = foundPriority; } } else if (podPriorityInfo.Priority.HasValue && string.IsNullOrEmpty(podPriorityInfo.PriorityClassName)) { if (priorityToName.TryGetValue(podPriorityInfo.Priority.Value, out var foundPriorityClass)) { podPriorityInfo.PriorityClassName = foundPriorityClass; } } if (!podPriorityInfo.Priority.HasValue) { podPriorityInfo.Priority = 0; } if (string.IsNullOrEmpty(podPriorityInfo.PriorityClassName)) { podPriorityInfo.PriorityClassName = "[NONE]"; } } var badPodDeploymentCount = ownerToPriorityInfo.Values.Count(info => info.Priority < PriorityClass.NeonMin.Value); if (badPodDeploymentCount > 0 || details) { if (badPodDeploymentCount > 0) { Console.WriteLine(); Console.WriteLine($"ERROR: [{badPodDeploymentCount}] pod deployments are deployed with [Priority<{PriorityClass.NeonMin.Value}]:"); Console.WriteLine(); } else { Console.WriteLine(); } var ownerIdWidth = ownerToPriorityInfo.Keys.Max(imageName => imageName.Length); foreach (var item in ownerToPriorityInfo .OrderByDescending(item => item.Value.Priority) .ThenBy(item => item.Key)) { var priorityInfo = item.Value; var ownerFormatted = item.Key + new string(' ', ownerIdWidth - item.Key.Length); var priorityValue = priorityInfo.Priority.Value.ToString("#,##0").Trim(); var errorMarker = priorityInfo.Priority.Value < PriorityClass.NeonMin.Value ? "-->" : " "; Console.WriteLine($"{errorMarker} {ownerFormatted} - {priorityInfo.PriorityClassName} ({priorityValue})"); } } else { Console.WriteLine(); Console.WriteLine($"OK: Pod priorities are set correctly."); } return(badPodDeploymentCount > 0); }
/// <summary> /// <para> /// Verifies that all of the container images currently loaded on nodes are specified in the /// container manifest. Any images that aren't in the manifest need to be preloaded info the /// node image. This is used to ensure that pods started by third-party operators are also /// included in the cluster manifest, ensuing that our node images are self-contained for a /// better setup experience as well as air-gapped clusters. /// </para> /// <para> /// Details about any issues will be written to STDOUT. /// </para> /// </summary> /// <param name="clusterLogin">Specifies the target cluster login.</param> /// <param name="k8s">Specifies the cluster's Kubernertes client.</param> /// <param name="details">Optionally specifies that status should be written to STDOUT when there's no errors.</param> /// <returns><c>true</c> when there are no problems, <c>false</c> otherwise.</returns> /// <remarks> /// <para> /// neonKUBE clusters deploy all required images to CRI-O running on all cluster /// nodes as well as the local Harbor registry. This not only improves the cluster /// setup experience but also makes air gapped cluster possible. /// </para> /// </remarks> public static async Task <bool> CheckNodeContainerImagesAsync(ClusterLogin clusterLogin, IKubernetes k8s, bool details = false) { Covenant.Requires <ArgumentNullException>(clusterLogin != null, nameof(clusterLogin)); Covenant.Requires <ArgumentNullException>(k8s != null, nameof(k8s)); Console.WriteLine(); Console.WriteLine("==============================================================================="); Console.WriteLine("Checking local container images..."); var manifestImages = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase); foreach (var image in KubeSetup.ClusterManifest.ContainerImages) { manifestImages.Add(image.SourceRef); } var nodes = await k8s.ListNodeAsync(); var images = new Dictionary <string, ImageStatus>(StringComparer.InvariantCultureIgnoreCase); var sbImageNames = new StringBuilder(); foreach (var node in nodes.Items) { foreach (var image in node.Status.Images) { var found = false; foreach (var name in image.Names) { if (manifestImages.Contains(name)) { found = true; break; } } sbImageNames.Clear(); foreach (var name in image.Names.OrderBy(name => name, StringComparer.InvariantCultureIgnoreCase)) { sbImageNames.AppendWithSeparator(name, ", "); } var imageNames = sbImageNames.ToString(); var imageStatus = new ImageStatus() { ImageNames = imageNames, NotInManifest = !found }; images[imageStatus.ImageNames] = imageStatus; } } var badImageCount = images.Values.Count(image => image.NotInManifest); if (badImageCount > 0 || details) { if (badImageCount > 0) { Console.WriteLine(); Console.WriteLine($"ERROR: [{badImageCount}] images are being pulled from external registries:"); Console.WriteLine(); } else { Console.WriteLine(); } if (badImageCount > 0 || details) { foreach (var image in images.Values.OrderBy(image => image.ImageNames)) { var badImage = image.NotInManifest; var status = badImage ? "--> " : " "; if (badImage || details) { Console.WriteLine($"{status}{image.ImageNames}"); } } } } else { Console.WriteLine(); Console.WriteLine($"OK: All container images are present in the cluster registry."); } return(badImageCount == 0); }
/// <inheritdoc/> public override async Task RunAsync(CommandLine commandLine) { if (commandLine.Arguments.Length < 1) { Console.Error.WriteLine("*** ERROR: [root@CLUSTER-NAME] argument is required."); Program.Exit(1); } Console.WriteLine(); // Cluster prepare/setup uses the [ProfileClient] to retrieve secrets and profile values. // We need to inject an implementation for [PreprocessReader] so it will be able to // perform the lookups. NeonHelper.ServiceContainer.AddSingleton <IProfileClient>(new ProfileClient()); var contextName = KubeContextName.Parse(commandLine.Arguments[0]); var kubeCluster = KubeHelper.Config.GetCluster(contextName.Cluster); var unredacted = commandLine.HasOption("--unredacted"); var debug = commandLine.HasOption("--debug"); var check = commandLine.HasOption("--check"); var uploadCharts = commandLine.HasOption("--upload-charts") || debug; var clusterspace = commandLine.GetOption("--clusterspace"); var maxParallelOption = commandLine.GetOption("--max-parallel", "6"); var disablePending = commandLine.HasOption("--disable-pending"); if (!int.TryParse(maxParallelOption, out var maxParallel) || maxParallel <= 0) { Console.Error.WriteLine($"*** ERROR: [--max-parallel={maxParallelOption}] is not valid."); Program.Exit(1); } clusterLogin = KubeHelper.GetClusterLogin(contextName); if (clusterLogin == null) { Console.Error.WriteLine($"*** ERROR: Be sure to prepare the cluster first via: neon cluster prepare..."); Program.Exit(1); } if (string.IsNullOrEmpty(clusterLogin.SshPassword)) { Console.Error.WriteLine($"*** ERROR: No cluster node SSH password found."); Program.Exit(1); } if (kubeCluster != null && !clusterLogin.SetupDetails.SetupPending) { if (commandLine.GetOption("--force") == null && !Program.PromptYesNo($"One or more logins reference [{kubeCluster.Name}]. Do you wish to delete these?")) { Program.Exit(0); } // Remove the cluster from the kubeconfig and remove any // contexts that reference it. KubeHelper.Config.Clusters.Remove(kubeCluster); var delList = new List <KubeConfigContext>(); foreach (var context in KubeHelper.Config.Contexts) { if (context.Properties.Cluster == kubeCluster.Name) { delList.Add(context); } } foreach (var context in delList) { KubeHelper.Config.Contexts.Remove(context); } if (KubeHelper.CurrentContext != null && KubeHelper.CurrentContext.Properties.Cluster == kubeCluster.Name) { KubeHelper.Config.CurrentContext = null; } KubeHelper.Config.Save(); } kubeContext = new KubeConfigContext(contextName); KubeHelper.InitContext(kubeContext); // Create and run the cluster setup controller. var clusterDefinition = clusterLogin.ClusterDefinition; var controller = KubeSetup.CreateClusterSetupController( clusterDefinition, maxParallel: maxParallel, unredacted: unredacted, debugMode: debug, uploadCharts: uploadCharts, clusterspace: clusterspace); controller.DisablePendingTasks = disablePending; controller.StatusChangedEvent += status => { status.WriteToConsole(); }; switch (await controller.RunAsync()) { case SetupDisposition.Succeeded: var pendingGroups = controller.GetPendingGroups(); if (pendingGroups.Count > 0) { Console.WriteLine($"*** ERROR: [{pendingGroups.Count}] pending task groups have not been awaited:"); Console.WriteLine(); foreach (var groupName in pendingGroups) { Console.WriteLine($" {groupName}"); } Program.Exit(1); } Console.WriteLine(); Console.WriteLine($" [{clusterDefinition.Name}] cluster is ready."); Console.WriteLine(); if (check && !debug) { var k8s = new Kubernetes(KubernetesClientConfiguration.BuildConfigFromConfigFile(KubeHelper.KubeConfigPath)); if (!await ClusterChecker.CheckAsync(clusterLogin, k8s)) { Program.Exit(1); } } Program.Exit(0); break; case SetupDisposition.Cancelled: Console.WriteLine(" *** CANCELLED: Cluster setup was cancelled."); Console.WriteLine(); Console.WriteLine(); Program.Exit(1); break; case SetupDisposition.Failed: Console.WriteLine(); Console.WriteLine(" *** ERROR: Cluster setup failed. Examine the logs here:"); Console.WriteLine(); Console.WriteLine($" {KubeHelper.LogFolder}"); Console.WriteLine(); Program.Exit(1); break; default: throw new NotImplementedException(); } await Task.CompletedTask; }