/// <inheritdoc/> public override void AddUpdateSteps(SetupController <NodeDefinition> controller) { base.Initialize(controller); controller.AddStep(GetStepLabel("elasticsearch"), (node, stepDelay) => UpdateElasticsearch(node)); controller.AddStep(GetStepLabel("ceph-fuse"), (node, stepDelay) => UpdateCephFuse(node)); // $todo(jeff.lill): // // Update these component scripts to remove this secret: neon-hivemq-neon // // neon-hive-manager.sh // neon-proxy-manager.sh // neon-proxy-public.sh // neon-proxy-private.sh // // Remove these Docker secrets after updating services: // // neon-hivemq-neon // neon-hivemq-sysadmin // neon-hivemq-app // // We also need to reconfigure the AMPQ private [neon-hivemq-ampq] traffic manager rule // as TCP because older builds incorrectly configured this as an HTTP proxy. controller.AddGlobalStep(GetStepLabel("hivemq-settings"), () => UpdateHiveMQSettings()); controller.AddGlobalStep(GetStepLabel("hivemq cluster name"), () => UpdateHiveMQClusterName()); controller.AddGlobalStep(GetStepLabel("rename log-retention-days"), () => UpdateLogRetentionDays()); controller.AddGlobalStep(GetStepLabel("proxy cache services"), () => UpdateProxyCacheServices()); controller.AddStep(GetStepLabel("edit proxy bridge scripts"), (node, stepDelay) => UpdateProxyBridgeScripts(node)); }
/// <inheritdoc/> public override void AddUpdateSteps(SetupController <NodeDefinition> controller) { base.Initialize(controller); controller.AddGlobalStep(GetStepLabel("make neon-registry LB rule private"), () => PrivateRegistryRule()); controller.AddStep(GetStepLabel("remove docker python module"), (node, stepDelay) => RemoveDockerPython(node)); controller.AddStep(GetStepLabel("edit /etc/hosts"), (node, stepDelay) => EditEtcHosts(node)); }
/// <summary> /// Scans the hive and adds the steps to a <see cref="SetupController{NodeMetadata}"/> required /// to update the hive to the most recent version. /// </summary> /// <param name="hive">The target hive proxy.</param> /// <param name="controller">The setup controller.</param> /// <param name="restartRequired">Returns as <c>true</c> if one or more cluster nodes will be restarted during the update.</param> /// <param name="servicesOnly">Optionally indicate that only hive service and container images should be updated.</param> /// <param name="serviceUpdateParallism">Optionally specifies the parallism to use when updating services.</param> /// <param name="imageTag">Optionally overrides the default image tag.</param> /// <returns>The number of pending updates.</returns> /// <exception cref="HiveException">Thrown if there was an error selecting the updates.</exception> public static int AddHiveUpdateSteps(HiveProxy hive, SetupController <NodeDefinition> controller, out bool restartRequired, bool servicesOnly = false, int serviceUpdateParallism = 1, string imageTag = null) { Covenant.Requires <ArgumentNullException>(hive != null); restartRequired = false; var pendingUpdateCount = 0; // Obtain and parse the current hive version. if (!SemanticVersion.TryParse(hive.Globals.Version, out var hiveVersion)) { throw new HiveException($"Unable to retrieve or parse the hive version global [{HiveGlobals.Version}]."); } if (!servicesOnly) { // Scan for the first update that applies. var firstUpdate = Updates .Where(u => u.FromVersion >= hiveVersion) .OrderBy(u => u.FromVersion) .FirstOrDefault(); if (firstUpdate != null) { // Determine which updates apply. We're going to sort the available updates // in ascending order by [FromVersion] and then in decending order by [ToVersion] // to favor overlapping updates that advance the hive the most. var nextVersion = firstUpdate.FromVersion; foreach (var update in Updates .OrderBy(u => u.FromVersion) .ThenByDescending(u => u.ToVersion)) { if (update.FromVersion >= nextVersion) { pendingUpdateCount++; update.Hive = hive; nextVersion = update.ToVersion; if (!servicesOnly) { update.AddUpdateSteps(controller); if (update.RestartRequired) { restartRequired = true; } } } } } } var componentInfo = hive.Headend.GetComponentInfo(hive.Globals.Version, ThisAssembly.Git.Branch); var systemContainers = HiveConst.DockerContainers; var systemServices = HiveConst.DockerServices; var firstManager = hive.FirstManager; if (hive.Definition.Docker.RegistryCache) { controller.AddGlobalStep("pull images to cache", () => { foreach (var container in systemContainers) { var image = GetUpdateImage(hive, componentInfo, container, imageTag); if (image != null) { firstManager.Status = $"run: docker pull {image}"; firstManager.SudoCommand($"docker pull {image}"); firstManager.Status = string.Empty; } } foreach (var service in systemServices) { var image = GetUpdateImage(hive, componentInfo, service, imageTag); if (image != null) { firstManager.Status = $"run: docker pull {image}"; firstManager.SudoCommand($"docker pull {image}"); firstManager.Status = string.Empty; } } }); } controller.AddStep("update services", (node, stepDelay) => { // List the neonHIVE services actually running and only update those. var runningServices = new HashSet <string>(); var response = node.SudoCommand("docker service ls --format \"{{.Name}}\""); using (var reader = new StringReader(response.OutputText)) { foreach (var service in reader.Lines()) { runningServices.Add(service); } } foreach (var service in systemServices.Where(s => runningServices.Contains(s))) { var image = GetUpdateImage(hive, componentInfo, service, imageTag); if (image != null) { // $todo(jeff.lill): // // We should check the service image to see if we actually need to perform an // upgrade. There's no point in restarting the service instances unnecessarily. // // https://github.com/jefflill/NeonForge/issues/378 firstManager.Status = $"update: {image}"; node.SudoCommand($"docker service update --force --image {image} --update-parallelism {serviceUpdateParallism} {service}"); firstManager.Status = string.Empty; // Update the service creation scripts on all manager nodes for all built-in // services. Note that this depends on how [ServicesBase.CreateStartScript()] // formatted the generated code at the top of the script. foreach (var manager in hive.Managers) { UpdateStartScript(manager, service, $"{image}"); } } } }, node => node == firstManager); controller.AddGlobalStep("update containers", () => { // $todo(jeff.lill): // // We should check the service image to see if we actually need to perform an // upgrade. There's no point in restarting the service instances unnecessarily. // // https://github.com/jefflill/NeonForge/issues/378 // We're going to update containers on each node, one node at a time // and then stablize for a period of time before moving on to the // next node. This will help keep clustered applications like HiveMQ // and databases like Couchbase that are deployed as containers happy // by not blowing all of the application instances away at the same // time while updating. // // Hopefully, there will be enough time after updating a clustered // application container for the container to rejoin the cluster // before we update the next node. foreach (var node in hive.Nodes) { // List the neonHIVE containers actually running and only update those. // Note that we're going to use the local script to start the container // so we don't need to hardcode the Docker options here. We won't restart // the container if the script doesn't exist. // // Note that we'll update and restart the containers in parallel if the // hive has a local registry, otherwise we'll just go with the user // specified parallelism to avoid overwhelming the network with image // downloads. // $todo(jeff.lill): // // A case could be made for having a central place for generating container // (and service) scripts for hive setup as well as situations like this. // It could also be possible then to be able to scan for and repair missing // or incorrect scripts. var runningContainers = new HashSet <string>(); var response = node.SudoCommand("docker ps --format \"{{.Names}}\""); using (var reader = new StringReader(response.OutputText)) { foreach (var container in reader.Lines()) { runningContainers.Add(container); } } foreach (var container in systemContainers.Where(s => runningContainers.Contains(s))) { var image = GetUpdateImage(hive, componentInfo, container, imageTag); if (image != null) { var scriptPath = LinuxPath.Combine(HiveHostFolders.Scripts, $"{container}.sh"); if (node.FileExists(scriptPath)) { // The container has a creation script, so update the script, stop/remove the // container and then run the script to restart the container. UpdateStartScript(node, container, $"{image}"); node.Status = $"stop: {container}"; node.DockerCommand("docker", "rm", "--force", container); node.Status = $"restart: {container}"; node.SudoCommand(scriptPath); } else { var warning = $"WARNING: Container script [{scriptPath}] is not present on this node so we can't update the [{container}] container."; node.Status = warning; node.Log(warning); Thread.Sleep(TimeSpan.FromSeconds(5)); } } } node.Status = $"stablizing ({Program.WaitSeconds}s)"; Thread.Sleep(TimeSpan.FromSeconds(Program.WaitSeconds)); node.Status = "READY"; } }); return(pendingUpdateCount); }
/// <summary> /// Adds a global step that restarts the designated cluster nodes one-by-one. /// </summary> /// <param name="hive">The hive proxy.</param> /// <param name="controller">The setup controller.</param> /// <param name="predicate"> /// Optionally specifies the predicate to be used to select the hive nodes /// to be rebooted. This defaults to <c>null</c> indicating that all nodes /// will be rebooted. /// </param> /// <param name="stepLabel"> /// Optionally specifies the step label. This default to <b>restart nodes</b>. /// </param> /// <param name="stablizeTime"> /// The time to wait after the node has been restarted for things /// to stablize. This defaults to <see cref="Program.WaitSeconds"/>. /// </param> public static void AddRestartClusterStep( HiveProxy hive, SetupController <NodeDefinition> controller, Func <NodeDefinition, bool> predicate = null, string stepLabel = null, TimeSpan stablizeTime = default(TimeSpan)) { Covenant.Requires <ArgumentNullException>(hive != null); Covenant.Requires <ArgumentNullException>(controller != null); predicate = predicate ?? (node => true); stepLabel = stepLabel ?? "restart nodes"; if (stablizeTime <= TimeSpan.Zero) { stablizeTime = TimeSpan.FromSeconds(Program.WaitSeconds); } controller.AddGlobalStep(stepLabel, () => { foreach (var node in hive.Nodes.Where(n => predicate(n.Metadata))) { node.Status = "restart pending"; } // We're going to restart selected nodes by type in this order: // // Managers // Workers // Pets var restartNode = new Action <SshProxy <NodeDefinition> >( node => { node.Status = "restart"; node.Reboot(wait: true); node.Status = $"stabilize ({stablizeTime.TotalSeconds}s)"; Thread.Sleep(stablizeTime); node.Status = "READY"; }); // Manager nodes. foreach (var node in hive.Nodes.Where(n => n.Metadata.IsManager && predicate(n.Metadata))) { restartNode(node); } // Worker nodes. foreach (var node in hive.Nodes.Where(n => n.Metadata.IsWorker && predicate(n.Metadata))) { restartNode(node); } // Pet nodes. foreach (var node in hive.Nodes.Where(n => n.Metadata.IsPet && predicate(n.Metadata))) { restartNode(node); } // Clear the node status. foreach (var node in hive.Nodes) { node.Status = string.Empty; } }); }
/// <inheritdoc/> public override void Run(CommandLine commandLine) { if (commandLine.HasHelpOption) { Help(); Program.Exit(0); } // Special-case handling of the [--remove-templates] option. if (commandLine.HasOption("--remove-templates")) { Console.WriteLine("Removing cached virtual machine templates."); foreach (var fileName in Directory.GetFiles(HiveHelper.GetVmTemplatesFolder(), "*.*", SearchOption.TopDirectoryOnly)) { File.Delete(fileName); } Program.Exit(0); } // Implement the command. packageCacheUri = commandLine.GetOption("--package-cache"); // This overrides the hive definition, if specified. if (Program.HiveLogin != null) { Console.Error.WriteLine("*** ERROR: You are logged into a hive. You need to logout before preparing another."); Program.Exit(1); } if (commandLine.Arguments.Length == 0) { Console.Error.WriteLine($"*** ERROR: HIVE-DEF expected."); Program.Exit(1); } hiveDefPath = commandLine.Arguments[0]; force = commandLine.GetFlag("--force"); HiveDefinition.ValidateFile(hiveDefPath, strict: true); var hiveDefinition = HiveDefinition.FromFile(hiveDefPath, strict: true); hiveDefinition.Provisioner = $"neon-cli:{Program.Version}"; // Identify this tool/version as the hive provisioner // NOTE: // // Azure has implemented a more restrictive password policy and our // default machine password does not meet the requirements: // // The supplied password must be between 6-72 characters long and must // satisfy at least 3 of password complexity requirements from the following: // // 1. Contains an uppercase character // 2. Contains a lowercase character // 3. Contains a numeric digit // 4. Contains a special character // 5. Control characters are not allowed // // It's also probably not a great idea to use a static password when // provisioning VMs in public clouds because it might be possible for // somebody to use this fact the SSH into nodes while the hive is being // setup and before we set the secure password at the end. // // This is less problematic for non-cloud environments because it's // likely that the hosts won't initially be able to receive inbound // Internet traffic and besides, we need to have a known password // embedded into the VM templates. // // We're going to handle this for cloud environments by looking // at [Program.MachinePassword]. If this is set to the default // machine password then we're going to replace it with a randomlly // generated password with a few extra characters to ensure that // it meets the target cloud's password requirements. We'll use // a non-default password if the operator specified one. if (hiveDefinition.Hosting.IsCloudProvider && Program.MachinePassword == HiveConst.DefaulVmTemplatePassword) { Program.MachinePassword = NeonHelper.GetRandomPassword(20); // Append a string that guarantees that the generated password meets // cloud minimum requirements. Program.MachinePassword += ".Aa0"; } // Note that hive prepare starts new log files. hive = new HiveProxy(hiveDefinition, Program.CreateNodeProxy <NodeDefinition>, appendLog: false, useBootstrap: true, defaultRunOptions: RunOptions.LogOutput | RunOptions.FaultOnError); if (File.Exists(Program.GetHiveLoginPath(HiveConst.RootUser, hive.Definition.Name))) { Console.Error.WriteLine($"*** ERROR: A hive login named [{HiveConst.RootUser}@{hive.Definition.Name}] already exists."); Program.Exit(1); } Program.OSProperties = OSProperties.For(hiveDefinition.HiveNode.OperatingSystem); // Configure global options. if (commandLine.HasOption("--unredacted")) { hive.SecureRunOptions = RunOptions.None; } //----------------------------------------------------------------- // $todo(jeff.lill): // // We're temporarily disabling redaction to make it easier to investigate // Vault setup issues. Remove this line before final launch. // // https://github.com/jefflill/NeonForge/issues/225 hive.SecureRunOptions = RunOptions.None; //----------------------------------------------------------------- // Assign the VPN client return subnets to the manager nodes if VPN is enabled. if (hive.Definition.Vpn.Enabled) { var vpnSubnet = NetworkCidr.Parse(hive.Definition.Network.VpnPoolSubnet); var prefixLength = 25; var nextVpnSubnetAddress = vpnSubnet.Address; // Note that we're not going to assign the first block of addresses in the // VPN subnet to any managers to prevent conflicts with addresses reserved // by some cloud platforms at the beginning of a subnet. Azure for example // reserves 4 IP addresses for DNS servers and platform provided VPNs. foreach (var manager in hive.Definition.SortedManagers) { var managerVpnSubnet = new NetworkCidr(NetHelper.AddressIncrement(nextVpnSubnetAddress, VpnOptions.ServerAddressCount), prefixLength); manager.VpnPoolSubnet = managerVpnSubnet.ToString(); nextVpnSubnetAddress = managerVpnSubnet.NextAddress; } } //----------------------------------------------------------------- // Try to ensure that no servers are already deployed on the IP addresses defined // for hive nodes because provisoning over an existing hive will likely // corrupt the existing hive and also probably prevent the new hive from // provisioning correctly. // // Note that we're not going to perform this check for the [Machine] hosting // environment because we're expecting the bare machines to be already running // with the assigned addresses and we're also not going to do this for cloud // environments because we're assuming that the hive will run in its own private // network so there'll ne no possibility of conflicts. if (hive.Definition.Hosting.Environment != HostingEnvironments.Machine && !hive.Definition.Hosting.IsCloudProvider) { Console.WriteLine(); Console.WriteLine("Scanning for IP address conflicts..."); Console.WriteLine(); var pingOptions = new PingOptions(ttl: 32, dontFragment: true); var pingTimeout = TimeSpan.FromSeconds(2); var pingConflicts = new List <NodeDefinition>(); var pingAttempts = 2; // I'm going to use up to 20 threads at a time here for simplicity // rather then doing this as async operations. var parallelOptions = new ParallelOptions() { MaxDegreeOfParallelism = 20 }; Parallel.ForEach(hive.Definition.NodeDefinitions.Values, parallelOptions, node => { using (var ping = new Ping()) { // We're going to try pinging up to [pingAttempts] times for each node // just in case the network it sketchy and we're losing reply packets. for (int i = 0; i < pingAttempts; i++) { var reply = ping.Send(node.PrivateAddress, (int)pingTimeout.TotalMilliseconds); if (reply.Status == IPStatus.Success) { lock (pingConflicts) { pingConflicts.Add(node); } break; } } } }); if (pingConflicts.Count > 0) { Console.Error.WriteLine($"*** ERROR: Cannot provision the hive because [{pingConflicts.Count}] other"); Console.Error.WriteLine($"*** machines conflict with the following hive nodes:"); Console.Error.WriteLine(); foreach (var node in pingConflicts.OrderBy(n => NetHelper.AddressToUint(IPAddress.Parse(n.PrivateAddress)))) { Console.Error.WriteLine($"{node.PrivateAddress, 16}: {node.Name}"); } Program.Exit(1); } } //----------------------------------------------------------------- // Perform basic environment provisioning. This creates basic hive components // such as virtual machines, networks, load balancers, public IP addresses, security // groups,... as required for the environment. hostingManager = new HostingManagerFactory(() => HostingLoader.Initialize()).GetManager(hive, Program.LogPath); if (hostingManager == null) { Console.Error.WriteLine($"*** ERROR: No hosting manager for the [{hive.Definition.Hosting.Environment}] hosting environment could be located."); Program.Exit(1); } hostingManager.HostUsername = Program.MachineUsername; hostingManager.HostPassword = Program.MachinePassword; hostingManager.ShowStatus = !Program.Quiet; hostingManager.MaxParallel = Program.MaxParallel; hostingManager.WaitSeconds = Program.WaitSeconds; if (hostingManager.RequiresAdminPrivileges) { Program.VerifyAdminPrivileges($"Provisioning to [{hive.Definition.Hosting.Environment}] requires elevated administrator privileges."); } if (!hostingManager.Provision(force)) { Program.Exit(1); } // Get the mounted drive prefix from the hosting manager. hive.Definition.DrivePrefix = hostingManager.DrivePrefix; // Ensure that the nodes have valid IP addresses. hive.Definition.ValidatePrivateNodeAddresses(); var ipAddressToServer = new Dictionary <IPAddress, SshProxy <NodeDefinition> >(); foreach (var node in hive.Nodes.OrderBy(n => n.Name)) { SshProxy <NodeDefinition> duplicateServer; if (node.PrivateAddress == IPAddress.Any) { throw new ArgumentException($"Node [{node.Name}] has not been assigned an IP address."); } if (ipAddressToServer.TryGetValue(node.PrivateAddress, out duplicateServer)) { throw new ArgumentException($"Nodes [{duplicateServer.Name}] and [{node.Name}] have the same IP address [{node.Metadata.PrivateAddress}]."); } ipAddressToServer.Add(node.PrivateAddress, node); } //----------------------------------------------------------------- // Perform basic node provisioning including operating system updates & configuration, // and configure OpenVPN on the manager nodes so that hive setup will be // able to reach the nodes on all ports. // Write the operation begin marker to all hive node logs. hive.LogLine(logBeginMarker); var operation = $"Preparing [{hive.Definition.Name}] nodes"; var controller = new SetupController <NodeDefinition>(operation, hive.Nodes) { ShowStatus = !Program.Quiet, MaxParallel = Program.MaxParallel }; if (!string.IsNullOrEmpty(packageCacheUri)) { hive.Definition.PackageProxy = packageCacheUri; } // Prepare the nodes. controller.AddWaitUntilOnlineStep(timeout: TimeSpan.FromMinutes(15)); hostingManager.AddPostProvisionSteps(controller); controller.AddStep("verify OS", (node, stepDelay) => { Thread.Sleep(stepDelay); CommonSteps.VerifyOS(node); }); controller.AddStep("prepare", (node, stepDelay) => { Thread.Sleep(stepDelay); CommonSteps.PrepareNode(node, hive.Definition, shutdown: false); }, stepStaggerSeconds: hive.Definition.Setup.StepStaggerSeconds); // Add any VPN configuration steps. if (hive.Definition.Vpn.Enabled) { controller.AddGlobalStep("vpn credentials", () => CreateVpnCredentials()); controller.AddStep("vpn server", (node, stepDelay) => { Thread.Sleep(stepDelay); ConfigManagerVpn(node); }, node => node.Metadata.IsManager); // Add a step to establish a VPN connection if we're provisioning to a cloud. // We specifically don't want to do this if we're provisioning to a on-premise // datacenter because we're assuming that we're already directly connected to // the LAN while preparing and setting up the hive. if (hive.Definition.Hosting.IsCloudProvider) { controller.AddStep("vpn connect", (manager, stepDelay) => { Thread.Sleep(stepDelay); // Create a hive login with just enough credentials to connect the VPN. // Note that this isn't really a node specific command but I wanted to // be able to display the connection status somewhere. var vpnLogin = new HiveLogin() { Definition = hive.Definition, VpnCredentials = vpnCredentials }; // Ensure that we don't have an old VPN client for the hive running. HiveHelper.VpnClose(vpnLogin.Definition.Name); // ...and then start a new one. HiveHelper.VpnOpen(vpnLogin, onStatus: message => manager.Status = $"{message}", onError: message => manager.Status = $"ERROR: {message}"); }, n => n == hive.FirstManager); } // Perform any post-VPN setup provisioning required by the hosting provider. hostingManager.AddPostVpnSteps(controller); } if (!controller.Run()) { // Write the operation end/failed marker to all hive node logs. hive.LogLine(logFailedMarker); Console.Error.WriteLine("*** ERROR: One or more configuration steps failed."); Program.Exit(1); } // Write the hive login file. var hiveLoginPath = Program.GetHiveLoginPath(HiveConst.RootUser, hive.Definition.Name); var hiveLogin = new HiveLogin() { Path = hiveLoginPath, Username = HiveConst.RootUser, Definition = hive.Definition, SshUsername = Program.MachineUsername, SshPassword = Program.MachinePassword, SshProvisionPassword = Program.MachinePassword, SetupPending = true }; if (hive.Definition.Vpn.Enabled) { hiveLogin.VpnCredentials = vpnCredentials; } // Generate the hive certificates. const int bitCount = 2048; const int validDays = 365000; // About 1,000 years. if (hiveLogin.HiveCertificate == null) { var hostnames = new string[] { $"{hive.Name}.nhive.io", $"*.{hive.Name}.nhive.io", $"*.neon-vault.{hive.Name}.nhive.io", $"*.neon-registry-cache.{hive.Name}.nhive.io", $"*.neon-hivemq.{hive.Name}.nhive.io" }; hiveLogin.HiveCertificate = TlsCertificate.CreateSelfSigned(hostnames, bitCount, validDays, issuedBy: "neonHIVE", issuedTo: $"neonHIVE: {hiveDefinition.Name}"); hiveLogin.HiveCertificate.FriendlyName = $"neonHIVE: {hiveLogin.Definition.Name}"; } // Persist the certificates into the hive login. hiveLogin.Save(); // Write the operation end marker to all hive node logs. hive.LogLine(logEndMarker); }
/// <inheritdoc/> public override void Run(CommandLine commandLine) { if (commandLine.HasHelpOption) { Help(); Program.Exit(0); } // Special-case handling of the [--remove-templates] option. if (commandLine.HasOption("--remove-templates")) { Console.WriteLine("Removing cached virtual machine templates."); foreach (var fileName in Directory.GetFiles(KubeHelper.VmTemplatesFolder, "*.*", SearchOption.TopDirectoryOnly)) { File.Delete(fileName); } Program.Exit(0); } // Implement the command. if (KubeHelper.CurrentContext != null) { Console.Error.WriteLine("*** ERROR: You are logged into a cluster. You need to logout before preparing another."); Program.Exit(1); } if (commandLine.Arguments.Length == 0) { Console.Error.WriteLine($"*** ERROR: CLUSTER-DEF expected."); Program.Exit(1); } clusterDefPath = commandLine.Arguments[0]; force = commandLine.GetFlag("--force"); ClusterDefinition.ValidateFile(clusterDefPath, strict: true); var clusterDefinition = ClusterDefinition.FromFile(clusterDefPath, strict: true); clusterDefinition.Provisioner = $"neon-cli:{Program.Version}"; // Identify this tool/version as the cluster provisioner // NOTE: // // Azure has a more restrictive password policy and our default // machine password does not meet the requirements: // // The supplied password must be between 6-72 characters long and must // satisfy at least 3 of password complexity requirements from the following: // // 1. Contains an uppercase character // 2. Contains a lowercase character // 3. Contains a numeric digit // 4. Contains a special character // 5. Control characters are not allowed // // It's also probably not a great idea to use a static password when // provisioning VMs in public clouds because it might be possible for // somebody to use this fact the SSH into nodes while the cluster is // being setup and before we set the secure password at the end. // // This is less problematic for non-cloud environments because it's // likely that the hosts won't initially be able to receive inbound // Internet traffic and besides, we need to have a known password // embedded into the VM templates. // // We're going to handle this for cloud environments by looking // at [Program.MachinePassword]. If this is set to the default // machine password then we're going to replace it with a randomlly // generated password with a few extra characters to ensure that // it meets the target cloud's password requirements. We'll use // a non-default password if the operator specified one. if (clusterDefinition.Hosting.IsCloudProvider && Program.MachinePassword == KubeConst.DefaulVmTemplatePassword) { Program.MachinePassword = NeonHelper.GetCryptoRandomPassword(20); // Append a string that guarantees that the generated password meets // cloud minimum requirements. Program.MachinePassword += ".Aa0"; } // NOTE: Cluster prepare starts new log files. cluster = new ClusterProxy(clusterDefinition, Program.CreateNodeProxy <NodeDefinition>, appendToLog: false, defaultRunOptions: RunOptions.LogOutput | RunOptions.FaultOnError); if (KubeHelper.Config.GetContext(cluster.Definition.Name) != null) { Console.Error.WriteLine($"*** ERROR: A context named [{cluster.Definition.Name}] already exists."); Program.Exit(1); } // Configure global options. if (commandLine.HasOption("--unredacted")) { cluster.SecureRunOptions = RunOptions.None; } var failed = false; try { KubeHelper.Desktop.StartOperationAsync($"Preparing [{cluster.Name}]").Wait(); //----------------------------------------------------------------- // Try to ensure that no servers are already deployed on the IP addresses defined // for cluster nodes because provisoning over an existing cluster will likely // corrupt the existing cluster and also probably prevent the new cluster from // provisioning correctly. // // Note that we're not going to perform this check for the [Machine] hosting // environment because we're expecting the bare machines to be already running // with the assigned addresses and we're also not going to do this for cloud // environments because we're assuming that the cluster will run in its own // private network so there'll ne no possibility of conflicts. if (cluster.Definition.Hosting.Environment != HostingEnvironments.Machine && !cluster.Definition.Hosting.IsCloudProvider) { Console.WriteLine(); Console.WriteLine(" Scanning for IP address conflicts..."); Console.WriteLine(); var pingOptions = new PingOptions(ttl: 32, dontFragment: true); var pingTimeout = TimeSpan.FromSeconds(2); var pingConflicts = new List <NodeDefinition>(); var pingAttempts = 2; // I'm going to use up to 20 threads at a time here for simplicity // rather then doing this as async operations. var parallelOptions = new ParallelOptions() { MaxDegreeOfParallelism = 20 }; Parallel.ForEach(cluster.Definition.NodeDefinitions.Values, parallelOptions, node => { using (var pinger = new Pinger()) { // We're going to try pinging up to [pingAttempts] times for each node // just in case the network it sketchy and we're losing reply packets. for (int i = 0; i < pingAttempts; i++) { var reply = pinger.SendPingAsync(node.PrivateAddress, (int)pingTimeout.TotalMilliseconds).Result; if (reply.Status == IPStatus.Success) { lock (pingConflicts) { pingConflicts.Add(node); } break; } } } }); if (pingConflicts.Count > 0) { Console.Error.WriteLine($"*** ERROR: Cannot provision the cluster because [{pingConflicts.Count}] other"); Console.Error.WriteLine($"*** machines conflict with the following cluster nodes:"); Console.Error.WriteLine(); foreach (var node in pingConflicts.OrderBy(n => NetHelper.AddressToUint(IPAddress.Parse(n.PrivateAddress)))) { Console.Error.WriteLine($"{node.PrivateAddress, 16}: {node.Name}"); } Program.Exit(1); } } //----------------------------------------------------------------- // Perform basic environment provisioning. This creates basic cluster components // such as virtual machines, networks, load balancers, public IP addresses, security // groups,... as required for the environment. hostingManager = new HostingManagerFactory(() => HostingLoader.Initialize()).GetMaster(cluster, Program.LogPath); if (hostingManager == null) { Console.Error.WriteLine($"*** ERROR: No hosting manager for the [{cluster.Definition.Hosting.Environment}] hosting environment could be located."); Program.Exit(1); } hostingManager.HostUsername = Program.MachineUsername; hostingManager.HostPassword = Program.MachinePassword; hostingManager.ShowStatus = !Program.Quiet; hostingManager.MaxParallel = Program.MaxParallel; hostingManager.WaitSeconds = Program.WaitSeconds; if (hostingManager.RequiresAdminPrivileges) { Program.VerifyAdminPrivileges($"Provisioning to [{cluster.Definition.Hosting.Environment}] requires elevated administrator privileges."); } if (!hostingManager.Provision(force)) { Program.Exit(1); } // Get the mounted drive prefix from the hosting manager. cluster.Definition.DrivePrefix = hostingManager.DrivePrefix; // Ensure that the nodes have valid IP addresses. cluster.Definition.ValidatePrivateNodeAddresses(); var ipAddressToServer = new Dictionary <IPAddress, SshProxy <NodeDefinition> >(); foreach (var node in cluster.Nodes.OrderBy(n => n.Name)) { SshProxy <NodeDefinition> duplicateServer; if (node.PrivateAddress == IPAddress.Any) { throw new ArgumentException($"Node [{node.Name}] has not been assigned an IP address."); } if (ipAddressToServer.TryGetValue(node.PrivateAddress, out duplicateServer)) { throw new ArgumentException($"Nodes [{duplicateServer.Name}] and [{node.Name}] have the same IP address [{node.Metadata.PrivateAddress}]."); } ipAddressToServer.Add(node.PrivateAddress, node); } // We're going to use the masters as package caches unless the user // specifies something else. packageCaches = commandLine.GetOption("--package-cache"); // This overrides the cluster definition, if specified. if (!string.IsNullOrEmpty(packageCaches)) { cluster.Definition.PackageProxy = packageCaches; } if (string.IsNullOrEmpty(cluster.Definition.PackageProxy)) { var sbProxies = new StringBuilder(); foreach (var master in cluster.Masters) { sbProxies.AppendWithSeparator($"{master.PrivateAddress}:{NetworkPorts.AppCacherNg}"); } cluster.Definition.PackageProxy = sbProxies.ToString(); } //----------------------------------------------------------------- // Prepare the cluster. // Write the operation begin marker to all cluster node logs. cluster.LogLine(logBeginMarker); var nodesText = cluster.Nodes.Count() == 1 ? "node" : "nodes"; var operation = $"Preparing [{cluster.Definition.Name}] {nodesText}"; var controller = new SetupController <NodeDefinition>(operation, cluster.Nodes) { ShowStatus = !Program.Quiet, MaxParallel = Program.MaxParallel }; controller.AddGlobalStep("setup details", () => { using (var client = new HeadendClient()) { kubeSetupInfo = client.GetSetupInfoAsync(cluster.Definition).Result; } }); // Prepare the nodes. controller.AddWaitUntilOnlineStep(timeout: TimeSpan.FromMinutes(15)); hostingManager.AddPostProvisionSteps(controller); controller.AddStep("verify OS", CommonSteps.VerifyOS); controller.AddStep("prepare", (node, stepDelay) => { Thread.Sleep(stepDelay); CommonSteps.PrepareNode(node, cluster.Definition, kubeSetupInfo, shutdown: false); }, stepStaggerSeconds: cluster.Definition.Setup.StepStaggerSeconds); if (!controller.Run()) { // Write the operation end/failed marker to all cluster node logs. cluster.LogLine(logFailedMarker); Console.Error.WriteLine("*** ERROR: One or more configuration steps failed."); Program.Exit(1); } // Persist the cluster context extension. var contextExtensionsPath = KubeHelper.GetContextExtensionPath((KubeContextName)$"{KubeConst.RootUser}@{clusterDefinition.Name}"); var contextExtension = new KubeContextExtension(contextExtensionsPath) { ClusterDefinition = clusterDefinition, SshUsername = Program.MachineUsername, SshPassword = Program.MachinePassword, SetupDetails = new KubeSetupDetails() { SetupPending = true } }; contextExtension.Save(); // Write the operation end marker to all cluster node logs. cluster.LogLine(logEndMarker); } catch { failed = true; throw; } finally { if (!failed) { KubeHelper.Desktop.EndOperationAsync($"Cluster [{cluster.Name}] has been prepared and is ready for setup.").Wait(); } else { KubeHelper.Desktop.EndOperationAsync($"Cluster [{cluster.Name}] prepare has failed.", failed: true).Wait(); } } }