/// <summary> /// Starts a neonHIVE related Docker container on a node and also uploads a script /// to make it easy to restart the container manually or for hive updates. /// </summary> /// <param name="node">The target hive node.</param> /// <param name="containerName">Identifies the container.</param> /// <param name="image">The Docker image to be used by the container.</param> /// <param name="runOptions">Optional run options (defaults to <see cref="RunOptions.FaultOnError"/>).</param> /// <param name="commands">The commands required to start the container.</param> /// <remarks> /// <para> /// This method performs the following steps: /// </para> /// <list type="number"> /// <item> /// Passes <paramref name="image"/> to <see cref="Program.ResolveDockerImage(string)"/> to /// obtain the actual image to be started. /// </item> /// <item> /// Generates the first few lines of the script file that sets the /// default image as the <c>TARGET_IMAGE</c> macro and then overrides /// this with the script parameter (if there is one). /// </item> /// <item> /// Appends the commands to the script, replacing any text that matches /// <see cref="ImagePlaceholderArg"/> with <c>${TARGET_IMAGE}</c> to make it easy /// for services to be upgraded later. /// </item> /// <item> /// Starts the container. /// </item> /// <item> /// Uploads the generated script to the node to [<see cref="HiveHostFolders.Scripts"/>/<paramref name="containerName"/>.sh]. /// </item> /// </list> /// </remarks> public static void StartContainer(SshProxy <NodeDefinition> node, string containerName, string image, RunOptions runOptions = RunOptions.FaultOnError, params IBashCommandFormatter[] commands) { Covenant.Requires <ArgumentNullException>(node != null); Covenant.Requires <ArgumentNullException>(!string.IsNullOrWhiteSpace(containerName)); Covenant.Requires <ArgumentNullException>(!string.IsNullOrWhiteSpace(image)); Covenant.Requires <ArgumentNullException>(commands != null); Covenant.Requires <ArgumentNullException>(commands.Length > 0); node.Status = $"start: {containerName}"; // Generate the container start script. var script = CreateStartScript(containerName, image, true, commands); // Upload the script to the target node and set permissions. var scriptPath = LinuxPath.Combine(HiveHostFolders.Scripts, $"{containerName}.sh"); node.UploadText(scriptPath, script); node.SudoCommand($"chmod 740 {scriptPath}"); // Run the script without a parameter to start the container. node.IdempotentDockerCommand($"setup/{containerName}", null, runOptions, scriptPath); node.Status = string.Empty; }
/// <summary> /// <para> /// Edits the [/etc/hosts] file on all hive nodes so that the line: /// </para> /// <code> /// 127.0.1.1 {hostname} /// </code> /// <para> /// is changed to: /// </para> /// <code> /// {node.PrivateAddress} {hostname} /// </code> /// <para> /// Hashicorp Vault cannot restart with the old setting, complaining about a /// <b>""missing API address</b>. /// </para> /// </summary> /// <param name="node">The target node.</param> private void EditEtcHosts(SshProxy <NodeDefinition> node) { node.InvokeIdempotentAction(GetIdempotentTag("edit-etc-hosts"), () => { var etcHosts = node.DownloadText("/etc/hosts"); var sbEtcHosts = new StringBuilder(); using (var reader = new StringReader(etcHosts)) { foreach (var line in reader.Lines()) { if (line.StartsWith("127.0.1.1")) { var nodeAddress = node.PrivateAddress.ToString(); var separator = new string(' ', Math.Max(16 - nodeAddress.Length, 1)); sbEtcHosts.AppendLine($"{nodeAddress}{separator}{node.Name}"); } else { sbEtcHosts.AppendLine(line); } } } node.UploadText("/etc/hosts", sbEtcHosts.ToString(), permissions: "644"); node.SudoCommand("systemctl restart vault"); }); }
/// <summary> /// Edits the [neon-proxy-public-bridge.sh] and [neon-proxy-private-bridge.sh] /// scripts to remove the [VAULT_CREDENTIALS] environment variable so the new /// .NET based proxy bridge image will work properly. /// </summary> /// <param name="node">The target node.</param> private void UpdateProxyBridgeScripts(SshProxy <NodeDefinition> node) { var scriptNames = new string[] { "neon-proxy-public-bridge.sh", "neon-proxy-private-bridge.sh" }; foreach (var scriptName in scriptNames) { var scriptPath = LinuxPath.Combine(HiveHostFolders.Scripts, scriptName); var scriptText = node.DownloadText(scriptName); var sbEdited = new StringBuilder(); using (var reader = new StringReader(scriptText)) { foreach (var line in reader.Lines()) { if (!line.Contains("--env VAULT_CREDENTIALS=")) { sbEdited.AppendLineLinux(line); } } } node.UploadText(scriptPath, sbEdited.ToString(), permissions: "700"); } }
/// <summary> /// Uploads a resource file to the remote server after performing any necessary preprocessing. /// </summary> /// <typeparam name="TMetadata">The node metadata type.</typeparam> /// <param name="node">The remote node.</param> /// <param name="hiveDefinition">The hive definition or <c>null</c>.</param> /// <param name="file">The resource file.</param> /// <param name="targetPath">The target path on the remote server.</param> private static void UploadFile <TMetadata>(this SshProxy <TMetadata> node, HiveDefinition hiveDefinition, ResourceFiles.File file, string targetPath) where TMetadata : class { using (var input = file.ToStream()) { if (file.HasVariables) { // We need to expand any variables. Note that if we don't have a // hive definition or for undefined variables, we're going to // have the variables expand to the empty string. using (var msExpanded = new MemoryStream()) { using (var writer = new StreamWriter(msExpanded)) { var preprocessReader = new PreprocessReader(new StreamReader(input)) { DefaultVariable = string.Empty, ExpandVariables = true, ProcessCommands = false, StripComments = false }; if (hiveDefinition != null) { SetHiveVariables(preprocessReader, hiveDefinition, node.Metadata as NodeDefinition); } foreach (var line in preprocessReader.Lines()) { writer.WriteLine(line); } writer.Flush(); msExpanded.Position = 0; node.UploadText(targetPath, msExpanded, tabStop: 4, outputEncoding: Encoding.UTF8); } } } else { node.UploadText(targetPath, input, tabStop: 4, outputEncoding: Encoding.UTF8); } } }
/// <summary> /// Updates a service or container start script on a hive node with a new image. /// </summary> /// <param name="node">The target hive node.</param> /// <param name="scriptName">The script name (without the <b>.sh</b>).</param> /// <param name="image">The fully qualified image name.</param> private static void UpdateStartScript(SshProxy <NodeDefinition> node, string scriptName, string image) { var scriptPath = LinuxPath.Combine(HiveHostFolders.Scripts, $"{scriptName}.sh"); node.Status = $"edit: {scriptPath}"; if (node.FileExists(scriptPath)) { var curScript = node.DownloadText(scriptPath); var sbNewScript = new StringBuilder(); // Scan for the generated code section and then replace the first // line that looks like: // // TARGET_IMAGE=OLD-IMAGE // // with the new image and then upload the change. using (var reader = new StringReader(curScript)) { var inGenerated = false; var wasEdited = false; foreach (var line in reader.Lines()) { if (wasEdited) { sbNewScript.AppendLine(line); continue; } if (!inGenerated && line.StartsWith(ServiceHelper.ParamSectionMarker)) { inGenerated = true; } if (line.StartsWith("TARGET_IMAGE=")) { sbNewScript.AppendLine($"TARGET_IMAGE={image}"); wasEdited = true; } else { sbNewScript.AppendLine(line); } } } node.UploadText(scriptPath, sbNewScript.ToString(), permissions: "740"); } node.Status = string.Empty; }
/// <summary> /// Update the Elasticsearch container launch scripts to enable automatic /// memory settings based on any cgroup limits. /// </summary> /// <param name="node">The target node.</param> private void UpdateElasticsearch(SshProxy <NodeDefinition> node) { // This method is called for all cluster nodes, even those // that aren't currently hosting Elasticsearch, so we can // update any scripts that may have been orphaned (for // consistency). // // The update consists of replacing the script line that // sets the [ES_JAVA_OPTS] environment variable with: // // --env ES_JAVA_OPTS=-XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap \ // // To ensure that this feature is enabled in favor of the // old hacked memory level settings. var scriptPath = LinuxPath.Combine(HiveHostFolders.Scripts, "neon-log-esdata.sh"); node.InvokeIdempotentAction(GetIdempotentTag("neon-log-esdata"), () => { if (node.FileExists(scriptPath)) { node.Status = $"edit: {scriptPath}"; var orgScript = node.DownloadText(scriptPath); var newScript = new StringBuilder(); foreach (var line in new StringReader(orgScript).Lines()) { if (line.Contains("ES_JAVA_OPTS=")) { newScript.AppendLine(" --env \"ES_JAVA_OPTS=-XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap\" \\"); } else { newScript.AppendLine(line); } } node.UploadText(scriptPath, newScript.ToString(), permissions: ""); node.Status = string.Empty; } }); }
/// <summary> /// Updates the <b>/etc/systemd/system/ceph-fuse-hivefs.service</b> to adjust restart /// behavior: https://github.com/jefflill/NeonForge/issues/364 /// </summary> /// <param name="node">The target node.</param> private void UpdateCephFuse(SshProxy <NodeDefinition> node) { node.InvokeIdempotentAction(GetIdempotentTag("ceph-fuse"), () => { node.UploadText("/etc/systemd/system/ceph-fuse-hivefs.service", @"[Unit] Description=Ceph FUSE client (for /mnt/hivefs) After=network-online.target local-fs.target time-sync.target Wants=network-online.target local-fs.target time-sync.target Conflicts=umount.target PartOf=ceph-fuse.target [Service] EnvironmentFile=-/etc/default/ceph Environment=CLUSTER=ceph ExecStart=/usr/bin/ceph-fuse -f -o nonempty --cluster ${CLUSTER} /mnt/hivefs TasksMax=infinity # These settings configure the service to restart always after # waiting 5 seconds between attempts for up to a 365 days (effectively # forever). [StartLimitIntervalSec] is set to the number of seconds # in a year and [StartLimitBurst] is set to the number of 5 second # intervals in [StartLimitIntervalSec]. Restart=always RestartSec=5 StartLimitIntervalSec=31536000 StartLimitBurst=6307200 [Install] WantedBy=ceph-fuse.target WantedBy=docker.service ", permissions: "644"); // Tell systemd to regenerate its configuration. node.SudoCommand("systemctl daemon-reload"); }); }
/// <summary> /// Uploads the setup and other scripts and tools for the target operating system to the server. /// </summary> /// <typeparam name="TMetadata">The server's metadata type.</typeparam> /// <param name="server">The remote server.</param> /// <param name="hiveDefinition">The hive definition or <c>null</c>.</param> public static void UploadResources <TMetadata>(this SshProxy <TMetadata> server, HiveDefinition hiveDefinition = null) where TMetadata : class { Covenant.Requires <ArgumentNullException>(server != null); //----------------------------------------------------------------- // Upload resource files to the setup folder. server.Status = $"clear: {HiveHostFolders.Setup}"; server.SudoCommand($"rm -rf {HiveHostFolders.Setup}/*.*"); // Upload the setup files. server.Status = "upload: setup files"; foreach (var file in Program.LinuxFolder.GetFolder("setup").Files()) { server.UploadFile(hiveDefinition, file, $"{HiveHostFolders.Setup}/{file.Name}"); } // Make the setup scripts executable. server.SudoCommand($"chmod 744 {HiveHostFolders.Setup}/*"); // Uncomment this if/when we have to upload source files. #if FALSE //----------------------------------------------------------------- // Upload resource files to the source folder. Note that we're going // to convert to Linux style line endings and we're going to convert // leading spaces into TABs (4 spaces == 1 TAB). // $hack(jeff.lill): // // This is hardcoded to assume that the source consists of a single level // folder with the source files. If the folders nest eny further, we'll // need to implement a recursive method to handle this properly. // // This code also assumes that the folder and file names do not include // any spaces. server.Status = $"clear: {HiveHostFolders.Source}"; server.SudoCommand($"rm -rf {HiveHostFolders.Source}/*.*"); // Upload the source files. server.Status = "upload: source files"; foreach (var folder in Program.LinuxFolder.GetFolder("source").Folders()) { foreach (var file in folder.Files()) { var targetPath = $"{HiveHostFolders.Source}/{folder.Name}/{file.Name}"; server.UploadText(targetPath, file.Contents, tabStop: -4); server.SudoCommand("chmod 664", targetPath); } } #endif //----------------------------------------------------------------- // Upload files to the tools folder. server.Status = $"clear: {HiveHostFolders.Tools}"; server.SudoCommand($"rm -rf {HiveHostFolders.Tools}/*.*"); // Upload the tool files. Note that we're going to strip out the [.sh] // file type to make these easier to run. server.Status = "upload: tool files"; foreach (var file in Program.LinuxFolder.GetFolder("tools").Files()) { server.UploadFile(hiveDefinition, file, $"{HiveHostFolders.Tools}/{file.Name.Replace(".sh", string.Empty)}"); } // Make the scripts executable. server.SudoCommand($"chmod 744 {HiveHostFolders.Tools}/*"); }
/// <summary> /// Customizes the OpenSSH configuration on a node. /// </summary> /// <param name="node">The target node.</param> /// <param name="stepDelayed">Ignored.</param> public static void ConfigureOpenSSH(SshProxy <NodeDefinition> node, TimeSpan stepDelayed) { // Upload the OpenSSH server configuration, restart OpenSSH and // then disconnect and wait for the OpenSSH to restart. var openSshConfig = @"# Package generated configuration file # See the sshd_config(5) manpage for details # What ports, IPs and protocols we listen for Port 22 # Use these options to restrict which interfaces/protocols sshd will bind to #ListenAddress :: #ListenAddress 0.0.0.0 Protocol 2 # HostKeys for protocol version 2 HostKey /etc/ssh/ssh_host_rsa_key #HostKey /etc/ssh/ssh_host_dsa_key #HostKey /etc/ssh/ssh_host_ecdsa_key #HostKey /etc/ssh/ssh_host_ed25519_key #Privilege Separation is turned on for security UsePrivilegeSeparation yes # Lifetime and size of ephemeral version 1 server key KeyRegenerationInterval 3600 ServerKeyBits 1024 # Logging SyslogFacility AUTH LogLevel INFO # Authentication: LoginGraceTime 120 PermitRootLogin prohibit-password StrictModes yes RSAAuthentication yes PubkeyAuthentication yes #AuthorizedKeysFile %h/.ssh/authorized_keys # Don't read the user's ~/.rhosts and ~/.shosts files IgnoreRhosts yes # For this to work you will also need host keys in /etc/ssh_known_hosts RhostsRSAAuthentication no # similar for protocol version 2 HostbasedAuthentication no # Uncomment if you don't trust ~/.ssh/known_hosts for RhostsRSAAuthentication #IgnoreUserKnownHosts yes # To enable empty passwords, change to yes (NOT RECOMMENDED) PermitEmptyPasswords no # Change to yes to enable challenge-response passwords (beware issues with # some PAM modules and threads) ChallengeResponseAuthentication no # Change to no to disable tunnelled clear text passwords #PasswordAuthentication yes # Kerberos options #KerberosAuthentication no #KerberosGetAFSToken no #KerberosOrLocalPasswd yes #KerberosTicketCleanup yes # GSSAPI options #GSSAPIAuthentication no #GSSAPICleanupCredentials yes AllowTcpForwarding no X11Forwarding no X11DisplayOffset 10 PrintMotd no PrintLastLog yes TCPKeepAlive yes #UseLogin no #MaxStartups 10:30:60 #Banner /etc/issue.net # Allow client to pass locale environment variables AcceptEnv LANG LC_* Subsystem sftp /usr/lib/openssh/sftp-server # Set this to 'yes' to enable PAM authentication, account processing, # and session processing. If this is enabled, PAM authentication will # be allowed through the ChallengeResponseAuthentication and # PasswordAuthentication. Depending on your PAM configuration, # PAM authentication via ChallengeResponseAuthentication may bypass # the setting of ""PermitRootLogin without-password"". # If you just want the PAM account and session checks to run without # PAM authentication, then enable this but set PasswordAuthentication # and ChallengeResponseAuthentication to 'no'. UsePAM yes # Allow connections to be idle for up to an 10 minutes (600 seconds) # before terminating them. This configuration pings the client every # 30 seconds for up to 20 times without a response: # # 20*30 = 600 seconds ClientAliveInterval 30 ClientAliveCountMax 20 TCPKeepAlive yes "; node.UploadText("/etc/ssh/sshd_config", openSshConfig); node.SudoCommand("systemctl restart sshd"); }
/// <summary> /// Initializes a near virgin server with the basic capabilities required /// for a cluster host node. /// </summary> /// <param name="node">The target cluster node.</param> /// <param name="clusterDefinition">The cluster definition.</param> /// <param name="kubeSetupInfo">Kubernetes setup details.</param> /// <param name="shutdown">Optionally shuts down the node.</param> public static void PrepareNode(SshProxy <NodeDefinition> node, ClusterDefinition clusterDefinition, KubeSetupInfo kubeSetupInfo, bool shutdown = false) { Covenant.Requires <ArgumentNullException>(node != null); Covenant.Requires <ArgumentNullException>(clusterDefinition != null); Covenant.Requires <ArgumentNullException>(kubeSetupInfo != null); if (node.FileExists($"{KubeHostFolders.State}/setup/prepared")) { return; // Already prepared } //----------------------------------------------------------------- // Ensure that the cluster host folders exist. node.CreateHostFolders(); //----------------------------------------------------------------- // Package manager configuration. if (!clusterDefinition.NodeOptions.AllowPackageManagerIPv6) { // Restrict the [apt] package manager to using IPv4 to communicate // with the package mirrors, since IPv6 often doesn't work. node.UploadText("/etc/apt/apt.conf.d/99-force-ipv4-transport", "Acquire::ForceIPv4 \"true\";"); node.SudoCommand("chmod 644 /etc/apt/apt.conf.d/99-force-ipv4-transport"); } // Configure [apt] to retry. node.UploadText("/etc/apt/apt.conf.d/99-retries", $"APT::Acquire::Retries \"{clusterDefinition.NodeOptions.PackageManagerRetries}\";"); node.SudoCommand("chmod 644 /etc/apt/apt.conf.d/99-retries"); //----------------------------------------------------------------- // Other configuration. ConfigureOpenSSH(node, TimeSpan.Zero); node.UploadConfigFiles(clusterDefinition, kubeSetupInfo); node.UploadResources(clusterDefinition, kubeSetupInfo); if (clusterDefinition != null) { ConfigureEnvironmentVariables(node, clusterDefinition); } node.SudoCommand("safe-apt-get update"); node.InvokeIdempotentAction("setup/prep-node", () => { node.Status = "preparing"; node.SudoCommand("setup-prep.sh"); node.Reboot(wait: true); }); // We need to upload the cluster configuration and initialize drives attached // to the node. We're going to assume that these are not already initialized. // $todo(jeff.lill): // // We may need an option that allows an operator to pre-build a hardware // based drive array or something. I'm going to defer this to later and // concentrate on commodity hardware and cloud deployments for now. CommonSteps.ConfigureEnvironmentVariables(node, clusterDefinition); node.Status = "setup: disk"; node.SudoCommand("setup-disk.sh"); // Clear any DHCP leases to be super sure that cloned node // VMs will obtain fresh IP addresses. node.Status = "clear: DHCP leases"; node.SudoCommand("rm -f /var/lib/dhcp/*"); // Indicate that the node has been fully prepared. node.SudoCommand($"touch {KubeHostFolders.State}/setup/prepared"); // Shutdown the node if requested. if (shutdown) { node.Status = "shutdown"; node.SudoCommand("shutdown 0", RunOptions.Defaults | RunOptions.Shutdown); } }
/// <summary> /// Configures the global environment variables that describe the configuration /// of the server within the cluster. /// </summary> /// <param name="node">The server to be updated.</param> /// <param name="clusterDefinition">The cluster definition.</param> public static void ConfigureEnvironmentVariables(SshProxy <NodeDefinition> node, ClusterDefinition clusterDefinition) { node.Status = "environment variables"; // We're going to append the new variables to the existing Linux [/etc/environment] file. var sb = new StringBuilder(); // Append all of the existing environment variables except for those // whose names start with "NEON_" to make the operation idempotent. // // Note that we're going to special case PATH to add any Neon // related directories. using (var currentEnvironmentStream = new MemoryStream()) { node.Download("/etc/environment", currentEnvironmentStream); currentEnvironmentStream.Position = 0; using (var reader = new StreamReader(currentEnvironmentStream)) { foreach (var line in reader.Lines()) { if (line.StartsWith("PATH=")) { if (!line.Contains(KubeHostFolders.Bin)) { sb.AppendLine(line + $":/snap/bin:{KubeHostFolders.Bin}"); } else { sb.AppendLine(line); } } else if (!line.StartsWith("NEON_")) { sb.AppendLine(line); } } } } // Add the global cluster related environment variables. sb.AppendLine($"NEON_CLUSTER_PROVISIONER={clusterDefinition.Provisioner}"); sb.AppendLine($"NEON_CLUSTER={clusterDefinition.Name}"); sb.AppendLine($"NEON_DATACENTER={clusterDefinition.Datacenter.ToLowerInvariant()}"); sb.AppendLine($"NEON_ENVIRONMENT={clusterDefinition.Environment.ToString().ToLowerInvariant()}"); var sbPackageProxies = new StringBuilder(); foreach (var proxyEndpoint in clusterDefinition.PackageProxy.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)) { sbPackageProxies.AppendWithSeparator(proxyEndpoint); } sb.AppendLine($"NEON_PACKAGE_PROXY={sbPackageProxies}"); if (clusterDefinition.Hosting != null) { sb.AppendLine($"NEON_HOSTING={clusterDefinition.Hosting.Environment.ToMemberString().ToLowerInvariant()}"); } sb.AppendLine($"NEON_NODE_NAME={node.Name}"); if (node.Metadata != null) { sb.AppendLine($"NEON_NODE_ROLE={node.Metadata.Role}"); sb.AppendLine($"NEON_NODE_IP={node.Metadata.PrivateAddress}"); sb.AppendLine($"NEON_NODE_HDD={node.Metadata.Labels.StorageHDD.ToString().ToLowerInvariant()}"); } sb.AppendLine($"NEON_ARCHIVE_FOLDER={KubeHostFolders.Archive(KubeConst.SysAdminUser)}"); sb.AppendLine($"NEON_BIN_FOLDER={KubeHostFolders.Bin}"); sb.AppendLine($"NEON_CONFIG_FOLDER={KubeHostFolders.Config}"); sb.AppendLine($"NEON_EXEC_FOLDER={KubeHostFolders.Exec(KubeConst.SysAdminUser)}"); sb.AppendLine($"NEON_SETUP_FOLDER={KubeHostFolders.Setup}"); sb.AppendLine($"NEON_STATE_FOLDER={KubeHostFolders.State}"); sb.AppendLine($"NEON_TMPFS_FOLDER={KubeHostFolders.Tmpfs}"); // Kubernetes related variables for masters. if (node.Metadata.IsMaster) { sb.AppendLine($"KUBECONFIG=/etc/kubernetes/admin.conf"); } // Upload the new environment to the server. node.UploadText("/etc/environment", sb, tabStop: 4); }
/// <summary> /// Configures the global environment variables that describe the configuration /// of the server within the hive. /// </summary> /// <param name="node">The server to be updated.</param> /// <param name="hiveDefinition">The hive definition.</param> public static void ConfigureEnvironmentVariables(SshProxy <NodeDefinition> node, HiveDefinition hiveDefinition) { node.Status = "environment variables"; // We're going to append the new variables to the existing Linux [/etc/environment] file. var sb = new StringBuilder(); // Append all of the existing environment variables except for those // whose names start with "NEON_" to make the operation idempotent. // // Note that we're going to special case PATH to add any Neon // related directories. using (var currentEnvironmentStream = new MemoryStream()) { node.Download("/etc/environment", currentEnvironmentStream); currentEnvironmentStream.Position = 0; using (var reader = new StreamReader(currentEnvironmentStream)) { foreach (var line in reader.Lines()) { if (line.StartsWith("PATH=")) { if (!line.Contains(HiveHostFolders.Tools)) { sb.AppendLine(line + $":{HiveHostFolders.Tools}"); } else { sb.AppendLine(line); } } else if (!line.StartsWith("NEON_")) { sb.AppendLine(line); } } } } // Add the global neonHIVE related environment variables. sb.AppendLine($"NEON_HIVE_PROVISIONER={hiveDefinition.Provisioner}"); sb.AppendLine($"NEON_HIVE={hiveDefinition.Name}"); sb.AppendLine($"NEON_DATACENTER={hiveDefinition.Datacenter.ToLowerInvariant()}"); sb.AppendLine($"NEON_ENVIRONMENT={hiveDefinition.Environment.ToString().ToLowerInvariant()}"); if (hiveDefinition.Hosting != null) { sb.AppendLine($"NEON_HOSTING={hiveDefinition.Hosting.Environment.ToMemberString().ToLowerInvariant()}"); } sb.AppendLine($"NEON_NODE_NAME={node.Name}"); sb.AppendLine($"NEON_NODE_FS={hiveDefinition.HiveFS.Enabled.ToString().ToLowerInvariant()}"); if (node.Metadata != null) { sb.AppendLine($"NEON_NODE_ROLE={node.Metadata.Role}"); sb.AppendLine($"NEON_NODE_IP={node.Metadata.PrivateAddress}"); sb.AppendLine($"NEON_NODE_SSD={node.Metadata.Labels.StorageSSD.ToString().ToLowerInvariant()}"); sb.AppendLine($"NEON_NODE_SWAP={node.Metadata.Labels.ComputeSwap.ToString().ToLowerInvariant()}"); } var sbNameservers = new StringBuilder(); foreach (var nameServer in hiveDefinition.Network.Nameservers) { sbNameservers.AppendWithSeparator(nameServer, ","); } sb.AppendLine($"NEON_UPSTREAM_DNS=\"{sbNameservers}\""); sb.AppendLine($"NEON_APT_PROXY={HiveHelper.GetPackageProxyReferences(hiveDefinition)}"); sb.AppendLine($"NEON_ARCHIVE_FOLDER={HiveHostFolders.Archive}"); sb.AppendLine($"NEON_BIN_FOLDER={HiveHostFolders.Bin}"); sb.AppendLine($"NEON_CONFIG_FOLDER={HiveHostFolders.Config}"); sb.AppendLine($"NEON_EXEC_FOLDER={HiveHostFolders.Exec}"); sb.AppendLine($"NEON_SCRIPTS_FOLDER={HiveHostFolders.Scripts}"); sb.AppendLine($"NEON_SECRETS_FOLDER={HiveHostFolders.Secrets}"); sb.AppendLine($"NEON_SETUP_FOLDER={HiveHostFolders.Setup}"); sb.AppendLine($"NEON_SOURCE_FOLDER={HiveHostFolders.Source}"); sb.AppendLine($"NEON_STATE_FOLDER={HiveHostFolders.State}"); sb.AppendLine($"NEON_TMPFS_FOLDER={HiveHostFolders.Tmpfs}"); sb.AppendLine($"NEON_TOOLS_FOLDER={HiveHostFolders.Tools}"); // Append Consul and Vault addresses. // All nodes will be configured such that host processes using the HashiCorp Consul // CLI will access the Consul cluster via local Consul instance. This will be a // server for manager nodes and a proxy for workers and pets. if (hiveDefinition.Consul.Tls) { sb.AppendLine($"CONSUL_HTTP_SSL=true"); sb.AppendLine($"CONSUL_HTTP_ADDR=" + $"{hiveDefinition.Hostnames.Consul}:{hiveDefinition.Consul.Port}"); sb.AppendLine($"CONSUL_HTTP_FULLADDR=" + $"https://{hiveDefinition.Hostnames.Consul}:{hiveDefinition.Consul.Port}"); } else { sb.AppendLine($"CONSUL_HTTP_SSL=false"); sb.AppendLine($"CONSUL_HTTP_ADDR=" + $"{hiveDefinition.Hostnames.Consul}:{hiveDefinition.Consul.Port}"); sb.AppendLine($"CONSUL_HTTP_FULLADDR=" + $"http://{hiveDefinition.Hostnames.Consul}:{hiveDefinition.Consul.Port}"); } // All nodes will be configured such that host processes using the HashiCorp Vault // CLI will access the Vault cluster via the [neon-proxy-vault] proxy service // by default. sb.AppendLine($"VAULT_ADDR={hiveDefinition.VaultProxyUri}"); if (node.Metadata != null) { if (node.Metadata.IsManager) { // Manager hosts may use the [VAULT_DIRECT_ADDR] environment variable to // access Vault without going through the [neon-proxy-vault] proxy. This // points to the Vault instance running locally. // // This is useful when configuring Vault. sb.AppendLine($"VAULT_DIRECT_ADDR={hiveDefinition.GetVaultDirectUri(node.Name)}"); } else { sb.AppendLine($"VAULT_DIRECT_ADDR="); } } // Upload the new environment to the server. node.UploadText("/etc/environment", sb.ToString(), tabStop: 4); }
/// <summary> /// Configures OpenVPN on a manager node. /// </summary> /// <param name="manager">The manager.</param> private void ConfigManagerVpn(SshProxy <NodeDefinition> manager) { // Upload the setup and configuration files. // // NOTE: // // These steps are redundant and will be repeated during the // common node configuration, but we need some of the scripts // here, before that happens. manager.CreateHiveHostFolders(); manager.UploadConfigFiles(hive.Definition); manager.UploadResources(hive.Definition); // Install OpenVPN. manager.Status = "vpn install"; manager.SudoCommand("safe-apt-get update"); manager.SudoCommand("safe-apt-get install -yq openvpn"); // Configure OpenVPN. var nodesSubnet = NetworkCidr.Parse(hive.Definition.Network.NodesSubnet); var vpnSubnet = NetworkCidr.Parse(manager.Metadata.VpnPoolSubnet); var duplicateCN = hive.Definition.Vpn.AllowSharedCredentials ? "duplicate-cn" : ";duplicate-cn"; var vpnServerAddress = NetHelper.UintToAddress(NetHelper.AddressToUint(vpnSubnet.Address) + 1); var serverConf = $@"#------------------------------------------------------------------------------ # OpenVPN config file customized for the [{manager.Name}] neonHIVE manager node. # OpenVPN listening port. port {NetworkPorts.OpenVPN} # Enable TCP and/or UDP transports. proto tcp ;proto udp # Set packet tunneling mode. dev tun # SSL/TLS root certificate (ca), certificate # (cert), and private key (key). Each client # and the server must have their own cert and # key file. The server and all clients will # use the same ca file. # # See the [easy-rsa] directory for a series # of scripts for generating RSA certificates # and private keys. Remember to use # a unique Common Name for the server # and each of the client certificates. # # Any X509 key management system can be used. # OpenVPN can also use a PKCS #12 formatted key file # (see [pkcs12] directive in man page). ca ca.crt cert server.crt key server.key # This file should be kept secret # Diffie hellman parameters (2048-bit) generated via: # # openssl dhparam -out dhparam.pem 2048 # dh dhparam.pem # The currently recommended topology. topology subnet # Configure server mode and supply a VPN subnet # for OpenVPN to draw client addresses from. # The server will take {vpnServerAddress} for itself, # the rest will be made available to clients. # Each client will be able to reach the server # on {vpnServerAddress}. Comment this line out if you are # ethernet bridging. See the man page for more info. server {vpnSubnet.Address} {vpnSubnet.Mask} # Maintain a record of client virtual IP address # associations in this file. If OpenVPN goes down or # is restarted, reconnecting clients can be assigned # the same virtual IP address from the pool that was # previously assigned. ;ifconfig-pool-persist ipp.txt # Push routes to the client to allow it # to reach other private subnets behind # the server. Remember that these # private subnets will also need # to know to route the OpenVPN client # address pool ({vpnSubnet.Address}) # back to this specific OpenVPN server. push ""route {nodesSubnet.Address} {nodesSubnet.Mask}"" # Uncomment this directive if multiple clients # might connect with the same certificate/key # files or common names. This is recommended # only for testing purposes. For production use, # each client should have its own certificate/key # pair. {duplicateCN} # The keepalive directive causes ping-like # messages to be sent back and forth over # the link so that each side knows when # the other side has gone down. # Ping every 10 seconds, assume that remote # peer is down if no ping received during # a 120 second time period. keepalive 10 120 # For extra security beyond that provided # by SSL/TLS, create an [HMAC firewall] # to help block DoS attacks and UDP port flooding. # # Generate with: # openvpn --genkey --secret ta.key # # The server and each client must have # a copy of this key. # The second parameter should be '0' # on the server and '1' on the clients. tls-auth ta.key 0 # This file is secret # Select a cryptographic cipher. # This config item must be copied to # the client config file as well. cipher AES-256-CBC # Enable compression on the VPN link. # Don't enable this unless it is also # enabled in the client config file. # # We're not enabling this due to the # VORACLE security vulnerablity: # # https://community.openvpn.net/openvpn/wiki/VORACLE # # The maximum number of concurrently connected # clients we want to allow. max-clients {VpnOptions.ServerAddressCount - 2} # This macro sets the TCP_NODELAY socket flag on # the server as well as pushes it to connecting # clients. The TCP_NODELAY flag disables the Nagle # algorithm on TCP sockets causing packets to be # transmitted immediately with low latency, rather # than waiting a short period of time in order to # aggregate several packets into a larger containing # packet. In VPN applications over TCP, TCP_NODELAY # is generally a good latency optimization. tcp-nodelay # It's a good idea to reduce the OpenVPN # daemon's privileges after initialization. # # You can uncomment this out on # non-Windows systems. ;user nobody ;group nobody # The persist options will try to avoid # accessing certain resources on restart # that may no longer be accessible because # of the privilege downgrade. persist-key persist-tun # Output a short status file showing # current connections, truncated # and rewritten every minute. status openvpn-status.log # By default, log messages will go to the syslog (ork # on Windows, if running as a service, they will go to # the [\Program Files\OpenVPN\log] directory). # Use log or log-append to override this default. # [log] will truncate the log file on OpenVPN startup, # while [log-append] will append to it. Use one # or the other (but not both). log /var/log/openvpn.log ;log-append openvpn.log # Set the appropriate level of log # file verbosity. # # 0 is silent, except for fatal errors # 4 is reasonable for general usage # 5 and 6 can help to debug connection problems # 9 is extremely verbose verb 4 # Silence repeating messages. At most 20 # sequential messages of the same message # category will be output to the log. ;mute 20 "; manager.Status = "vpn config"; manager.SudoCommand("mkdir -p /etc/openvpn"); manager.UploadText("/etc/openvpn/server.conf", serverConf); manager.UploadText("/etc/openvpn/ca.crt", vpnCaFiles.GetCert("ca")); manager.UploadText("/etc/openvpn/server.crt", vpnCaFiles.GetCert("server")); manager.UploadText("/etc/openvpn/server.key", vpnCaFiles.GetKey("server")); manager.SudoCommand("chmod 600", "/etc/openvpn/server.key"); // This is a secret! manager.UploadText("/etc/openvpn/ta.key", vpnCaFiles.GetTaKey()); manager.SudoCommand("chmod 600", "/etc/openvpn/ta.key"); // This is a secret too! manager.UploadText("/etc/openvpn/dhparam.pem", vpnCaFiles.GetDHParam()); // Initialize the [root] user's credentials. vpnCredentials = new VpnCredentials() { CaCert = vpnCaFiles.GetCert("ca"), UserCert = vpnCaFiles.GetCert(HiveConst.RootUser), UserKey = vpnCaFiles.GetKey(HiveConst.RootUser), TaKey = vpnCaFiles.GetTaKey(), CaZipKey = VpnCaFiles.GenerateKey(), CaZip = vpnCaFiles.ToZipBytes() }; // Upload the initial (empty) Certificate Revocation List (CRL) file and then // upload a OpenVPN systemd unit drop-in so that it will recognize revoked certificates. manager.UploadText("/etc/openvpn/crl.pem", vpnCaFiles.GetFile("crl.pem")); manager.SudoCommand("chmod 664", "/etc/openvpn/crl.pem"); // OpenVPN needs to be able to read this after having its privileges downgraded. var openVpnUnit = @"[Unit] Description=OpenVPN connection to %i PartOf=openvpn.service ReloadPropagatedFrom=openvpn.service Before=systemd-user-sessions.service Documentation=man:openvpn(8) Documentation=https://community.openvpn.net/openvpn/wiki/Openvpn23ManPage Documentation=https://community.openvpn.net/openvpn/wiki/HOWTO [Service] PrivateTmp=true KillMode=mixed Type=forking ExecStart=/usr/sbin/openvpn --daemon ovpn-%i --status /run/openvpn/%i.status 10 --cd /etc/openvpn --script-security 2 --config /etc/openvpn/%i.conf --writepid /run/openvpn/%i.pid --crl-verify /etc/openvpn/crl.pem PIDFile=/run/openvpn/%i.pid ExecReload=/bin/kill -HUP $MAINPID WorkingDirectory=/etc/openvpn ProtectSystem=yes CapabilityBoundingSet=CAP_IPC_LOCK CAP_NET_ADMIN CAP_NET_BIND_SERVICE CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SYS_CHROOT CAP_DAC_READ_SEARCH CAP_AUDIT_WRITE LimitNPROC=10 DeviceAllow=/dev/null rw DeviceAllow=/dev/net/tun rw [Install] WantedBy=multi-user.target "; manager.UploadText("/etc/systemd/system/[email protected]", openVpnUnit); manager.SudoCommand("chmod 644 /etc/systemd/system/[email protected]"); // Do a daemon-reload so systemd will be aware of the new drop-in. manager.SudoCommand("systemctl disable openvpn"); manager.SudoCommand("systemctl daemon-reload"); // Enable and restart OpenVPN. manager.SudoCommand("systemctl enable openvpn"); manager.SudoCommand("systemctl restart openvpn"); //----------------------------------------------------------------- // SPECIAL NOTE: // // I figured out that I need this lovely bit of code after banging my head on the desk for // 12 freaking days. The problem was getting OpenVPN to work in Windows Azure (this will // also probably impact other cloud environments). // // Azure implements VNETs as layer 3 overlays. This means that the host network interfaces // are not actually on an ethernet segment and the VPN default gateway is actually handling // all of the ARP packets, routing between the VNET subnets, load balancers, and the Internet. // This is problematic for OpenVPN traffic because the VPN client IP address space is not // part of the VNET which means the VNET gateway is not able to route packets from hive // hosts back to the manager's OpenVPN client addresses by default. // // The solution is to configure the managers with secondary NIC cards in a different subnet // and provision special Azure user-defined routes that direct VPN return packets to the // correct manager. // // I figured this part out the second day. The problem was though that it simply didn't work. // From an external VPN client, I would try to ping a worker node through OpenVPN running on // a manager. I'd see the ping traffic: // // 1. manager/tun0: request // 2. manager/eth1: request // 3. worker/eth0: request // 4. worker/eth0: reply // 5. manager/eth0: reply // 6: NOTHING! EXPECTED: manager/tun0: reply // // So the problem was that I could see the ICMP ping request hit the various interfaces // on the manager and be received by the worker. I'd then see the worker send the reply, // and be routed via the user-defined Azure rult back to the manager. The problem was // that the packet was simply dropped there. It never made it back to tun0 so OpenVPN // could forward it back to the client. // // After days and days of trying to learn about Linux routing, iptables and policy rules, // I finally ran across this posting for the second time: // // https://unix.stackexchange.com/questions/21093/output-traffic-on-different-interfaces-based-on-destination-port // // This was the key. I ran across this a few days ago and didn't read it closely enough. // It made more sense after learning more about this stuff. // // Linux has a built-in IP address spoofing filter enabled by default. This filter has the // kernel discard any packets whose source address doesn't match the IP address/route implied // by the remote interface that transmitted the packet. This is exactly what's happening // when Azure forwards the VPN return packets via the user-defined route. I'd see return // packets hit eth0 on the manager, be processed by the low-level RAW and MANGLE iptables // and then they'd disappear. // // The solution is simply to disable the spoofing filter. I'm going to go ahead and do this // for all interfaces which should be fine for hives hosted in cloud environments, because the // VNET/Load Balancer/Security Groups will be used to lock things down. Local hives will // need to be manually placed behind a suitable router/firewall as well. // // For robustness, I'm going to deploy this as a service daemon that polls the filter state // for each interface every 5 seconds, and disables any enabled filters. This will ensure // that the filters will always be disabled, even as interfaces are bought up and down. var disableSpoofUnit = $@"[Unit] Description=Disable Network Anti-Spoofing Filters Documentation= After= Requires= Before= [Service] Type=simple ExecStart={HiveHostFolders.Bin}/disable-spoof-filters.sh [Install] WantedBy=multi-user.target "; var disableSpoofScript = @"#!/bin/bash #------------------------------------------------------------------------------ # This script is a deployed as a service to ensure that the Linux anti-spoofing # filters are disabled for the network interfaces on manager nodes hosting # OpenVPN. This is required to allow VPN return traffic from other nodes to # routed back to tun0 and ultimately, connected VPN clients. # # Note that it appears that we need to disable the filter for all interfaces # for this to actually work. while : do flush=false for f in /proc/sys/net/ipv4/conf/*/rp_filter do filter_enabled=$(cat $f) if [ ""$filter_enabled"" == ""1"" ] ; then echo 0 > $f flush=true fi done if [ ""$flush"" == ""true"" ] ; then echo 1 > /proc/sys/net/ipv4/route/flush fi sleep 5 done"; manager.UploadText("/lib/systemd/system/disable-spoof-filters.service", disableSpoofUnit); manager.SudoCommand("chmod 644 /lib/systemd/system/disable-spoof-filters.service"); manager.UploadText($"{HiveHostFolders.Bin}/disable-spoof-filters.sh", disableSpoofScript); manager.SudoCommand($"chmod 770 {HiveHostFolders.Bin}/disable-spoof-filters.sh"); manager.SudoCommand("systemctl enable disable-spoof-filters"); manager.SudoCommand("systemctl restart disable-spoof-filters"); }