Ejemplo n.º 1
0
        /// <summary>
        /// Uploads the configuration files for the target operating system to the server.
        /// </summary>
        /// <typeparam name="Metadata">The node metadata type.</typeparam>
        /// <param name="node">The remote node.</param>
        /// <param name="hiveDefinition">The hive definition or <c>null</c>.</param>
        public static void UploadConfigFiles <Metadata>(this SshProxy <Metadata> node, HiveDefinition hiveDefinition = null)
            where Metadata : class
        {
            Covenant.Requires <ArgumentNullException>(node != null);

            // Clear the contents of the configuration folder.

            node.Status = $"clear: {HiveHostFolders.Config}";
            node.SudoCommand($"rm -rf {HiveHostFolders.Config}/*.*");

            // Upload the files.

            node.Status = "upload: config files";

            foreach (var file in Program.LinuxFolder.GetFolder("conf").Files())
            {
                node.UploadFile(hiveDefinition, file, $"{HiveHostFolders.Config}/{file.Name}");
            }

            // Secure the files and make the scripts executable.

            node.SudoCommand($"chmod 644 {HiveHostFolders.Config}/*.*");
            node.SudoCommand($"chmod 744 {HiveHostFolders.Config}/*.sh");

            node.Status = "copied";
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Updates Docker on a specific node.
        /// </summary>
        /// <param name="node">The target node.</param>
        /// <param name="stepDelay">The step delay.</param>
        private void UpdateDocker(SshProxy <NodeDefinition> node, TimeSpan stepDelay)
        {
            if (node.GetDockerVersion() >= (SemanticVersion)version)
            {
                return;     // Already updated
            }

            if (node.Metadata.InSwarm)
            {
                node.Status = "swarm: drain services";
                hive.Docker.DrainNode(node.Name);
            }

            node.Status = "run: safe-apt-get update";
            node.SudoCommand("safe-apt-get update");

            node.Status = $"run: safe-apt-get install -yq {dockerPackageUri}";
            node.SudoCommand($"safe-apt-get install -yq {dockerPackageUri}");

            node.Status = $"restart: docker";
            node.SudoCommand("systemctl restart docker");

            if (node.Metadata.InSwarm)
            {
                // Put the node back into ACTIVE mode (from DRAIN).

                node.Status = "swarm: activate";
                hive.Docker.ActivateNode(node.Name);
            }

            node.Status = $"stabilizing ({Program.WaitSeconds}s)";
            Thread.Sleep(TimeSpan.FromSeconds(Program.WaitSeconds));
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Updates docker on a hive node.
        /// </summary>
        /// <param name="hive">The target hive.</param>
        /// <param name="node">The target node.</param>
        /// <param name="dockerPackageUri">The Docker Debian package URI.</param>
        private static void UpdateDocker(HiveProxy hive, SshProxy <NodeDefinition> node, string dockerPackageUri)
        {
            try
            {
                if (node.Metadata.InSwarm)
                {
                    node.Status = "swarm: drain services";
                    hive.Docker.DrainNode(node.Name);
                }

                node.Status = "stop: docker";
                node.SudoCommand("systemctl stop docker").EnsureSuccess();

                node.Status = "download: docker package";
                node.SudoCommand($"curl {Program.CurlOptions} {dockerPackageUri} -o /tmp/docker.deb").EnsureSuccess();

                node.Status = "update: docker";
                node.SudoCommand("gdebi /tmp/docker.deb").EnsureSuccess();
                node.SudoCommand("rm /tmp/docker.deb");

                node.Status = "restart: docker";
                node.SudoCommand("systemctl start docker").EnsureSuccess();

                if (node.Metadata.InSwarm)
                {
                    node.Status = "swarm: activate";
                    hive.Docker.ActivateNode(node.Name);
                }
            }
            catch (Exception e)
            {
                node.Fault($"[docker] update failed: {NeonHelper.ExceptionError(e)}");
            }
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Executes a Vault command on a specific node using the root Vault token.
        /// </summary>
        /// <param name="node">The target node.</param>
        /// <param name="commandLine">The Vault command.</param>
        private void ExecuteOnNode(SshProxy <NodeDefinition> node, CommandLine commandLine)
        {
            var response = node.SudoCommand($"export VAULT_TOKEN={vaultCredentials.RootToken} && {remoteVaultPath} {commandLine}", RunOptions.IgnoreRemotePath | RunOptions.Redact);

            Console.WriteLine(response.AllText);
            Program.Exit(response.ExitCode);
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Updates Linux on a specific node.
        /// </summary>
        /// <param name="node">The target node.</param>
        /// <param name="stepDelay">The step delay.</param>
        private void UpdateLinux(SshProxy <NodeDefinition> node, TimeSpan stepDelay)
        {
            if (node.Metadata.InSwarm)
            {
                node.Status = "swarm: drain services";
                hive.Docker.DrainNode(node.Name);
            }

            node.Status = "run: safe-apt-get dist-upgrade -yq";
            node.SudoCommand("safe-apt-get dist-upgrade -yq");

            node.Reboot();

            if (node.Metadata.InSwarm)
            {
                // Put the node back into ACTIVE mode (from DRAIN).

                node.Status = "swarm: activate";
                hive.Docker.ActivateNode(node.Name);
            }

            // Give the node a chance to become active again in the swarm
            // for containers to restart and for service tasks to redeploy

            node.Status = $"stabilizing ({Program.WaitSeconds}s)";
            Thread.Sleep(TimeSpan.FromSeconds(Program.WaitSeconds));
        }
Ejemplo n.º 6
0
        /// <summary>
        /// <para>
        /// Edits the [/etc/hosts] file on all hive nodes so that the line:
        /// </para>
        /// <code>
        /// 127.0.1.1   {hostname}
        /// </code>
        /// <para>
        /// is changed to:
        /// </para>
        /// <code>
        /// {node.PrivateAddress} {hostname}
        /// </code>
        /// <para>
        /// Hashicorp Vault cannot restart with the old setting, complaining about a
        /// <b>""missing API address</b>.
        /// </para>
        /// </summary>
        /// <param name="node">The target node.</param>
        private void EditEtcHosts(SshProxy <NodeDefinition> node)
        {
            node.InvokeIdempotentAction(GetIdempotentTag("edit-etc-hosts"),
                                        () =>
            {
                var etcHosts   = node.DownloadText("/etc/hosts");
                var sbEtcHosts = new StringBuilder();

                using (var reader = new StringReader(etcHosts))
                {
                    foreach (var line in reader.Lines())
                    {
                        if (line.StartsWith("127.0.1.1"))
                        {
                            var nodeAddress = node.PrivateAddress.ToString();
                            var separator   = new string(' ', Math.Max(16 - nodeAddress.Length, 1));

                            sbEtcHosts.AppendLine($"{nodeAddress}{separator}{node.Name}");
                        }
                        else
                        {
                            sbEtcHosts.AppendLine(line);
                        }
                    }
                }

                node.UploadText("/etc/hosts", sbEtcHosts.ToString(), permissions: "644");
                node.SudoCommand("systemctl restart vault");
            });
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Verifies Consul health.
        /// </summary>
        /// <param name="node">The manager node.</param>
        /// <param name="hiveDefinition">The hive definition.</param>
        private static void CheckConsul(SshProxy <NodeDefinition> node, HiveDefinition hiveDefinition)
        {
            node.Status = "checking: consul";

            // Verify that the daemon is running.

            switch (Program.ServiceManager)
            {
            case ServiceManager.Systemd:

            {
                var output = node.SudoCommand("systemctl status consul", RunOptions.LogOutput).OutputText;

                if (!output.Contains("Active: active (running)"))
                {
                    node.Fault($"Consul deamon is not running.");
                    return;
                }
            }
            break;

            default:

                throw new NotImplementedException();
            }
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Starts a neonHIVE related Docker container on a node and also uploads a script
        /// to make it easy to restart the container manually or for hive updates.
        /// </summary>
        /// <param name="node">The target hive node.</param>
        /// <param name="containerName">Identifies the container.</param>
        /// <param name="image">The Docker image to be used by the container.</param>
        /// <param name="runOptions">Optional run options (defaults to <see cref="RunOptions.FaultOnError"/>).</param>
        /// <param name="commands">The commands required to start the container.</param>
        /// <remarks>
        /// <para>
        /// This method performs the following steps:
        /// </para>
        /// <list type="number">
        ///     <item>
        ///     Passes <paramref name="image"/> to <see cref="Program.ResolveDockerImage(string)"/> to
        ///     obtain the actual image to be started.
        ///     </item>
        ///     <item>
        ///     Generates the first few lines of the script file that sets the
        ///     default image as the <c>TARGET_IMAGE</c> macro and then overrides
        ///     this with the script parameter (if there is one).
        ///     </item>
        ///     <item>
        ///     Appends the commands to the script, replacing any text that matches
        ///     <see cref="ImagePlaceholderArg"/> with <c>${TARGET_IMAGE}</c> to make it easy
        ///     for services to be upgraded later.
        ///     </item>
        ///     <item>
        ///     Starts the container.
        ///     </item>
        ///     <item>
        ///     Uploads the generated script to the node to [<see cref="HiveHostFolders.Scripts"/>/<paramref name="containerName"/>.sh].
        ///     </item>
        /// </list>
        /// </remarks>
        public static void StartContainer(SshProxy <NodeDefinition> node, string containerName, string image, RunOptions runOptions = RunOptions.FaultOnError, params IBashCommandFormatter[] commands)
        {
            Covenant.Requires <ArgumentNullException>(node != null);
            Covenant.Requires <ArgumentNullException>(!string.IsNullOrWhiteSpace(containerName));
            Covenant.Requires <ArgumentNullException>(!string.IsNullOrWhiteSpace(image));
            Covenant.Requires <ArgumentNullException>(commands != null);
            Covenant.Requires <ArgumentNullException>(commands.Length > 0);

            node.Status = $"start: {containerName}";

            // Generate the container start script.

            var script = CreateStartScript(containerName, image, true, commands);

            // Upload the script to the target node and set permissions.

            var scriptPath = LinuxPath.Combine(HiveHostFolders.Scripts, $"{containerName}.sh");

            node.UploadText(scriptPath, script);
            node.SudoCommand($"chmod 740 {scriptPath}");

            // Run the script without a parameter to start the container.

            node.IdempotentDockerCommand($"setup/{containerName}", null, runOptions, scriptPath);

            node.Status = string.Empty;
        }
Ejemplo n.º 9
0
 /// <summary>
 /// Removes the Docker python module from all nodes because it conflicts with
 /// Docker related Ansible playbooks.
 /// </summary>
 /// <param name="node">The target node.</param>
 private void RemoveDockerPython(SshProxy <NodeDefinition> node)
 {
     node.InvokeIdempotentAction(GetIdempotentTag("remove-docker-py"),
                                 () =>
     {
         node.SudoCommand("su sysadmin -c 'pip uninstall -y docker'", RunOptions.LogOnErrorOnly);
     });
 }
Ejemplo n.º 10
0
        /// <summary>
        /// Uploads the setup and other scripts and tools for the target operating system to the server.
        /// </summary>
        /// <typeparam name="TMetadata">The server's metadata type.</typeparam>
        /// <param name="server">The remote server.</param>
        /// <param name="clusterDefinition">The cluster definition.</param>
        /// <param name="kubeSetupInfo">The Kubernetes setup details.</param>
        public static void UploadResources <TMetadata>(this SshProxy <TMetadata> server, ClusterDefinition clusterDefinition, KubeSetupInfo kubeSetupInfo)
            where TMetadata : class
        {
            Covenant.Requires <ArgumentNullException>(server != null, nameof(server));
            Covenant.Requires <ArgumentNullException>(clusterDefinition != null, nameof(clusterDefinition));
            Covenant.Requires <ArgumentNullException>(kubeSetupInfo != null, nameof(kubeSetupInfo));

            //-----------------------------------------------------------------
            // Upload resource files to the setup folder.

            server.Status = $"clear: {KubeHostFolders.Setup}";
            server.SudoCommand($"rm -rf {KubeHostFolders.Setup}/*.*");

            // Upload the setup files.

            server.Status = "upload: setup scripts";

            foreach (var file in Program.LinuxFolder.GetFolder("setup").Files())
            {
                server.UploadFile(clusterDefinition, kubeSetupInfo, file, $"{KubeHostFolders.Setup}/{file.Name}");
            }

            // Make the setup scripts executable.

            server.SudoCommand($"chmod 744 {KubeHostFolders.Setup}/*");

            //-----------------------------------------------------------------
            // Upload files to the bin folder.

            server.Status = $"clear: {KubeHostFolders.Bin}";
            server.SudoCommand($"rm -rf {KubeHostFolders.Bin}/*.*");

            // Upload the tool files.  Note that we're going to strip out the [.sh]
            // file type to make these easier to run.

            server.Status = "upload: binary files";

            foreach (var file in Program.LinuxFolder.GetFolder("binary").Files())
            {
                server.UploadFile(clusterDefinition, kubeSetupInfo, file, $"{KubeHostFolders.Bin}/{file.Name.Replace(".sh", string.Empty)}");
            }

            // Make the scripts executable.

            server.SudoCommand($"chmod 744 {KubeHostFolders.Bin}/*");
        }
Ejemplo n.º 11
0
        /// <summary>
        /// Ensures that the Docker <b>config.json</b> file for the node's root
        /// user matches that for the sysadmin user.
        /// </summary>
        private void SyncDockerConf(SshProxy <NodeDefinition> node)
        {
            // We also need to manage the login for the [root] account due
            // to issue
            //
            //      https://github.com/jefflill/NeonForge/issues/265

            // $hack(jeff.lill):
            //
            // We're simply going ensure that the [/root/.docker/config.json]
            // file matches the equivalent file for the node sysadmin account,
            // removing the root file if this was deleted for sysadmin.
            //
            // This is a bit of a hack because it assumes that the Docker config
            // for the root and sysadmin account never diverge, which is probably
            // a reasonable assumption given that these are managed hosts.
            //
            // We're also going to ensure that these directories and files have the
            // correct owners and permissions.

            var bundle = new CommandBundle("./sync.sh");

            bundle.AddFile("sync.sh",
                           $@"#!/bin/bash

if [ ! -d /root/.docker ] ; then
    mkdir -p /root/.docker
fi

if [ -f /home/{node.Username}/.docker/config.json ] ; then
    cp /home/{node.Username}/.docker/config.json /root/.docker/config.json
else
    if [ -f /root/.docker/config.json ] ; then
        rm /root/.docker/config.json
    fi
fi

if [ -d /root/.docker ] ; then
    chown -R root:root /root/.docker
    chmod 660 /root/.docker/*
fi

if [ -d /home/{node.Username}/.docker ] ; then
    chown -R {node.Username}:{node.Username} /home/{node.Username}/.docker
    chmod 660 /home/{node.Username}/.docker/*
fi
",
                           isExecutable: true);

            var response = node.SudoCommand(bundle);

            if (response.ExitCode != 0)
            {
                throw new HiveException(response.ErrorSummary);
            }
        }
Ejemplo n.º 12
0
        /// <summary>
        /// Updates Consul on a specific node.
        /// </summary>
        /// <param name="node">The target node.</param>
        /// <param name="stepDelay">The step delay.</param>
        private void UpdateConsul(SshProxy <NodeDefinition> node, TimeSpan stepDelay)
        {
            if (node.GetConsulVersion() >= (SemanticVersion)version)
            {
                return;     // Already updated
            }

            node.Status = $"stop: consul";
            node.SudoCommand("systemctl stop consul");

            node.Status = $"update: consul";

            var bundle = new CommandBundle("./install.sh", version);

            bundle.AddFile("install.sh",
                           $@"#!/bin/bash

set -euo pipefail

curl {Program.CurlOptions} https://releases.hashicorp.com/consul/$1/consul_$1_linux_amd64.zip -o /tmp/consul.zip 1>&2
unzip -u /tmp/consul.zip -d /tmp
cp /tmp/consul /usr/local/bin
chmod 770 /usr/local/bin/consul

rm /tmp/consul.zip
rm /tmp/consul 
",
                           isExecutable: true);

            node.SudoCommand(bundle);

            node.Status = $"restart: consul";
            node.SudoCommand("systemctl restart consul");

            if (node.Metadata.IsManager)
            {
                node.Status = $"stabilizing ({Program.WaitSeconds}s)";
                Thread.Sleep(TimeSpan.FromSeconds(Program.WaitSeconds));
            }
        }
Ejemplo n.º 13
0
        /// <summary>
        /// Verifies Docker health.
        /// </summary>
        /// <param name="node">The target hive node.</param>
        /// <param name="hiveDefinition">The hive definition.</param>
        private static void CheckDocker(SshProxy <NodeDefinition> node, HiveDefinition hiveDefinition)
        {
            node.Status = "checking: docker";

            // This is a super simple ping to verify that Docker appears to be running.

            var response = node.SudoCommand("docker info");

            if (response.ExitCode != 0)
            {
                node.Fault($"Docker: {response.AllText}");
            }
        }
Ejemplo n.º 14
0
        /// <summary>
        /// Updates Vault on a specific node.
        /// </summary>
        /// <param name="node">The target node.</param>
        /// <param name="stepDelay">The step delay.</param>
        private void UpdateVault(SshProxy <NodeDefinition> node, TimeSpan stepDelay)
        {
            if (node.GetVaultVersion() >= (SemanticVersion)version)
            {
                return;     // Already updated
            }

            node.Status = $"update: vault";

            var bundle = new CommandBundle("./install.sh", version);

            bundle.AddFile("install.sh",
                           $@"#!/bin/bash

set -euo pipefail

curl {Program.CurlOptions} https://releases.hashicorp.com/vault/$1/vault_$1_linux_amd64.zip -o /tmp/vault.zip 1>&2
unzip -o /tmp/vault.zip -d /tmp
rm /tmp/vault.zip

mv /tmp/vault /usr/local/bin/vault
chmod 700 /usr/local/bin/vault
",
                           isExecutable: true);

            node.SudoCommand(bundle);

            if (node.Metadata.IsManager)
            {
                node.Status = $"restart: vault";
                node.SudoCommand("systemctl restart vault");

                node.Status = $"unseal: vault";
                hive.Vault.Unseal();

                node.Status = $"stabilizing ({Program.WaitSeconds}s)";
                Thread.Sleep(TimeSpan.FromSeconds(Program.WaitSeconds));
            }
        }
Ejemplo n.º 15
0
        /// <summary>
        /// Executes a <b>docker config create</b> command.
        /// </summary>
        /// <param name="node">The target node.</param>
        /// <param name="rightCommandLine">The right split of the command line.</param>
        private void ConfigCreate(SshProxy <NodeDefinition> node, CommandLine rightCommandLine)
        {
            // We're expecting a command like:
            //
            //      docker config create [OPTIONS] CONFIG file|-
            //
            // where CONFIG is the name of the configuration and and [file]
            // is the path to the config file or [-] indicates that
            // the config is streaming in on stdin.
            //
            // We're going to run this as a command bundle that includes
            // the config file.

            if (rightCommandLine.Arguments.Length != 4)
            {
                Console.Error.WriteLine("*** ERROR: Expected: docker config create [OPTIONS] CONFIG file|-");
                Program.Exit(0);
            }

            string fileArg = rightCommandLine.Arguments[3];

            byte[] configData;

            if (fileArg == "-")
            {
                configData = NeonHelper.ReadStandardInputBytes();
            }
            else
            {
                configData = File.ReadAllBytes(fileArg);
            }

            // Create and execute a command bundle.  Note that we're going to hardcode
            // the config data path to [config.data].

            rightCommandLine.Items[rightCommandLine.Items.Length - 1] = "config.data";

            var bundle = new CommandBundle("docker", rightCommandLine.Items);

            bundle.AddFile("config.data", configData);

            var response = node.SudoCommand(bundle, RunOptions.None);

            Console.Write(response.AllText);
            Program.Exit(response.ExitCode);
        }
Ejemplo n.º 16
0
        /// <summary>
        /// Updates the <b>/etc/systemd/system/ceph-fuse-hivefs.service</b> to adjust restart
        /// behavior: https://github.com/jefflill/NeonForge/issues/364
        /// </summary>
        /// <param name="node">The target node.</param>
        private void UpdateCephFuse(SshProxy <NodeDefinition> node)
        {
            node.InvokeIdempotentAction(GetIdempotentTag("ceph-fuse"),
                                        () =>
            {
                node.UploadText("/etc/systemd/system/ceph-fuse-hivefs.service",
                                @"[Unit]
Description=Ceph FUSE client (for /mnt/hivefs)
After=network-online.target local-fs.target time-sync.target
Wants=network-online.target local-fs.target time-sync.target
Conflicts=umount.target
PartOf=ceph-fuse.target

[Service]
EnvironmentFile=-/etc/default/ceph
Environment=CLUSTER=ceph
ExecStart=/usr/bin/ceph-fuse -f -o nonempty --cluster ${CLUSTER} /mnt/hivefs
TasksMax=infinity

# These settings configure the service to restart always after
# waiting 5 seconds between attempts for up to a 365 days (effectively 
# forever).  [StartLimitIntervalSec] is set to the number of seconds 
# in a year and [StartLimitBurst] is set to the number of 5 second 
# intervals in [StartLimitIntervalSec].

Restart=always
RestartSec=5
StartLimitIntervalSec=31536000 
StartLimitBurst=6307200

[Install]
WantedBy=ceph-fuse.target
WantedBy=docker.service
",
                                permissions: "644");

                // Tell systemd to regenerate its configuration.

                node.SudoCommand("systemctl daemon-reload");
            });
        }
Ejemplo n.º 17
0
        /// <summary>
        /// Verifies that the node has the correct operating system installed.
        /// </summary>
        /// <param name="node">The target hive node.</param>
        public static void VerifyOS(SshProxy <NodeDefinition> node)
        {
            node.Status = "check: OS";

            var response = node.SudoCommand("lsb_release -a");

            switch (Program.OSProperties.TargetOS)
            {
            case TargetOS.Ubuntu_16_04:

                if (!response.OutputText.Contains("Ubuntu 16.04"))
                {
                    node.Fault("Expected [Ubuntu 16.04].");
                }
                break;

            default:

                throw new NotImplementedException($"Support for [{nameof(TargetOS)}.{Program.OSProperties.TargetOS}] is not implemented.");
            }
        }
Ejemplo n.º 18
0
        /// <summary>
        /// Executes a command on a specific hive manager node using the root Vault token.
        /// </summary>
        /// <param name="manager">The target manager.</param>
        /// <param name="command">The command (including the <b>vault</b>).</param>
        /// <param name="args">The optional arguments.</param>
        /// <returns>The command response.</returns>
        /// <remarks>
        /// <note>
        /// This method does not fault or throw an exception if the command returns
        /// a non-zero exit code.
        /// </note>
        /// </remarks>
        public CommandResponse CommandNoFault(SshProxy <NodeDefinition> manager, string command, params object[] args)
        {
            Covenant.Requires <ArgumentNullException>(manager != null);
            Covenant.Requires <ArgumentNullException>(command != null);

            VerifyToken();

            var scriptBundle = new CommandBundle(command, args);
            var bundle       = new CommandBundle("./vault-command.sh");

            bundle.AddFile("vault-command.sh",
                           $@"#!/bin/bash
export VAULT_TOKEN={hive.HiveLogin.VaultCredentials.RootToken}
{scriptBundle}
",
                           isExecutable: true);

            var response = manager.SudoCommand(bundle, hive.SecureRunOptions);

            response.BashCommand = bundle.ToBash();

            return(response);
        }
Ejemplo n.º 19
0
        /// <summary>
        /// Inspects the node to determine physical machine capabilities like
        /// processor count, RAM, and primary disk capacity and then sets the
        /// corresponding node labels.
        /// </summary>
        /// <param name="node">The target node.</param>
        private void SetLabels(SshProxy <NodeDefinition> node)
        {
            CommandResponse result;

            // Download [/proc/meminfo] and extract the [MemTotal] value (in kB).

            result = node.SudoCommand("cat /proc/meminfo");

            if (result.ExitCode == 0)
            {
                var memInfo       = result.OutputText;
                var memTotalRegex = new Regex(@"^MemTotal:\s*(?<size>\d+)\s*kB", RegexOptions.Multiline);
                var memMatch      = memTotalRegex.Match(memInfo);

                if (memMatch.Success && long.TryParse(memMatch.Groups["size"].Value, out var memSizeKiB))
                {
                    // Note that the RAM reported by Linux is somewhat less than the
                    // physical RAM installed.

                    node.Metadata.Labels.ComputeRam = (int)(memSizeKiB / 1024);  // Convert KiB --> MiB
                }
            }

            // Download [/proc/cpuinfo] and count the number of processors.

            result = node.SudoCommand("cat /proc/cpuinfo");

            if (result.ExitCode == 0)
            {
                var cpuInfo          = result.OutputText;
                var processorRegex   = new Regex(@"^processor\s*:\s*\d+", RegexOptions.Multiline);
                var processorMatches = processorRegex.Matches(cpuInfo);

                node.Metadata.Labels.ComputeCores = processorMatches.Count;
            }

            // Determine the primary disk size.

            // $hack(jeff.lill):
            //
            // I'm not entirely sure how to determine which block device is hosting
            // the primary file system for all systems.  For now, I'm just going to
            // assume that this can be one of:
            //
            //      /dev/sda1
            //      /dev/sda
            //      /dev/xvda1
            //      /dev/xvda
            //
            // I'll try each of these in order and setting the label for the
            // first reasonable result we get back.

            var blockDevices = new string[]
            {
                "/dev/sda1",
                "/dev/sda",
                "/dev/xvda1",
                "/dev/xvda"
            };

            foreach (var blockDevice in blockDevices)
            {
                result = node.SudoCommand($"lsblk -b --output SIZE -n -d {blockDevice}", RunOptions.LogOutput);

                if (result.ExitCode == 0)
                {
                    if (long.TryParse(result.OutputText.Trim(), out var deviceSize) && deviceSize > 0)
                    {
                        node.Metadata.Labels.StorageSize = ByteUnits.ToGiString(deviceSize);
                        break;
                    }
                }
            }
        }
Ejemplo n.º 20
0
        /// <summary>
        /// Actually executes the command on the node.
        /// </summary>
        /// <param name="node">The target node.</param>
        private void Execute(SshProxy <NodeDefinition> node)
        {
            var status = this.ToString();

            // Limit the node status to a maximum of 80 characters.  For strings
            // longer than this, we're going to scan backwards from character 80
            // until we find a space and then truncate the string at the space
            // so the status will look nice.

            if (status.Length > 80)
            {
                var pos = 80 - "...".Length;    // Leave space for "..."

                for (; pos > 0; pos--)
                {
                    if (status[pos] == ' ')
                    {
                        break;
                    }
                }

                if (pos > 0)
                {
                    status = status.Substring(0, pos) + "...";
                }
                else
                {
                    // Fallback on the chance that a long status has no spaces
                    // before the break.

                    status = status.Substring(0, 77) + "...";
                }
            }

            node.Status = status;

            if (commandBundle.Count == 0)
            {
                // We can execute the command directly if we're
                // not uploading any files.

                if (isDocker)
                {
                    node.DockerCommand(commandBundle.Command, commandBundle.Args);
                }
                else if (Sudo)
                {
                    node.SudoCommand(commandBundle.Command, commandBundle.Args);
                }
                else
                {
                    throw new NotImplementedException();
                }
            }
            else
            {
                if (isDocker)
                {
                    throw new NotImplementedException();
                }
                else if (Sudo)
                {
                    node.SudoCommand(commandBundle);
                }
                else
                {
                    throw new NotImplementedException();
                }
            }

            StatusPause();

            node.Status = string.Empty;
        }
Ejemplo n.º 21
0
        /// <summary>
        /// Executes a <b>docker deploy</b> or <b>docker stack deploy</b> command.
        /// </summary>
        /// <param name="node">The target node.</param>
        /// <param name="rightCommandLine">The right split of the command line.</param>
        private void Deploy(SshProxy <NodeDefinition> node, CommandLine rightCommandLine)
        {
            string path = null;

            // We're going to upload the file specified by the first
            // [--bundle-file], [--compose-file], or [-c] option.

            for (int i = 0; i < rightCommandLine.Items.Length; i++)
            {
                switch (rightCommandLine.Items[i])
                {
                case "--bundle-file":
                case "--compose-file":
                case "-c":

                    path = rightCommandLine.Items.Skip(i + 1).FirstOrDefault();
                    break;
                }

                if (path != null)
                {
                    // Convert the command line argument to a bundle relative path.

                    rightCommandLine.Items[i + 1] = Path.GetFileName(rightCommandLine.Items[i + 1]);
                    break;
                }
            }

            if (path == null)
            {
                // If that didn't work, try looking for arguments like:
                //
                //      --bundle-file=PATH

                var patterns =
                    new string[]
                {
                    "--bundle-file=",
                    "--compose-file=",
                    "-c="
                };

                for (int i = 0; i < rightCommandLine.Items.Length; i++)
                {
                    var item = rightCommandLine.Items[i];

                    foreach (var pattern in patterns)
                    {
                        if (item.StartsWith(pattern))
                        {
                            path = item.Substring(pattern.Length);

                            // Convert the command line argument to a bundle relative path.

                            rightCommandLine.Items[i] = pattern + Path.GetFileName(path);
                            break;
                        }
                    }

                    if (path != null)
                    {
                        break;
                    }
                }
            }

            if (path == null)
            {
                Console.Error.WriteLine("*** ERROR: No DAB or compose file specified.");
                Program.Exit(0);
            }

            var bundle = new CommandBundle("docker", rightCommandLine.Items);

            bundle.AddFile(Path.GetFileName(path), File.ReadAllText(path));

            var response = node.SudoCommand(bundle);

            Console.Write(response.AllText);
            Program.Exit(response.ExitCode);
        }
Ejemplo n.º 22
0
        /// <summary>
        /// Verifies that a worker node's NTP health.
        /// </summary>
        /// <param name="node">The worker node.</param>
        /// <param name="clusterDefinition">The cluster definition.</param>
        private static void CheckWorkerNtp(SshProxy <NodeDefinition> node, ClusterDefinition clusterDefinition)
        {
            // We're going to use [ntpq -pw] to query the configured time sources.
            // We should get something back that looks like
            //
            //           remote           refid      st t when poll reach   delay   offset  jitter
            //           ==============================================================================
            //            LOCAL(0).LOCL.          10 l  45m   64    0    0.000    0.000   0.000
            //           * 10.0.1.5        198.60.22.240    2 u  111  128  377    0.062    3.409   0.608
            //           + 10.0.1.7        198.60.22.240    2 u  111  128  377    0.062    3.409   0.608
            //           + 10.0.1.7        198.60.22.240    2 u  111  128  377    0.062    3.409   0.608
            //
            // For worker nodes, we need to verify that each of the masters are answering
            // by confirming that their IP addresses are present.

            node.Status = "check: NTP";

            var retryDelay = TimeSpan.FromSeconds(30);
            var fault      = (string)null;
            var firstTry   = true;

tryAgain:

            for (var tries = 0; tries < 6; tries++)
            {
                var output = node.SudoCommand("/usr/bin/ntpq -pw", RunOptions.LogOutput).OutputText;

                foreach (var master in clusterDefinition.SortedMasters)
                {
                    // We're going to check the for presence of the master's IP address
                    // or its name, the latter because [ntpq] appears to attempt a reverse
                    // IP address lookup which will resolve into one of the DNS names defined
                    // in the local [/etc/hosts] file.

                    if (!output.Contains(master.PrivateAddress.ToString()) && !output.Contains(master.Name.ToLower()))
                    {
                        fault = $"NTP: Manager [{master.Name}/{master.PrivateAddress}] is not answering.";

                        Thread.Sleep(retryDelay);
                        continue;
                    }

                    // Everything looks OK.

                    break;
                }
            }

            if (fault != null)
            {
                if (firstTry)
                {
                    // $hack(jeff.lill):
                    //
                    // I've seen the NTP check fail on worker nodes, complaining
                    // that the connection attempt was rejected.  I manually restarted
                    // the node and then it worked.  I'm not sure if the rejected connection
                    // was being made to the local NTP service or from the local service
                    // to NTP running on the master.
                    //
                    // I'm going to assume that it was to the local NTP service and I'm
                    // going to try mitigating this by restarting the local NTP service
                    // and then re-running the tests.  I'm only going to do this once.

                    node.SudoCommand("systemctl restart ntp", node.DefaultRunOptions & ~RunOptions.FaultOnError);

                    firstTry = false;
                    goto tryAgain;
                }

                node.Fault(fault);
            }
        }
Ejemplo n.º 23
0
        /// <summary>
        /// Customizes the OpenSSH configuration on a node.
        /// </summary>
        /// <param name="node">The target node.</param>
        /// <param name="stepDelayed">Ignored.</param>
        public static void ConfigureOpenSSH(SshProxy <NodeDefinition> node, TimeSpan stepDelayed)
        {
            // Upload the OpenSSH server configuration, restart OpenSSH and
            // then disconnect and wait for the OpenSSH to restart.

            var openSshConfig =
                @"# Package generated configuration file
# See the sshd_config(5) manpage for details

# What ports, IPs and protocols we listen for
Port 22
# Use these options to restrict which interfaces/protocols sshd will bind to
#ListenAddress ::
#ListenAddress 0.0.0.0
Protocol 2
# HostKeys for protocol version 2
HostKey /etc/ssh/ssh_host_rsa_key
#HostKey /etc/ssh/ssh_host_dsa_key
#HostKey /etc/ssh/ssh_host_ecdsa_key
#HostKey /etc/ssh/ssh_host_ed25519_key
#Privilege Separation is turned on for security
UsePrivilegeSeparation yes

# Lifetime and size of ephemeral version 1 server key
KeyRegenerationInterval 3600
ServerKeyBits 1024

# Logging
SyslogFacility AUTH
LogLevel INFO

# Authentication:
LoginGraceTime 120
PermitRootLogin prohibit-password
StrictModes yes

RSAAuthentication yes
PubkeyAuthentication yes
#AuthorizedKeysFile	%h/.ssh/authorized_keys

# Don't read the user's ~/.rhosts and ~/.shosts files
IgnoreRhosts yes
# For this to work you will also need host keys in /etc/ssh_known_hosts
RhostsRSAAuthentication no
# similar for protocol version 2
HostbasedAuthentication no
# Uncomment if you don't trust ~/.ssh/known_hosts for RhostsRSAAuthentication
#IgnoreUserKnownHosts yes

# To enable empty passwords, change to yes (NOT RECOMMENDED)
PermitEmptyPasswords no

# Change to yes to enable challenge-response passwords (beware issues with
# some PAM modules and threads)
ChallengeResponseAuthentication no

# Change to no to disable tunnelled clear text passwords
#PasswordAuthentication yes

# Kerberos options
#KerberosAuthentication no
#KerberosGetAFSToken no
#KerberosOrLocalPasswd yes
#KerberosTicketCleanup yes

# GSSAPI options
#GSSAPIAuthentication no
#GSSAPICleanupCredentials yes

AllowTcpForwarding no
X11Forwarding no
X11DisplayOffset 10
PrintMotd no
PrintLastLog yes
TCPKeepAlive yes
#UseLogin no

#MaxStartups 10:30:60
#Banner /etc/issue.net

# Allow client to pass locale environment variables
AcceptEnv LANG LC_*

Subsystem sftp /usr/lib/openssh/sftp-server

# Set this to 'yes' to enable PAM authentication, account processing,
# and session processing. If this is enabled, PAM authentication will
# be allowed through the ChallengeResponseAuthentication and
# PasswordAuthentication.  Depending on your PAM configuration,
# PAM authentication via ChallengeResponseAuthentication may bypass
# the setting of ""PermitRootLogin without-password"".
# If you just want the PAM account and session checks to run without
# PAM authentication, then enable this but set PasswordAuthentication
# and ChallengeResponseAuthentication to 'no'.
UsePAM yes

# Allow connections to be idle for up to an 10 minutes (600 seconds)
# before terminating them.  This configuration pings the client every
# 30 seconds for up to 20 times without a response:
#
#   20*30 = 600 seconds

ClientAliveInterval 30
ClientAliveCountMax 20
TCPKeepAlive yes
";

            node.UploadText("/etc/ssh/sshd_config", openSshConfig);
            node.SudoCommand("systemctl restart sshd");
        }
Ejemplo n.º 24
0
        /// <summary>
        /// Initializes a near virgin server with the basic capabilities required
        /// for a cluster host node.
        /// </summary>
        /// <param name="node">The target cluster node.</param>
        /// <param name="clusterDefinition">The cluster definition.</param>
        /// <param name="kubeSetupInfo">Kubernetes setup details.</param>
        /// <param name="shutdown">Optionally shuts down the node.</param>
        public static void PrepareNode(SshProxy <NodeDefinition> node, ClusterDefinition clusterDefinition, KubeSetupInfo kubeSetupInfo, bool shutdown = false)
        {
            Covenant.Requires <ArgumentNullException>(node != null);
            Covenant.Requires <ArgumentNullException>(clusterDefinition != null);
            Covenant.Requires <ArgumentNullException>(kubeSetupInfo != null);

            if (node.FileExists($"{KubeHostFolders.State}/setup/prepared"))
            {
                return;     // Already prepared
            }

            //-----------------------------------------------------------------
            // Ensure that the cluster host folders exist.

            node.CreateHostFolders();

            //-----------------------------------------------------------------
            // Package manager configuration.

            if (!clusterDefinition.NodeOptions.AllowPackageManagerIPv6)
            {
                // Restrict the [apt] package manager to using IPv4 to communicate
                // with the package mirrors, since IPv6 often doesn't work.

                node.UploadText("/etc/apt/apt.conf.d/99-force-ipv4-transport", "Acquire::ForceIPv4 \"true\";");
                node.SudoCommand("chmod 644 /etc/apt/apt.conf.d/99-force-ipv4-transport");
            }

            // Configure [apt] to retry.

            node.UploadText("/etc/apt/apt.conf.d/99-retries", $"APT::Acquire::Retries \"{clusterDefinition.NodeOptions.PackageManagerRetries}\";");
            node.SudoCommand("chmod 644 /etc/apt/apt.conf.d/99-retries");

            //-----------------------------------------------------------------
            // Other configuration.

            ConfigureOpenSSH(node, TimeSpan.Zero);
            node.UploadConfigFiles(clusterDefinition, kubeSetupInfo);
            node.UploadResources(clusterDefinition, kubeSetupInfo);

            if (clusterDefinition != null)
            {
                ConfigureEnvironmentVariables(node, clusterDefinition);
            }

            node.SudoCommand("safe-apt-get update");

            node.InvokeIdempotentAction("setup/prep-node",
                                        () =>
            {
                node.Status = "preparing";
                node.SudoCommand("setup-prep.sh");
                node.Reboot(wait: true);
            });

            // We need to upload the cluster configuration and initialize drives attached
            // to the node.  We're going to assume that these are not already initialized.

            // $todo(jeff.lill):
            //
            // We may need an option that allows an operator to pre-build a hardware
            // based drive array or something.  I'm going to defer this to later and
            // concentrate on commodity hardware and cloud deployments for now.

            CommonSteps.ConfigureEnvironmentVariables(node, clusterDefinition);

            node.Status = "setup: disk";
            node.SudoCommand("setup-disk.sh");

            // Clear any DHCP leases to be super sure that cloned node
            // VMs will obtain fresh IP addresses.

            node.Status = "clear: DHCP leases";
            node.SudoCommand("rm -f /var/lib/dhcp/*");

            // Indicate that the node has been fully prepared.

            node.SudoCommand($"touch {KubeHostFolders.State}/setup/prepared");

            // Shutdown the node if requested.

            if (shutdown)
            {
                node.Status = "shutdown";
                node.SudoCommand("shutdown 0", RunOptions.Defaults | RunOptions.Shutdown);
            }
        }
Ejemplo n.º 25
0
        /// <summary>
        /// Verifies that a master node's NTP health.
        /// </summary>
        /// <param name="node">The master node.</param>
        /// <param name="clusterDefinition">The cluster definition.</param>
        private static void CheckMasterNtp(SshProxy <NodeDefinition> node, ClusterDefinition clusterDefinition)
        {
            // We're going to use [ntpq -pw] to query the configured time sources.
            // We should get something back that looks like
            //
            //      remote           refid      st t when poll reach   delay   offset  jitter
            //      ==============================================================================
            //       LOCAL(0).LOCL.          10 l  45m   64    0    0.000    0.000   0.000
            //      * clock.xmission. .GPS.            1 u  134  256  377   48.939 - 0.549  18.357
            //      + 173.44.32.10    18.26.4.105      2 u  200  256  377   96.981 - 0.623   3.284
            //      + pacific.latt.ne 44.24.199.34     3 u  243  256  377   41.457 - 8.929   8.497
            //
            // For master nodes, we're simply going to verify that we have at least one external
            // time source answering.

            node.Status = "check: NTP";

            var retryDelay = TimeSpan.FromSeconds(30);
            var fault      = (string)null;

            for (int tryCount = 0; tryCount < 6; tryCount++)
            {
                var response = node.SudoCommand("/usr/bin/ntpq -pw", RunOptions.LogOutput);

                if (response.ExitCode != 0)
                {
                    Thread.Sleep(retryDelay);
                    continue;
                }

                using (var reader = response.OpenOutputTextReader())
                {
                    string line;

                    // Column header and table bar lines.

                    line = reader.ReadLine();
                    if (string.IsNullOrWhiteSpace(line))
                    {
                        fault = "NTP: Invalid [ntpq -pw] response.";

                        Thread.Sleep(retryDelay);
                        continue;
                    }

                    line = reader.ReadLine();
                    if (string.IsNullOrWhiteSpace(line) || line[0] != '=')
                    {
                        fault = "NTP: Invalid [ntpq -pw] response.";

                        Thread.Sleep(retryDelay);
                        continue;
                    }

                    // Count the lines starting that don't include [*.LOCL.*],
                    // the local clock.

                    var sourceCount = 0;

                    for (line = reader.ReadLine(); line != null; line = reader.ReadLine())
                    {
                        if (line.Length > 0 && !line.Contains(".LOCL."))
                        {
                            sourceCount++;
                        }
                    }

                    if (sourceCount == 0)
                    {
                        fault = "NTP: No external sources are answering.";

                        Thread.Sleep(retryDelay);
                        continue;
                    }

                    // Everything looks good.

                    break;
                }
            }

            if (fault != null)
            {
                node.Fault(fault);
            }
        }
Ejemplo n.º 26
0
        /// <summary>
        /// Deploys RabbitMQ to a cluster node as a container.
        /// </summary>
        /// <param name="node">The target hive node.</param>
        private void DeployHiveMQ(SshProxy <NodeDefinition> node)
        {
            // Deploy RabbitMQ only on the labeled nodes.

            if (node.Metadata.Labels.HiveMQ)
            {
                // Build a comma separated list of fully qualified RabbitMQ hostnames so we
                // can pass them as the CLUSTER environment variable.

                var rabbitNodes = hive.Definition.SortedNodes.Where(n => n.Labels.HiveMQ).ToList();
                var sbCluster   = new StringBuilder();

                foreach (var rabbitNode in rabbitNodes)
                {
                    sbCluster.AppendWithSeparator($"{rabbitNode.Name}@{rabbitNode.Name}.{hive.Definition.Hostnames.HiveMQ}", ",");
                }

                var hipeCompileArgs = new List <string>();

                if (hive.Definition.HiveMQ.Precompile)
                {
                    hipeCompileArgs.Add("--env");
                    hipeCompileArgs.Add("RABBITMQ_HIPE_COMPILE=1");
                }

                var managementPluginArgs = new List <string>();

                if (node.Metadata.Labels.HiveMQManager)
                {
                    hipeCompileArgs.Add("--env");
                    hipeCompileArgs.Add("MANAGEMENT_PLUGIN=true");
                }

                // $todo(jeff.lill):
                //
                // I was unable to get TLS working correctly for RabbitMQ.  I'll come back
                // and revisit this later:
                //
                //      https://github.com/jefflill/NeonForge/issues/319

                ServiceHelper.StartContainer(node, "neon-hivemq", hive.Definition.Image.HiveMQ, RunOptions.FaultOnError,
                                             new CommandBundle(
                                                 "docker run",
                                                 "--detach",
                                                 "--name", "neon-hivemq",
                                                 "--env", $"CLUSTER_NAME={hive.Definition.Name}",
                                                 "--env", $"CLUSTER_NODES={sbCluster}",
                                                 "--env", $"CLUSTER_PARTITION_MODE=autoheal",
                                                 "--env", $"NODENAME={node.Name}@{node.Name}.{hive.Definition.Hostnames.HiveMQ}",
                                                 "--env", $"RABBITMQ_USE_LONGNAME=true",
                                                 "--env", $"RABBITMQ_DEFAULT_USER=sysadmin",
                                                 "--env", $"RABBITMQ_DEFAULT_PASS=password",
                                                 "--env", $"RABBITMQ_NODE_PORT={HiveHostPorts.HiveMQAMQP}",
                                                 "--env", $"RABBITMQ_DIST_PORT={HiveHostPorts.HiveMQDIST}",
                                                 "--env", $"RABBITMQ_MANAGEMENT_PORT={HiveHostPorts.HiveMQManagement}",
                                                 "--env", $"RABBITMQ_ERLANG_COOKIE={hive.Definition.HiveMQ.ErlangCookie}",
                                                 "--env", $"RABBITMQ_VM_MEMORY_HIGH_WATERMARK={hive.Definition.HiveMQ.RamHighWatermark}",
                                                 hipeCompileArgs,
                                                 managementPluginArgs,
                                                 "--env", $"RABBITMQ_DISK_FREE_LIMIT={HiveDefinition.ValidateSize(hive.Definition.HiveMQ.DiskFreeLimit, typeof(HiveMQOptions), nameof(hive.Definition.HiveMQ.DiskFreeLimit))}",
                                                 //"--env", $"RABBITMQ_SSL_CERTFILE=/etc/neon/certs/hive.crt",
                                                 //"--env", $"RABBITMQ_SSL_KEYFILE=/etc/neon/certs/hive.key",
                                                 "--env", $"ERL_EPMD_PORT={HiveHostPorts.HiveMQEPMD}",
                                                 "--mount", "type=volume,source=neon-hivemq,target=/var/lib/rabbitmq",
                                                 "--mount", "type=bind,source=/etc/neon/certs,target=/etc/neon/certs,readonly",
                                                 "--publish", $"{HiveHostPorts.HiveMQEPMD}:{HiveHostPorts.HiveMQEPMD}",
                                                 "--publish", $"{HiveHostPorts.HiveMQAMQP}:{HiveHostPorts.HiveMQAMQP}",
                                                 "--publish", $"{HiveHostPorts.HiveMQDIST}:{HiveHostPorts.HiveMQDIST}",
                                                 "--publish", $"{HiveHostPorts.HiveMQManagement}:{HiveHostPorts.HiveMQManagement}",
                                                 "--memory", HiveDefinition.ValidateSize(hive.Definition.HiveMQ.RamLimit, typeof(HiveMQOptions), nameof(hive.Definition.HiveMQ.RamLimit)),
                                                 "--restart", "always",
                                                 ServiceHelper.ImagePlaceholderArg));

                // Wait for the RabbitMQ node to report that it's ready.

                var timeout  = TimeSpan.FromMinutes(4);
                var pollTime = TimeSpan.FromSeconds(2);

                node.Status = "hivemq: waiting";

                try
                {
                    NeonHelper.WaitFor(
                        () =>
                    {
                        var readyReponse = node.SudoCommand($"docker exec neon-hivemq rabbitmqctl node_health_check -n {node.Name}@{node.Name}.{hive.Definition.Hostnames.HiveMQ}", node.DefaultRunOptions & ~RunOptions.FaultOnError);

                        return(readyReponse.ExitCode == 0);
                    },
                        timeout: timeout,
                        pollTime: pollTime);
                }
                catch (TimeoutException)
                {
                    node.Fault($"RabbitMQ not ready after waiting [{timeout}].");
                    return;
                }

                node.Status = "hivemq: ready";
            }
        }
Ejemplo n.º 27
0
        /// <summary>
        /// Performs the Docker registry cache related configuration of the node.
        /// </summary>
        public void Configure(SshProxy <NodeDefinition> node)
        {
            // NOTE:
            //
            // We're going to configure the certificates even if the registry cache
            // isn't enabled so it'll be easier to upgrade the hive later.

            // For managers, upload the individual cache certificate and
            // private key files for managers [cache.crt] and [cache.key] at
            // [/etc/neon-registry-cache/].  This directory will be
            // mapped into the cache container.
            //
            // Then create the cache's data volume and start the manager's
            // Registry cache container.

            if (node.Metadata.IsManager)
            {
                node.InvokeIdempotentAction("setup/registrycache",
                                            () =>
                {
                    // Copy the registry cache certificate and private key to
                    //
                    //      /etc/neon-registry-cache

                    node.Status = "run: registry-cache-server-certs.sh";

                    var copyCommand  = new CommandBundle("./registry-cache-server-certs.sh");
                    var sbCopyScript = new StringBuilder();

                    sbCopyScript.AppendLine("mkdir -p /etc/neon-registry-cache");
                    sbCopyScript.AppendLine("chmod 750 /etc/neon-registry-cache");

                    copyCommand.AddFile($"cache.crt", hive.HiveLogin.HiveCertificate.CertPem);
                    copyCommand.AddFile($"cache.key", hive.HiveLogin.HiveCertificate.KeyPem);

                    sbCopyScript.AppendLine($"cp cache.crt /etc/neon-registry-cache/cache.crt");
                    sbCopyScript.AppendLine($"cp cache.key /etc/neon-registry-cache/cache.key");
                    sbCopyScript.AppendLine($"chmod 640 /etc/neon-registry-cache/*");

                    copyCommand.AddFile("registry-cache-server-certs.sh", sbCopyScript.ToString(), isExecutable: true);
                    node.SudoCommand(copyCommand);

                    // Upload the cache certificates to every hive node at:
                    //
                    //      /etc/docker/certs.d/<hostname>:{HiveHostPorts.RegistryCache}/ca.crt
                    //
                    // and then have Linux reload the trusted certificates.

                    node.InvokeIdempotentAction("setup/registrycache-cert",
                                                () =>
                    {
                        node.Status = "upload: registry cache certs";

                        var uploadCommand  = new CommandBundle("./registry-cache-client-certs.sh");
                        var sbUploadScript = new StringBuilder();

                        uploadCommand.AddFile($"hive-neon-registry-cache.crt", hive.HiveLogin.HiveCertificate.CertPem);

                        foreach (var manager in hive.Definition.SortedManagers)
                        {
                            var cacheHostName = hive.Definition.GetRegistryCacheHost(manager);

                            sbUploadScript.AppendLine($"mkdir -p /etc/docker/certs.d/{cacheHostName}:{HiveHostPorts.DockerRegistryCache}");
                            sbUploadScript.AppendLine($"cp hive-neon-registry-cache.crt /etc/docker/certs.d/{cacheHostName}:{HiveHostPorts.DockerRegistryCache}/ca.crt");
                        }

                        uploadCommand.AddFile("registry-cache-client-certs.sh", sbUploadScript.ToString(), isExecutable: true);
                        node.SudoCommand(uploadCommand);
                    });

                    // Start the registry cache containers if enabled for the hive.

                    if (hive.Definition.Docker.RegistryCache)
                    {
                        // Create the registry data volume.

                        node.Status = "create: registry cache volume";
                        node.SudoCommand(new CommandBundle("docker-volume-create \"neon-registry-cache\""));

                        // Start the registry cache using the required Docker public registry
                        // credentials, if any.

                        var publicRegistryCredentials = hive.Definition.Docker.Registries.SingleOrDefault(r => HiveHelper.IsDockerPublicRegistry(r.Registry));

                        publicRegistryCredentials = publicRegistryCredentials ?? new RegistryCredentials()
                        {
                            Registry = HiveConst.DockerPublicRegistry
                        };
                        publicRegistryCredentials.Username = publicRegistryCredentials.Username ?? string.Empty;
                        publicRegistryCredentials.Password = publicRegistryCredentials.Password ?? string.Empty;

                        node.Status = "start: neon-registry-cache";

                        var registry = publicRegistryCredentials.Registry;

                        if (string.IsNullOrEmpty(registry) || registry.Equals("docker.io", StringComparison.InvariantCultureIgnoreCase))
                        {
                            registry = "registry-1.docker.io";
                        }

                        ServiceHelper.StartContainer(node, "neon-registry-cache", hive.Definition.Image.RegistryCache, RunOptions.FaultOnError | hive.SecureRunOptions,
                                                     new CommandBundle(
                                                         "docker run",
                                                         "--name", "neon-registry-cache",
                                                         "--detach",
                                                         "--restart", "always",
                                                         "--publish", $"{HiveHostPorts.DockerRegistryCache}:5000",
                                                         "--volume", "/etc/neon-registry-cache:/etc/neon-registry-cache:ro", // Registry cache certificates folder
                                                         "--volume", "neon-registry-cache:/var/lib/neon-registry-cache",
                                                         "--env", $"HOSTNAME={node.Name}.{hive.Definition.Hostnames.RegistryCache}",
                                                         "--env", $"REGISTRY=https://{registry}",
                                                         "--env", $"USERNAME={publicRegistryCredentials.Username}",
                                                         "--env", $"PASSWORD={publicRegistryCredentials.Password}",
                                                         "--env", "LOG_LEVEL=info",
                                                         ServiceHelper.ImagePlaceholderArg));
                    }
                });

                node.Status = string.Empty;
            }
        }
Ejemplo n.º 28
0
        /// <summary>
        /// Uploads the setup and other scripts and tools for the target operating system to the server.
        /// </summary>
        /// <typeparam name="TMetadata">The server's metadata type.</typeparam>
        /// <param name="server">The remote server.</param>
        /// <param name="hiveDefinition">The hive definition or <c>null</c>.</param>
        public static void UploadResources <TMetadata>(this SshProxy <TMetadata> server, HiveDefinition hiveDefinition = null)
            where TMetadata : class
        {
            Covenant.Requires <ArgumentNullException>(server != null);

            //-----------------------------------------------------------------
            // Upload resource files to the setup folder.

            server.Status = $"clear: {HiveHostFolders.Setup}";
            server.SudoCommand($"rm -rf {HiveHostFolders.Setup}/*.*");

            // Upload the setup files.

            server.Status = "upload: setup files";

            foreach (var file in Program.LinuxFolder.GetFolder("setup").Files())
            {
                server.UploadFile(hiveDefinition, file, $"{HiveHostFolders.Setup}/{file.Name}");
            }

            // Make the setup scripts executable.

            server.SudoCommand($"chmod 744 {HiveHostFolders.Setup}/*");

            // Uncomment this if/when we have to upload source files.

#if FALSE
            //-----------------------------------------------------------------
            // Upload resource files to the source folder.  Note that we're going
            // to convert to Linux style line endings and we're going to convert
            // leading spaces into TABs (4 spaces == 1 TAB).

            // $hack(jeff.lill):
            //
            // This is hardcoded to assume that the source consists of a single level
            // folder with the source files.  If the folders nest eny further, we'll
            // need to implement a recursive method to handle this properly.
            //
            // This code also assumes that the folder and file names do not include
            // any spaces.

            server.Status = $"clear: {HiveHostFolders.Source}";
            server.SudoCommand($"rm -rf {HiveHostFolders.Source}/*.*");

            // Upload the source files.

            server.Status = "upload: source files";

            foreach (var folder in Program.LinuxFolder.GetFolder("source").Folders())
            {
                foreach (var file in folder.Files())
                {
                    var targetPath = $"{HiveHostFolders.Source}/{folder.Name}/{file.Name}";

                    server.UploadText(targetPath, file.Contents, tabStop: -4);
                    server.SudoCommand("chmod 664", targetPath);
                }
            }
#endif

            //-----------------------------------------------------------------
            // Upload files to the tools folder.

            server.Status = $"clear: {HiveHostFolders.Tools}";
            server.SudoCommand($"rm -rf {HiveHostFolders.Tools}/*.*");

            // Upload the tool files.  Note that we're going to strip out the [.sh]
            // file type to make these easier to run.

            server.Status = "upload: tool files";

            foreach (var file in Program.LinuxFolder.GetFolder("tools").Files())
            {
                server.UploadFile(hiveDefinition, file, $"{HiveHostFolders.Tools}/{file.Name.Replace(".sh", string.Empty)}");
            }

            // Make the scripts executable.

            server.SudoCommand($"chmod 744 {HiveHostFolders.Tools}/*");
        }
Ejemplo n.º 29
0
        /// <summary>
        /// Configures OpenVPN on a manager node.
        /// </summary>
        /// <param name="manager">The manager.</param>
        private void ConfigManagerVpn(SshProxy <NodeDefinition> manager)
        {
            // Upload the setup and configuration files.
            //
            // NOTE:
            //
            // These steps are redundant and will be repeated during the
            // common node configuration, but we need some of the scripts
            // here, before that happens.

            manager.CreateHiveHostFolders();
            manager.UploadConfigFiles(hive.Definition);
            manager.UploadResources(hive.Definition);

            // Install OpenVPN.

            manager.Status = "vpn install";
            manager.SudoCommand("safe-apt-get update");
            manager.SudoCommand("safe-apt-get install -yq openvpn");

            // Configure OpenVPN.

            var nodesSubnet      = NetworkCidr.Parse(hive.Definition.Network.NodesSubnet);
            var vpnSubnet        = NetworkCidr.Parse(manager.Metadata.VpnPoolSubnet);
            var duplicateCN      = hive.Definition.Vpn.AllowSharedCredentials ? "duplicate-cn" : ";duplicate-cn";
            var vpnServerAddress = NetHelper.UintToAddress(NetHelper.AddressToUint(vpnSubnet.Address) + 1);

            var serverConf =
                $@"#------------------------------------------------------------------------------
# OpenVPN config file customized for the [{manager.Name}] neonHIVE manager node.

# OpenVPN listening port.
port {NetworkPorts.OpenVPN}

# Enable TCP and/or UDP transports.
proto tcp
;proto udp

# Set packet tunneling mode.
dev tun

# SSL/TLS root certificate (ca), certificate
# (cert), and private key (key).  Each client
# and the server must have their own cert and
# key file.  The server and all clients will
# use the same ca file.
#
# See the [easy-rsa] directory for a series
# of scripts for generating RSA certificates
# and private keys.  Remember to use
# a unique Common Name for the server
# and each of the client certificates.
#
# Any X509 key management system can be used.
# OpenVPN can also use a PKCS #12 formatted key file
# (see [pkcs12] directive in man page).
ca ca.crt
cert server.crt
key server.key  # This file should be kept secret

# Diffie hellman parameters (2048-bit) generated via:
# 
#   openssl dhparam -out dhparam.pem 2048
# 
dh dhparam.pem

# The currently recommended topology.
topology subnet

# Configure server mode and supply a VPN subnet
# for OpenVPN to draw client addresses from.
# The server will take {vpnServerAddress} for itself,
# the rest will be made available to clients.
# Each client will be able to reach the server
# on {vpnServerAddress}. Comment this line out if you are
# ethernet bridging. See the man page for more info.
server {vpnSubnet.Address} {vpnSubnet.Mask}

# Maintain a record of client  virtual IP address
# associations in this file.  If OpenVPN goes down or
# is restarted, reconnecting clients can be assigned
# the same virtual IP address from the pool that was
# previously assigned.
;ifconfig-pool-persist ipp.txt

# Push routes to the client to allow it
# to reach other private subnets behind
# the server.  Remember that these
# private subnets will also need
# to know to route the OpenVPN client
# address pool ({vpnSubnet.Address})
# back to this specific OpenVPN server.
push ""route {nodesSubnet.Address} {nodesSubnet.Mask}""

# Uncomment this directive if multiple clients
# might connect with the same certificate/key
# files or common names.  This is recommended
# only for testing purposes.  For production use,
# each client should have its own certificate/key
# pair.
{duplicateCN}

# The keepalive directive causes ping-like
# messages to be sent back and forth over
# the link so that each side knows when
# the other side has gone down.
# Ping every 10 seconds, assume that remote
# peer is down if no ping received during
# a 120 second time period.
keepalive 10 120

# For extra security beyond that provided
# by SSL/TLS, create an [HMAC firewall]
# to help block DoS attacks and UDP port flooding.
#
# Generate with:
#   openvpn --genkey --secret ta.key
#
# The server and each client must have
# a copy of this key.
# The second parameter should be '0'
# on the server and '1' on the clients.
tls-auth ta.key 0 # This file is secret

# Select a cryptographic cipher.
# This config item must be copied to
# the client config file as well.
cipher AES-256-CBC 

# Enable compression on the VPN link.
# Don't enable this unless it is also
# enabled in the client config file.
#
# We're not enabling this due to the
# VORACLE security vulnerablity:
#
#   https://community.openvpn.net/openvpn/wiki/VORACLE
#

# The maximum number of concurrently connected
# clients we want to allow.
max-clients {VpnOptions.ServerAddressCount - 2}

# This macro sets the TCP_NODELAY socket flag on 
# the server as well as pushes it to connecting
# clients. The TCP_NODELAY flag disables the Nagle
# algorithm on TCP sockets causing packets to be
# transmitted immediately with low latency, rather
# than waiting a short period of time in order to 
# aggregate several packets into a larger containing
# packet. In VPN applications over TCP, TCP_NODELAY
# is generally a good latency optimization.
tcp-nodelay

# It's a good idea to reduce the OpenVPN
# daemon's privileges after initialization.
#
# You can uncomment this out on
# non-Windows systems.
;user nobody
;group nobody

# The persist options will try to avoid
# accessing certain resources on restart
# that may no longer be accessible because
# of the privilege downgrade.
persist-key
persist-tun

# Output a short status file showing
# current connections, truncated
# and rewritten every minute.
status openvpn-status.log

# By default, log messages will go to the syslog (ork
# on Windows, if running as a service, they will go to
# the [\Program Files\OpenVPN\log] directory).
# Use log or log-append to override this default.
# [log] will truncate the log file on OpenVPN startup,
# while [log-append] will append to it.  Use one
# or the other (but not both).
log         /var/log/openvpn.log
;log-append  openvpn.log

# Set the appropriate level of log
# file verbosity.
#
# 0 is silent, except for fatal errors
# 4 is reasonable for general usage
# 5 and 6 can help to debug connection problems
# 9 is extremely verbose
verb 4

# Silence repeating messages.  At most 20
# sequential messages of the same message
# category will be output to the log.
;mute 20
";

            manager.Status = "vpn config";
            manager.SudoCommand("mkdir -p /etc/openvpn");
            manager.UploadText("/etc/openvpn/server.conf", serverConf);

            manager.UploadText("/etc/openvpn/ca.crt", vpnCaFiles.GetCert("ca"));
            manager.UploadText("/etc/openvpn/server.crt", vpnCaFiles.GetCert("server"));
            manager.UploadText("/etc/openvpn/server.key", vpnCaFiles.GetKey("server"));
            manager.SudoCommand("chmod 600", "/etc/openvpn/server.key");    // This is a secret!

            manager.UploadText("/etc/openvpn/ta.key", vpnCaFiles.GetTaKey());
            manager.SudoCommand("chmod 600", "/etc/openvpn/ta.key");        // This is a secret too!

            manager.UploadText("/etc/openvpn/dhparam.pem", vpnCaFiles.GetDHParam());

            // Initialize the [root] user's credentials.

            vpnCredentials =
                new VpnCredentials()
            {
                CaCert   = vpnCaFiles.GetCert("ca"),
                UserCert = vpnCaFiles.GetCert(HiveConst.RootUser),
                UserKey  = vpnCaFiles.GetKey(HiveConst.RootUser),
                TaKey    = vpnCaFiles.GetTaKey(),
                CaZipKey = VpnCaFiles.GenerateKey(),
                CaZip    = vpnCaFiles.ToZipBytes()
            };

            // Upload the initial (empty) Certificate Revocation List (CRL) file and then
            // upload a OpenVPN systemd unit drop-in so that it will recognize revoked certificates.

            manager.UploadText("/etc/openvpn/crl.pem", vpnCaFiles.GetFile("crl.pem"));
            manager.SudoCommand("chmod 664", "/etc/openvpn/crl.pem");    // OpenVPN needs to be able to read this after having its privileges downgraded.

            var openVpnUnit =
                @"[Unit]
Description=OpenVPN connection to %i
PartOf=openvpn.service
ReloadPropagatedFrom=openvpn.service
Before=systemd-user-sessions.service
Documentation=man:openvpn(8)
Documentation=https://community.openvpn.net/openvpn/wiki/Openvpn23ManPage
Documentation=https://community.openvpn.net/openvpn/wiki/HOWTO

[Service]
PrivateTmp=true
KillMode=mixed
Type=forking
ExecStart=/usr/sbin/openvpn --daemon ovpn-%i --status /run/openvpn/%i.status 10 --cd /etc/openvpn --script-security 2 --config /etc/openvpn/%i.conf --writepid /run/openvpn/%i.pid --crl-verify /etc/openvpn/crl.pem
PIDFile=/run/openvpn/%i.pid
ExecReload=/bin/kill -HUP $MAINPID
WorkingDirectory=/etc/openvpn
ProtectSystem=yes
CapabilityBoundingSet=CAP_IPC_LOCK CAP_NET_ADMIN CAP_NET_BIND_SERVICE CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SYS_CHROOT CAP_DAC_READ_SEARCH CAP_AUDIT_WRITE
LimitNPROC=10
DeviceAllow=/dev/null rw
DeviceAllow=/dev/net/tun rw

[Install]
WantedBy=multi-user.target
";

            manager.UploadText("/etc/systemd/system/[email protected]", openVpnUnit);
            manager.SudoCommand("chmod 644 /etc/systemd/system/[email protected]");

            // Do a daemon-reload so systemd will be aware of the new drop-in.

            manager.SudoCommand("systemctl disable openvpn");
            manager.SudoCommand("systemctl daemon-reload");

            // Enable and restart OpenVPN.

            manager.SudoCommand("systemctl enable openvpn");
            manager.SudoCommand("systemctl restart openvpn");

            //-----------------------------------------------------------------
            // SPECIAL NOTE:
            //
            // I figured out that I need this lovely bit of code after banging my head on the desk for
            // 12 freaking days.  The problem was getting OpenVPN to work in Windows Azure (this will
            // also probably impact other cloud environments).
            //
            // Azure implements VNETs as layer 3 overlays.  This means that the host network interfaces
            // are not actually on an ethernet segment and the VPN default gateway is actually handling
            // all of the ARP packets, routing between the VNET subnets, load balancers, and the Internet.
            // This is problematic for OpenVPN traffic because the VPN client IP address space is not
            // part of the VNET which means the VNET gateway is not able to route packets from hive
            // hosts back to the manager's OpenVPN client addresses by default.
            //
            // The solution is to configure the managers with secondary NIC cards in a different subnet
            // and provision special Azure user-defined routes that direct VPN return packets to the
            // correct manager.
            //
            // I figured this part out the second day.  The problem was though that it simply didn't work.
            // From an external VPN client, I would try to ping a worker node through OpenVPN running on
            // a manager.  I'd see the ping traffic:
            //
            //      1. manager/tun0: request
            //      2. manager/eth1: request
            //      3. worker/eth0: request
            //      4. worker/eth0: reply
            //      5. manager/eth0: reply
            //      6: NOTHING! EXPECTED: manager/tun0: reply
            //
            // So the problem was that I could see the ICMP ping request hit the various interfaces
            // on the manager and be received by the worker.  I'd then see the worker send the reply,
            // and be routed via the user-defined Azure rult back to the manager.  The problem was
            // that the packet was simply dropped there.  It never made it back to tun0 so OpenVPN
            // could forward it back to the client.
            //
            // After days and days of trying to learn about Linux routing, iptables and policy rules,
            // I finally ran across this posting for the second time:
            //
            //      https://unix.stackexchange.com/questions/21093/output-traffic-on-different-interfaces-based-on-destination-port
            //
            // This was the key.  I ran across this a few days ago and didn't read it closely enough.
            // It made more sense after learning more about this stuff.
            //
            // Linux has a built-in IP address spoofing filter enabled by default.  This filter has the
            // kernel discard any packets whose source address doesn't match the IP address/route implied
            // by the remote interface that transmitted the packet.  This is exactly what's happening
            // when Azure forwards the VPN return packets via the user-defined route.  I'd see return
            // packets hit eth0 on the manager, be processed by the low-level RAW and MANGLE iptables
            // and then they'd disappear.
            //
            // The solution is simply to disable the spoofing filter.  I'm going to go ahead and do this
            // for all interfaces which should be fine for hives hosted in cloud environments, because the
            // VNET/Load Balancer/Security Groups will be used to lock things down.  Local hives will
            // need to be manually placed behind a suitable router/firewall as well.
            //
            // For robustness, I'm going to deploy this as a service daemon that polls the filter state
            // for each interface every 5 seconds, and disables any enabled filters.  This will ensure
            // that the filters will always be disabled, even as interfaces are bought up and down.

            var disableSpoofUnit =
                $@"[Unit]
Description=Disable Network Anti-Spoofing Filters
Documentation=
After=
Requires=
Before=

[Service]
Type=simple
ExecStart={HiveHostFolders.Bin}/disable-spoof-filters.sh

[Install]
WantedBy=multi-user.target
";

            var disableSpoofScript =
                @"#!/bin/bash
#------------------------------------------------------------------------------
# This script is a deployed as a service to ensure that the Linux anti-spoofing
# filters are disabled for the network interfaces on manager nodes hosting
# OpenVPN.  This is required to allow VPN return traffic from other nodes to
# routed back to tun0 and ultimately, connected VPN clients.
#
# Note that it appears that we need to disable the filter for all interfaces
# for this to actually work.

while :
do
    flush=false

    for f in /proc/sys/net/ipv4/conf/*/rp_filter
    do
        filter_enabled=$(cat $f)

        if [ ""$filter_enabled"" == ""1"" ] ; then
            echo 0 > $f
            flush=true
        fi
    done

    if [ ""$flush"" == ""true"" ] ; then
      echo 1 > /proc/sys/net/ipv4/route/flush
    fi

    sleep 5
done";

            manager.UploadText("/lib/systemd/system/disable-spoof-filters.service", disableSpoofUnit);
            manager.SudoCommand("chmod 644 /lib/systemd/system/disable-spoof-filters.service");

            manager.UploadText($"{HiveHostFolders.Bin}/disable-spoof-filters.sh", disableSpoofScript);
            manager.SudoCommand($"chmod 770 {HiveHostFolders.Bin}/disable-spoof-filters.sh");

            manager.SudoCommand("systemctl enable disable-spoof-filters");
            manager.SudoCommand("systemctl restart disable-spoof-filters");
        }