Ejemplo n.º 1
        /// <summary>
        /// Verifies that a cluster worker node is healthy.
        /// </summary>
        /// <param name="node">The server node.</param>
        /// <param name="clusterDefinition">The cluster definition.</param>
        public static void CheckWorker(SshProxy <NodeDefinition> node, ClusterDefinition clusterDefinition)
            Covenant.Requires <ArgumentNullException>(node != null);
            Covenant.Requires <ArgumentException>(node.Metadata.IsWorker);
            Covenant.Requires <ArgumentNullException>(clusterDefinition != null);

            if (!node.IsFaulted)
                CheckWorkerNtp(node, clusterDefinition);

            node.Status = "healthy";
Ejemplo n.º 2
        /// <inheritdoc/>
        public override void Run(CommandLine commandLine)
            if (commandLine.Arguments.Length < 1)
                Console.Error.WriteLine("*** ERROR: CLUSTER-DEF is required.");

            // Parse and validate the cluster definition.

            ClusterDefinition.FromFile(commandLine.Arguments[0], strict: true);

            Console.WriteLine("*** The cluster definition is OK.");
Ejemplo n.º 3
        /// <summary>
        /// Uploads a resource file to the remote server after performing any necessary preprocessing.
        /// </summary>
        /// <typeparam name="TMetadata">The node metadata type.</typeparam>
        /// <param name="node">The remote node.</param>
        /// <param name="clusterDefinition">The cluster definition or <c>null</c>.</param>
        /// <param name="kubeSetupInfo">The Kubernetes setup details.</param>
        /// <param name="file">The resource file.</param>
        /// <param name="targetPath">The target path on the remote server.</param>
        private static void UploadFile <TMetadata>(this SshProxy <TMetadata> node, ClusterDefinition clusterDefinition, KubeSetupInfo kubeSetupInfo, ResourceFiles.File file, string targetPath)
            where TMetadata : class
            using (var input = file.ToStream())
                if (file.HasVariables)
                    // We need to expand any variables.  Note that if we don't have a
                    // cluster definition or for undefined variables, we're going to
                    // have the variables expand to the empty string.

                    using (var msExpanded = new MemoryStream())
                        using (var writer = new StreamWriter(msExpanded))
                            var preprocessReader =
                                new PreprocessReader(new StreamReader(input))
                                DefaultVariable   = string.Empty,
                                ExpandVariables   = true,
                                ProcessStatements = false,
                                StripComments     = false

                            if (clusterDefinition != null)
                                SetClusterVariables(preprocessReader, clusterDefinition, kubeSetupInfo, node.Metadata as NodeDefinition);

                            foreach (var line in preprocessReader.Lines())


                            msExpanded.Position = 0;
                            node.UploadText(targetPath, msExpanded, tabStop: 4, outputEncoding: Encoding.UTF8);
                    node.UploadText(targetPath, input, tabStop: 4, outputEncoding: Encoding.UTF8);
Ejemplo n.º 4
        /// <inheritdoc/>
        public override async Task RunAsync(CommandLine commandLine)

            if (commandLine.Arguments.Length < 1)
                Console.Error.WriteLine("*** ERROR: CLUSTER-DEF is required.");

            // Parse and validate the cluster definition.

            ClusterDefinition.FromFile(commandLine.Arguments[0], strict: true);

            Console.WriteLine("The cluster definition is OK.");

            await Task.CompletedTask;
Ejemplo n.º 5
        /// <summary>
        /// <para>
        /// Deploys a new test cluster as specified by the cluster definition passed or connects
        /// to a cluster previously deployed by this method when the cluster definition of the
        /// existing cluster and the definition passed here are the same.
        /// </para>
        /// </summary>
        /// <param name="clusterDefinition">The cluster definition model.</param>
        /// <param name="options">
        /// Optionally specifies the options that <see cref="ClusterFixture"/> will use to
        /// manage the test cluster.
        /// </param>
        /// <returns>
        /// <para>
        /// The <see cref="TestFixtureStatus"/>:
        /// </para>
        /// <list type="table">
        /// <item>
        ///     <term><see cref="TestFixtureStatus.Disabled"/></term>
        ///     <description>
        ///     Returned when cluster unit testing is disabled due to the <c>NEON_CLUSTER_TESTING</c> environment
        ///     variable not being present on the current machine which means that <see cref="TestHelper.IsClusterTestingEnabled"/>
        ///     returns <c>false</c>.
        ///     </description>
        /// </item>
        /// <item>
        ///     <term><see cref="TestFixtureStatus.Started"/></term>
        ///     <description>
        ///     Returned when one of the <c>Start()</c> methods is called for the first time for the fixture
        ///     instance, indicating that an existing cluster has been connected or a new cluster has been deployed.
        ///     </description>
        /// </item>
        /// <item>
        ///     <term><see cref="TestFixtureStatus.AlreadyRunning"/></term>
        ///     <description>
        ///     Returned when one of the <c>Start()</c> methods has already been called by your test
        ///     class instance.
        ///     </description>
        /// </item>
        /// </list>
        /// </returns>
        /// <exception cref="NeonKubeException">Thrown when the test cluster could not be deployed.</exception>
        /// <remarks>
        /// <para>
        /// <b>IMPORTANT:</b> Only one <see cref="ClusterFixture"/> can be run at a time on
        /// any one computer.  This is due to the fact that cluster state like the kubeconfig,
        /// neonKUBE logins, logs and other files will be written to <b>~/.neonkube/spaces/$fixture/*</b>
        /// so multiple fixture instances will be confused when trying to manage these same files.
        /// </para>
        /// <para>
        /// This means that not only will running <see cref="ClusterFixture"/> based tests in parallel
        /// within the same instance of Visual Studio fail, but running these tests in different
        /// Visual Studio instances will also fail.
        /// </para>
        /// </remarks>
        public TestFixtureStatus StartWithClusterDefinition(ClusterDefinition clusterDefinition, ClusterFixtureOptions options = null)
            Covenant.Requires <ArgumentNullException>(clusterDefinition != null, nameof(clusterDefinition));

            if (clusterDefinition.IsLocked)
                throw new NeonKubeException("Test clusters need to be unlocked.  Please set [isLocked: false] in your cluster definition.");

            if (!TestHelper.IsClusterTestingEnabled)

            if (started)

            options ??= new ClusterFixtureOptions();
            this.options = options.Clone();

            if (this.Cluster != null)

            // Set the clusterspace mode, using any previously downloaded node image unless
            // the user specifies a custom image.  We're going to host the fixture state
            // files in this fixed folder:
            //      ~/.neonkube/spaces/$fixture/*

            clusterspaceFolder = KubeHelper.SetClusterSpaceMode(string.IsNullOrEmpty(options.ImageUriOrPath) ? KubeClusterspaceMode.EnabledWithSharedCache : KubeClusterspaceMode.Enabled, KubeHelper.ClusterspacePrefix("fixture"));

            // Figure out whether the user passed an image URI or file path to override
            // the default node image.

            var imageUriOrPath = options.ImageUriOrPath;
            var imageUri       = (string)null;
            var imagePath      = (string)null;

            if (string.IsNullOrEmpty(imageUriOrPath))
                imageUriOrPath = KubeDownloads.GetDefaultNodeImageUri(clusterDefinition.Hosting.Environment);

            if (imageUriOrPath.StartsWith("http://", StringComparison.InvariantCultureIgnoreCase) || imageUriOrPath.StartsWith("https://", StringComparison.InvariantCultureIgnoreCase))
                imageUri = imageUriOrPath;
                imagePath = imageUriOrPath;

            // We need to deal with some scenarios here:
            //  1. No cluster context or login exists for the target cluster.
            //     A conflicting cluster may still exist though, having been deployed
            //     by another computer or perhaps the kubecontext/logins on the current
            //     machine may have been modified.  We need to be sure to remove any
            //     conflicting resources in this case.
            //  2. Cluster context and login exist on the current machine for the target
            //     cluster but the cluster is unhealthy or locked.  We'll abort for locked
            //     clusters and remove and redeploy for unhealth clusters.
            //  3.  Cluster context and login exist and the cluster is healthy.  In this case,
            //      we need to compare the deployed cluster version against the current version
            //      and remove/redeploy when the versions don't match.
            //  4. Cluster context and login exist and the cluster is healthy and cluster versions
            //     match.  In this case,  We'll compare the existing cluster definition with that for
            //     the new cluster and also compare the cluster versions and if they match and
            //     [RemoveClusterOnStart=false] we'll just use the existing cluster.
            //  5. The current cluster matches the target but [RemoveClusterOnStart=true].
            //     We need to remove the current cluster in this case so we'll deploy a
            //     fresh one.

            // Determine whether a test cluster with the same name exists and if
            // its cluster definition matches the test cluster's definition.
            var clusterExists      = false;
            var clusterContextName = KubeContextName.Parse($"root@{clusterDefinition.Name}");
            var clusterContext     = KubeHelper.Config.GetContext(clusterContextName);
            var clusterLogin       = KubeHelper.GetClusterLogin(clusterContextName);

            if (clusterContext != null && clusterLogin != null && !clusterLogin.SetupDetails.SetupPending)
                clusterExists = ClusterDefinition.AreSimilar(clusterDefinition, clusterLogin.ClusterDefinition);

            if (clusterExists && !options.RemoveClusterOnStart)
                // It looks like the test cluster may already exist.  We'll verify
                // that it's running, healthy, unlocked and the cluster versions match.
                // When all of these conditions are true, we'll use the existing cluster,
                // otherwise we'll remove the cluster as well as its context/login,
                // and deploy a new cluster below.

                using (var cluster = new ClusterProxy(clusterLogin.ClusterDefinition, new HostingManagerFactory()))

                    var isLocked      = cluster.IsLockedAsync().ResultWithoutAggregate();
                    var clusterInfo   = cluster.GetClusterInfoAsync().ResultWithoutAggregate();
                    var clusterHealth = cluster.GetClusterHealthAsync().ResultWithoutAggregate();

                    if (isLocked.HasValue && isLocked.Value)
                        throw new NeonKubeException($"Cluster is locked: {cluster.Name}");

                    if (clusterHealth.State == ClusterState.Healthy && clusterInfo.ClusterVersion == KubeVersions.NeonKube)
                        // We need to reset an existing cluster to ensure it's in a known state.


                        started   = true;
                        IsRunning = true;
                        Cluster   = new ClusterProxy(KubeHelper.CurrentContext, new HostingManagerFactory());


                    cluster.RemoveAsync(removeOrphans: true).WaitWithoutAggregate();
                // There is no known existing cluster but there still might be a cluster
                // deployed by another machine or fragments of a partially deployed cluster,
                // so we need to do a preemptive cluster remove.

                using (var cluster = new ClusterProxy(clusterDefinition, new HostingManagerFactory()))
                    cluster.RemoveAsync(removeOrphans: true).WaitWithoutAggregate();

            // Provision the new cluster.

            WriteTestOutputLine($"PREPARE CLUSTER: {clusterDefinition.Name}");

                var controller = KubeSetup.CreateClusterPrepareController(
                    clusterDefinition:   clusterDefinition,
                    nodeImageUri:        imageUri,
                    nodeImagePath:       imagePath,
                    maxParallel:         options.MaxParallel,
                    unredacted:          options.Unredacted,
                    neonCloudHeadendUri: options.NeonCloudHeadendUri);

                switch (controller.RunAsync().ResultWithoutAggregate())
                case SetupDisposition.Succeeded:

                    WriteTestOutputLine("CLUSTER PREPARE: SUCCESS");

                case SetupDisposition.Failed:

                    WriteTestOutputLine("CLUSTER PREPARE: FAIL");
                    throw new NeonKubeException("Cluster prepare failed.");

                case SetupDisposition.Cancelled:

                    throw new NotImplementedException();
                if (options.CaptureDeploymentLogs)

            // Setup the cluster.

            WriteTestOutputLine($"SETUP CLUSTER: {clusterDefinition.Name}");

                var controller = KubeSetup.CreateClusterSetupController(
                    clusterDefinition: clusterDefinition,
                    maxParallel:       options.MaxParallel,
                    unredacted:        options.Unredacted);

                switch (controller.RunAsync().ResultWithoutAggregate())
                case SetupDisposition.Succeeded:

                    WriteTestOutputLine("CLUSTER SETUP: SUCCESS");

                case SetupDisposition.Failed:

                    WriteTestOutputLine("CLUSTER SETUP: FAILED");
                    throw new NeonKubeException("Cluster setup failed.");

                case SetupDisposition.Cancelled:

                    throw new NotImplementedException();
                if (options.CaptureDeploymentLogs)

            // NOTE: We just deployed brand new cluster so there's no need to reset it.

            started   = true;
            IsRunning = true;
            Cluster   = new ClusterProxy(KubeHelper.CurrentContext, new HostingManagerFactory());

Ejemplo n.º 6
        /// <summary>
        /// Initializes a near virgin server with the basic capabilities required
        /// for a cluster host node.
        /// </summary>
        /// <param name="node">The target cluster node.</param>
        /// <param name="clusterDefinition">The cluster definition.</param>
        /// <param name="kubeSetupInfo">Kubernetes setup details.</param>
        /// <param name="shutdown">Optionally shuts down the node.</param>
        public static void PrepareNode(SshProxy <NodeDefinition> node, ClusterDefinition clusterDefinition, KubeSetupInfo kubeSetupInfo, bool shutdown = false)
            Covenant.Requires <ArgumentNullException>(node != null);
            Covenant.Requires <ArgumentNullException>(clusterDefinition != null);
            Covenant.Requires <ArgumentNullException>(kubeSetupInfo != null);

            if (node.FileExists($"{KubeHostFolders.State}/setup/prepared"))
                return;     // Already prepared

            // Ensure that the cluster host folders exist.


            // Package manager configuration.

            if (!clusterDefinition.NodeOptions.AllowPackageManagerIPv6)
                // Restrict the [apt] package manager to using IPv4 to communicate
                // with the package mirrors, since IPv6 often doesn't work.

                node.UploadText("/etc/apt/apt.conf.d/99-force-ipv4-transport", "Acquire::ForceIPv4 \"true\";");
                node.SudoCommand("chmod 644 /etc/apt/apt.conf.d/99-force-ipv4-transport");

            // Configure [apt] to retry.

            node.UploadText("/etc/apt/apt.conf.d/99-retries", $"APT::Acquire::Retries \"{clusterDefinition.NodeOptions.PackageManagerRetries}\";");
            node.SudoCommand("chmod 644 /etc/apt/apt.conf.d/99-retries");

            // Other configuration.

            ConfigureOpenSSH(node, TimeSpan.Zero);
            node.UploadConfigFiles(clusterDefinition, kubeSetupInfo);
            node.UploadResources(clusterDefinition, kubeSetupInfo);

            if (clusterDefinition != null)
                ConfigureEnvironmentVariables(node, clusterDefinition);

            node.SudoCommand("safe-apt-get update");

                                        () =>
                node.Status = "preparing";
                node.Reboot(wait: true);

            // We need to upload the cluster configuration and initialize drives attached
            // to the node.  We're going to assume that these are not already initialized.

            // $todo(jeff.lill):
            // We may need an option that allows an operator to pre-build a hardware
            // based drive array or something.  I'm going to defer this to later and
            // concentrate on commodity hardware and cloud deployments for now.

            CommonSteps.ConfigureEnvironmentVariables(node, clusterDefinition);

            node.Status = "setup: disk";

            // Clear any DHCP leases to be super sure that cloned node
            // VMs will obtain fresh IP addresses.

            node.Status = "clear: DHCP leases";
            node.SudoCommand("rm -f /var/lib/dhcp/*");

            // Indicate that the node has been fully prepared.

            node.SudoCommand($"touch {KubeHostFolders.State}/setup/prepared");

            // Shutdown the node if requested.

            if (shutdown)
                node.Status = "shutdown";
                node.SudoCommand("shutdown 0", RunOptions.Defaults | RunOptions.Shutdown);
Ejemplo n.º 7
        /// <summary>
        /// Configures the global environment variables that describe the configuration
        /// of the server within the cluster.
        /// </summary>
        /// <param name="node">The server to be updated.</param>
        /// <param name="clusterDefinition">The cluster definition.</param>
        public static void ConfigureEnvironmentVariables(SshProxy <NodeDefinition> node, ClusterDefinition clusterDefinition)
            node.Status = "environment variables";

            // We're going to append the new variables to the existing Linux [/etc/environment] file.

            var sb = new StringBuilder();

            // Append all of the existing environment variables except for those
            // whose names start with "NEON_" to make the operation idempotent.
            // Note that we're going to special case PATH to add any Neon
            // related directories.

            using (var currentEnvironmentStream = new MemoryStream())
                node.Download("/etc/environment", currentEnvironmentStream);

                currentEnvironmentStream.Position = 0;

                using (var reader = new StreamReader(currentEnvironmentStream))
                    foreach (var line in reader.Lines())
                        if (line.StartsWith("PATH="))
                            if (!line.Contains(KubeHostFolders.Bin))
                                sb.AppendLine(line + $":/snap/bin:{KubeHostFolders.Bin}");
                        else if (!line.StartsWith("NEON_"))

            // Add the global cluster related environment variables.


            var sbPackageProxies = new StringBuilder();

            foreach (var proxyEndpoint in clusterDefinition.PackageProxy.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries))


            if (clusterDefinition.Hosting != null)


            if (node.Metadata != null)


            // Kubernetes related variables for masters.

            if (node.Metadata.IsMaster)

            // Upload the new environment to the server.

            node.UploadText("/etc/environment", sb, tabStop: 4);
Ejemplo n.º 8
        /// <summary>
        /// Verifies that a master node's NTP health.
        /// </summary>
        /// <param name="node">The master node.</param>
        /// <param name="clusterDefinition">The cluster definition.</param>
        private static void CheckMasterNtp(SshProxy <NodeDefinition> node, ClusterDefinition clusterDefinition)
            // We're going to use [ntpq -pw] to query the configured time sources.
            // We should get something back that looks like
            //      remote           refid      st t when poll reach   delay   offset  jitter
            //      ==============================================================================
            //       LOCAL(0).LOCL.          10 l  45m   64    0    0.000    0.000   0.000
            //      * clock.xmission. .GPS.            1 u  134  256  377   48.939 - 0.549  18.357
            //      +      2 u  200  256  377   96.981 - 0.623   3.284
            //      + pacific.latt.ne     3 u  243  256  377   41.457 - 8.929   8.497
            // For master nodes, we're simply going to verify that we have at least one external
            // time source answering.

            node.Status = "check: NTP";

            var retryDelay = TimeSpan.FromSeconds(30);
            var fault      = (string)null;

            for (int tryCount = 0; tryCount < 6; tryCount++)
                var response = node.SudoCommand("/usr/bin/ntpq -pw", RunOptions.LogOutput);

                if (response.ExitCode != 0)

                using (var reader = response.OpenOutputTextReader())
                    string line;

                    // Column header and table bar lines.

                    line = reader.ReadLine();
                    if (string.IsNullOrWhiteSpace(line))
                        fault = "NTP: Invalid [ntpq -pw] response.";


                    line = reader.ReadLine();
                    if (string.IsNullOrWhiteSpace(line) || line[0] != '=')
                        fault = "NTP: Invalid [ntpq -pw] response.";


                    // Count the lines starting that don't include [*.LOCL.*],
                    // the local clock.

                    var sourceCount = 0;

                    for (line = reader.ReadLine(); line != null; line = reader.ReadLine())
                        if (line.Length > 0 && !line.Contains(".LOCL."))

                    if (sourceCount == 0)
                        fault = "NTP: No external sources are answering.";


                    // Everything looks good.


            if (fault != null)
Ejemplo n.º 9
        /// <summary>
        /// Verifies that a worker node's NTP health.
        /// </summary>
        /// <param name="node">The worker node.</param>
        /// <param name="clusterDefinition">The cluster definition.</param>
        private static void CheckWorkerNtp(SshProxy <NodeDefinition> node, ClusterDefinition clusterDefinition)
            // We're going to use [ntpq -pw] to query the configured time sources.
            // We should get something back that looks like
            //           remote           refid      st t when poll reach   delay   offset  jitter
            //           ==============================================================================
            //            LOCAL(0).LOCL.          10 l  45m   64    0    0.000    0.000   0.000
            //           *    2 u  111  128  377    0.062    3.409   0.608
            //           +    2 u  111  128  377    0.062    3.409   0.608
            //           +    2 u  111  128  377    0.062    3.409   0.608
            // For worker nodes, we need to verify that each of the masters are answering
            // by confirming that their IP addresses are present.

            node.Status = "check: NTP";

            var retryDelay = TimeSpan.FromSeconds(30);
            var fault      = (string)null;
            var firstTry   = true;


            for (var tries = 0; tries < 6; tries++)
                var output = node.SudoCommand("/usr/bin/ntpq -pw", RunOptions.LogOutput).OutputText;

                foreach (var master in clusterDefinition.SortedMasters)
                    // We're going to check the for presence of the master's IP address
                    // or its name, the latter because [ntpq] appears to attempt a reverse
                    // IP address lookup which will resolve into one of the DNS names defined
                    // in the local [/etc/hosts] file.

                    if (!output.Contains(master.PrivateAddress.ToString()) && !output.Contains(master.Name.ToLower()))
                        fault = $"NTP: Manager [{master.Name}/{master.PrivateAddress}] is not answering.";


                    // Everything looks OK.


            if (fault != null)
                if (firstTry)
                    // $hack(jeff.lill):
                    // I've seen the NTP check fail on worker nodes, complaining
                    // that the connection attempt was rejected.  I manually restarted
                    // the node and then it worked.  I'm not sure if the rejected connection
                    // was being made to the local NTP service or from the local service
                    // to NTP running on the master.
                    // I'm going to assume that it was to the local NTP service and I'm
                    // going to try mitigating this by restarting the local NTP service
                    // and then re-running the tests.  I'm only going to do this once.

                    node.SudoCommand("systemctl restart ntp", node.DefaultRunOptions & ~RunOptions.FaultOnError);

                    firstTry = false;
                    goto tryAgain;

Ejemplo n.º 10
        /// <summary>
        /// Uploads the setup and other scripts and tools for the target operating system to the server.
        /// </summary>
        /// <typeparam name="TMetadata">The server's metadata type.</typeparam>
        /// <param name="server">The remote server.</param>
        /// <param name="clusterDefinition">The cluster definition.</param>
        /// <param name="kubeSetupInfo">The Kubernetes setup details.</param>
        public static void UploadResources <TMetadata>(this SshProxy <TMetadata> server, ClusterDefinition clusterDefinition, KubeSetupInfo kubeSetupInfo)
            where TMetadata : class
            Covenant.Requires <ArgumentNullException>(server != null, nameof(server));
            Covenant.Requires <ArgumentNullException>(clusterDefinition != null, nameof(clusterDefinition));
            Covenant.Requires <ArgumentNullException>(kubeSetupInfo != null, nameof(kubeSetupInfo));

            // Upload resource files to the setup folder.

            server.Status = $"clear: {KubeHostFolders.Setup}";
            server.SudoCommand($"rm -rf {KubeHostFolders.Setup}/*.*");

            // Upload the setup files.

            server.Status = "upload: setup scripts";

            foreach (var file in Program.LinuxFolder.GetFolder("setup").Files())
                server.UploadFile(clusterDefinition, kubeSetupInfo, file, $"{KubeHostFolders.Setup}/{file.Name}");

            // Make the setup scripts executable.

            server.SudoCommand($"chmod 744 {KubeHostFolders.Setup}/*");

            // Upload files to the bin folder.

            server.Status = $"clear: {KubeHostFolders.Bin}";
            server.SudoCommand($"rm -rf {KubeHostFolders.Bin}/*.*");

            // Upload the tool files.  Note that we're going to strip out the [.sh]
            // file type to make these easier to run.

            server.Status = "upload: binary files";

            foreach (var file in Program.LinuxFolder.GetFolder("binary").Files())
                server.UploadFile(clusterDefinition, kubeSetupInfo, file, $"{KubeHostFolders.Bin}/{file.Name.Replace(".sh", string.Empty)}");

            // Make the scripts executable.

            server.SudoCommand($"chmod 744 {KubeHostFolders.Bin}/*");
Ejemplo n.º 11
        /// <summary>
        /// Uploads the configuration files for the target operating system to the server.
        /// </summary>
        /// <typeparam name="Metadata">The node metadata type.</typeparam>
        /// <param name="node">The remote node.</param>
        /// <param name="clusterDefinition">The cluster definition.</param>
        /// <param name="kubeSetupInfo">The Kubernetes setup details.</param>
        public static void UploadConfigFiles <Metadata>(this SshProxy <Metadata> node, ClusterDefinition clusterDefinition, KubeSetupInfo kubeSetupInfo)
            where Metadata : class
            Covenant.Requires <ArgumentNullException>(node != null, nameof(node));
            Covenant.Requires <ArgumentNullException>(clusterDefinition != null, nameof(clusterDefinition));
            Covenant.Requires <ArgumentNullException>(kubeSetupInfo != null, nameof(kubeSetupInfo));

            // Clear the contents of the configuration folder.

            node.Status = $"clear: {KubeHostFolders.Config}";
            node.SudoCommand($"rm -rf {KubeHostFolders.Config}/*.*");

            // Upload the files.

            node.Status = "upload: config files";

            foreach (var file in Program.LinuxFolder.GetFolder("conf").Files())
                node.UploadFile(clusterDefinition, kubeSetupInfo, file, $"{KubeHostFolders.Config}/{file.Name}");

            // Secure the files and make the scripts executable.

            node.SudoCommand($"chmod 644 {KubeHostFolders.Config}/*.*");
            node.SudoCommand($"chmod 744 {KubeHostFolders.Config}/*.sh");

            node.Status = "copied";
Ejemplo n.º 12
        /// <summary>
        /// Sets cluster definition related variables for a <see cref="PreprocessReader"/>.
        /// </summary>
        /// <param name="preprocessReader">The reader.</param>
        /// <param name="clusterDefinition">The cluster definition.</param>
        /// <param name="kubeSetupInfo">The Kubernetes setup details.</param>
        /// <param name="nodeDefinition">The target node definition.</param>
        private static void SetClusterVariables(PreprocessReader preprocessReader, ClusterDefinition clusterDefinition, KubeSetupInfo kubeSetupInfo, NodeDefinition nodeDefinition)
            Covenant.Requires <ArgumentNullException>(preprocessReader != null, nameof(preprocessReader));
            Covenant.Requires <ArgumentNullException>(clusterDefinition != null, nameof(clusterDefinition));
            Covenant.Requires <ArgumentNullException>(kubeSetupInfo != null, nameof(kubeSetupInfo));

            // Generate the master node variables in sorted order.  The variable
            // names will be formatted as:
            //      NEON_MASTER_#
            // where [#] is the zero-based index of the node.  This is compatible
            // with the [getmaster] function included the script.
            // Each variable defines an associative array with [name] and [address]
            // properties.
            // Then generate the NEON_MASTER_NAMES and NEON_MASTER_ADDRESSES arrays.
            // NOTE: We need to use Linux-style line endings.

            var sbMasters                  = new StringBuilder();
            var sbMasterNamesArray         = new StringBuilder();
            var sbMasterAddressesArray     = new StringBuilder();
            var sbPeerMasterAddressesArray = new StringBuilder();
            var sbMasterNodesSummary       = new StringBuilder();
            var index           = 0;
            var masterNameWidth = 0;


            foreach (var master in clusterDefinition.SortedMasters)
                sbMasters.Append($"declare -x -A NEON_MASTER_{index}\n");
                sbMasters.Append($"NEON_MASTER_{index}=( [\"name\"]=\"{master.Name}\" [\"address\"]=\"{master.PrivateAddress}\" )\n");

                sbMasterNamesArray.Append($" \"{master.Name}\"");
                sbMasterAddressesArray.Append($" \"{master.PrivateAddress}\"");

                if (master != nodeDefinition)
                    sbPeerMasterAddressesArray.Append($" \"{master.PrivateAddress}\"");

                masterNameWidth = Math.Max(master.Name.Length, masterNameWidth);

            sbMasterNamesArray.Append(" )");
            sbMasterAddressesArray.Append(" )");
            sbPeerMasterAddressesArray.Append(" )");

            foreach (var master in clusterDefinition.SortedMasters)
                var nameField = master.Name;

                if (nameField.Length < masterNameWidth)
                    nameField += new string(' ', masterNameWidth - nameField.Length);

                // The blanks below are just enough so that the "=" sign lines up
                // with the summary output from [cluster.conf.sh].

                if (sbMasterNodesSummary.Length == 0)
                    sbMasterNodesSummary.Append($"    echo \"NEON_MASTER_NODES                 = {nameField}: {master.PrivateAddress}\" 1>&2\n");
                    sbMasterNodesSummary.Append($"    echo \"                                     {nameField}: {master.PrivateAddress}\" 1>&2\n");

            foreach (var master in clusterDefinition.SortedMasters)
                sbMasters.Append($"declare -x -A NEON_MASTER_{index}\n");
                sbMasters.Append($"NEON_MASTER_{index}=( [\"name\"]=\"{master.Name}\" [\"address\"]=\"{master.PrivateAddress}\" )\n");

            sbMasters.Append($"declare -x NEON_MASTER_NAMES={sbMasterNamesArray}\n");
            sbMasters.Append($"declare -x NEON_MASTER_ADDRESSES={sbMasterAddressesArray}\n");


            // Generate the master and worker NTP time sources.

            var masterTimeSources = string.Empty;
            var workerTimeSources = string.Empty;

            if (clusterDefinition.TimeSources != null)
                foreach (var source in clusterDefinition.TimeSources)
                    if (string.IsNullOrWhiteSpace(source))

                    if (masterTimeSources.Length > 0)
                        masterTimeSources += " ";

                    masterTimeSources += $"\"{source}\"";

            foreach (var master in clusterDefinition.SortedMasters)
                if (workerTimeSources.Length > 0)
                    workerTimeSources += " ";

                workerTimeSources += $"\"{master.PrivateAddress}\"";

            if (string.IsNullOrWhiteSpace(masterTimeSources))
                // Default to a reasonable public time source.

                masterTimeSources = "\"pool.ntp.org\"";

            // Set the variables.

            preprocessReader.Set("load-cluster-conf", KubeHostFolders.Config + "/cluster.conf.sh --echo-summary");
            preprocessReader.Set("load-cluster-conf-quiet", KubeHostFolders.Config + "/cluster.conf.sh");

            SetBashVariable(preprocessReader, "cluster.provisioner", clusterDefinition.Provisioner);

            SetBashVariable(preprocessReader, "node.driveprefix", clusterDefinition.DrivePrefix);

            SetBashVariable(preprocessReader, "neon.folders.archive", KubeHostFolders.Archive(KubeConst.SysAdminUser));
            SetBashVariable(preprocessReader, "neon.folders.bin", KubeHostFolders.Bin);
            SetBashVariable(preprocessReader, "neon.folders.exec", KubeHostFolders.Exec(KubeConst.SysAdminUser));
            SetBashVariable(preprocessReader, "neon.folders.config", KubeHostFolders.Config);
            SetBashVariable(preprocessReader, "neon.folders.setup", KubeHostFolders.Setup);
            SetBashVariable(preprocessReader, "neon.folders.state", KubeHostFolders.State);
            SetBashVariable(preprocessReader, "neon.folders.tmpfs", KubeHostFolders.Tmpfs);
            SetBashVariable(preprocessReader, "neon.folders.tools", KubeHostFolders.Bin);

            SetBashVariable(preprocessReader, "nodes.master.count", clusterDefinition.Masters.Count());
            preprocessReader.Set("nodes.masters", sbMasters);
            preprocessReader.Set("nodes.masters.summary", sbMasterNodesSummary);

            SetBashVariable(preprocessReader, "ntp.master.sources", masterTimeSources);
            NewMethod(preprocessReader, workerTimeSources);

            SetBashVariable(preprocessReader, "docker.packageuri", kubeSetupInfo.DockerPackageUbuntuUri);

            SetBashVariable(preprocessReader, "neon.kube.kubeadm.package_version", kubeSetupInfo.KubeAdmPackageUbuntuVersion);
            SetBashVariable(preprocessReader, "neon.kube.kubectl.package_version", kubeSetupInfo.KubeCtlPackageUbuntuVersion);
            SetBashVariable(preprocessReader, "neon.kube.kubelet.package_version", kubeSetupInfo.KubeletPackageUbuntuVersion);

            // Configure the variables for the [setup-disk.sh] script.

            switch (clusterDefinition.Hosting.Environment)
            case HostingEnvironments.Aws:

                throw new NotImplementedException("$todo(jefflill)");

            case HostingEnvironments.Azure:

                // The primary Azure data drive is [/dev/sdb] so any mounted drive will be [/dev/sdc].

                if (nodeDefinition.Azure.HardDriveCount == 0)
                    SetBashVariable(preprocessReader, "data.disk", "PRIMARY");
                    SetBashVariable(preprocessReader, "data.disk", "/dev/sdc");

            case HostingEnvironments.Google:

                throw new NotImplementedException("$todo(jefflill)");

            case HostingEnvironments.HyperV:
            case HostingEnvironments.HyperVLocal:
            case HostingEnvironments.Machine:
            case HostingEnvironments.Unknown:
            case HostingEnvironments.XenServer:

                // VMs for all of these environments simply host their data on the
                // primary OS disk only for now, the idea being that this disk
                // can be sized up as necessary.  There are valid scenarios where
                // folks would like the data on a different drive (e.g. for better
                // performance).  I'm putting support for that on the backlog.

                SetBashVariable(preprocessReader, "data.disk", "PRIMARY");


                throw new NotImplementedException($"The [{clusterDefinition.Hosting.Environment}] hosting environment is not implemented.");
Ejemplo n.º 13
        /// <inheritdoc/>
        public override void Run(CommandLine commandLine)
            if (commandLine.HasHelpOption)

            // Special-case handling of the [--remove-templates] option.

            if (commandLine.HasOption("--remove-templates"))
                Console.WriteLine("Removing cached virtual machine templates.");

                foreach (var fileName in Directory.GetFiles(KubeHelper.VmTemplatesFolder, "*.*", SearchOption.TopDirectoryOnly))


            // Implement the command.

            if (KubeHelper.CurrentContext != null)
                Console.Error.WriteLine("*** ERROR: You are logged into a cluster.  You need to logout before preparing another.");

            if (commandLine.Arguments.Length == 0)
                Console.Error.WriteLine($"*** ERROR: CLUSTER-DEF expected.");

            clusterDefPath = commandLine.Arguments[0];
            force          = commandLine.GetFlag("--force");

            ClusterDefinition.ValidateFile(clusterDefPath, strict: true);

            var clusterDefinition = ClusterDefinition.FromFile(clusterDefPath, strict: true);

            clusterDefinition.Provisioner = $"neon-cli:{Program.Version}";  // Identify this tool/version as the cluster provisioner

            // NOTE:
            // Azure has a more restrictive password policy and our default
            // machine password does not meet the requirements:
            // The supplied password must be between 6-72 characters long and must
            // satisfy at least 3 of password complexity requirements from the following:
            //      1. Contains an uppercase character
            //      2. Contains a lowercase character
            //      3. Contains a numeric digit
            //      4. Contains a special character
            //      5. Control characters are not allowed
            // It's also probably not a great idea to use a static password when
            // provisioning VMs in public clouds because it might be possible for
            // somebody to use this fact the SSH into nodes while the cluster is
            // being setup and before we set the secure password at the end.
            // This is less problematic for non-cloud environments because it's
            // likely that the hosts won't initially be able to receive inbound
            // Internet traffic and besides, we need to have a known password
            // embedded into the VM templates.
            // We're going to handle this for cloud environments by looking
            // at [Program.MachinePassword].  If this is set to the default
            // machine password then we're going to replace it with a randomlly
            // generated password with a few extra characters to ensure that
            // it meets the target cloud's password requirements.  We'll use
            // a non-default password if the operator specified one.

            if (clusterDefinition.Hosting.IsCloudProvider && Program.MachinePassword == KubeConst.DefaulVmTemplatePassword)
                Program.MachinePassword = NeonHelper.GetCryptoRandomPassword(20);

                // Append a string that guarantees that the generated password meets
                // cloud minimum requirements.

                Program.MachinePassword += ".Aa0";

            // NOTE: Cluster prepare starts new log files.

            cluster = new ClusterProxy(clusterDefinition, Program.CreateNodeProxy <NodeDefinition>, appendToLog: false, defaultRunOptions: RunOptions.LogOutput | RunOptions.FaultOnError);

            if (KubeHelper.Config.GetContext(cluster.Definition.Name) != null)
                Console.Error.WriteLine($"*** ERROR: A context named [{cluster.Definition.Name}] already exists.");

            // Configure global options.

            if (commandLine.HasOption("--unredacted"))
                cluster.SecureRunOptions = RunOptions.None;

            var failed = false;

                KubeHelper.Desktop.StartOperationAsync($"Preparing [{cluster.Name}]").Wait();

                // Try to ensure that no servers are already deployed on the IP addresses defined
                // for cluster nodes because provisoning over an existing cluster will likely
                // corrupt the existing cluster and also probably prevent the new cluster from
                // provisioning correctly.
                // Note that we're not going to perform this check for the [Machine] hosting
                // environment because we're expecting the bare machines to be already running
                // with the assigned addresses and we're also not going to do this for cloud
                // environments because we're assuming that the cluster will run in its own
                // private network so there'll ne no possibility of conflicts.

                if (cluster.Definition.Hosting.Environment != HostingEnvironments.Machine &&
                    Console.WriteLine(" Scanning for IP address conflicts...");

                    var pingOptions   = new PingOptions(ttl: 32, dontFragment: true);
                    var pingTimeout   = TimeSpan.FromSeconds(2);
                    var pingConflicts = new List <NodeDefinition>();
                    var pingAttempts  = 2;

                    // I'm going to use up to 20 threads at a time here for simplicity
                    // rather then doing this as async operations.

                    var parallelOptions = new ParallelOptions()
                        MaxDegreeOfParallelism = 20

                    Parallel.ForEach(cluster.Definition.NodeDefinitions.Values, parallelOptions,
                                     node =>
                        using (var pinger = new Pinger())
                            // We're going to try pinging up to [pingAttempts] times for each node
                            // just in case the network it sketchy and we're losing reply packets.

                            for (int i = 0; i < pingAttempts; i++)
                                var reply = pinger.SendPingAsync(node.PrivateAddress, (int)pingTimeout.TotalMilliseconds).Result;

                                if (reply.Status == IPStatus.Success)
                                    lock (pingConflicts)


                    if (pingConflicts.Count > 0)
                        Console.Error.WriteLine($"*** ERROR: Cannot provision the cluster because [{pingConflicts.Count}] other");
                        Console.Error.WriteLine($"***        machines conflict with the following cluster nodes:");

                        foreach (var node in pingConflicts.OrderBy(n => NetHelper.AddressToUint(IPAddress.Parse(n.PrivateAddress))))
                            Console.Error.WriteLine($"{node.PrivateAddress, 16}:    {node.Name}");


                // Perform basic environment provisioning.  This creates basic cluster components
                // such as virtual machines, networks, load balancers, public IP addresses, security
                // groups,... as required for the environment.

                hostingManager = new HostingManagerFactory(() => HostingLoader.Initialize()).GetMaster(cluster, Program.LogPath);

                if (hostingManager == null)
                    Console.Error.WriteLine($"*** ERROR: No hosting manager for the [{cluster.Definition.Hosting.Environment}] hosting environment could be located.");

                hostingManager.HostUsername = Program.MachineUsername;
                hostingManager.HostPassword = Program.MachinePassword;
                hostingManager.ShowStatus   = !Program.Quiet;
                hostingManager.MaxParallel  = Program.MaxParallel;
                hostingManager.WaitSeconds  = Program.WaitSeconds;

                if (hostingManager.RequiresAdminPrivileges)
                    Program.VerifyAdminPrivileges($"Provisioning to [{cluster.Definition.Hosting.Environment}] requires elevated administrator privileges.");

                if (!hostingManager.Provision(force))

                // Get the mounted drive prefix from the hosting manager.

                cluster.Definition.DrivePrefix = hostingManager.DrivePrefix;

                // Ensure that the nodes have valid IP addresses.


                var ipAddressToServer = new Dictionary <IPAddress, SshProxy <NodeDefinition> >();

                foreach (var node in cluster.Nodes.OrderBy(n => n.Name))
                    SshProxy <NodeDefinition> duplicateServer;

                    if (node.PrivateAddress == IPAddress.Any)
                        throw new ArgumentException($"Node [{node.Name}] has not been assigned an IP address.");

                    if (ipAddressToServer.TryGetValue(node.PrivateAddress, out duplicateServer))
                        throw new ArgumentException($"Nodes [{duplicateServer.Name}] and [{node.Name}] have the same IP address [{node.Metadata.PrivateAddress}].");

                    ipAddressToServer.Add(node.PrivateAddress, node);

                // We're going to use the masters as package caches unless the user
                // specifies something else.

                packageCaches = commandLine.GetOption("--package-cache");     // This overrides the cluster definition, if specified.

                if (!string.IsNullOrEmpty(packageCaches))
                    cluster.Definition.PackageProxy = packageCaches;

                if (string.IsNullOrEmpty(cluster.Definition.PackageProxy))
                    var sbProxies = new StringBuilder();

                    foreach (var master in cluster.Masters)

                    cluster.Definition.PackageProxy = sbProxies.ToString();

                // Prepare the cluster.

                // Write the operation begin marker to all cluster node logs.


                var nodesText = cluster.Nodes.Count() == 1 ? "node" : "nodes";
                var operation = $"Preparing [{cluster.Definition.Name}] {nodesText}";

                var controller =
                    new SetupController <NodeDefinition>(operation, cluster.Nodes)
                    ShowStatus  = !Program.Quiet,
                    MaxParallel = Program.MaxParallel

                controller.AddGlobalStep("setup details",
                                         () =>
                    using (var client = new HeadendClient())
                        kubeSetupInfo = client.GetSetupInfoAsync(cluster.Definition).Result;

                // Prepare the nodes.

                controller.AddWaitUntilOnlineStep(timeout: TimeSpan.FromMinutes(15));
                controller.AddStep("verify OS", CommonSteps.VerifyOS);

                                   (node, stepDelay) =>
                    CommonSteps.PrepareNode(node, cluster.Definition, kubeSetupInfo, shutdown: false);
                                   stepStaggerSeconds: cluster.Definition.Setup.StepStaggerSeconds);

                if (!controller.Run())
                    // Write the operation end/failed marker to all cluster node logs.


                    Console.Error.WriteLine("*** ERROR: One or more configuration steps failed.");

                // Persist the cluster context extension.

                var contextExtensionsPath = KubeHelper.GetContextExtensionPath((KubeContextName)$"{KubeConst.RootUser}@{clusterDefinition.Name}");
                var contextExtension      = new KubeContextExtension(contextExtensionsPath)
                    ClusterDefinition = clusterDefinition,
                    SshUsername       = Program.MachineUsername,
                    SshPassword       = Program.MachinePassword,
                    SetupDetails      = new KubeSetupDetails()
                        SetupPending = true


                // Write the operation end marker to all cluster node logs.

                failed = true;
                if (!failed)
                    KubeHelper.Desktop.EndOperationAsync($"Cluster [{cluster.Name}] has been prepared and is ready for setup.").Wait();
                    KubeHelper.Desktop.EndOperationAsync($"Cluster [{cluster.Name}] prepare has failed.", failed: true).Wait();
Ejemplo n.º 14
        /// <inheritdoc/>
        public override async Task RunAsync(CommandLine commandLine)
            if (commandLine.HasHelpOption)


            // Cluster prepare/setup uses the [ProfileClient] to retrieve secrets and profile values.
            // We need to inject an implementation for [PreprocessReader] so it will be able to
            // perform the lookups.

            NeonHelper.ServiceContainer.AddSingleton <IProfileClient>(new ProfileClient());

            // Handle the [--remove-templates] option.

            if (commandLine.HasOption("--remove-templates"))
                Console.WriteLine("Removing cached virtual machine templates.");

                foreach (var fileName in Directory.GetFiles(KubeHelper.NodeImageFolder, "*.*", SearchOption.TopDirectoryOnly))

            var nodeImageUri      = commandLine.GetOption("--node-image-uri");
            var nodeImagePath     = commandLine.GetOption("--node-image-path");
            var debug             = commandLine.HasOption("--debug");
            var baseImageName     = commandLine.GetOption("--base-image-name");
            var clusterspace      = commandLine.GetOption("--clusterspace");
            var headendUri        = commandLine.GetOption("--headend-uri") ?? KubeConst.NeonCloudHeadendUri;
            var maxParallelOption = commandLine.GetOption("--max-parallel", "6");
            var disablePending    = commandLine.HasOption("--disable-pending");

            if (!int.TryParse(maxParallelOption, out var maxParallel) || maxParallel <= 0)
                Console.Error.WriteLine($"*** ERROR: [--max-parallel={maxParallelOption}] is not valid.");

            if (debug && string.IsNullOrEmpty(baseImageName))
                Console.Error.WriteLine($"*** ERROR: [--base-image-name] is required for [--debug] mode.");

            // Implement the command.

            if (KubeHelper.CurrentContext != null)
                Console.Error.WriteLine("*** ERROR: You are logged into a cluster.  You need to logout before preparing another.");

            if (commandLine.Arguments.Length == 0)
                Console.Error.WriteLine($"*** ERROR: CLUSTER-DEF expected.");

            // Obtain the cluster definition.

            var clusterDefPath    = commandLine.Arguments[0];
            var clusterDefinition = (ClusterDefinition)null;

            ClusterDefinition.ValidateFile(clusterDefPath, strict: true);

            clusterDefinition = ClusterDefinition.FromFile(clusterDefPath, strict: true);

            // Do a quick sanity check to ensure that the hosting environment has enough
            // resources (memory and disk) to actually host the cluster.

            using (var cluster = new ClusterProxy(clusterDefinition, new HostingManagerFactory()))
                var status = await cluster.GetResourceAvailabilityAsync();

                if (!status.CanBeDeployed)
                    Console.Error.WriteLine($"*** ERROR: Insufficent resources available to deploy cluster.");

                    foreach (var entity in status.Constraints.Keys
                             .OrderBy(key => key, StringComparer.InvariantCultureIgnoreCase))

                        foreach (var constraint in status.Constraints[entity])
                            Console.Error.WriteLine($"    {constraint.ResourceType.ToString().ToUpperInvariant()}: {constraint.Details}");


            if (KubeHelper.IsOnPremiseHypervisorEnvironment(clusterDefinition.Hosting.Environment))
                // Use the default node image for the hosting environment unless [--node-image-uri]
                // or [--node-image-path] was specified.

                if (string.IsNullOrEmpty(nodeImageUri) && string.IsNullOrEmpty(nodeImagePath))
                    nodeImageUri = KubeDownloads.GetDefaultNodeImageUri(clusterDefinition.Hosting.Environment);

            // Parse any specified package cache endpoints.

            var packageCaches         = commandLine.GetOption("--package-caches", null);
            var packageCacheEndpoints = new List <IPEndPoint>();

            if (!string.IsNullOrEmpty(packageCaches))
                foreach (var item in packageCaches.Split(' ', StringSplitOptions.RemoveEmptyEntries))
                    if (!NetHelper.TryParseIPv4Endpoint(item, out var endpoint))
                        Console.Error.WriteLine($"*** ERROR: [{item}] is not a valid package cache IPv4 endpoint.");


            // Create and run the cluster prepare controller.

            var controller = KubeSetup.CreateClusterPrepareController(
                nodeImageUri:           nodeImageUri,
                nodeImagePath:          nodeImagePath,
                maxParallel:            maxParallel,
                packageCacheEndpoints:  packageCacheEndpoints,
                unredacted:             commandLine.HasOption("--unredacted"),
                debugMode:              debug,
                baseImageName:          baseImageName,
                clusterspace:           clusterspace,
                neonCloudHeadendUri:    headendUri);

            controller.DisablePendingTasks = disablePending;

            controller.StatusChangedEvent +=
                status =>

            switch (await controller.RunAsync())
            case SetupDisposition.Succeeded:

                var pendingGroups = controller.GetPendingGroups();

                if (pendingGroups.Count > 0)
                    Console.WriteLine($"*** ERROR: [{pendingGroups.Count}] pending task groups have not been awaited:");

                    foreach (var groupName in pendingGroups)
                        Console.WriteLine($"   {groupName}");


                Console.WriteLine($" [{clusterDefinition.Name}] cluster is prepared.");

            case SetupDisposition.Cancelled:

                Console.WriteLine(" *** CANCELLED: Cluster prepare was cancelled.");

            case SetupDisposition.Failed:

                Console.WriteLine(" *** ERROR: Cluster prepare has failed.  Examine the logs here:");
                Console.WriteLine($" {KubeHelper.LogFolder}");


                throw new NotImplementedException();

            await Task.CompletedTask;