Beispiel #1
0
        /// <summary>
        /// Updates docker on a hive node.
        /// </summary>
        /// <param name="hive">The target hive.</param>
        /// <param name="node">The target node.</param>
        /// <param name="dockerPackageUri">The Docker Debian package URI.</param>
        private static void UpdateDocker(HiveProxy hive, SshProxy <NodeDefinition> node, string dockerPackageUri)
        {
            try
            {
                if (node.Metadata.InSwarm)
                {
                    node.Status = "swarm: drain services";
                    hive.Docker.DrainNode(node.Name);
                }

                node.Status = "stop: docker";
                node.SudoCommand("systemctl stop docker").EnsureSuccess();

                node.Status = "download: docker package";
                node.SudoCommand($"curl {Program.CurlOptions} {dockerPackageUri} -o /tmp/docker.deb").EnsureSuccess();

                node.Status = "update: docker";
                node.SudoCommand("gdebi /tmp/docker.deb").EnsureSuccess();
                node.SudoCommand("rm /tmp/docker.deb");

                node.Status = "restart: docker";
                node.SudoCommand("systemctl start docker").EnsureSuccess();

                if (node.Metadata.InSwarm)
                {
                    node.Status = "swarm: activate";
                    hive.Docker.ActivateNode(node.Name);
                }
            }
            catch (Exception e)
            {
                node.Fault($"[docker] update failed: {NeonHelper.ExceptionError(e)}");
            }
        }
Beispiel #2
0
        /// <summary>
        /// Verifies Consul health.
        /// </summary>
        /// <param name="node">The manager node.</param>
        /// <param name="hiveDefinition">The hive definition.</param>
        private static void CheckConsul(SshProxy <NodeDefinition> node, HiveDefinition hiveDefinition)
        {
            node.Status = "checking: consul";

            // Verify that the daemon is running.

            switch (Program.ServiceManager)
            {
            case ServiceManager.Systemd:

            {
                var output = node.SudoCommand("systemctl status consul", RunOptions.LogOutput).OutputText;

                if (!output.Contains("Active: active (running)"))
                {
                    node.Fault($"Consul deamon is not running.");
                    return;
                }
            }
            break;

            default:

                throw new NotImplementedException();
            }
        }
Beispiel #3
0
        /// <summary>
        /// Verify that the XenServer is ready to provision the hive virtual machines.
        /// </summary>
        /// <param name="xenSshProxy">The XenServer SSH proxy.</param>
        private void VerifyReady(SshProxy <XenClient> xenSshProxy)
        {
            // $todo(jeff.lill):
            //
            // It would be nice to verify that XenServer actually has enough
            // resources (RAM, DISK, and perhaps CPU) here as well.

            var xenHost = xenSshProxy.Metadata;
            var nodes   = GetHostedNodes(xenHost);

            xenSshProxy.Status = "check virtual machines";

            var vmNames = new HashSet <string>(StringComparer.InvariantCultureIgnoreCase);

            foreach (var vm in xenHost.Machine.List())
            {
                vmNames.Add(vm.NameLabel);
            }

            foreach (var hostedNode in nodes)
            {
                var vmName = GetVmName(hostedNode);

                if (vmNames.Contains(vmName))
                {
                    xenSshProxy.Fault($"XenServer [{xenHost.Name}] already hosts a virtual machine named [{vmName}].");
                    return;
                }
            }
        }
Beispiel #4
0
        /// <summary>
        /// Verifies Vault health for a node.
        /// </summary>
        /// <param name="node">The node.</param>
        /// <param name="hiveDefinition">The hive definition.</param>
        private static void CheckVault(SshProxy <NodeDefinition> node, HiveDefinition hiveDefinition)
        {
            // $todo(jeff.lill): Implement this.

            return;

            node.Status = "checking: vault";

            // This is a minimal health test that just verifies that Vault
            // is listening for requests.  We're going to ping the local
            // Vault instance at [/v1/sys/health].
            //
            // Note that this should return a 500 status code with some
            // JSON content.  The reason for this is because we have not
            // yet initialized and unsealed the vault.

            var targetUrl = $"https://{node.Metadata.PrivateAddress}:{hiveDefinition.Vault.Port}/v1/sys/health?standbycode=200";

            using (var client = new HttpClient())
            {
                try
                {
                    var response = client.GetAsync(targetUrl).Result;

                    if (response.StatusCode != HttpStatusCode.OK &&
                        response.StatusCode != HttpStatusCode.InternalServerError)
                    {
                        node.Fault($"Vault: Unexpected HTTP response status [{(int) response.StatusCode}={response.StatusCode}]");
                        return;
                    }

                    if (!response.Content.Headers.ContentType.MediaType.Equals("application/json", StringComparison.OrdinalIgnoreCase))
                    {
                        node.Fault($"Vault: Unexpected content type [{response.Content.Headers.ContentType.MediaType}]");
                        return;
                    }
                }
                catch (Exception e)
                {
                    node.Fault($"Vault: {NeonHelper.ExceptionError(e)}");
                }
            }
        }
Beispiel #5
0
        /// <summary>
        /// Verifies that the node has the correct operating system installed.
        /// </summary>
        /// <param name="node">The target cluster node.</param>
        /// <param name="stepDelay">Ignored.</param>
        public static void VerifyOS(SshProxy <NodeDefinition> node, TimeSpan stepDelay)
        {
            node.Status = "check: OS";

            // $todo(jeff.lill): We're currently hardcoded to Ubuntu 18.04.x

            if (!node.OsName.Equals("Ubuntu", StringComparison.InvariantCultureIgnoreCase) || node.OsVersion < Version.Parse("18.04"))
            {
                node.Fault("Expected: Ubuntu 18.04.x");
            }
        }
Beispiel #6
0
        /// <summary>
        /// Verifies Docker health.
        /// </summary>
        /// <param name="node">The target hive node.</param>
        /// <param name="hiveDefinition">The hive definition.</param>
        private static void CheckDocker(SshProxy <NodeDefinition> node, HiveDefinition hiveDefinition)
        {
            node.Status = "checking: docker";

            // This is a super simple ping to verify that Docker appears to be running.

            var response = node.SudoCommand("docker info");

            if (response.ExitCode != 0)
            {
                node.Fault($"Docker: {response.AllText}");
            }
        }
Beispiel #7
0
        /// <summary>
        /// Verifies that the node has the correct operating system installed.
        /// </summary>
        /// <param name="node">The target hive node.</param>
        public static void VerifyOS(SshProxy <NodeDefinition> node)
        {
            node.Status = "check: OS";

            var response = node.SudoCommand("lsb_release -a");

            switch (Program.OSProperties.TargetOS)
            {
            case TargetOS.Ubuntu_16_04:

                if (!response.OutputText.Contains("Ubuntu 16.04"))
                {
                    node.Fault("Expected [Ubuntu 16.04].");
                }
                break;

            default:

                throw new NotImplementedException($"Support for [{nameof(TargetOS)}.{Program.OSProperties.TargetOS}] is not implemented.");
            }
        }
Beispiel #8
0
        /// <summary>
        /// Provision the virtual machines on the XenServer.
        /// </summary>
        /// <param name="xenSshProxy">The XenServer SSH proxy.</param>
        private void ProvisionVirtualMachines(SshProxy <XenClient> xenSshProxy)
        {
            var xenHost = xenSshProxy.Metadata;

            foreach (var node in GetHostedNodes(xenHost))
            {
                var vmName      = GetVmName(node);
                var processors  = node.Metadata.GetVmProcessors(hive.Definition);
                var memoryBytes = node.Metadata.GetVmMemory(hive.Definition);
                var diskBytes   = node.Metadata.GetVmDisk(hive.Definition);

                xenSshProxy.Status = FormatVmStatus(vmName, "create virtual machine");

                // We need to create a raw drive if the node hosts a Ceph OSD.

                var extraDrives = new List <XenVirtualDrive>();

                if (node.Metadata.Labels.CephOSD)
                {
                    extraDrives.Add(
                        new XenVirtualDrive()
                    {
                        Size = node.Metadata.GetCephOSDDriveSize(hive.Definition)
                    });
                }

                var vm = xenHost.Machine.Create(vmName, hive.Definition.Hosting.XenServer.TemplateName,
                                                processors:                 processors,
                                                memoryBytes:                memoryBytes,
                                                diskBytes:                  diskBytes,
                                                snapshot:                   hive.Definition.Hosting.XenServer.Snapshot,
                                                extraDrives:                extraDrives,
                                                primaryStorageRepository:   hive.Definition.Hosting.XenServer.StorageRepository,
                                                extraStorageRespository:    hive.Definition.Hosting.XenServer.OsdStorageRepository);

                xenSshProxy.Status = FormatVmStatus(vmName, "start virtual machine");

                xenHost.Machine.Start(vm);

                // We need to wait for the virtual machine to start and obtain
                // and IP address via DHCP.

                var address = string.Empty;

                xenSshProxy.Status = FormatVmStatus(vmName, "fetch ip address");

                try
                {
                    NeonHelper.WaitFor(
                        () =>
                    {
                        while (true)
                        {
                            vm = xenHost.Machine.Find(vmName);

                            if (!string.IsNullOrEmpty(vm.Address))
                            {
                                address = vm.Address;
                                return(true);
                            }

                            Thread.Sleep(1000);
                        }
                    },
                        TimeSpan.FromSeconds(120));
                }
                catch (TimeoutException)
                {
                    xenSshProxy.Fault("Timeout waiting for virtual machine to start and set an IP address.");
                }

                // SSH into the VM using the DHCP address, configure the static IP
                // address and extend the primary partition and file system to fill
                // the drive and then reboot.

                var subnet    = NetworkCidr.Parse(hive.Definition.Network.PremiseSubnet);
                var gateway   = hive.Definition.Network.Gateway;
                var broadcast = hive.Definition.Network.Broadcast;

                // We're going to temporarily set the node to the current VM address
                // so we can connect via SSH.

                var savedNodeAddress = node.PrivateAddress;

                try
                {
                    node.PrivateAddress = IPAddress.Parse(address);

                    using (var nodeProxy = hive.GetNode(node.Name))
                    {
                        xenSshProxy.Status = FormatVmStatus(vmName, "connect");
                        nodeProxy.WaitForBoot();

                        // Replace the [/etc/network/interfaces] file to configure the static
                        // IP and then reboot to reinitialize networking subsystem.

                        var primaryInterface = node.GetNetworkInterface(node.PrivateAddress);

                        xenSshProxy.Status = FormatVmStatus(vmName, $"set static ip [{node.PrivateAddress}]");

                        var interfacesText =
                            $@"# This file describes the network interfaces available on your system
# and how to activate them. For more information, see interfaces(5).

source /etc/network/interfaces.d/*

# The loopback network interface
auto lo
iface lo inet loopback

# The primary network interface
auto {primaryInterface}
iface {primaryInterface} inet static
address {savedNodeAddress}
netmask {subnet.Mask}
gateway {gateway}
broadcast {broadcast}
";
                        nodeProxy.UploadText("/etc/network/interfaces", interfacesText);

                        // Temporarily configure the public Google DNS servers as
                        // the name servers so DNS will work after we reboot with
                        // the static IP.  Note that hive setup will eventually
                        // configure the name servers specified in the hive
                        // definition.

                        // $todo(jeff.lill):
                        //
                        // Is there a good reason why we're not just configuring the
                        // DNS servers from the hive definition here???
                        //
                        // Using the Google DNS seems like it could break some hive
                        // network configurations (e.g. for hives that don't have
                        // access to the public Internet).  Totally private hives
                        // aren't really a supported scenario right now though because
                        // we assume we can use [apt-get]... to pull down packages.

                        var resolvBaseText =
                            $@"nameserver 8.8.8.8
nameserver 8.8.4.4
";
                        nodeProxy.UploadText("/etc/resolvconf/resolv.conf.d/base", resolvBaseText);

                        // Extend the primary partition and file system to fill
                        // the virtual the drive.

                        xenSshProxy.Status = FormatVmStatus(vmName, $"resize primary partition");

                        // $hack(jeff.lill):
                        //
                        // I've seen a transient error here but can't reproduce it.  I'm going
                        // to assume for now that the file system might not be quite ready for
                        // this operation directly after the VM has been rebooted, so we're going
                        // to delay for a few seconds before performing the operations.

                        Thread.Sleep(TimeSpan.FromSeconds(5));
                        nodeProxy.SudoCommand("growpart /dev/xvda 1");
                        nodeProxy.SudoCommand("resize2fs /dev/xvda1");

                        // Reboot to pick up the changes.

                        xenSshProxy.Status = FormatVmStatus(vmName, "reboot");
                        nodeProxy.Reboot(wait: false);
                    }
                }
                finally
                {
                    // Restore the node's IP address.

                    node.PrivateAddress = savedNodeAddress;
                }
            }
        }
Beispiel #9
0
        /// <summary>
        /// Deploys RabbitMQ to a cluster node as a container.
        /// </summary>
        /// <param name="node">The target hive node.</param>
        private void DeployHiveMQ(SshProxy <NodeDefinition> node)
        {
            // Deploy RabbitMQ only on the labeled nodes.

            if (node.Metadata.Labels.HiveMQ)
            {
                // Build a comma separated list of fully qualified RabbitMQ hostnames so we
                // can pass them as the CLUSTER environment variable.

                var rabbitNodes = hive.Definition.SortedNodes.Where(n => n.Labels.HiveMQ).ToList();
                var sbCluster   = new StringBuilder();

                foreach (var rabbitNode in rabbitNodes)
                {
                    sbCluster.AppendWithSeparator($"{rabbitNode.Name}@{rabbitNode.Name}.{hive.Definition.Hostnames.HiveMQ}", ",");
                }

                var hipeCompileArgs = new List <string>();

                if (hive.Definition.HiveMQ.Precompile)
                {
                    hipeCompileArgs.Add("--env");
                    hipeCompileArgs.Add("RABBITMQ_HIPE_COMPILE=1");
                }

                var managementPluginArgs = new List <string>();

                if (node.Metadata.Labels.HiveMQManager)
                {
                    hipeCompileArgs.Add("--env");
                    hipeCompileArgs.Add("MANAGEMENT_PLUGIN=true");
                }

                // $todo(jeff.lill):
                //
                // I was unable to get TLS working correctly for RabbitMQ.  I'll come back
                // and revisit this later:
                //
                //      https://github.com/jefflill/NeonForge/issues/319

                ServiceHelper.StartContainer(node, "neon-hivemq", hive.Definition.Image.HiveMQ, RunOptions.FaultOnError,
                                             new CommandBundle(
                                                 "docker run",
                                                 "--detach",
                                                 "--name", "neon-hivemq",
                                                 "--env", $"CLUSTER_NAME={hive.Definition.Name}",
                                                 "--env", $"CLUSTER_NODES={sbCluster}",
                                                 "--env", $"CLUSTER_PARTITION_MODE=autoheal",
                                                 "--env", $"NODENAME={node.Name}@{node.Name}.{hive.Definition.Hostnames.HiveMQ}",
                                                 "--env", $"RABBITMQ_USE_LONGNAME=true",
                                                 "--env", $"RABBITMQ_DEFAULT_USER=sysadmin",
                                                 "--env", $"RABBITMQ_DEFAULT_PASS=password",
                                                 "--env", $"RABBITMQ_NODE_PORT={HiveHostPorts.HiveMQAMQP}",
                                                 "--env", $"RABBITMQ_DIST_PORT={HiveHostPorts.HiveMQDIST}",
                                                 "--env", $"RABBITMQ_MANAGEMENT_PORT={HiveHostPorts.HiveMQManagement}",
                                                 "--env", $"RABBITMQ_ERLANG_COOKIE={hive.Definition.HiveMQ.ErlangCookie}",
                                                 "--env", $"RABBITMQ_VM_MEMORY_HIGH_WATERMARK={hive.Definition.HiveMQ.RamHighWatermark}",
                                                 hipeCompileArgs,
                                                 managementPluginArgs,
                                                 "--env", $"RABBITMQ_DISK_FREE_LIMIT={HiveDefinition.ValidateSize(hive.Definition.HiveMQ.DiskFreeLimit, typeof(HiveMQOptions), nameof(hive.Definition.HiveMQ.DiskFreeLimit))}",
                                                 //"--env", $"RABBITMQ_SSL_CERTFILE=/etc/neon/certs/hive.crt",
                                                 //"--env", $"RABBITMQ_SSL_KEYFILE=/etc/neon/certs/hive.key",
                                                 "--env", $"ERL_EPMD_PORT={HiveHostPorts.HiveMQEPMD}",
                                                 "--mount", "type=volume,source=neon-hivemq,target=/var/lib/rabbitmq",
                                                 "--mount", "type=bind,source=/etc/neon/certs,target=/etc/neon/certs,readonly",
                                                 "--publish", $"{HiveHostPorts.HiveMQEPMD}:{HiveHostPorts.HiveMQEPMD}",
                                                 "--publish", $"{HiveHostPorts.HiveMQAMQP}:{HiveHostPorts.HiveMQAMQP}",
                                                 "--publish", $"{HiveHostPorts.HiveMQDIST}:{HiveHostPorts.HiveMQDIST}",
                                                 "--publish", $"{HiveHostPorts.HiveMQManagement}:{HiveHostPorts.HiveMQManagement}",
                                                 "--memory", HiveDefinition.ValidateSize(hive.Definition.HiveMQ.RamLimit, typeof(HiveMQOptions), nameof(hive.Definition.HiveMQ.RamLimit)),
                                                 "--restart", "always",
                                                 ServiceHelper.ImagePlaceholderArg));

                // Wait for the RabbitMQ node to report that it's ready.

                var timeout  = TimeSpan.FromMinutes(4);
                var pollTime = TimeSpan.FromSeconds(2);

                node.Status = "hivemq: waiting";

                try
                {
                    NeonHelper.WaitFor(
                        () =>
                    {
                        var readyReponse = node.SudoCommand($"docker exec neon-hivemq rabbitmqctl node_health_check -n {node.Name}@{node.Name}.{hive.Definition.Hostnames.HiveMQ}", node.DefaultRunOptions & ~RunOptions.FaultOnError);

                        return(readyReponse.ExitCode == 0);
                    },
                        timeout: timeout,
                        pollTime: pollTime);
                }
                catch (TimeoutException)
                {
                    node.Fault($"RabbitMQ not ready after waiting [{timeout}].");
                    return;
                }

                node.Status = "hivemq: ready";
            }
        }
Beispiel #10
0
        /// <summary>
        /// Verifies that a master node's NTP health.
        /// </summary>
        /// <param name="node">The master node.</param>
        /// <param name="clusterDefinition">The cluster definition.</param>
        private static void CheckMasterNtp(SshProxy <NodeDefinition> node, ClusterDefinition clusterDefinition)
        {
            // We're going to use [ntpq -pw] to query the configured time sources.
            // We should get something back that looks like
            //
            //      remote           refid      st t when poll reach   delay   offset  jitter
            //      ==============================================================================
            //       LOCAL(0).LOCL.          10 l  45m   64    0    0.000    0.000   0.000
            //      * clock.xmission. .GPS.            1 u  134  256  377   48.939 - 0.549  18.357
            //      + 173.44.32.10    18.26.4.105      2 u  200  256  377   96.981 - 0.623   3.284
            //      + pacific.latt.ne 44.24.199.34     3 u  243  256  377   41.457 - 8.929   8.497
            //
            // For master nodes, we're simply going to verify that we have at least one external
            // time source answering.

            node.Status = "check: NTP";

            var retryDelay = TimeSpan.FromSeconds(30);
            var fault      = (string)null;

            for (int tryCount = 0; tryCount < 6; tryCount++)
            {
                var response = node.SudoCommand("/usr/bin/ntpq -pw", RunOptions.LogOutput);

                if (response.ExitCode != 0)
                {
                    Thread.Sleep(retryDelay);
                    continue;
                }

                using (var reader = response.OpenOutputTextReader())
                {
                    string line;

                    // Column header and table bar lines.

                    line = reader.ReadLine();
                    if (string.IsNullOrWhiteSpace(line))
                    {
                        fault = "NTP: Invalid [ntpq -pw] response.";

                        Thread.Sleep(retryDelay);
                        continue;
                    }

                    line = reader.ReadLine();
                    if (string.IsNullOrWhiteSpace(line) || line[0] != '=')
                    {
                        fault = "NTP: Invalid [ntpq -pw] response.";

                        Thread.Sleep(retryDelay);
                        continue;
                    }

                    // Count the lines starting that don't include [*.LOCL.*],
                    // the local clock.

                    var sourceCount = 0;

                    for (line = reader.ReadLine(); line != null; line = reader.ReadLine())
                    {
                        if (line.Length > 0 && !line.Contains(".LOCL."))
                        {
                            sourceCount++;
                        }
                    }

                    if (sourceCount == 0)
                    {
                        fault = "NTP: No external sources are answering.";

                        Thread.Sleep(retryDelay);
                        continue;
                    }

                    // Everything looks good.

                    break;
                }
            }

            if (fault != null)
            {
                node.Fault(fault);
            }
        }
Beispiel #11
0
        /// <summary>
        /// Verifies that a worker node's NTP health.
        /// </summary>
        /// <param name="node">The worker node.</param>
        /// <param name="clusterDefinition">The cluster definition.</param>
        private static void CheckWorkerNtp(SshProxy <NodeDefinition> node, ClusterDefinition clusterDefinition)
        {
            // We're going to use [ntpq -pw] to query the configured time sources.
            // We should get something back that looks like
            //
            //           remote           refid      st t when poll reach   delay   offset  jitter
            //           ==============================================================================
            //            LOCAL(0).LOCL.          10 l  45m   64    0    0.000    0.000   0.000
            //           * 10.0.1.5        198.60.22.240    2 u  111  128  377    0.062    3.409   0.608
            //           + 10.0.1.7        198.60.22.240    2 u  111  128  377    0.062    3.409   0.608
            //           + 10.0.1.7        198.60.22.240    2 u  111  128  377    0.062    3.409   0.608
            //
            // For worker nodes, we need to verify that each of the masters are answering
            // by confirming that their IP addresses are present.

            node.Status = "check: NTP";

            var retryDelay = TimeSpan.FromSeconds(30);
            var fault      = (string)null;
            var firstTry   = true;

tryAgain:

            for (var tries = 0; tries < 6; tries++)
            {
                var output = node.SudoCommand("/usr/bin/ntpq -pw", RunOptions.LogOutput).OutputText;

                foreach (var master in clusterDefinition.SortedMasters)
                {
                    // We're going to check the for presence of the master's IP address
                    // or its name, the latter because [ntpq] appears to attempt a reverse
                    // IP address lookup which will resolve into one of the DNS names defined
                    // in the local [/etc/hosts] file.

                    if (!output.Contains(master.PrivateAddress.ToString()) && !output.Contains(master.Name.ToLower()))
                    {
                        fault = $"NTP: Manager [{master.Name}/{master.PrivateAddress}] is not answering.";

                        Thread.Sleep(retryDelay);
                        continue;
                    }

                    // Everything looks OK.

                    break;
                }
            }

            if (fault != null)
            {
                if (firstTry)
                {
                    // $hack(jeff.lill):
                    //
                    // I've seen the NTP check fail on worker nodes, complaining
                    // that the connection attempt was rejected.  I manually restarted
                    // the node and then it worked.  I'm not sure if the rejected connection
                    // was being made to the local NTP service or from the local service
                    // to NTP running on the master.
                    //
                    // I'm going to assume that it was to the local NTP service and I'm
                    // going to try mitigating this by restarting the local NTP service
                    // and then re-running the tests.  I'm only going to do this once.

                    node.SudoCommand("systemctl restart ntp", node.DefaultRunOptions & ~RunOptions.FaultOnError);

                    firstTry = false;
                    goto tryAgain;
                }

                node.Fault(fault);
            }
        }
Beispiel #12
0
        /// <summary>
        /// Provision the virtual machines on the XenServer.
        /// </summary>
        /// <param name="xenSshProxy">The XenServer SSH proxy.</param>
        private void ProvisionVirtualMachines(SshProxy <XenClient> xenSshProxy)
        {
            var xenHost = xenSshProxy.Metadata;

            foreach (var node in GetHostedNodes(xenHost))
            {
                var vmName      = GetVmName(node);
                var processors  = node.Metadata.GetVmProcessors(cluster.Definition);
                var memoryBytes = node.Metadata.GetVmMemory(cluster.Definition);
                var diskBytes   = node.Metadata.GetVmDisk(cluster.Definition);

                xenSshProxy.Status = FormatVmStatus(vmName, "create: virtual machine");

                // We need to create a raw drive if the node hosts a Ceph OSD.

                var extraDrives = new List <XenVirtualDrive>();

                if (node.Metadata.Labels.CephOSD)
                {
                    extraDrives.Add(
                        new XenVirtualDrive()
                    {
                        Size = node.Metadata.GetCephOSDDriveSize(cluster.Definition)
                    });
                }

                var vm = xenHost.Machine.Create(vmName, cluster.Definition.Hosting.XenServer.TemplateName,
                                                processors:                 processors,
                                                memoryBytes:                memoryBytes,
                                                diskBytes:                  diskBytes,
                                                snapshot:                   cluster.Definition.Hosting.XenServer.Snapshot,
                                                extraDrives:                extraDrives,
                                                primaryStorageRepository:   cluster.Definition.Hosting.XenServer.StorageRepository,
                                                extraStorageRespository:    cluster.Definition.Hosting.XenServer.OsdStorageRepository);

                xenSshProxy.Status = FormatVmStatus(vmName, "start: virtual machine");

                xenHost.Machine.Start(vm);

                // We need to wait for the virtual machine to start and obtain
                // and IP address via DHCP.

                var address = string.Empty;

                xenSshProxy.Status = FormatVmStatus(vmName, "discover: ip address");

                try
                {
                    NeonHelper.WaitFor(
                        () =>
                    {
                        while (true)
                        {
                            vm = xenHost.Machine.Find(vmName);

                            if (!string.IsNullOrEmpty(vm.Address))
                            {
                                address = vm.Address;
                                return(true);
                            }

                            Thread.Sleep(1000);
                        }
                    },
                        TimeSpan.FromMinutes(3));
                }
                catch (TimeoutException)
                {
                    xenSshProxy.Fault("Timeout waiting for virtual machine to start and set an IP address.");
                }

                // SSH into the VM using the DHCP address, configure the static IP
                // address and extend the primary partition and file system to fill
                // the drive and then reboot.

                var subnet = NetworkCidr.Parse(cluster.Definition.Network.PremiseSubnet);

                // We're going to temporarily set the node to the current VM address
                // so we can connect via SSH.

                var nodePrivateAddress = node.PrivateAddress;

                try
                {
                    node.PrivateAddress = IPAddress.Parse(address);

                    using (var nodeProxy = cluster.GetNode(node.Name))
                    {
                        xenSshProxy.Status = FormatVmStatus(vmName, "connect");
                        nodeProxy.WaitForBoot();

                        // Configure the node's network stack to the static IP address
                        // and upstream nameservers.

                        node.Status = $"network config [IP={nodePrivateAddress}]";

                        var primaryInterface = node.GetNetworkInterface(node.PrivateAddress);

                        node.ConfigureNetwork(
                            networkInterface:   primaryInterface,
                            address:            nodePrivateAddress,
                            gateway:            IPAddress.Parse(cluster.Definition.Network.Gateway),
                            subnet:             NetworkCidr.Parse(cluster.Definition.Network.PremiseSubnet),
                            nameservers:        cluster.Definition.Network.Nameservers.Select(ns => IPAddress.Parse(ns)));

                        // Extend the primary partition and file system to fill
                        // the virtual the drive.

                        xenSshProxy.Status = FormatVmStatus(vmName, $"resize: primary drive");

                        // $hack(jeff.lill):
                        //
                        // I've seen a transient error here but can't reproduce it.  I'm going
                        // to assume for now that the file system might not be quite ready for
                        // this operation directly after the VM has been rebooted, so we're going
                        // to delay for a few seconds before performing the operations.

                        Thread.Sleep(TimeSpan.FromSeconds(5));
                        nodeProxy.SudoCommand("growpart /dev/xvda 2");
                        nodeProxy.SudoCommand("resize2fs /dev/xvda2");

                        // Reboot to pick up the changes.

                        xenSshProxy.Status = FormatVmStatus(vmName, "restarting...");
                        nodeProxy.Reboot(wait: false);
                    }
                }
                finally
                {
                    // Restore the node's IP address.

                    node.PrivateAddress = nodePrivateAddress;
                }
            }
        }