/// <summary> /// Configures the hive logging related services. /// </summary> /// <param name="firstManager">The first hive proxy manager.</param> public void Configure(SshProxy <NodeDefinition> firstManager) { if (!hive.Definition.Log.Enabled) { return; } firstManager.InvokeIdempotentAction("setup/log-services", () => { var steps = new ConfigStepList(); AddElasticsearchSteps(steps); if (hive.Definition.Dashboard.Kibana) { AddKibanaSteps(steps); } AddCollectorSteps(steps); hive.Configure(steps); firstManager.Status = string.Empty; }); }
/// <summary> /// Adds the steps required to configure the hive log collector which aggregates log events received /// from all hive nodes via their [neon-log-host] containers. /// </summary> /// <param name="steps">The configuration step list.</param> private void AddCollectorSteps(ConfigStepList steps) { // Add the steps to create the service. ServiceHelper.AddServiceStartSteps(hive, steps, "neon-log-collector", hive.Definition.Image.LogCollector, new CommandBundle( "docker service create", "--name", "neon-log-collector", "--detach=false", "--mode", "global", "--restart-delay", hive.Definition.Docker.RestartDelay, "--endpoint-mode", "vip", "--network", $"{HiveConst.PrivateNetwork}", "--constraint", $"node.role==manager", "--mount", "type=bind,source=/etc/neon/host-env,destination=/etc/neon/host-env,readonly=true", "--log-driver", "json-file", // Ensure that we don't log to the pipeline to avoid cascading events. ServiceHelper.ImagePlaceholderArg)); // Deploy the [neon-log-collector] traffic manager rule. steps.Add(ActionStep.Create(hive.FirstManager.Name, "setup/neon-log-collection-lbrule", node => { node.Status = "set neon-log-collector traffic manager rule"; // Configure a private hive proxy TCP route so the [neon-log-host] containers // will be able to reach the collectors. var rule = new TrafficTcpRule() { Name = "neon-log-collector", System = true, Log = false // This is important: we don't want to SPAM the log database with its own traffic. }; rule.Frontends.Add( new TrafficTcpFrontend() { ProxyPort = HiveHostPorts.ProxyPrivateTcpLogCollector }); rule.Backends.Add( new TrafficTcpBackend() { Server = "neon-log-collector", Port = NetworkPorts.TDAgentForward }); hive.PrivateTraffic.SetRule(rule); })); }
/// <summary> /// Appends the steps required to start a neonHIVE related Docker container and upload /// a script to the hive managers to make it easy to restart the service manually or /// for hive updates. /// </summary> /// <param name="hive">The target hive.</param> /// <param name="steps">The target step list.</param> /// <param name="node">The target hive node.</param> /// <param name="containerName">Identifies the service.</param> /// <param name="image">The Docker image to be used by the container.</param> /// <param name="command">The <c>docker service create ...</c> command.</param> /// <param name="runOptions">Optional run options (defaults to <see cref="RunOptions.FaultOnError"/>).</param> /// <remarks> /// <para> /// This method performs the following steps: /// </para> /// <list type="number"> /// <item> /// Passes <paramref name="image"/> to <see cref="Program.ResolveDockerImage(string)"/> to /// obtain the actual image to be started. /// </item> /// <item> /// Generates the first few lines of the script file that sets the /// default image as the <c>TARGET_IMAGE</c> macro and then overrides /// this with the script parameter (if there is one). We also add /// a Docker command that pulls the image. /// </item> /// <item> /// Appends the commands to the script, replacing any text that matches /// <see cref="ImagePlaceholderArg"/> with <c>${TARGET_IMAGE}</c> to make it easy /// for services to be upgraded later. /// </item> /// <item> /// Starts the service. /// </item> /// <item> /// Uploads the generated script to each hive manager to [<see cref="HiveHostFolders.Scripts"/>/<paramref name="containerName"/>.sh]. /// </item> /// </list> /// </remarks> public static void AddContainerStartSteps(HiveProxy hive, ConfigStepList steps, SshProxy <NodeDefinition> node, string containerName, string image, IBashCommandFormatter command, RunOptions runOptions = RunOptions.FaultOnError) { Covenant.Requires <ArgumentNullException>(hive != null); Covenant.Requires <ArgumentNullException>(steps != null); Covenant.Requires <ArgumentNullException>(!string.IsNullOrWhiteSpace(containerName)); Covenant.Requires <ArgumentNullException>(!string.IsNullOrWhiteSpace(image)); Covenant.Requires <ArgumentNullException>(command != null); // Generate the container start script. var script = CreateStartScript(containerName, image, true, command); // Add steps to upload the script to the managers and then call the script // to create the container on the target node. var scriptPath = LinuxPath.Combine(HiveHostFolders.Scripts, $"{containerName}.sh"); steps.Add(hive.GetFileUploadSteps(node, scriptPath, script, permissions: "740")); steps.Add(CommandStep.CreateIdempotentDocker(node.Name, $"setup/{containerName}", scriptPath)); }
/// <summary> /// Appends the steps required to start a neonHIVE related Docker service and upload /// a script to the hive managers to make it easy to restart the service manually or /// for hive updates. /// </summary> /// <param name="hive">The target hive.</param> /// <param name="steps">The target step list.</param> /// <param name="serviceName">Identifies the service.</param> /// <param name="image">The Docker image to be used by the service.</param> /// <param name="command">The <c>docker service create ...</c> command.</param> /// <param name="runOptions">Optional run options (defaults to <see cref="RunOptions.FaultOnError"/>).</param> /// <remarks> /// <para> /// This method performs the following steps: /// </para> /// <list type="number"> /// <item> /// Passes <paramref name="image"/> to <see cref="Program.ResolveDockerImage(string)"/> to /// obtain the actual image to be started. /// </item> /// <item> /// Generates the first few lines of the script file that sets the /// default image as the <c>TARGET_IMAGE</c> macro and then overrides /// this with the script parameter (if there is one). /// </item> /// <item> /// Appends the commands to the script, replacing any text that matches /// <see cref="ImagePlaceholderArg"/> with <c>${TARGET_IMAGE}</c> to make it easy /// for services to be upgraded later. /// </item> /// <item> /// Starts the service. /// </item> /// <item> /// Uploads the generated script to each hive manager to [<see cref="HiveHostFolders.Scripts"/>/<paramref name="serviceName"/>.sh]. /// </item> /// </list> /// </remarks> public static void AddServiceStartSteps(HiveProxy hive, ConfigStepList steps, string serviceName, string image, IBashCommandFormatter command, RunOptions runOptions = RunOptions.FaultOnError) { Covenant.Requires <ArgumentNullException>(hive != null); Covenant.Requires <ArgumentNullException>(steps != null); Covenant.Requires <ArgumentNullException>(!string.IsNullOrWhiteSpace(serviceName)); Covenant.Requires <ArgumentNullException>(!string.IsNullOrWhiteSpace(image)); Covenant.Requires <ArgumentNullException>(command != null); // Generate the service start script. var script = CreateStartScript(serviceName, image, false, command); // Add steps to upload the script to the managers and then call the script // to create the service on the first manager. var scriptPath = LinuxPath.Combine(HiveHostFolders.Scripts, $"{serviceName}.sh"); steps.Add(hive.GetFileUploadSteps(hive.Managers, scriptPath, script, permissions: "740")); steps.Add(CommandStep.CreateIdempotentDocker(hive.FirstManager.Name, $"setup/{serviceName}", scriptPath)); }
/// <summary> /// Adds the steps required to configure the Kibana Elasticsearch/logging user interface. /// </summary> /// <param name="steps">The configuration step list.</param> private void AddKibanaSteps(ConfigStepList steps) { // This is super simple: All we need to do is to launch the Kibana // service on the hive managers. ServiceHelper.AddServiceStartSteps(hive, steps, "neon-log-kibana", hive.Definition.Image.Kibana, new CommandBundle( "docker service create", "--name", "neon-log-kibana", "--detach=false", "--mode", "global", "--endpoint-mode", "vip", "--restart-delay", hive.Definition.Docker.RestartDelay, "--network", HiveConst.PrivateNetwork, "--constraint", $"node.role==manager", "--publish", $"{HiveHostPorts.Kibana}:{NetworkPorts.Kibana}", "--mount", "type=bind,source=/etc/neon/host-env,destination=/etc/neon/host-env,readonly=true", "--env", $"ELASTICSEARCH_URL={hive.Definition.LogEsDataUri}", "--log-driver", "json-file", // Ensure that we don't log to the pipeline to avoid cascading events. ServiceHelper.ImagePlaceholderArg)); }
/// <summary> /// Adds the steps to configure the stateful Elasticsearch instances used to persist the log data. /// </summary> /// <param name="steps">The configuration step list.</param> private void AddElasticsearchSteps(ConfigStepList steps) { var esNodes = new List <SshProxy <NodeDefinition> >(); foreach (var nodeDefinition in hive.Definition.Nodes.Where(n => n.Labels.LogEsData)) { esNodes.Add(hive.GetNode(nodeDefinition.Name)); } // Determine number of manager nodes and the quorum size. // Note that we'll deploy an odd number of managers. var managerCount = Math.Min(esNodes.Count, 5); // We shouldn't ever need more than 5 managers if (!NeonHelper.IsOdd(managerCount)) { managerCount--; } var quorumCount = (managerCount / 2) + 1; // Sort the nodes by name and then separate the manager and // worker nodes (managers will be assigned to nodes that appear // first in the list). var managerEsNodes = new List <SshProxy <NodeDefinition> >(); var normalEsNodes = new List <SshProxy <NodeDefinition> >(); esNodes = esNodes.OrderBy(n => n.Name).ToList(); foreach (var esNode in esNodes) { if (managerEsNodes.Count < managerCount) { managerEsNodes.Add(esNode); } else { normalEsNodes.Add(esNode); } } // Figure out how much RAM to allocate to the Elasticsearch Docker containers // as well as Java heap within. The guidance is to set the heap size to half // the container RAM up to a maximum of 31GB. var esContainerRam = hive.Definition.Log.EsMemoryBytes; var esHeapBytes = Math.Min(esContainerRam / 2, 31L * NeonHelper.Giga); // We're going to use explicit docker commands to deploy the Elasticsearch cluster // log storage containers. // // We're mounting three volumes to the container: // // /etc/neon/host-env - Generic host specific environment variables // /etc/neon/env-log-esdata - Elasticsearch node host specific environment variables // neon-log-esdata-# - Persistent Elasticsearch data folder var esBootstrapNodes = new StringBuilder(); foreach (var esMasterNode in managerEsNodes) { esBootstrapNodes.AppendWithSeparator($"{esMasterNode.PrivateAddress}:{HiveHostPorts.LogEsDataTcp}", ","); } // Create a data volume for each Elasticsearch node and then start the node container. for (int i = 0; i < esNodes.Count; i++) { var esNode = esNodes[i]; var containerName = $"neon-log-esdata"; var isMaster = managerEsNodes.Contains(esNode) ? "true" : "false"; var volumeCommand = CommandStep.CreateSudo(esNode.Name, "docker-volume-create", containerName); steps.Add(volumeCommand); ServiceHelper.AddContainerStartSteps(hive, steps, esNode, containerName, hive.Definition.Image.Elasticsearch, new CommandBundle( "docker run", "--name", containerName, "--detach", "--restart", "always", "--volume", "/etc/neon/host-env:/etc/neon/host-env:ro", "--volume", $"{containerName}:/mnt/esdata", "--env", $"ELASTICSEARCH_CLUSTER={hive.Definition.Datacenter}.{hive.Definition.Name}.neon-log-esdata", "--env", $"ELASTICSEARCH_NODE_MASTER={isMaster}", "--env", $"ELASTICSEARCH_NODE_DATA=true", "--env", $"ELASTICSEARCH_NODE_COUNT={esNodes.Count}", "--env", $"ELASTICSEARCH_HTTP_PORT={HiveHostPorts.LogEsDataHttp}", "--env", $"ELASTICSEARCH_TCP_PORT={HiveHostPorts.LogEsDataTcp}", "--env", $"ELASTICSEARCH_QUORUM={quorumCount}", "--env", $"ELASTICSEARCH_BOOTSTRAP_NODES={esBootstrapNodes}", "--env", $"ES_JAVA_OPTS=-XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap", "--memory", $"{esContainerRam / NeonHelper.Mega}M", "--memory-reservation", $"{esContainerRam / NeonHelper.Mega}M", "--memory-swappiness", "0", "--network", "host", "--log-driver", "json-file", // Ensure that we don't log to the pipeline to avoid cascading events. ServiceHelper.ImagePlaceholderArg)); } // Configure a private hive proxy route to the Elasticsearch nodes. steps.Add(ActionStep.Create(hive.FirstManager.Name, "setup/elasticsearch-lbrule", node => { var rule = new TrafficHttpRule() { Name = "neon-log-esdata", System = true, Log = false, // This is important: we don't want to SPAM the log database with its own traffic. Resolver = null }; rule.Frontends.Add( new TrafficHttpFrontend() { ProxyPort = HiveHostPorts.ProxyPrivateHttpLogEsData }); foreach (var esNode in esNodes) { rule.Backends.Add( new TrafficHttpBackend() { Server = esNode.Metadata.PrivateAddress.ToString(), Port = HiveHostPorts.LogEsDataHttp }); } hive.PrivateTraffic.SetRule(rule); })); // Wait for the elasticsearch cluster to become ready and then save the // [logstash-*] template. We need to do this before [neon-log-collector] // is started so we'll be sure that no indexes will be created before // we have a chance to persist the pattern. // // This works because [neon-log-collector] is the main service responsible // for persisting events to this index. steps.Add(ActionStep.Create(hive.FirstManager.Name, operationName: null, node => { node.Status = "wait for elasticsearch cluster"; using (var jsonClient = new JsonClient()) { var baseLogEsDataUri = hive.Definition.LogEsDataUri; var timeout = TimeSpan.FromMinutes(5); var timeoutTime = DateTime.UtcNow + timeout; var esNodeCount = hive.Definition.Nodes.Count(n => n.Labels.LogEsData); // Wait for the Elasticsearch cluster. jsonClient.UnsafeRetryPolicy = NoRetryPolicy.Instance; while (true) { try { var response = jsonClient.GetUnsafeAsync($"{baseLogEsDataUri}/_cluster/health").Result; if (response.IsSuccess) { var clusterStatus = response.AsDynamic(); var status = (string)(clusterStatus.status); status = status.ToUpperInvariant(); node.Status = $"wait for [neon-log-esdata] cluster: [status={status}] [{clusterStatus.number_of_nodes}/{esNodeCount} nodes ready])"; // $todo(jeff.lill): // // We're accepting YELLOW status here due to this issue: // // https://github.com/jefflill/NeonForge/issues/257 if ((status == "GREEN" || status == "YELLOW") && clusterStatus.number_of_nodes == esNodeCount) { node.Status = "elasticsearch cluster is ready"; break; } } } catch { if (DateTime.UtcNow >= timeoutTime) { node.Fault($"[neon-log-esdata] cluster not ready after waiting [{timeout}]."); return; } } Thread.Sleep(TimeSpan.FromSeconds(1)); } // Save the [logstash-*] template pattern. var templatePattern = ResourceFiles.Root.GetFolder("Elasticsearch").GetFile("logstash-template.json").Contents; jsonClient.PutAsync($"{baseLogEsDataUri}/_template/logstash-*", templatePattern).Wait(); } })); }