Beispiel #1
0
        /// <summary>
        /// Adds the steps required to configure the hive log collector which aggregates log events received
        /// from all hive nodes via their [neon-log-host] containers.
        /// </summary>
        /// <param name="steps">The configuration step list.</param>
        private void AddCollectorSteps(ConfigStepList steps)
        {
            // Add the steps to create the service.

            ServiceHelper.AddServiceStartSteps(hive, steps, "neon-log-collector", hive.Definition.Image.LogCollector,
                                               new CommandBundle(
                                                   "docker service create",
                                                   "--name", "neon-log-collector",
                                                   "--detach=false",
                                                   "--mode", "global",
                                                   "--restart-delay", hive.Definition.Docker.RestartDelay,
                                                   "--endpoint-mode", "vip",
                                                   "--network", $"{HiveConst.PrivateNetwork}",
                                                   "--constraint", $"node.role==manager",
                                                   "--mount", "type=bind,source=/etc/neon/host-env,destination=/etc/neon/host-env,readonly=true",
                                                   "--log-driver", "json-file", // Ensure that we don't log to the pipeline to avoid cascading events.
                                                   ServiceHelper.ImagePlaceholderArg));

            // Deploy the [neon-log-collector] traffic manager rule.

            steps.Add(ActionStep.Create(hive.FirstManager.Name, "setup/neon-log-collection-lbrule",
                                        node =>
            {
                node.Status = "set neon-log-collector traffic manager rule";

                // Configure a private hive proxy TCP route so the [neon-log-host] containers
                // will be able to reach the collectors.

                var rule = new TrafficTcpRule()
                {
                    Name   = "neon-log-collector",
                    System = true,
                    Log    = false        // This is important: we don't want to SPAM the log database with its own traffic.
                };

                rule.Frontends.Add(
                    new TrafficTcpFrontend()
                {
                    ProxyPort = HiveHostPorts.ProxyPrivateTcpLogCollector
                });

                rule.Backends.Add(
                    new TrafficTcpBackend()
                {
                    Server = "neon-log-collector",
                    Port   = NetworkPorts.TDAgentForward
                });

                hive.PrivateTraffic.SetRule(rule);
            }));
        }
Beispiel #2
0
        /// <summary>
        /// Adds the steps to configure the stateful Elasticsearch instances used to persist the log data.
        /// </summary>
        /// <param name="steps">The configuration step list.</param>
        private void AddElasticsearchSteps(ConfigStepList steps)
        {
            var esNodes = new List <SshProxy <NodeDefinition> >();

            foreach (var nodeDefinition in hive.Definition.Nodes.Where(n => n.Labels.LogEsData))
            {
                esNodes.Add(hive.GetNode(nodeDefinition.Name));
            }

            // Determine number of manager nodes and the quorum size.
            // Note that we'll deploy an odd number of managers.

            var managerCount = Math.Min(esNodes.Count, 5);   // We shouldn't ever need more than 5 managers

            if (!NeonHelper.IsOdd(managerCount))
            {
                managerCount--;
            }

            var quorumCount = (managerCount / 2) + 1;

            // Sort the nodes by name and then separate the manager and
            // worker nodes (managers will be assigned to nodes that appear
            // first in the list).

            var managerEsNodes = new List <SshProxy <NodeDefinition> >();
            var normalEsNodes  = new List <SshProxy <NodeDefinition> >();

            esNodes = esNodes.OrderBy(n => n.Name).ToList();

            foreach (var esNode in esNodes)
            {
                if (managerEsNodes.Count < managerCount)
                {
                    managerEsNodes.Add(esNode);
                }
                else
                {
                    normalEsNodes.Add(esNode);
                }
            }

            // Figure out how much RAM to allocate to the Elasticsearch Docker containers
            // as well as Java heap within.  The guidance is to set the heap size to half
            // the container RAM up to a maximum of 31GB.

            var esContainerRam = hive.Definition.Log.EsMemoryBytes;
            var esHeapBytes    = Math.Min(esContainerRam / 2, 31L * NeonHelper.Giga);

            // We're going to use explicit docker commands to deploy the Elasticsearch cluster
            // log storage containers.
            //
            // We're mounting three volumes to the container:
            //
            //      /etc/neon/host-env         - Generic host specific environment variables
            //      /etc/neon/env-log-esdata   - Elasticsearch node host specific environment variables
            //      neon-log-esdata-#          - Persistent Elasticsearch data folder

            var esBootstrapNodes = new StringBuilder();

            foreach (var esMasterNode in managerEsNodes)
            {
                esBootstrapNodes.AppendWithSeparator($"{esMasterNode.PrivateAddress}:{HiveHostPorts.LogEsDataTcp}", ",");
            }

            // Create a data volume for each Elasticsearch node and then start the node container.

            for (int i = 0; i < esNodes.Count; i++)
            {
                var esNode        = esNodes[i];
                var containerName = $"neon-log-esdata";
                var isMaster      = managerEsNodes.Contains(esNode) ? "true" : "false";
                var volumeCommand = CommandStep.CreateSudo(esNode.Name, "docker-volume-create", containerName);

                steps.Add(volumeCommand);

                ServiceHelper.AddContainerStartSteps(hive, steps, esNode, containerName, hive.Definition.Image.Elasticsearch,
                                                     new CommandBundle(
                                                         "docker run",
                                                         "--name", containerName,
                                                         "--detach",
                                                         "--restart", "always",
                                                         "--volume", "/etc/neon/host-env:/etc/neon/host-env:ro",
                                                         "--volume", $"{containerName}:/mnt/esdata",
                                                         "--env", $"ELASTICSEARCH_CLUSTER={hive.Definition.Datacenter}.{hive.Definition.Name}.neon-log-esdata",
                                                         "--env", $"ELASTICSEARCH_NODE_MASTER={isMaster}",
                                                         "--env", $"ELASTICSEARCH_NODE_DATA=true",
                                                         "--env", $"ELASTICSEARCH_NODE_COUNT={esNodes.Count}",
                                                         "--env", $"ELASTICSEARCH_HTTP_PORT={HiveHostPorts.LogEsDataHttp}",
                                                         "--env", $"ELASTICSEARCH_TCP_PORT={HiveHostPorts.LogEsDataTcp}",
                                                         "--env", $"ELASTICSEARCH_QUORUM={quorumCount}",
                                                         "--env", $"ELASTICSEARCH_BOOTSTRAP_NODES={esBootstrapNodes}",
                                                         "--env", $"ES_JAVA_OPTS=-XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap",
                                                         "--memory", $"{esContainerRam / NeonHelper.Mega}M",
                                                         "--memory-reservation", $"{esContainerRam / NeonHelper.Mega}M",
                                                         "--memory-swappiness", "0",
                                                         "--network", "host",
                                                         "--log-driver", "json-file", // Ensure that we don't log to the pipeline to avoid cascading events.
                                                         ServiceHelper.ImagePlaceholderArg));
            }

            // Configure a private hive proxy route to the Elasticsearch nodes.

            steps.Add(ActionStep.Create(hive.FirstManager.Name, "setup/elasticsearch-lbrule",
                                        node =>
            {
                var rule = new TrafficHttpRule()
                {
                    Name     = "neon-log-esdata",
                    System   = true,
                    Log      = false,       // This is important: we don't want to SPAM the log database with its own traffic.
                    Resolver = null
                };

                rule.Frontends.Add(
                    new TrafficHttpFrontend()
                {
                    ProxyPort = HiveHostPorts.ProxyPrivateHttpLogEsData
                });

                foreach (var esNode in esNodes)
                {
                    rule.Backends.Add(
                        new TrafficHttpBackend()
                    {
                        Server = esNode.Metadata.PrivateAddress.ToString(),
                        Port   = HiveHostPorts.LogEsDataHttp
                    });
                }

                hive.PrivateTraffic.SetRule(rule);
            }));

            // Wait for the elasticsearch cluster to become ready and then save the
            // [logstash-*] template.  We need to do this before [neon-log-collector]
            // is started so we'll be sure that no indexes will be created before
            // we have a chance to persist the pattern.
            //
            // This works because [neon-log-collector] is the main service responsible
            // for persisting events to this index.

            steps.Add(ActionStep.Create(hive.FirstManager.Name, operationName: null,
                                        node =>
            {
                node.Status = "wait for elasticsearch cluster";

                using (var jsonClient = new JsonClient())
                {
                    var baseLogEsDataUri = hive.Definition.LogEsDataUri;
                    var timeout          = TimeSpan.FromMinutes(5);
                    var timeoutTime      = DateTime.UtcNow + timeout;
                    var esNodeCount      = hive.Definition.Nodes.Count(n => n.Labels.LogEsData);

                    // Wait for the Elasticsearch cluster.

                    jsonClient.UnsafeRetryPolicy = NoRetryPolicy.Instance;

                    while (true)
                    {
                        try
                        {
                            var response = jsonClient.GetUnsafeAsync($"{baseLogEsDataUri}/_cluster/health").Result;

                            if (response.IsSuccess)
                            {
                                var clusterStatus = response.AsDynamic();
                                var status        = (string)(clusterStatus.status);

                                status      = status.ToUpperInvariant();
                                node.Status = $"wait for [neon-log-esdata] cluster: [status={status}] [{clusterStatus.number_of_nodes}/{esNodeCount} nodes ready])";

                                // $todo(jeff.lill):
                                //
                                // We're accepting YELLOW status here due to this issue:
                                //
                                //      https://github.com/jefflill/NeonForge/issues/257

                                if ((status == "GREEN" || status == "YELLOW") && clusterStatus.number_of_nodes == esNodeCount)
                                {
                                    node.Status = "elasticsearch cluster is ready";
                                    break;
                                }
                            }
                        }
                        catch
                        {
                            if (DateTime.UtcNow >= timeoutTime)
                            {
                                node.Fault($"[neon-log-esdata] cluster not ready after waiting [{timeout}].");
                                return;
                            }
                        }

                        Thread.Sleep(TimeSpan.FromSeconds(1));
                    }

                    // Save the [logstash-*]  template pattern.

                    var templatePattern = ResourceFiles.Root.GetFolder("Elasticsearch").GetFile("logstash-template.json").Contents;

                    jsonClient.PutAsync($"{baseLogEsDataUri}/_template/logstash-*", templatePattern).Wait();
                }
            }));
        }