/// <summary> /// Implements the service as a <see cref="Task"/>. /// </summary> /// <returns>The <see cref="Task"/>.</returns> private static async Task RunAsync() { var periodicTask = new AsyncPeriodicTask( pollInterval, onTaskAsync: async() => { log.LogDebug(() => "Starting poll"); // We're going to collect the [hostname --> address] mappings into // a specialized (semi-threadsafe) dictionary. var hostAddresses = new HostAddresses(); // Retrieve the current hive definition from Consul if we don't already // have it or it's different from what we've cached. hiveDefinition = await HiveHelper.GetDefinitionAsync(hiveDefinition, terminator.CancellationToken); log.LogDebug(() => $"Hive has [{hiveDefinition.NodeDefinitions.Count}] nodes."); // Add the [NAME.HIVENAME.nhive.io] definitions for each cluster node. foreach (var node in hiveDefinition.Nodes) { hostAddresses.Add($"{node.Name}.{hiveDefinition.Name}.nhive.io", IPAddress.Parse(node.PrivateAddress)); } // Read the DNS entry definitions from Consul and add the appropriate // host/addresses based on health checks, etc. var targetsResult = (await consul.KV.ListOrDefault <DnsEntry>(HiveConst.ConsulDnsEntriesKey + "/", terminator.CancellationToken)); List <DnsEntry> targets; if (targetsResult == null) { // The targets key wasn't found in Consul, so we're // going to assume that there are no targets. targets = new List <DnsEntry>(); } else { targets = targetsResult.ToList(); } log.LogDebug(() => $"Consul has [{targets.Count()}] DNS targets."); await ResolveTargetsAsync(hostAddresses, targets); // Generate a canonical [hosts.txt] file by sorting host entries by // hostname and then by IP address. // // Unhealthy hosts will be assigned the unrouteable [0.0.0.0] address. // The reason for this is subtle but super important. // // If we didn't do this, the DNS host would likely be resolved by a // public DNS service, perhaps returning the IP address of a production // endpoint. // // This could cause a disaster if the whole purpose of having a local // DNS host defined to redirect test traffic to a test service. If // the test service endpoints didn't report as healthy and [0.0.0.0] // wasn't set, then test traffic could potentially hit the production // endpoint and do serious damage. var sbHosts = new StringBuilder(); var mappingCount = 0; foreach (var host in hostAddresses.OrderBy(h => h.Key)) { foreach (var address in host.Value.OrderBy(a => a.ToString())) { sbHosts.AppendLineLinux($"{address,-15} {host.Key}"); mappingCount++; } } var unhealthyTargets = targets.Where(t => !hostAddresses.ContainsKey(t.Hostname) || hostAddresses[t.Hostname].Count == 0).ToList(); if (unhealthyTargets.Count > 0) { sbHosts.AppendLine(); sbHosts.AppendLine($"# [{unhealthyTargets.Count}] unhealthy DNS hosts:"); sbHosts.AppendLine(); var unhealthyAddress = "0.0.0.0"; foreach (var target in unhealthyTargets.OrderBy(h => h)) { sbHosts.AppendLineLinux($"{unhealthyAddress,-15} {target.Hostname}"); } } // Compute the MD5 hash and compare it to the hash persisted to // Consul (if any) to determine whether we need to update the // answers in Consul. var hostsTxt = sbHosts.ToString(); var hostsMD5 = NeonHelper.ComputeMD5(hostsTxt); var currentMD5 = await consul.KV.GetStringOrDefault(HiveConst.ConsulDnsHostsMd5Key, terminator.CancellationToken); if (currentMD5 == null) { currentMD5 = string.Empty; } if (hostsMD5 != currentMD5) { log.LogDebug(() => $"DNS answers have changed."); log.LogDebug(() => $"Writing [{mappingCount}] DNS answers to Consul."); // Update the Consul keys using a transaction. var operations = new List <KVTxnOp>() { new KVTxnOp(HiveConst.ConsulDnsHostsMd5Key, KVTxnVerb.Set) { Value = Encoding.UTF8.GetBytes(hostsMD5) }, new KVTxnOp(HiveConst.ConsulDnsHostsKey, KVTxnVerb.Set) { Value = Encoding.UTF8.GetBytes(hostsTxt) } }; await consul.KV.Txn(operations, terminator.CancellationToken); } log.LogDebug(() => "Finished poll"); return(await Task.FromResult(false)); }, onExceptionAsync: async e => { log.LogError(e); return(await Task.FromResult(false)); }, onTerminateAsync: async() => { log.LogInfo(() => "Terminating"); await Task.CompletedTask; }); terminator.AddDisposable(periodicTask); await periodicTask.Run(); }
/// <summary> /// Handles polling of Docker swarm about the hive nodes and updating the hive /// definition and hash when changes are detected. /// </summary> /// <returns>The tracking <see cref="Task"/>.</returns> private static async Task SwarmPollerAsync() { var periodicTask = new AsyncPeriodicTask( swarmPollInterval, onTaskAsync: async() => { try { log.LogDebug(() => "SWARM-POLLER: Polling"); // Retrieve the current hive definition from Consul if we don't already // have it or if it's different from what we've cached. cachedHiveDefinition = await HiveHelper.GetDefinitionAsync(cachedHiveDefinition, terminator.CancellationToken); // Retrieve the swarm nodes from Docker. log.LogDebug(() => $"SWARM-POLLER: Querying [{docker.Settings.Uri}]"); var swarmNodes = await docker.NodeListAsync(); // Parse the node definitions from the swarm nodes and build a new definition with // using the new nodes. Then compare the hashes of the cached and new hive definitions // and then update Consul if they're different. var currentHiveDefinition = NeonHelper.JsonClone <HiveDefinition>(cachedHiveDefinition); currentHiveDefinition.NodeDefinitions.Clear(); foreach (var swarmNode in swarmNodes) { var nodeDefinition = NodeDefinition.ParseFromLabels(swarmNode.Labels); nodeDefinition.Name = swarmNode.Hostname; currentHiveDefinition.NodeDefinitions.Add(nodeDefinition.Name, nodeDefinition); } log.LogDebug(() => $"SWARM-POLLER: [{currentHiveDefinition.Managers.Count()}] managers and [{currentHiveDefinition.Workers.Count()}] workers in current hive definition."); // Hive pets are not part of the Swarm, so Docker won't return any information // about them. We'll read the pet definitions from [neon/global/pets-definition] in // Consul. We'll assume that there are no pets if this key doesn't exist for // backwards compatibility and robustness. var petsJson = await HiveHelper.Consul.KV.GetStringOrDefault($"{HiveConst.GlobalKey}/{HiveGlobals.PetsDefinition}", terminator.CancellationToken); if (petsJson == null) { log.LogDebug(() => $"SWARM-POLLER: [{HiveConst.GlobalKey}/{HiveGlobals.PetsDefinition}] Consul key not found. Assuming no pets."); } else { if (!string.IsNullOrWhiteSpace(petsJson)) { // Parse the pet node definitions and add them to the hive definition. var petDefinitions = NeonHelper.JsonDeserialize <Dictionary <string, NodeDefinition> >(petsJson); foreach (var item in petDefinitions) { currentHiveDefinition.NodeDefinitions.Add(item.Key, item.Value); } log.LogDebug(() => $"SWARM-POLLER: [{HiveConst.GlobalKey}/{HiveGlobals.PetsDefinition}] defines [{petDefinitions.Count}] pets."); } else { log.LogDebug(() => $"SWARM-POLLER: [{HiveConst.GlobalKey}/{HiveGlobals.PetsDefinition}] is empty."); } } // Fetch the hive summary and add it to the hive definition. currentHiveDefinition.Summary = HiveSummary.FromHive(hive, currentHiveDefinition); // Determine if the definition has changed. currentHiveDefinition.ComputeHash(); if (currentHiveDefinition.Hash != cachedHiveDefinition.Hash) { log.LogInfo(() => "SWARM-POLLER: Hive definition has CHANGED. Updating Consul."); await HiveHelper.PutDefinitionAsync(currentHiveDefinition, cancellationToken: terminator.CancellationToken); cachedHiveDefinition = currentHiveDefinition; } else { log.LogDebug(() => "SWARM-POLLER: Hive definition is UNCHANGED."); } } catch (KeyNotFoundException) { // We'll see this when no hive definition has been persisted to the // hive. This is a serious problem. This is configured during setup // and there should always be a definition in Consul. log.LogError(() => $"SWARM-POLLER: No hive definition has been found at [{hiveDefinitionKey}] in Consul. This is a serious error that will have to be corrected manually."); } log.LogDebug(() => "SWARM-POLLER: Finished Poll"); return(await Task.FromResult(false)); }, onExceptionAsync: async e => { log.LogError("SWARM-POLLER", e); return(await Task.FromResult(false)); }, onTerminateAsync: async() => { log.LogInfo(() => "SWARM-POLLER: Terminating"); await Task.CompletedTask; }); terminator.AddDisposable(periodicTask); await periodicTask.Run(); }