/// <summary> /// Handles the periodic logging of error messages when <see cref="errorTimeUtc"/> is set /// to something greater than <see cref="DateTime.MinValue"/>, indicating that the service /// has been unable to update the HAProxy configuration and is currently running with /// out-of-date settings. /// </summary> /// <returns>The tracking <see cref="Task"/>.</returns> private static async Task ErrorPollerAsync() { var periodicTask = new AsyncPeriodicTask( warnInterval, onTaskAsync: async() => { if (errorTimeUtc > DateTime.MinValue) { log.LogError(() => $"HAProxy is running with an out-of-date configuration due to a previous error at [{errorTimeUtc}] UTC."); } return(await Task.FromResult(false)); }, onExceptionAsync: async e => { log.LogError("ERROR-POLLER", e); return(await Task.FromResult(false)); }, onTerminateAsync: async() => { log.LogInfo(() => "ERROR-POLLER: Terminating"); await Task.CompletedTask; }); terminator.AddDisposable(periodicTask); await periodicTask.Run(); }
public async Task ExceptionAsync() { // Verify that the exception callback is called and that // we can terminate the task by returning TRUE. var taskCalls = 0; var exception = (Exception)null; var periodicTask = new AsyncPeriodicTask( interval: TimeSpan.FromSeconds(0.5), onTaskAsync: async() => { taskCalls++; await Task.CompletedTask; throw new TimeoutException(); }, onExceptionAsync: async e => { exception = e; await Task.CompletedTask; return(taskCalls == 5); }); await periodicTask.Run(); Assert.Equal(5, taskCalls); Assert.IsType <TimeoutException>(exception); }
public async Task TerminateViaTaskAsync() { // Verify that the termination callback is called when the task // is terminated by the task callback. var terminated = false; var exception = (Exception)null; var periodicTask = new AsyncPeriodicTask( interval: TimeSpan.FromSeconds(0.5), onTaskAsync: async() => { await Task.CompletedTask; return(true); }, onExceptionAsync: async e => { exception = e; await Task.CompletedTask; return(false); }, onTerminateAsync: async() => { terminated = true; await Task.CompletedTask; }); await periodicTask.Run(); Assert.True(terminated); Assert.Null(exception); }
public async Task TerminateViaExceptionHandlerAsync() { // Verify that the termination callback is called when the task // throws an exception and the exception callback returns TRUE. var terminated = false; var exception = (Exception)null; var periodicTask = new AsyncPeriodicTask( interval: TimeSpan.FromSeconds(0.5), onTaskAsync: async() => { await Task.CompletedTask; throw new TimeoutException(); }, onExceptionAsync: async e => { exception = e; await Task.CompletedTask; return(true); }, onTerminateAsync: async() => { terminated = true; await Task.CompletedTask; }); await periodicTask.Run(); Assert.True(terminated); Assert.IsType <TimeoutException>(exception); }
/// <summary> /// Periodically broadcasts a <see cref="ProxyRegenerateMessage"/> to the <b>neon-proxy-manager</b> /// service which will then regenerate the public and private proxy related configurations. This /// is a fail-safe that ensures that the proxy configurations will eventually converge, even when /// proxy change notifications may have been lost somehow. This also provides an opportunity for /// <b>neon-proxy-manager</b> to verify the traffic manager rules for correctness and also to check /// for expired or expiring TLS certificates so that warnings can be logged. /// </summary> /// <returns>The tracking <see cref="Task"/>.</returns> private static async Task ProxyUpdaterAsync() { var periodicTask = new AsyncPeriodicTask( proxyUpdateInterval, onTaskAsync: async() => { log.LogInfo(() => $"PROXY-UPDATER: Publish: [{nameof(ProxyRegenerateMessage)}(\"fail-safe\") --> {proxyNotifyChannel.Name}]"); proxyNotifyChannel.Publish(new ProxyRegenerateMessage() { Reason = "[neon-hive-manager]: fail-safe" }); return(await Task.FromResult(false)); }, onExceptionAsync: async e => { log.LogError("PROXY-UPDATER", e); return(await Task.FromResult(false)); }, onTerminateAsync: async() => { log.LogInfo(() => "PROXY-UPDATER: Terminating"); await Task.CompletedTask; }); terminator.AddDisposable(periodicTask); await periodicTask.Run(); }
/// <summary> /// Handles detection of changes to the hive's manager nodes. The process will /// be terminated when manager nodes are added or removed so that Docker will restart /// the service to begin handling the changes. /// </summary> /// <returns>The tracking <see cref="Task"/>.</returns> private static async Task ManagerWatcherAsync() { var periodicTask = new AsyncPeriodicTask( managerTopologyInterval, onTaskAsync: async() => { log.LogDebug(() => "MANAGER-WATCHER: Polling for hive manager changes."); var latestVaultUris = await GetVaultUrisAsync(); var changed = vaultUris.Count != latestVaultUris.Count; if (!changed) { for (int i = 0; i < vaultUris.Count; i++) { if (vaultUris[i] != latestVaultUris[i]) { changed = true; break; } } } if (changed) { log.LogInfo("MANAGER-WATCHER: Detected one or more hive manager node changes."); log.LogInfo("MANAGER-WATCHER: Exiting the service so that Docker will restart it to pick up the manager node changes."); terminator.Exit(); } else { log.LogDebug(() => "MANAGER-WATCHER: No manager changes detected."); } log.LogDebug(() => "MANAGER-WATCHER: Poll finished."); return(await Task.FromResult(false)); }, onExceptionAsync: async e => { log.LogError("MANAGER-WATCHER", e); return(await Task.FromResult(false)); }, onTerminateAsync: async() => { log.LogInfo(() => "MANAGER-WATCHER: Terminating"); await Task.CompletedTask; }); terminator.AddDisposable(periodicTask); await periodicTask.Run(); }
public async Task TerminateViaExternalCancellationAsync() { // Verify that the termination callback is called when the task // is cancelled from outside the task and also that the exception // callback was not called. var terminated = false; var exception = (Exception)null; var periodicTask = new AsyncPeriodicTask( interval: TimeSpan.FromSeconds(0.5), onTaskAsync: async() => { await Task.CompletedTask; return(false); }, onExceptionAsync: async e => { exception = e; await Task.CompletedTask; return(false); }, onTerminateAsync: async() => { terminated = true; await Task.CompletedTask; }); var tasks = new Task[] { periodicTask.Run(), Task.Run( async() => { await Task.Delay(TimeSpan.FromSeconds(2)); periodicTask.CancellationTokenSource.Cancel(); }) }; await NeonHelper.WaitAllAsync(tasks, TimeSpan.FromSeconds(10)); Assert.True(terminated); Assert.Null(exception); }
/// <summary> /// Manages the Varnish initial configuration from Consul and Vault settings and /// then listens for <see cref="ProxyUpdateMessage"/> messages on the <see cref="HiveMQChannels.ProxyNotify"/> /// broadcast by <b>neon-proxy-manager</b> signalling that the configuration has been /// updated. /// </summary> /// <remarks> /// <para> /// This method will terminate the service with an error if the configuration could /// not be retrieved or applied for the first attempt since this very likely indicates /// a larger problem with the hive (e.g. Consul is down). /// </para> /// <para> /// If Varnish was configured successfully on the first attempt, subsequent failures /// will be logged as warnings but the service will continue running with the out-of-date /// configuration to provide some resilience for running hive services. /// </para> /// </remarks> /// <returns>The tracking <see cref="Task"/>.</returns> private async static Task VarnishShim() { // This call ensures that Varnish is started immediately. await ConfigureVarnish(); // Register a handler for [ProxyUpdateMessage] messages that determines // whether the message is meant for this service instance and handle it. StartNotifyHandler(); // Register an event handler that will be fired when the HiveMQ bootstrap // settings change. This will restart the [ProxyUpdateMessage] listener // using the new settings. hive.HiveMQ.Internal.HiveMQBootstrapChanged += (s, a) => { StartNotifyHandler(); }; // Spin quietly while waiting for a cancellation indicating that // the service is stopping. var task = new AsyncPeriodicTask( TimeSpan.FromMinutes(5), onTaskAsync: async() => await Task.FromResult(false), onTerminateAsync: async() => { log.LogInfo(() => "VARNISH-SHIM: Terminating"); if (proxyNotifyChannel != null) { proxyNotifyChannel.Dispose(); proxyNotifyChannel = null; } await Task.CompletedTask; }, cancellationTokenSource: terminator.CancellationTokenSource); await task.Run(); }
public async Task SimpleAsync() { // Verify that we can execute a simple periodic task that terminates // itself by returning TRUE. var taskCalls = 0; var periodicTask = new AsyncPeriodicTask( interval: TimeSpan.FromSeconds(0.5), onTaskAsync: async() => { await Task.CompletedTask; return(++taskCalls == 5); }); await periodicTask.Run(); Assert.Equal(5, taskCalls); }
/// <summary> /// Periodically broadcasts failsafe <see cref="ProxyUpdateMessage"/> messages commanding /// the proxy and proxy bridge instances to ensure that they're running with the current /// HAProxy and Varnish configurations. /// </summary> /// <returns>The tracking <see cref="Task"/>.</returns> private static async Task FailsafeBroadcasterAsync() { var periodicTask = new AsyncPeriodicTask( failsafeInterval, onTaskAsync: async() => { var message = new ProxyUpdateMessage(all: true) { Reason = "fail-safe" }; log.LogInfo(() => $"FAILSAFE-BROADCASTER: Broadcasting [{message}]."); await proxyNotifyChannel.PublishAsync(message); return(false); }, onExceptionAsync: async e => { log.LogError("FAILSAFE-BROADCASTER", e); return(await Task.FromResult(false)); }, onTerminateAsync: async() => { log.LogInfo(() => "FAILSAFE-BROADCASTER: Terminating"); await Task.CompletedTask; }, cancellationTokenSource: terminator.CancellationTokenSource); terminator.AddDisposable(periodicTask); await periodicTask.Run(); }
/// <summary> /// Handles purging of old <b>logstash</b> and <b>metricbeat</b> Elasticsearch indexes. /// </summary> /// <returns>The tracking <see cref="Task"/>.</returns> public async Task LogPurgerAsync(TimeSpan logPurgerInterval, int retentionDays) { using (var jsonClient = new JsonClient()) { jsonClient.BaseAddress = KubernetesClientConfiguration.IsInCluster() ? this.ServiceMap[NeonServices.Elasticsearch].Endpoints.Default.Uri : new Uri($"http://localhost:{this.ServiceMap[NeonServices.Elasticsearch].Endpoints.Default.Port}"); var periodicTask = new AsyncPeriodicTask( logPurgerInterval, onTaskAsync: async() => { // We're going to list the indexes and look for [logstash] // and [metricbeat] indexes that encode the index date like: // // logstash-2018.06.06 // metricbeat-6.1.1-2018.06.06 // // The date is simply encodes the day covered by the index. var utcNow = DateTime.UtcNow; var deleteBeforeDate = new DateTime(utcNow.Year, utcNow.Month, utcNow.Day) - TimeSpan.FromDays(retentionDays); var indexList = await jsonClient.GetAsync <JObject>("_aliases"); foreach (var indexProperty in indexList.Properties()) { var indexName = indexProperty.Name; // We're only purging [logstash] and [metricbeat] indexes. if (!indexName.StartsWith("logstash-") && !indexName.StartsWith("metricbeat-")) { continue; } // Extract the date from the index name. var pos = indexName.LastIndexOf('-'); if (pos == -1) { Log.LogWarn(() => $"LOG-PURGER: Cannot extract date from index named [{indexName}]."); continue; } var date = indexName.Substring(pos + 1); var fields = date.Split('.'); var indexDate = default(DateTime); try { indexDate = new DateTime(int.Parse(fields[0]), int.Parse(fields[1]), int.Parse(fields[2])); } catch { Log.LogWarn(() => $"LOG-PURGER: Cannot extract date from index named [{indexName}]."); continue; } if (indexDate < deleteBeforeDate) { Log.LogInfo(() => $"LOG-PURGER: Deleting index [{indexName}]."); await jsonClient.DeleteAsync <JObject>(indexName); Log.LogInfo(() => $"LOG-PURGER: [{indexName}] was deleted."); } } Log.LogDebug("LOG-PURGER: Scan finished."); return(await Task.FromResult(false)); }, onExceptionAsync: async e => { Log.LogError("LOG-PURGER", e); return(await Task.FromResult(false)); }, onTerminateAsync: async() => { Log.LogInfo(() => "LOG-PURGER: Terminating"); await Task.CompletedTask; }); await periodicTask.Run(); } }
/// <summary> /// Handles polling of Vault seal status and automatic unsealing if enabled. /// </summary> /// <param name="vaultUri">The URI for the Vault instance being managed.</param> /// <returns>The tracking <see cref="Task"/>.</returns> private static async Task VaultUnsealerAsync(string vaultUri) { var lastVaultStatus = (VaultHealthStatus)null; // We're going to periodically log Vault status even // when there is no status changes. var statusUpdateTimeUtc = DateTime.UtcNow; var statusUpdateInterval = TimeSpan.FromMinutes(30); log.LogInfo(() => $"VAULT-UNSEALER: Opening [{vaultUri}]"); using (var vault = VaultClient.OpenWithToken(new Uri(vaultUri))) { var periodicTask = new AsyncPeriodicTask( vaultUnsealInterval, onTaskAsync: async() => { if (IsSetupPending) { log.LogInfo(() => "VAULT-UNSEALER: Delaying because hive setup is still in progress."); return(false); } log.LogDebug(() => $"VAULT-UNSEALER: Polling [{vaultUri}]"); // Monitor Vault for status changes and handle unsealing if enabled. log.LogDebug(() => $"VAULT-UNSEALER: Querying [{vaultUri}]"); var newVaultStatus = await vault.GetHealthAsync(terminator.CancellationToken); var autoUnsealDisabled = consul.KV.GetBoolOrDefault($"{HiveConst.GlobalKey}/{HiveGlobals.UserDisableAutoUnseal}").Result; var changed = false; if (lastVaultStatus == null) { changed = true; } else { changed = !lastVaultStatus.Equals(newVaultStatus); } if (changed) { if (!newVaultStatus.IsInitialized || newVaultStatus.IsSealed) { log.LogError(() => $"VAULT-UNSEALER: status CHANGED [{vaultUri}]"); } else { log.LogInfo(() => $"VAULT-UNSEALER: status CHANGED [{vaultUri}]"); } statusUpdateTimeUtc = DateTime.UtcNow; // Force logging status below } if (DateTime.UtcNow >= statusUpdateTimeUtc) { if (!newVaultStatus.IsInitialized || newVaultStatus.IsSealed) { log.LogError(() => $"VAULT-UNSEALER: status={newVaultStatus} [{vaultUri}]"); } else { log.LogInfo(() => $"VAULT-UNSEALER: status={newVaultStatus} [{vaultUri}]"); } if (newVaultStatus.IsSealed && autoUnsealDisabled) { log.LogInfo(() => $"VAULT-UNSEALER: AUTO-UNSEAL is temporarily DISABLED because Consul [{HiveConst.GlobalKey}/{HiveGlobals.UserDisableAutoUnseal}=true]."); } statusUpdateTimeUtc = DateTime.UtcNow + statusUpdateInterval; } lastVaultStatus = newVaultStatus; // Attempt to unseal the Vault if it's sealed and we have the keys. if (newVaultStatus.IsSealed && vaultCredentials != null) { if (autoUnsealDisabled) { return(await Task.FromResult(false)); // Don't unseal. } log.LogInfo(() => $"VAULT-UNSEALER: UNSEALING [{vaultUri}]"); await vault.UnsealAsync(vaultCredentials, terminator.CancellationToken); log.LogInfo(() => $"VAULT-UNSEALER: UNSEALED [{vaultUri}]"); // Schedule a status update on the next loop // and then loop immediately so we'll log the // updated status. statusUpdateTimeUtc = DateTime.UtcNow; return(await Task.FromResult(false)); } return(await Task.FromResult(false)); }, onExceptionAsync: async e => { log.LogError("VAULT-UNSEALER", e); return(await Task.FromResult(false)); }, onTerminateAsync: async() => { log.LogInfo(() => "VAULT-UNSEALER: Terminating"); await Task.CompletedTask; }); terminator.AddDisposable(periodicTask); await periodicTask.Run(); } }
/// <summary> /// Implements the service as a <see cref="Task"/>. /// </summary> /// <returns>The <see cref="Task"/>.</returns> private static async Task RunAsync() { var localMD5 = string.Empty; var remoteMD5 = "[unknown]"; var verifyTimer = new PolledTimer(verifyInterval, autoReset: true); var periodicTask = new AsyncPeriodicTask( pollInterval, onTaskAsync: async() => { log.LogDebug(() => "Starting poll"); log.LogDebug(() => "Fetching DNS answers MD5 from Consul."); remoteMD5 = await consul.KV.GetStringOrDefault(HiveConst.ConsulDnsHostsMd5Key, terminator.CancellationToken); if (remoteMD5 == null) { remoteMD5 = "[unknown]"; } var verify = verifyTimer.HasFired; if (verify) { // Under normal circumstances, we should never see the reload signal file // here because the [neon-dns-loader] service should have deleted it after // handling the last change signal. // // This probably means that [neon-dns-loader] is not running or if this service // is configured with POLL_INTERVAL being so short that [neon-dns-loader] // hasn't had a chance to handle the previous signal. if (File.Exists(reloadSignalPath)) { log.LogWarn("[neon-dns-loader] service doesn't appear to be running because the reload signal file is present."); } } if (!verify && localMD5 == remoteMD5) { log.LogDebug(() => "DNS answers are unchanged."); } else { if (localMD5 == remoteMD5) { log.LogDebug(() => "DNS answers have not changed but we're going to verify that we have the correct hosts anyway."); } else { log.LogDebug(() => "DNS answers have changed."); } log.LogDebug(() => "Fetching DNS answers."); var hostsTxt = await consul.KV.GetStringOrDefault(HiveConst.ConsulDnsHostsKey, terminator.CancellationToken); if (hostsTxt == null) { log.LogWarn(() => "DNS answers do not exist on Consul. Is [neon-dns-mon] functioning properly?"); } else { var marker = "# -------- NEON-DNS --------"; // We have the host entries from Consul. We need to add these onto the // end [/etc/powserdns/hosts], replacing any host entries written during // a previous run. // // We're going to use the special marker line: // // # ---DYNAMIC-HOSTS--- // // to separate the built-in hosts (above the line) from the dynamic hosts // we're generating here (which will be below the line). Note that this // line won't exist the first time this service runs, so we'll just add it. // // Note that it's possible that the PowerDNS Recursor might be reading this // file while we're trying to write it. We're going to treat these as a // transient errors and retry. var retry = new LinearRetryPolicy(typeof(IOException), maxAttempts: 5, retryInterval: TimeSpan.FromSeconds(1)); await retry.InvokeAsync( async() => { using (var stream = new FileStream(powerDnsHostsPath, FileMode.Open, FileAccess.ReadWrite)) { // Read a copy of the hosts file as bytes so we can compare // the old version with the new one generated below for changes. var orgHostBytes = stream.ReadToEnd(); stream.Position = 0; // Generate the new hosts file. var sbHosts = new StringBuilder(); // Read the hosts file up to but not including the special marker // line (if it's present). using (var reader = new StreamReader(stream, Encoding.UTF8, true, 32 * 1024, leaveOpen: true)) { foreach (var line in reader.Lines()) { if (line.StartsWith(marker)) { break; } sbHosts.AppendLine(line); } } // Strip any trailing whitespace from the hosts file so we'll // be able to leave a nice blank line between the end of the // original file and the special marker line. var text = sbHosts.ToString().TrimEnd(); sbHosts.Clear(); sbHosts.AppendLine(text); // Append the marker line, followed by dynamic host // entries we downloaded from Consul. sbHosts.AppendLine(); sbHosts.AppendLine(marker); sbHosts.AppendLine(); sbHosts.Append(hostsTxt); // Generate the new host file bytes, taking care to ensure that // we're using Linux style line endings and then update the // hosts file if anything changed. var hostsText = NeonHelper.ToLinuxLineEndings(sbHosts.ToString()); var newHostBytes = Encoding.UTF8.GetBytes(hostsText); if (NeonHelper.ArrayEquals(orgHostBytes, newHostBytes)) { log.LogDebug(() => $"[{powerDnsHostsPath}] file is up-to-date."); } else { log.LogDebug(() => $"[{powerDnsHostsPath}] is being updated."); stream.Position = 0; stream.SetLength(0); stream.Write(newHostBytes); // Signal to the local [neon-dns-loader] systemd service that it needs // to have PowerDNS Recursor reload the hosts file. File.WriteAllText(reloadSignalPath, "reload now"); } } log.LogDebug(() => "Finished poll"); await Task.CompletedTask; }); // We've successfully synchronized the local hosts file with // the Consul DNS settings. localMD5 = remoteMD5; } } return(await Task.FromResult(false)); }, onExceptionAsync: async e => { log.LogError(e); return(await Task.FromResult(false)); }, onTerminateAsync: async() => { log.LogInfo(() => "Terminating"); await Task.CompletedTask; }); terminator.AddDisposable(periodicTask); await periodicTask.Run(); }
/// <summary> /// Periodically performs HiveMQ related maintenance activities such as ensuring /// that the [sysadmin] account has full permissions for all virtual hosts. /// </summary> /// <returns>The tracking <see cref="Task"/>.</returns> private static async Task HiveMQMaintainerAsync() { using (var hivemqManager = hive.HiveMQ.ConnectHiveMQManager(useBootstrap: true)) { var periodicTask = new AsyncPeriodicTask( hivemqMantainInterval, onTaskAsync: async() => { log.LogDebug(() => $"HIVEMQ-MAINTAINER: Checking [{HiveConst.HiveMQSysadminUser}] permissions."); // Build the set of virtual host names where [sysadmin] already has // full permissions. var sysadminVHosts = new HashSet <string>(); foreach (var permissions in await hivemqManager.GetPermissionsAsync()) { if (permissions.User == HiveConst.HiveMQSysadminUser && permissions.Configure == ".*" && permissions.Read == ".*" && permissions.Write == ".*") { sysadminVHosts.Add(permissions.Vhost); } } // List the vhosts and set full permissions for [sysadmin] for any // virtual hosts where [sysadmin] doesn't already have full permissions. var sysadminUser = await hivemqManager.GetUserAsync(HiveConst.HiveMQSysadminUser); var addedVHostPermissions = new List <string>(); foreach (var vhost in await hivemqManager.GetVHostsAsync()) { if (!sysadminVHosts.Contains(vhost.Name)) { addedVHostPermissions.Add(vhost.Name); await hivemqManager.CreatePermissionAsync(new PermissionInfo(sysadminUser, vhost)); } } if (addedVHostPermissions.Count > 0) { var sbVHostList = new StringBuilder(); foreach (var vhost in addedVHostPermissions) { sbVHostList.AppendWithSeparator(vhost, ", "); } log.LogInfo(() => $"HIVEMQ-MAINTAINER: Granted [{HiveConst.HiveMQSysadminUser}] full permissions for vhosts: {sbVHostList}"); } log.LogDebug(() => $"HIVEMQ-MAINTAINER: Check completed."); return(await Task.FromResult(false)); }, onExceptionAsync: async e => { log.LogError("HIVEMQ-MAINTAINER", e); return(await Task.FromResult(false)); }, onTerminateAsync: async() => { log.LogInfo(() => "HIVEMQ-MAINTAINER: Terminating"); await Task.CompletedTask; }); terminator.AddDisposable(periodicTask); await periodicTask.Run(); } }
/// <summary> /// Implements the service as a <see cref="Task"/>. /// </summary> /// <returns>The <see cref="Task"/>.</returns> private static async Task RunAsync() { var periodicTask = new AsyncPeriodicTask( pollInterval, onTaskAsync: async() => { log.LogDebug(() => "Starting poll"); // We're going to collect the [hostname --> address] mappings into // a specialized (semi-threadsafe) dictionary. var hostAddresses = new HostAddresses(); // Retrieve the current hive definition from Consul if we don't already // have it or it's different from what we've cached. hiveDefinition = await HiveHelper.GetDefinitionAsync(hiveDefinition, terminator.CancellationToken); log.LogDebug(() => $"Hive has [{hiveDefinition.NodeDefinitions.Count}] nodes."); // Add the [NAME.HIVENAME.nhive.io] definitions for each cluster node. foreach (var node in hiveDefinition.Nodes) { hostAddresses.Add($"{node.Name}.{hiveDefinition.Name}.nhive.io", IPAddress.Parse(node.PrivateAddress)); } // Read the DNS entry definitions from Consul and add the appropriate // host/addresses based on health checks, etc. var targetsResult = (await consul.KV.ListOrDefault <DnsEntry>(HiveConst.ConsulDnsEntriesKey + "/", terminator.CancellationToken)); List <DnsEntry> targets; if (targetsResult == null) { // The targets key wasn't found in Consul, so we're // going to assume that there are no targets. targets = new List <DnsEntry>(); } else { targets = targetsResult.ToList(); } log.LogDebug(() => $"Consul has [{targets.Count()}] DNS targets."); await ResolveTargetsAsync(hostAddresses, targets); // Generate a canonical [hosts.txt] file by sorting host entries by // hostname and then by IP address. // // Unhealthy hosts will be assigned the unrouteable [0.0.0.0] address. // The reason for this is subtle but super important. // // If we didn't do this, the DNS host would likely be resolved by a // public DNS service, perhaps returning the IP address of a production // endpoint. // // This could cause a disaster if the whole purpose of having a local // DNS host defined to redirect test traffic to a test service. If // the test service endpoints didn't report as healthy and [0.0.0.0] // wasn't set, then test traffic could potentially hit the production // endpoint and do serious damage. var sbHosts = new StringBuilder(); var mappingCount = 0; foreach (var host in hostAddresses.OrderBy(h => h.Key)) { foreach (var address in host.Value.OrderBy(a => a.ToString())) { sbHosts.AppendLineLinux($"{address,-15} {host.Key}"); mappingCount++; } } var unhealthyTargets = targets.Where(t => !hostAddresses.ContainsKey(t.Hostname) || hostAddresses[t.Hostname].Count == 0).ToList(); if (unhealthyTargets.Count > 0) { sbHosts.AppendLine(); sbHosts.AppendLine($"# [{unhealthyTargets.Count}] unhealthy DNS hosts:"); sbHosts.AppendLine(); var unhealthyAddress = "0.0.0.0"; foreach (var target in unhealthyTargets.OrderBy(h => h)) { sbHosts.AppendLineLinux($"{unhealthyAddress,-15} {target.Hostname}"); } } // Compute the MD5 hash and compare it to the hash persisted to // Consul (if any) to determine whether we need to update the // answers in Consul. var hostsTxt = sbHosts.ToString(); var hostsMD5 = NeonHelper.ComputeMD5(hostsTxt); var currentMD5 = await consul.KV.GetStringOrDefault(HiveConst.ConsulDnsHostsMd5Key, terminator.CancellationToken); if (currentMD5 == null) { currentMD5 = string.Empty; } if (hostsMD5 != currentMD5) { log.LogDebug(() => $"DNS answers have changed."); log.LogDebug(() => $"Writing [{mappingCount}] DNS answers to Consul."); // Update the Consul keys using a transaction. var operations = new List <KVTxnOp>() { new KVTxnOp(HiveConst.ConsulDnsHostsMd5Key, KVTxnVerb.Set) { Value = Encoding.UTF8.GetBytes(hostsMD5) }, new KVTxnOp(HiveConst.ConsulDnsHostsKey, KVTxnVerb.Set) { Value = Encoding.UTF8.GetBytes(hostsTxt) } }; await consul.KV.Txn(operations, terminator.CancellationToken); } log.LogDebug(() => "Finished poll"); return(await Task.FromResult(false)); }, onExceptionAsync: async e => { log.LogError(e); return(await Task.FromResult(false)); }, onTerminateAsync: async() => { log.LogInfo(() => "Terminating"); await Task.CompletedTask; }); terminator.AddDisposable(periodicTask); await periodicTask.Run(); }
/// <summary> /// Handles polling of Docker swarm about the hive nodes and updating the hive /// definition and hash when changes are detected. /// </summary> /// <returns>The tracking <see cref="Task"/>.</returns> private static async Task SwarmPollerAsync() { var periodicTask = new AsyncPeriodicTask( swarmPollInterval, onTaskAsync: async() => { try { log.LogDebug(() => "SWARM-POLLER: Polling"); // Retrieve the current hive definition from Consul if we don't already // have it or if it's different from what we've cached. cachedHiveDefinition = await HiveHelper.GetDefinitionAsync(cachedHiveDefinition, terminator.CancellationToken); // Retrieve the swarm nodes from Docker. log.LogDebug(() => $"SWARM-POLLER: Querying [{docker.Settings.Uri}]"); var swarmNodes = await docker.NodeListAsync(); // Parse the node definitions from the swarm nodes and build a new definition with // using the new nodes. Then compare the hashes of the cached and new hive definitions // and then update Consul if they're different. var currentHiveDefinition = NeonHelper.JsonClone <HiveDefinition>(cachedHiveDefinition); currentHiveDefinition.NodeDefinitions.Clear(); foreach (var swarmNode in swarmNodes) { var nodeDefinition = NodeDefinition.ParseFromLabels(swarmNode.Labels); nodeDefinition.Name = swarmNode.Hostname; currentHiveDefinition.NodeDefinitions.Add(nodeDefinition.Name, nodeDefinition); } log.LogDebug(() => $"SWARM-POLLER: [{currentHiveDefinition.Managers.Count()}] managers and [{currentHiveDefinition.Workers.Count()}] workers in current hive definition."); // Hive pets are not part of the Swarm, so Docker won't return any information // about them. We'll read the pet definitions from [neon/global/pets-definition] in // Consul. We'll assume that there are no pets if this key doesn't exist for // backwards compatibility and robustness. var petsJson = await HiveHelper.Consul.KV.GetStringOrDefault($"{HiveConst.GlobalKey}/{HiveGlobals.PetsDefinition}", terminator.CancellationToken); if (petsJson == null) { log.LogDebug(() => $"SWARM-POLLER: [{HiveConst.GlobalKey}/{HiveGlobals.PetsDefinition}] Consul key not found. Assuming no pets."); } else { if (!string.IsNullOrWhiteSpace(petsJson)) { // Parse the pet node definitions and add them to the hive definition. var petDefinitions = NeonHelper.JsonDeserialize <Dictionary <string, NodeDefinition> >(petsJson); foreach (var item in petDefinitions) { currentHiveDefinition.NodeDefinitions.Add(item.Key, item.Value); } log.LogDebug(() => $"SWARM-POLLER: [{HiveConst.GlobalKey}/{HiveGlobals.PetsDefinition}] defines [{petDefinitions.Count}] pets."); } else { log.LogDebug(() => $"SWARM-POLLER: [{HiveConst.GlobalKey}/{HiveGlobals.PetsDefinition}] is empty."); } } // Fetch the hive summary and add it to the hive definition. currentHiveDefinition.Summary = HiveSummary.FromHive(hive, currentHiveDefinition); // Determine if the definition has changed. currentHiveDefinition.ComputeHash(); if (currentHiveDefinition.Hash != cachedHiveDefinition.Hash) { log.LogInfo(() => "SWARM-POLLER: Hive definition has CHANGED. Updating Consul."); await HiveHelper.PutDefinitionAsync(currentHiveDefinition, cancellationToken: terminator.CancellationToken); cachedHiveDefinition = currentHiveDefinition; } else { log.LogDebug(() => "SWARM-POLLER: Hive definition is UNCHANGED."); } } catch (KeyNotFoundException) { // We'll see this when no hive definition has been persisted to the // hive. This is a serious problem. This is configured during setup // and there should always be a definition in Consul. log.LogError(() => $"SWARM-POLLER: No hive definition has been found at [{hiveDefinitionKey}] in Consul. This is a serious error that will have to be corrected manually."); } log.LogDebug(() => "SWARM-POLLER: Finished Poll"); return(await Task.FromResult(false)); }, onExceptionAsync: async e => { log.LogError("SWARM-POLLER", e); return(await Task.FromResult(false)); }, onTerminateAsync: async() => { log.LogInfo(() => "SWARM-POLLER: Terminating"); await Task.CompletedTask; }); terminator.AddDisposable(periodicTask); await periodicTask.Run(); }