/// <summary> /// Resolves the <paramref name="targets"/> into healthy host addresses, /// adding the results to <paramref name="hostAddresses"/>. /// </summary> /// <param name="hostAddresses">The host addresses.</param> /// <param name="targets">The DNS targets.</param> private static async Task ResolveTargetsAsync(HostAddresses hostAddresses, List <DnsEntry> targets) { // $todo(jeff.lill): // // I'm keeping this implementation super simple for now, by performing // all of the health checks during the poll. This probably won't scale // well when there are 100s of target endpoints. This will also tend // to blast health check traffic to all of the endpoints at once. // // It would probably be better to do health checking continuously in // another task and have this method resolve the hosts from that data. // That would also allow health checks to use a target TTL as a hint // for how often endpoint health should be checked. // Implementation Note: // -------------------- // We're going to create a task for each DNS host entry and then // each of those tasks will create a task for each endpoint that // requires a health check. var nodeGroups = hiveDefinition.GetHostGroups(); var entryTasks = new List <Task>(); var warnings = new List <string>(); foreach (var target in targets) { var targetWarnings = target.Validate(hiveDefinition, nodeGroups); if (targetWarnings.Count > 0) { // We skip generating DNS entries for targets with warnings. foreach (var warning in warnings) { warnings.Add(warning); } continue; } // Clear the resolver at the beginning of each health check pass // to purge any cached state from the previous pass. healthResolver.Clear(); // Kick off the endpoint health checks. var healthyAddresses = new HashSet <string>(); entryTasks.Add(Task.Run( async() => { var healthTasks = new List <Task>(); foreach (var endpoint in target.Endpoints) { //------------------------------------------------- // Handle node group endpoints. var groupName = endpoint.GetGroupName(); if (groupName != null) { if (nodeGroups.TryGetValue(groupName, out var group)) { foreach (var node in group) { healthTasks.Add(Task.Run( async() => { var nodeAddresses = await CheckEndpointAsync(endpoint, node.PrivateAddress); foreach (var nodeAddress in nodeAddresses) { hostAddresses.Add(target.Hostname, nodeAddress); } })); } } continue; } //------------------------------------------------- // Handle normal endpoints. var addresses = await CheckEndpointAsync(endpoint); if (addresses != null) { foreach (var address in addresses) { hostAddresses.Add(target.Hostname, address); } } } await NeonHelper.WaitAllAsync(healthTasks); }, cancellationToken: terminator.CancellationToken)); } await NeonHelper.WaitAllAsync(entryTasks); // Log any detected configuration warnings. Note that we're going to throttle // warning reports to once every 5 minutes, so we won't spam the logs. if (warnTimer.HasFired) { foreach (var warning in warnings) { log.LogWarn(warning); } } }
/// <summary> /// Implements the service as a <see cref="Task"/>. /// </summary> /// <returns>The <see cref="Task"/>.</returns> private static async Task RunAsync() { var periodicTask = new AsyncPeriodicTask( pollInterval, onTaskAsync: async() => { log.LogDebug(() => "Starting poll"); // We're going to collect the [hostname --> address] mappings into // a specialized (semi-threadsafe) dictionary. var hostAddresses = new HostAddresses(); // Retrieve the current hive definition from Consul if we don't already // have it or it's different from what we've cached. hiveDefinition = await HiveHelper.GetDefinitionAsync(hiveDefinition, terminator.CancellationToken); log.LogDebug(() => $"Hive has [{hiveDefinition.NodeDefinitions.Count}] nodes."); // Add the [NAME.HIVENAME.nhive.io] definitions for each cluster node. foreach (var node in hiveDefinition.Nodes) { hostAddresses.Add($"{node.Name}.{hiveDefinition.Name}.nhive.io", IPAddress.Parse(node.PrivateAddress)); } // Read the DNS entry definitions from Consul and add the appropriate // host/addresses based on health checks, etc. var targetsResult = (await consul.KV.ListOrDefault <DnsEntry>(HiveConst.ConsulDnsEntriesKey + "/", terminator.CancellationToken)); List <DnsEntry> targets; if (targetsResult == null) { // The targets key wasn't found in Consul, so we're // going to assume that there are no targets. targets = new List <DnsEntry>(); } else { targets = targetsResult.ToList(); } log.LogDebug(() => $"Consul has [{targets.Count()}] DNS targets."); await ResolveTargetsAsync(hostAddresses, targets); // Generate a canonical [hosts.txt] file by sorting host entries by // hostname and then by IP address. // // Unhealthy hosts will be assigned the unrouteable [0.0.0.0] address. // The reason for this is subtle but super important. // // If we didn't do this, the DNS host would likely be resolved by a // public DNS service, perhaps returning the IP address of a production // endpoint. // // This could cause a disaster if the whole purpose of having a local // DNS host defined to redirect test traffic to a test service. If // the test service endpoints didn't report as healthy and [0.0.0.0] // wasn't set, then test traffic could potentially hit the production // endpoint and do serious damage. var sbHosts = new StringBuilder(); var mappingCount = 0; foreach (var host in hostAddresses.OrderBy(h => h.Key)) { foreach (var address in host.Value.OrderBy(a => a.ToString())) { sbHosts.AppendLineLinux($"{address,-15} {host.Key}"); mappingCount++; } } var unhealthyTargets = targets.Where(t => !hostAddresses.ContainsKey(t.Hostname) || hostAddresses[t.Hostname].Count == 0).ToList(); if (unhealthyTargets.Count > 0) { sbHosts.AppendLine(); sbHosts.AppendLine($"# [{unhealthyTargets.Count}] unhealthy DNS hosts:"); sbHosts.AppendLine(); var unhealthyAddress = "0.0.0.0"; foreach (var target in unhealthyTargets.OrderBy(h => h)) { sbHosts.AppendLineLinux($"{unhealthyAddress,-15} {target.Hostname}"); } } // Compute the MD5 hash and compare it to the hash persisted to // Consul (if any) to determine whether we need to update the // answers in Consul. var hostsTxt = sbHosts.ToString(); var hostsMD5 = NeonHelper.ComputeMD5(hostsTxt); var currentMD5 = await consul.KV.GetStringOrDefault(HiveConst.ConsulDnsHostsMd5Key, terminator.CancellationToken); if (currentMD5 == null) { currentMD5 = string.Empty; } if (hostsMD5 != currentMD5) { log.LogDebug(() => $"DNS answers have changed."); log.LogDebug(() => $"Writing [{mappingCount}] DNS answers to Consul."); // Update the Consul keys using a transaction. var operations = new List <KVTxnOp>() { new KVTxnOp(HiveConst.ConsulDnsHostsMd5Key, KVTxnVerb.Set) { Value = Encoding.UTF8.GetBytes(hostsMD5) }, new KVTxnOp(HiveConst.ConsulDnsHostsKey, KVTxnVerb.Set) { Value = Encoding.UTF8.GetBytes(hostsTxt) } }; await consul.KV.Txn(operations, terminator.CancellationToken); } log.LogDebug(() => "Finished poll"); return(await Task.FromResult(false)); }, onExceptionAsync: async e => { log.LogError(e); return(await Task.FromResult(false)); }, onTerminateAsync: async() => { log.LogInfo(() => "Terminating"); await Task.CompletedTask; }); terminator.AddDisposable(periodicTask); await periodicTask.Run(); }