Ejemplo n.º 1
0
        /// <summary>
        /// Emulates a signal instructing the service to close.  This will typically be used
        /// for unit testing services.
        /// </summary>
        /// <exception cref="TimeoutException">
        /// Thrown if the service did not exit gracefully in time before it would have
        /// been killed (e.g. by Kubernetes or Docker).
        /// </exception>
        public void Signal()
        {
            if (readyToExit)
            {
                // Application has already indicated that it has terminated.

                return;
            }

            var isTerminating = terminating;

            terminating = true;

            if (isTerminating)
            {
                return;     // Already terminating.
            }

            log?.LogInfo(() => $"Emulated stop request: [timeout={Timeout}]");

            cts.Cancel();

            lock (handlers)
            {
                foreach (var handler in handlers)
                {
                    new Thread(new ThreadStart(handler)).Start();
                }
            }

            StopEvent.Set();

            try
            {
                NeonHelper.WaitFor(() => readyToExit, Timeout);
                log?.LogInfo(() => "Process stopped gracefully.");
            }
            catch (TimeoutException)
            {
                log?.LogWarn(() => $"Process did not stop within [{Timeout}].");
                throw;
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Cleanly terminates the current process (for internal use).
        /// </summary>
        /// <param name="exitCode">Optional process exit code (defaults to <b>0</b>).</param>
        /// <param name="explicitTermination">Optionally indicates that termination is not due to receiving an external signal.</param>
        private void ExitInternal(int exitCode = 0, bool explicitTermination = false)
        {
            if (readyToExit)
            {
                // Application has already indicated that it has terminated.

                return;
            }

            var isTerminating = terminating;

            terminating = true;

            if (isTerminating)
            {
                return;     // Already terminating.
            }

            if (explicitTermination)
            {
                log?.LogInfo(() => $"INTERNAL stop request: [timeout={Timeout}]");
            }
            else
            {
                log?.LogInfo(() => $"SIGTERM received: Stopping process [timeout={Timeout}]");
            }

            cts.Cancel();

            lock (handlers)
            {
                foreach (var handler in handlers)
                {
                    new Thread(new ThreadStart(handler)).Start();
                }
            }

            try
            {
                NeonHelper.WaitFor(() => readyToExit, Timeout);
                log?.LogInfo(() => "Process stopped gracefully.");
            }
            catch (TimeoutException)
            {
                log?.LogWarn(() => $"Process did not stop within [{Timeout}].");
            }

            Environment.Exit(exitCode);
        }
Ejemplo n.º 3
0
 /// <summary>
 /// Logs a transient exception that is going to be retried if logging
 /// is enabled.
 /// </summary>
 /// <param name="e">The exception.</param>
 protected void LogTransient(Exception e)
 {
     log?.LogWarn("[transient-retry]", e);
 }
Ejemplo n.º 4
0
        /// <summary>
        /// Constructs a query/signal method map for a workflow type.
        /// </summary>
        /// <param name="workflowType">The workflow interface.</param>
        /// <returns>The <see cref="WorkflowMethodMap"/>.</returns>
        public static WorkflowMethodMap Create(Type workflowType)
        {
            Covenant.Requires <ArgumentNullException>(workflowType != null);

            // $todo(jeff.lill):
            //
            // The code below doesn't not verify that query/signal names are unique
            // but also doesn't barf.  It will send requets to the last method
            // encountered with the same name, which is pretty reasonable.
            //
            // In a perfect world, we'd detect this and throw an exception.

            var map = new WorkflowMethodMap();

            foreach (var method in workflowType.GetMethods(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance))
            {
                // Signal methods are tagged by [SignalHandler], accept a single byte array parameter,
                // and returns [Task].

                var signalHandlerAttribute = method.GetCustomAttribute <SignalMethodAttribute>();

                if (signalHandlerAttribute != null)
                {
                    if (method.ReturnType != typeof(Task))
                    {
                        Log.LogWarn($"Workflow [{workflowType.FullName}.{method.Name}()] signal handler is invalid because it doesn't return [void].  It will be ignored.");
                        continue;
                    }

                    var parameters = method.GetParameters();

                    if (parameters.Length != 1 || parameters[0].ParameterType != typeof(byte[]))
                    {
                        Log.LogWarn($"Workflow [{workflowType.FullName}.{method.Name}()] signal handler is invalid because it doesn't accept a single byte array parameter.  It will be ignored.");
                        continue;
                    }

                    map.nameToSignalMethod[signalHandlerAttribute.Name] = method;
                    continue;
                }

                // Query methods are tagged by [QueryHandler], accept a single byte array parameter,
                // and returns [Task<byte[]>].

                var queryHandlerAttribute = method.GetCustomAttribute <QueryMethodAttribute>();

                if (queryHandlerAttribute != null)
                {
                    if (method.ReturnType != typeof(Task <byte[]>))
                    {
                        Log.LogWarn($"Workflow [{workflowType.FullName}.{method.Name}()] query handler is invalid because it doesn't return a byte array.  It will be ignored.");
                        continue;
                    }

                    var parameters = method.GetParameters();

                    if (parameters.Length != 1 || parameters[0].ParameterType != typeof(byte[]))
                    {
                        Log.LogWarn($"Workflow [{workflowType.FullName}.{method.Name}()] query handler is invalid because it doesn't accept a single byte array parameter.  It will be ignored.");
                        continue;
                    }

                    map.nameToQueryMethod[queryHandlerAttribute.Name] = method;
                    continue;
                }
            }

            return(map);
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Implements the service as a <see cref="Task"/>.
        /// </summary>
        /// <returns>The <see cref="Task"/>.</returns>
        private static async Task RunAsync()
        {
            var localMD5    = string.Empty;
            var remoteMD5   = "[unknown]";
            var verifyTimer = new PolledTimer(verifyInterval, autoReset: true);

            var periodicTask =
                new AsyncPeriodicTask(
                    pollInterval,
                    onTaskAsync:
                    async() =>
            {
                log.LogDebug(() => "Starting poll");
                log.LogDebug(() => "Fetching DNS answers MD5 from Consul.");

                remoteMD5 = await consul.KV.GetStringOrDefault(HiveConst.ConsulDnsHostsMd5Key, terminator.CancellationToken);

                if (remoteMD5 == null)
                {
                    remoteMD5 = "[unknown]";
                }

                var verify = verifyTimer.HasFired;

                if (verify)
                {
                    // Under normal circumstances, we should never see the reload signal file
                    // here because the [neon-dns-loader] service should have deleted it after
                    // handling the last change signal.
                    //
                    // This probably means that [neon-dns-loader] is not running or if this service
                    // is configured with POLL_INTERVAL being so short that [neon-dns-loader]
                    // hasn't had a chance to handle the previous signal.

                    if (File.Exists(reloadSignalPath))
                    {
                        log.LogWarn("[neon-dns-loader] service doesn't appear to be running because the reload signal file is present.");
                    }
                }

                if (!verify && localMD5 == remoteMD5)
                {
                    log.LogDebug(() => "DNS answers are unchanged.");
                }
                else
                {
                    if (localMD5 == remoteMD5)
                    {
                        log.LogDebug(() => "DNS answers have not changed but we're going to verify that we have the correct hosts anyway.");
                    }
                    else
                    {
                        log.LogDebug(() => "DNS answers have changed.");
                    }

                    log.LogDebug(() => "Fetching DNS answers.");

                    var hostsTxt = await consul.KV.GetStringOrDefault(HiveConst.ConsulDnsHostsKey, terminator.CancellationToken);

                    if (hostsTxt == null)
                    {
                        log.LogWarn(() => "DNS answers do not exist on Consul.  Is [neon-dns-mon] functioning properly?");
                    }
                    else
                    {
                        var marker = "# -------- NEON-DNS --------";

                        // We have the host entries from Consul.  We need to add these onto the
                        // end [/etc/powserdns/hosts], replacing any host entries written during
                        // a previous run.
                        //
                        // We're going to use the special marker line:
                        //
                        //  # ---DYNAMIC-HOSTS---
                        //
                        // to separate the built-in hosts (above the line) from the dynamic hosts
                        // we're generating here (which will be below the line).  Note that this
                        // line won't exist the first time this service runs, so we'll just add it.
                        //
                        // Note that it's possible that the PowerDNS Recursor might be reading this
                        // file while we're trying to write it.  We're going to treat these as a
                        // transient errors and retry.

                        var retry = new LinearRetryPolicy(typeof(IOException), maxAttempts: 5, retryInterval: TimeSpan.FromSeconds(1));

                        await retry.InvokeAsync(
                            async() =>
                        {
                            using (var stream = new FileStream(powerDnsHostsPath, FileMode.Open, FileAccess.ReadWrite))
                            {
                                // Read a copy of the hosts file as bytes so we can compare
                                // the old version with the new one generated below for changes.

                                var orgHostBytes = stream.ReadToEnd();

                                stream.Position = 0;

                                // Generate the new hosts file.

                                var sbHosts = new StringBuilder();

                                // Read the hosts file up to but not including the special marker
                                // line (if it's present).

                                using (var reader = new StreamReader(stream, Encoding.UTF8, true, 32 * 1024, leaveOpen: true))
                                {
                                    foreach (var line in reader.Lines())
                                    {
                                        if (line.StartsWith(marker))
                                        {
                                            break;
                                        }

                                        sbHosts.AppendLine(line);
                                    }
                                }

                                // Strip any trailing whitespace from the hosts file so we'll
                                // be able to leave a nice blank line between the end of the
                                // original file and the special marker line.

                                var text = sbHosts.ToString().TrimEnd();

                                sbHosts.Clear();
                                sbHosts.AppendLine(text);

                                // Append the marker line, followed by dynamic host
                                // entries we downloaded from Consul.

                                sbHosts.AppendLine();
                                sbHosts.AppendLine(marker);
                                sbHosts.AppendLine();
                                sbHosts.Append(hostsTxt);

                                // Generate the new host file bytes, taking care to ensure that
                                // we're using Linux style line endings and then update the
                                // hosts file if anything changed.

                                var hostsText    = NeonHelper.ToLinuxLineEndings(sbHosts.ToString());
                                var newHostBytes = Encoding.UTF8.GetBytes(hostsText);

                                if (NeonHelper.ArrayEquals(orgHostBytes, newHostBytes))
                                {
                                    log.LogDebug(() => $"[{powerDnsHostsPath}] file is up-to-date.");
                                }
                                else
                                {
                                    log.LogDebug(() => $"[{powerDnsHostsPath}] is being updated.");

                                    stream.Position = 0;
                                    stream.SetLength(0);
                                    stream.Write(newHostBytes);

                                    // Signal to the local [neon-dns-loader] systemd service that it needs
                                    // to have PowerDNS Recursor reload the hosts file.

                                    File.WriteAllText(reloadSignalPath, "reload now");
                                }
                            }

                            log.LogDebug(() => "Finished poll");
                            await Task.CompletedTask;
                        });

                        // We've successfully synchronized the local hosts file with
                        // the Consul DNS settings.

                        localMD5 = remoteMD5;
                    }
                }

                return(await Task.FromResult(false));
            },
                    onExceptionAsync:
                    async e =>
            {
                log.LogError(e);
                return(await Task.FromResult(false));
            },
                    onTerminateAsync:
                    async() =>
            {
                log.LogInfo(() => "Terminating");
                await Task.CompletedTask;
            });

            terminator.AddDisposable(periodicTask);
            await periodicTask.Run();
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Resolves the <paramref name="targets"/> into healthy host addresses,
        /// adding the results to <paramref name="hostAddresses"/>.
        /// </summary>
        /// <param name="hostAddresses">The host addresses.</param>
        /// <param name="targets">The DNS targets.</param>
        private static async Task ResolveTargetsAsync(HostAddresses hostAddresses, List <DnsEntry> targets)
        {
            // $todo(jeff.lill):
            //
            // I'm keeping this implementation super simple for now, by performing
            // all of the health checks during the poll.  This probably won't scale
            // well when there are 100s of target endpoints.  This will also tend
            // to blast health check traffic to all of the endpoints at once.
            //
            // It would probably be better to do health checking continuously in
            // another task and have this method resolve the hosts from that data.
            // That would also allow health checks to use a target TTL as a hint
            // for how often endpoint health should be checked.

            // Implementation Note:
            // --------------------
            // We're going to create a task for each DNS host entry and then
            // each of those tasks will create a task for each endpoint that
            // requires a health check.

            var nodeGroups = hiveDefinition.GetHostGroups();
            var entryTasks = new List <Task>();
            var warnings   = new List <string>();

            foreach (var target in targets)
            {
                var targetWarnings = target.Validate(hiveDefinition, nodeGroups);

                if (targetWarnings.Count > 0)
                {
                    // We skip generating DNS entries for targets with warnings.

                    foreach (var warning in warnings)
                    {
                        warnings.Add(warning);
                    }

                    continue;
                }

                // Clear the resolver at the beginning of each health check pass
                // to purge any cached state from the previous pass.

                healthResolver.Clear();

                // Kick off the endpoint health checks.

                var healthyAddresses = new HashSet <string>();

                entryTasks.Add(Task.Run(
                                   async() =>
                {
                    var healthTasks = new List <Task>();

                    foreach (var endpoint in target.Endpoints)
                    {
                        //-------------------------------------------------
                        // Handle node group endpoints.

                        var groupName = endpoint.GetGroupName();

                        if (groupName != null)
                        {
                            if (nodeGroups.TryGetValue(groupName, out var group))
                            {
                                foreach (var node in group)
                                {
                                    healthTasks.Add(Task.Run(
                                                        async() =>
                                    {
                                        var nodeAddresses = await CheckEndpointAsync(endpoint, node.PrivateAddress);

                                        foreach (var nodeAddress in nodeAddresses)
                                        {
                                            hostAddresses.Add(target.Hostname, nodeAddress);
                                        }
                                    }));
                                }
                            }

                            continue;
                        }

                        //-------------------------------------------------
                        // Handle normal endpoints.

                        var addresses = await CheckEndpointAsync(endpoint);

                        if (addresses != null)
                        {
                            foreach (var address in addresses)
                            {
                                hostAddresses.Add(target.Hostname, address);
                            }
                        }
                    }

                    await NeonHelper.WaitAllAsync(healthTasks);
                },
                                   cancellationToken: terminator.CancellationToken));
            }

            await NeonHelper.WaitAllAsync(entryTasks);

            // Log any detected configuration warnings.  Note that we're going to throttle
            // warning reports to once every 5 minutes, so we won't spam the logs.

            if (warnTimer.HasFired)
            {
                foreach (var warning in warnings)
                {
                    log.LogWarn(warning);
                }
            }
        }
Ejemplo n.º 7
0
 /// <inheritdoc/>
 public void LogWarn(object message, string activityId = null)
 {
     log.LogWarn(message, activityId);
     capture.AppendLine($"[WARN]: {message}");
 }
Ejemplo n.º 8
0
        /// <summary>
        /// Rebuilds the host node's <b>/etc/containers/registries.conf.d/00-neon-cluster.conf</b> file,
        /// using the container registries passed, signals CRI-O to reload any changes and also manages
        /// container registry logins.
        /// </summary>
        private async Task UpdateContainerRegistriesAsync()
        {
            var registries = (await k8s.ListClusterCustomObjectAsync <V1NeonContainerRegistry>()).Items;

            // NOTE: Here's the documentation for the config file we're generating:
            //
            //      https://github.com/containers/image/blob/main/docs/containers-registries.conf.5.md
            //

            var sbRegistryConfig   = new StringBuilder();
            var sbSearchRegistries = new StringBuilder();

            // Configure any unqualified search registries.

            foreach (var registry in registries
                     .Where(registry => registry.Spec.SearchOrder >= 0)
                     .OrderBy(registry => registry.Spec.SearchOrder))
            {
                sbSearchRegistries.AppendWithSeparator($"\"{registry.Spec.Prefix}\"", ", ");
            }

            sbRegistryConfig.Append(
                $@"unqualified-search-registries = [{sbSearchRegistries}]
");

            // Configure any container registries including the local cluster.

            foreach (var registry in registries)
            {
                sbRegistryConfig.Append(
                    $@"
[[registry]]
prefix   = ""{registry.Spec.Prefix}""
insecure = {NeonHelper.ToBoolString(registry.Spec.Insecure)}
blocked  = {NeonHelper.ToBoolString(registry.Spec.Blocked)}
");

                if (!string.IsNullOrEmpty(registry.Spec.Location))
                {
                    sbRegistryConfig.AppendLine($"location = \"{registry.Spec.Location}\"");
                }
            }

            if (NeonHelper.IsLinux)
            {
                // Read and parse the current configuration file to create list of the existing
                // configured upstream registries.

                var currentConfigText = File.ReadAllText(configMountPath);
                var currentConfig     = Toml.Parse(currentConfigText);
                var existingLocations = new List <string>();

                foreach (var registryTable in currentConfig.Tables.Where(table => table.Name.Key.GetName() == "registry"))
                {
                    var location = registryTable.Items.SingleOrDefault(key => key.Key.GetName() == "location")?.Value.GetValue();

                    if (!string.IsNullOrWhiteSpace(location))
                    {
                        existingLocations.Add(location);
                    }
                }

                // Convert the generated config to Linux line endings and then compare the new
                // config against what's already configured on the host node.  We'll rewrite the
                // host file and then signal CRI-O to reload its config when the files differ.

                var newConfigText = NeonHelper.ToLinuxLineEndings(sbRegistryConfig.ToString());

                if (currentConfigText != newConfigText)
                {
                    configUpdateCounter.Inc();

                    File.WriteAllText(configMountPath, newConfigText);
                    (await Node.ExecuteCaptureAsync("pkill", new object[] { "-HUP", "crio" })).EnsureSuccess();

                    // Wait a few seconds to give CRI-O a chance to reload its config.  This will
                    // help mitigate problems when managing logins below due to potential inconsistencies
                    // between CRI-O's currently loaded config and the new config we just saved.

                    await Task.Delay(TimeSpan.FromSeconds(15));
                }
            }

            //-----------------------------------------------------------------
            // We need to manage registry logins by logging into new registries,
            // logging out of deleted registries, relogging in with new credentials,
            // and periodically logging in with unchanged credentials to ensure that
            // we're actually logged in.  Here's how this works:
            //
            //      https://github.com/nforgeio/neonKUBE/issues/1591

            var retry = new LinearRetryPolicy(e => true, maxAttempts: 5, retryInterval: TimeSpan.FromSeconds(5));

            // Construct LoginFile instances for all specified upstream registries
            // that require credentials and add these to a dictionary keyed by SHA-256.

            var shaToRequiredLogins = new Dictionary <string, LoginFile>();

            foreach (var registry in registries.Where(registry => !string.IsNullOrEmpty(registry.Spec.Username)))
            {
                var loginFile = LoginFile.Create(hostContainerRegistriesFolder, registry.Spec.Location, registry.Spec.Username, registry.Spec.Password);

                shaToRequiredLogins.Add(loginFile.Sha256, loginFile);
            }

            // Read all existing login files on the node and add them to a dictionary
            // mapping their SHA-256s to the file.

            var shaToExistingLogins = new Dictionary <string, LoginFile>();

            foreach (var file in Directory.GetFiles(hostContainerRegistriesFolder, "*.login", SearchOption.TopDirectoryOnly))
            {
                var loginFile = LoginFile.Read(file);

                if (loginFile != null)
                {
                    shaToExistingLogins.Add(loginFile.Sha256, loginFile);
                }
            }

            // Look for any existing login files that are not present in the collection of
            // new logins.  These correspond to registries that have been deleted or whose
            // credentials have changed.  We're going to go ahead and log out of the related
            // registries and then delete these login files (we'll re-login with new
            // credentials below for the registries that weren't targeted for removal).

            foreach (var loginFile in shaToExistingLogins.Values
                     .Where(login => !shaToRequiredLogins.ContainsKey(login.Sha256)))
            {
                try
                {
                    await retry.InvokeAsync(
                        async() =>
                    {
                        // Note that we're not ensuring success here because we may not be
                        // logged-in which is OK: we don't want to see that error.

                        log.LogInfo($"{podmanPath} logout {loginFile.Location}");

                        if (NeonHelper.IsLinux)
                        {
                            await Node.ExecuteCaptureAsync(podmanPath, new object[] { "logout", loginFile.Location });
                        }

                        loginFile.Delete();
                    });
                }
                catch (Exception e)
                {
                    loginErrorCounter.Inc();
                    log.LogError(e);
                }
            }

            // Look for any required logins that don't have an existing login file,
            // and then login the registry and then create the login file on success.

            foreach (var loginFile in shaToRequiredLogins.Values
                     .Where(login => !shaToExistingLogins.ContainsKey(login.Sha256)))
            {
                try
                {
                    await retry.InvokeAsync(
                        async() =>
                    {
                        log.LogInfo($"{podmanPath} login {loginFile.Location} --username {loginFile.Username} --password REDACTED");

                        if (NeonHelper.IsLinux)
                        {
                            (await Node.ExecuteCaptureAsync(podmanPath, new object[] { "login", loginFile.Location, "--username", loginFile.Username, "--password", loginFile.Password })).EnsureSuccess();
                        }
                    });

                    loginFile.Write();
                }
                catch (Exception e)
                {
                    loginErrorCounter.Inc();
                    log.LogError(e);
                }
            }

            //-----------------------------------------------------------------
            // Finally, we need to force a re-login for any existing logins that haven't
            // been explicitly logged into for a while.  Note that we're always going to
            // log into the local Harbor registry.

            foreach (var file in Directory.GetFiles(hostContainerRegistriesFolder, "*.login", SearchOption.TopDirectoryOnly))
            {
                // Read the next existing login file.

                var loginFile = LoginFile.Read(file);

                if (loginFile == null)
                {
                    continue;
                }

                // Update the login with the password from the corresponding container registry resource.

                var registry = registries.FirstOrDefault(registry => registry.Spec.Location == loginFile.Location);

                if (registry == null)
                {
                    log.LogWarn($"Cannot locate [{nameof(V1NeonContainerRegistry)}] resource for [location={loginFile.Location}].");
                    continue;
                }

                loginFile.Password = registry.Spec.Password;

                // Perform the login.

                var scheduledLoginUtc = loginFile.UpdatedUtc + reloginInterval + NeonHelper.PseudoRandomTimespan(reloginMaxRandomInterval);

                if (DateTime.UtcNow <= scheduledLoginUtc || loginFile.Location == KubeConst.LocalClusterRegistry)
                {
                    try
                    {
                        await retry.InvokeAsync(
                            async() =>
                        {
                            log.LogInfo($"{podmanPath} login {loginFile.Location} --username {loginFile.Username} --password REDACTED");

                            if (NeonHelper.IsLinux)
                            {
                                (await Node.ExecuteCaptureAsync(podmanPath, new object[] { "login", loginFile.Location, "--username", loginFile.Username, "--password", loginFile.Password })).EnsureSuccess();
                            }
                        });

                        loginFile.Write();
                    }
                    catch (Exception e)
                    {
                        loginErrorCounter.Inc();
                        log.LogError(e);
                    }
                }
            }
        }