示例#1
0
        public async Task Timeout()
        {
            var policy = new LinearRetryPolicy(TransientDetector, retryInterval: TimeSpan.FromSeconds(0.5), timeout: TimeSpan.FromSeconds(1.5));
            var times  = new List <DateTime>();

            Assert.Equal(int.MaxValue, policy.MaxAttempts);
            Assert.Equal(TimeSpan.FromSeconds(0.5), policy.RetryInterval);
            Assert.Equal(TimeSpan.FromSeconds(1.5), policy.Timeout);

            await Assert.ThrowsAsync <TransientException>(
                async() =>
            {
                await policy.InvokeAsync(
                    async() =>
                {
                    times.Add(DateTime.UtcNow);
                    await Task.CompletedTask;

                    throw new TransientException();
                });
            });

            Assert.Equal(4, times.Count);

            // Additional test to verify this serious problem is fixed:
            //
            //      https://github.com/nforgeio/neonKUBE/issues/762
            //
            // We'll wait a bit longer to enure that any (incorrect) deadline computed
            // by the policy when constructed above does not impact a subsequent run.

            await Task.Delay(TimeSpan.FromSeconds(4));

            times.Clear();

            Assert.Equal(TimeSpan.FromSeconds(0.5), policy.RetryInterval);
            Assert.Equal(TimeSpan.FromSeconds(1.5), policy.Timeout);

            await Assert.ThrowsAsync <TransientException>(
                async() =>
            {
                await policy.InvokeAsync(
                    async() =>
                {
                    times.Add(DateTime.UtcNow);
                    await Task.CompletedTask;

                    throw new TransientException();
                });
            });

            Assert.Equal(4, times.Count);
        }
示例#2
0
        public async Task SuccessDelayedAggregateArray()
        {
            var policy  = new LinearRetryPolicy(new Type[] { typeof(NotReadyException), typeof(KeyNotFoundException) });
            var times   = new List <DateTime>();
            var success = false;

            await policy.InvokeAsync(
                async() =>
            {
                times.Add(DateTime.UtcNow);
                await Task.Delay(0);

                if (times.Count < policy.MaxAttempts)
                {
                    if (times.Count % 1 == 0)
                    {
                        throw new AggregateException(new NotReadyException());
                    }
                    else
                    {
                        throw new AggregateException(new KeyNotFoundException());
                    }
                }

                success = true;
            });

            Assert.True(success);
            Assert.Equal(policy.MaxAttempts, times.Count);
            VerifyIntervals(times, policy);
        }
示例#3
0
        public async Task SuccessCustom()
        {
            var policy  = new LinearRetryPolicy(TransientDetector, maxAttempts: 4, retryInterval: TimeSpan.FromSeconds(2));
            var times   = new List <DateTime>();
            var success = false;

            Assert.Equal(4, policy.MaxAttempts);
            Assert.Equal(TimeSpan.FromSeconds(2), policy.RetryInterval);

            await policy.InvokeAsync(
                async() =>
            {
                times.Add(DateTime.UtcNow);
                await Task.CompletedTask;

                if (times.Count < policy.MaxAttempts)
                {
                    throw new TransientException();
                }

                success = true;
            });

            Assert.True(success);
            Assert.Equal(policy.MaxAttempts, times.Count);
            VerifyIntervals(times, policy);
        }
示例#4
0
        public async Task SuccessCustom_Result()
        {
            var policy = new LinearRetryPolicy(TransientDetector, maxAttempts: 4, retryInterval: TimeSpan.FromSeconds(2));
            var times  = new List <DateTime>();

            Assert.Equal(4, policy.MaxAttempts);
            Assert.Equal(TimeSpan.FromSeconds(2), policy.RetryInterval);

            var success = await policy.InvokeAsync(
                async() =>
            {
                times.Add(DateTime.UtcNow);
                await Task.Delay(0);

                if (times.Count < policy.MaxAttempts)
                {
                    throw new TransientException();
                }

                return("WOOHOO!");
            });

            Assert.Equal("WOOHOO!", success);
            Assert.Equal(policy.MaxAttempts, times.Count);
            VerifyIntervals(times, policy);
        }
示例#5
0
        /// <summary>
        /// Used to start the fixture within a <see cref="ComposedFixture"/>.
        /// </summary>
        /// <param name="image">
        /// Optionally specifies the NATS container image.  This defaults to
        /// <b>nkubeio/nats:latest</b> or <b>nkubedev/nats:latest</b> depending
        /// on whether the assembly was built from a git release branch or not.
        /// </param>
        /// <param name="name">Optionally specifies the container name (defaults to <c>nats-test</c>).</param>
        /// <param name="args">Optional NATS server command line arguments.</param>
        public void StartAsComposed(
            string image  = null,
            string name   = "nats-test",
            string[] args = null)
        {
            image = image ?? $"{KubeConst.NeonBranchRegistry}/nats:latest";

            base.CheckWithinAction();

            var dockerArgs =
                new string[]
            {
                "--detach",
                "-p", "4222:4222",
                "-p", "8222:8222",
                "-p", "6222:6222"
            };

            if (!IsRunning)
            {
                StartAsComposed(name, image, dockerArgs, args);
            }

            var factory = new ConnectionFactory();
            var retry   = new LinearRetryPolicy(exception => true, 20, TimeSpan.FromSeconds(0.5));

            retry.InvokeAsync(
                async() =>
            {
                Connection = factory.CreateConnection();

                await Task.CompletedTask;
            }).Wait();
        }
示例#6
0
        /// <summary>
        /// Signals the Docker orchestrator to begin scheduling service tasks on a node.
        /// </summary>
        /// <param name="nodeName">Identifies the target node.</param>
        /// <exception cref="KeyNotFoundException">Thrown if the named node does not exist.</exception>
        /// <exception cref="InvalidOperationException">Thrown if the node is not part of the swarm.</exception>
        public void ActivateNode(string nodeName)
        {
            Covenant.Requires <ArgumentNullException>(!string.IsNullOrEmpty(nodeName));

            var node = hive.GetNode(nodeName);

            if (!node.Metadata.InSwarm)
            {
                throw new InvalidOperationException($"Node [{nodeName}] is not part of the swarm.");
            }

            // I've see transient errors, so we'll retry a few times.

            var manager = hive.GetReachableManager();
            var retry   = new LinearRetryPolicy(typeof(Exception), maxAttempts: 5, retryInterval: TimeSpan.FromSeconds(5));

            retry.InvokeAsync(
                async() =>
            {
                var response = manager.SudoCommand($"docker node update --availability active {nodeName}");

                if (response.ExitCode != 0)
                {
                    throw new Exception(response.ErrorSummary);
                }

                await Task.CompletedTask;
            }).Wait();
        }
示例#7
0
        public async Task FailDelayed_Result()
        {
            var policy = new LinearRetryPolicy(TransientDetector);
            var times  = new List <DateTime>();

            await Assert.ThrowsAsync <NotImplementedException>(
                async() =>
            {
                await policy.InvokeAsync <string>(
                    async() =>
                {
                    times.Add(DateTime.UtcNow);
                    await Task.Delay(0);

                    if (times.Count < 2)
                    {
                        throw new TransientException();
                    }
                    else
                    {
                        throw new NotImplementedException();
                    }
                });
            });

            Assert.Equal(2, times.Count);
            VerifyIntervals(times, policy);
        }
示例#8
0
        /// <summary>
        /// Establishes the server connection.
        /// </summary>
        private void Connect()
        {
            var factory = new StanConnectionFactory();
            var retry   = new LinearRetryPolicy(exception => true, 20, TimeSpan.FromSeconds(0.5));

            retry.InvokeAsync(
                async() =>
            {
                Connection = factory.CreateConnection("test-cluster", nameof(NatsStreamingFixture));
                await Task.CompletedTask;
            }).Wait();
        }
示例#9
0
        /// <summary>
        /// Writes a file as text, retrying if the file is already open.
        /// </summary>
        /// <param name="path">The file path.</param>
        /// <param name="text">The text to be written.</param>
        /// <remarks>
        /// It's possible for the configuration file to be temporarily opened
        /// by another process (e.g. the neonKUBE Desktop application or a
        /// command line tool).  Rather than throw an exception, we're going
        /// to retry the operation a few times.
        /// </remarks>
        internal static string WriteFileTextWithRetry(string path, string text)
        {
            var retry = new LinearRetryPolicy(typeof(IOException), maxAttempts: 10, retryInterval: TimeSpan.FromMilliseconds(200));

            retry.InvokeAsync(
                async() =>
            {
                await Task.CompletedTask;

                File.WriteAllText(path, text);
            }).Wait();

            return(text);
        }
示例#10
0
        public async Task SuccessImmediate_Result()
        {
            var policy = new LinearRetryPolicy(TransientDetector);
            var times  = new List <DateTime>();

            var success = await policy.InvokeAsync(
                async() =>
            {
                times.Add(DateTime.UtcNow);
                await Task.Delay(0);

                return("WOOHOO!");
            });

            Assert.Single(times);
            Assert.Equal("WOOHOO!", success);
        }
示例#11
0
        /// <summary>
        /// Signals the Docker orchestrator to drain all service tasks from a node.
        /// </summary>
        /// <param name="nodeName">Identifies the target node.</param>
        /// <exception cref="KeyNotFoundException">Thrown if the named node does not exist.</exception>
        /// <exception cref="InvalidOperationException">Thrown if the node is not part of the swarm.</exception>
        public void DrainNode(string nodeName)
        {
            Covenant.Requires <ArgumentNullException>(!string.IsNullOrEmpty(nodeName));

            var node = hive.GetNode(nodeName);

            if (!node.Metadata.InSwarm)
            {
                throw new InvalidOperationException($"Node [{nodeName}] is not part of the swarm.");
            }

            // I've see transient errors, so we'll retry a few times.

            var manager = hive.GetReachableManager();
            var retry   = new LinearRetryPolicy(typeof(Exception), maxAttempts: 5, retryInterval: TimeSpan.FromSeconds(5));

            retry.InvokeAsync(
                async() =>
            {
                var response = manager.SudoCommand($"docker node update --availability drain {nodeName}");

                if (response.ExitCode != 0)
                {
                    throw new Exception(response.ErrorSummary);
                }

                await Task.CompletedTask;
            }).Wait();

            // $todo(jeff.lill):
            //
            // Ideally, we'd wait for all of the service tasks to stop but it
            // appears that there's no easy way to check for this other than
            // listing all of the hive services and then doing a
            //
            //      docker service ps SERVICE]
            //
            // for each until none report running on this node.
            //
            // A hacky alternative would be to list local containers and try
            // to determine which ones look liks service tasks by examining
            // the container name.

            Thread.Sleep(TimeSpan.FromSeconds(30));
        }
示例#12
0
        public async Task SuccessImmediate()
        {
            var policy  = new LinearRetryPolicy(TransientDetector);
            var times   = new List <DateTime>();
            var success = false;

            await policy.InvokeAsync(
                async() =>
            {
                times.Add(DateTime.UtcNow);
                await Task.Delay(0);

                success = true;
            });

            Assert.Single(times);
            Assert.True(success);
        }
示例#13
0
        public async Task FailImmediate_Result()
        {
            var policy = new LinearRetryPolicy(TransientDetector);
            var times  = new List <DateTime>();

            await Assert.ThrowsAsync <NotImplementedException>(
                async() =>
            {
                await policy.InvokeAsync <string>(
                    async() =>
                {
                    times.Add(DateTime.UtcNow);
                    await Task.Delay(0);
                    throw new NotImplementedException();
                });
            });

            Assert.Single(times);
        }
示例#14
0
        public async Task FailAll_Result()
        {
            var policy = new LinearRetryPolicy(TransientDetector);
            var times  = new List <DateTime>();

            await Assert.ThrowsAsync <TransientException>(
                async() =>
            {
                await policy.InvokeAsync <string>(
                    async() =>
                {
                    times.Add(DateTime.UtcNow);
                    await Task.Delay(0);
                    throw new TransientException();
                });
            });

            Assert.Equal(policy.MaxAttempts, times.Count);
            VerifyIntervals(times, policy);
        }
示例#15
0
        /// <summary>
        /// Restarts the NATS container to clear any previous state and returns the
        /// new client connection.
        /// </summary>
        public new IConnection Restart()
        {
            base.Restart();

            if (Connection != null)
            {
                Connection.Dispose();
                Connection = null;
            }

            var factory = new ConnectionFactory();
            var retry   = new LinearRetryPolicy(exception => true, 20, TimeSpan.FromSeconds(0.5));

            retry.InvokeAsync(
                async() =>
            {
                Connection = factory.CreateConnection($"nats://{GetHostInterface(hostInterface, forConnection: true)}:4222");
                await Task.CompletedTask;
            }).Wait();

            return(Connection);
        }
示例#16
0
        public async Task SuccessDelayed_Result()
        {
            var policy = new LinearRetryPolicy(TransientDetector);
            var times  = new List <DateTime>();

            var success = await policy.InvokeAsync(
                async() =>
            {
                times.Add(DateTime.UtcNow);
                await Task.CompletedTask;

                if (times.Count < policy.MaxAttempts)
                {
                    throw new TransientException();
                }

                return("WOOHOO!");
            });

            Assert.Equal("WOOHOO!", success);
            Assert.Equal(policy.MaxAttempts, times.Count);
            VerifyIntervals(times, policy);
        }
示例#17
0
        public async Task Timeout()
        {
            var policy = new LinearRetryPolicy(TransientDetector, maxAttempts: 6, retryInterval: TimeSpan.FromSeconds(0.5), timeout: TimeSpan.FromSeconds(1.5));
            var times  = new List <DateTime>();

            Assert.Equal(6, policy.MaxAttempts);
            Assert.Equal(TimeSpan.FromSeconds(0.5), policy.RetryInterval);
            Assert.Equal(TimeSpan.FromSeconds(1.5), policy.Timeout);

            await Assert.ThrowsAsync <TransientException>(
                async() =>
            {
                await policy.InvokeAsync(
                    async() =>
                {
                    times.Add(DateTime.UtcNow);
                    await Task.CompletedTask;

                    throw new TransientException();
                });
            });

            Assert.True(times.Count < 6);
        }
示例#18
0
        public async Task SuccessDelayedByType()
        {
            var policy  = new LinearRetryPolicy(typeof(NotReadyException));
            var times   = new List <DateTime>();
            var success = false;

            await policy.InvokeAsync(
                async() =>
            {
                times.Add(DateTime.UtcNow);
                await Task.Delay(0);

                if (times.Count < policy.MaxAttempts)
                {
                    throw new NotReadyException();
                }

                success = true;
            });

            Assert.True(success);
            Assert.Equal(policy.MaxAttempts, times.Count);
            VerifyIntervals(times, policy);
        }
示例#19
0
        /// <summary>
        /// <para>
        /// Used to temporarily modify the <b>hosts</b> file used by the DNS resolver
        /// for debugging or other purposes.
        /// </para>
        /// <note>
        /// <b>WARNING:</b> Modifying the <b>hosts</b> file will impact all processes
        /// on the system, not just the current one and this is designed to be used by
        /// a single process at a time.
        /// </note>
        /// </summary>
        /// <param name="hostEntries">A dictionary mapping the hostnames to an IP address or <c>null</c>.</param>
        /// <param name="section">
        /// <para>
        /// Optionally specifies the string to use to mark the hostnames section.  This
        /// defaults to <b>MODIFY</b> which will delimit the section with <b># NEON-BEGIN-MODIFY</b>
        /// and <b># NEON-END-MODIFY</b>.  You may pass a different string to identify a custom section.
        /// </para>
        /// <note>
        /// The string passed must be a valid DNS hostname label that must begin with a letter
        /// followed by letters, digits or dashes.  The maximum length is 63 characters.
        /// </note>
        /// </param>
        /// <remarks>
        /// <note>
        /// This method requires elevated administrative privileges.
        /// </note>
        /// <para>
        /// This method adds or removes a temporary section of host entry definitions
        /// delimited by special comment lines.  When <paramref name="hostEntries"/> is
        /// non-null and non-empty, the section will be added or updated.  Otherwise, the
        /// section will be removed.
        /// </para>
        /// <para>
        /// You can remove all host sections by passing both <paramref name="hostEntries"/>
        /// and <paramref name="section"/> as <c>null</c>.
        /// </para>
        /// </remarks>
        public static void ModifyLocalHosts(Dictionary <string, IPAddress> hostEntries = null, string section = "MODIFY")
        {
#if XAMARIN
            throw new NotSupportedException();
#else
            if (hostEntries != null && string.IsNullOrWhiteSpace(section))
            {
                throw new ArgumentNullException(nameof(section));
            }

            if (section != null)
            {
                var sectionOK = char.IsLetter(section[0]) && section.Length <= 63;

                if (sectionOK)
                {
                    foreach (var ch in section)
                    {
                        if (!char.IsLetterOrDigit(ch) && ch != '-')
                        {
                            sectionOK = false;
                            break;
                        }
                    }
                }

                if (!sectionOK)
                {
                    throw new ArgumentException("Suffix is not a valid DNS host name label.", nameof(section));
                }

                section = section.ToUpperInvariant();
            }

            string hostsPath;

            if (NeonHelper.IsWindows)
            {
                hostsPath = Path.Combine(Environment.GetEnvironmentVariable("windir"), "System32", "drivers", "etc", "hosts");
            }
            else if (NeonHelper.IsLinux || NeonHelper.IsOSX)
            {
                hostsPath = "/etc/hosts";
            }
            else
            {
                throw new NotSupportedException();
            }

            // We're seeing transient file locked errors when trying to update the [hosts] file.
            // My guess is that this is cause by the Window DNS resolver opening the file as
            // READ/WRITE to prevent it from being modified while the resolver is reading any
            // changes.
            //
            // We're going to mitigate this by retrying a few times.
            //
            // It can take a bit of time for the Windows DNS resolver to pick up the change.
            //
            //      https://github.com/nforgeio/neonKUBE/issues/244
            //
            // We're going to mitigate this by writing a [neon-modify-local-hosts.nhive.io] record with
            // a random IP address and then wait for for the DNS resolver to report the correct address.
            //
            // Note that this only works on Windows and perhaps OSX.  This doesn't work on
            // Linux because there's no central DNS resolver there.  See the issue below for
            // more information:
            //
            //      https://github.com/nforgeio/neonKUBE/issues/271

            var updateHost    = section != null ? $"{section.ToLowerInvariant()}.neonforge-marker" : $"H-{Guid.NewGuid().ToString("d")}.neonforge-marker";
            var addressBytes  = NeonHelper.GetCryptoRandomBytes(4);
            var updateAddress = GetRandomAddress();
            var lines         = new List <string>();
            var existingHosts = new Dictionary <string, string>(StringComparer.InvariantCultureIgnoreCase);
            var different     = false;

            retryFile.InvokeAsync(
                async() =>
            {
                var beginMarker = $"# NEON-BEGIN-";
                var endMarker   = $"# NEON-END-";

                if (section != null)
                {
                    beginMarker += section;
                    endMarker   += section;
                }

                var inputLines = File.ReadAllLines(hostsPath);
                var inSection  = false;

                // Load lines of text from the current [hosts] file, without
                // any lines for the named section.  We're going to parse those
                // lines instead, so we can compare them against the [hostEntries]
                // passed to determine whether we actually need to update the
                // [hosts] file.

                lines.Clear();
                existingHosts.Clear();

                foreach (var line in inputLines)
                {
                    var trimmed = line.Trim();

                    if (trimmed == beginMarker || (section == null && trimmed.StartsWith(beginMarker)))
                    {
                        inSection = true;
                    }
                    else if (trimmed == endMarker || (section == null && trimmed.StartsWith(endMarker)))
                    {
                        inSection = false;
                    }
                    else
                    {
                        if (inSection)
                        {
                            // The line is within the named section, so we're going to parse
                            // the host entry (if any) and add it to [existingHosts].

                            if (trimmed.Length == 0 || trimmed.StartsWith("#"))
                            {
                                // Ignore empty or comment lines (just to be safe).

                                continue;
                            }

                            // We're going to simply assume that the address and hostname
                            // are separated by whitespace and that there's no other junk
                            // on the line (like comments added by the operator).  If there
                            // is any junk, we'll capture that too and then the entries
                            // won't match and we'll just end up rewriting the section
                            // (which is reasonable).
                            //
                            // Note that we're going to ignore the special marker entry.

                            var fields   = line.Split(new char[] { ' ', '\t' }, StringSplitOptions.RemoveEmptyEntries);
                            var address  = fields[0];
                            var hostname = fields.Length > 1 ? fields[1] : string.Empty;

                            if (!hostname.EndsWith(".neonforge-marker"))
                            {
                                existingHosts[hostname] = address;
                            }
                        }
                        else
                        {
                            // The line is not in the named section, so we'll
                            // include it as as.

                            lines.Add(line);
                        }
                    }
                }

                // Compare the existing entries against the new ones and rewrite
                // the [hosts] file only if they are different.

                if (hostEntries != null && hostEntries.Count == existingHosts.Count)
                {
                    foreach (var item in hostEntries)
                    {
                        if (!existingHosts.TryGetValue(item.Key, out var existingAddress) ||
                            item.Value.ToString() != existingAddress)
                        {
                            different = true;
                            break;
                        }
                    }

                    if (!different)
                    {
                        return;
                    }
                }

                // Append the section if it has any host entries.

                if (hostEntries?.Count > 0)
                {
                    lines.Add(beginMarker);

                    // Append the special update host with a random IP address.

                    var address = updateAddress.ToString();

                    lines.Add($"        {address}{new string(' ', 16 - address.Length)}    {updateHost}");

                    // Append the new entries.

                    foreach (var item in hostEntries)
                    {
                        address = item.Value.ToString();

                        lines.Add($"        {address}{new string(' ', 16 - address.Length)}    {item.Key}");
                    }

                    lines.Add(endMarker);
                }

                File.WriteAllLines(hostsPath, lines.ToArray());
                await Task.CompletedTask;
            }).Wait();

            if (!different)
            {
                // We didn't detect any changes to the section above so we're going to
                // exit without rewriting the [hosts] file.

                return;
            }

            if (NeonHelper.IsWindows)
            {
                // Flush the DNS cache (and I believe this reloads the [hosts] file too).

                var response = NeonHelper.ExecuteCapture("ipconfig", "/flushdns");

                if (response.ExitCode != 0)
                {
                    throw new ToolException($"ipconfig [exitcode={response.ExitCode}]: {response.ErrorText}");
                }
            }
            else if (NeonHelper.IsOSX)
            {
                // $todo(jefflill):
                //
                // We may need to clear the OSX DNS cache here.  Here's some information on
                // how to do this:
                //
                //      https://help.dreamhost.com/hc/en-us/articles/214981288-Flushing-your-DNS-cache-in-Mac-OS-X-and-Linux

                throw new NotImplementedException("$todo(jefflill): Purge the OSX DNS cache.");
            }

            if (NeonHelper.IsWindows || NeonHelper.IsOSX)
            {
                // Poll the local DNS resolver until it reports the correct address for the
                // [neon-modify-local-hosts.nhive.io].
                //
                // If [hostEntries] is not null and contains at least one entry, we'll lookup
                // [neon-modify-local-hosts.neon] and compare the IP address to ensure that the
                // resolver has loaded the new entries.
                //
                // If [hostEntries] is null or empty, we'll wait until there are no records
                // for [neon-modify-local-hosts.neon] to ensure that the resolver has reloaded
                // the hosts file after we removed the entries.
                //
                // Note that we're going to count the retries and after the 20th (about 2 second's
                // worth of 100ms polling), we're going to rewrite the [hosts] file.  I've seen
                // situations where at appears that the DNS resolver isn't re-reading [hosts]
                // after it's been updated.  I believe this is due to the file being written
                // twice, once to remove the section and then shortly again there after to
                // write the section again.  I believe there's a chance that the resolver may
                // miss the second file change notification.  Writing the file again should
                // trigger a new notification.

                var retryCount = 0;

                retryReady.InvokeAsync(
                    async() =>
                {
                    var addresses = await GetHostAddressesAsync(updateHost);

                    if (hostEntries?.Count > 0)
                    {
                        // Ensure that the new records have been loaded by the resolver.

                        if (addresses.Length != 1)
                        {
                            RewriteOn20thRetry(hostsPath, lines, ref retryCount);
                            throw new NotReadyException($"[{updateHost}] lookup is returning [{addresses.Length}] results.  There should be [1].");
                        }

                        if (addresses[0].ToString() != updateAddress.ToString())
                        {
                            RewriteOn20thRetry(hostsPath, lines, ref retryCount);
                            throw new NotReadyException($"DNS is [{updateHost}={addresses[0]}] rather than [{updateAddress}].");
                        }
                    }
                    else
                    {
                        // Ensure that the resolver recognizes that we removed the records.

                        if (addresses.Length != 0)
                        {
                            RewriteOn20thRetry(hostsPath, lines, ref retryCount);
                            throw new NotReadyException($"[{updateHost}] lookup is returning [{addresses.Length}] results.  There should be [0].");
                        }
                    }
                }).Wait();
            }
#endif
        }
示例#20
0
        /// <summary>
        /// Configures the Kibana dashboard.
        /// </summary>
        /// <param name="firstManager">The first hive proxy manager.</param>
        public void ConfigureKibana(SshProxy <NodeDefinition> firstManager)
        {
            if (!hive.Definition.Log.Enabled)
            {
                return;
            }

            firstManager.InvokeIdempotentAction("setup/log-kibana",
                                                () =>
            {
                using (var jsonClient = new JsonClient())
                {
                    var baseLogEsDataUri = hive.Definition.LogEsDataUri;
                    var baseKibanaUri    = $"http://{firstManager.PrivateAddress}:{HiveHostPorts.Kibana}";
                    var timeout          = TimeSpan.FromMinutes(5);
                    var retry            = new LinearRetryPolicy(TransientDetector.Http, maxAttempts: 30, retryInterval: TimeSpan.FromSeconds(2));

                    // The Kibana API calls below require the [kbn-xsrf] header.

                    jsonClient.DefaultRequestHeaders.Add("kbn-xsrf", "true");

                    // Ensure that Kibana is ready before we submit any API requests.

                    firstManager.Status = "wait for kibana";

                    retry.InvokeAsync(
                        async() =>
                    {
                        var response = await jsonClient.GetAsync <dynamic>($"{baseKibanaUri}/api/status");

                        if (response.status.overall.state != "green")
                        {
                            throw new TransientException($"Kibana [state={response.status.overall.state}]");
                        }
                    }).Wait();

                    // Add the index pattern to Kibana.

                    firstManager.Status = "configure kibana index pattern";

                    retry.InvokeAsync(
                        async() =>
                    {
                        dynamic indexPattern = new ExpandoObject();
                        dynamic attributes   = new ExpandoObject();

                        attributes.title         = "logstash-*";
                        attributes.timeFieldName = "@timestamp";

                        indexPattern.attributes = attributes;

                        await jsonClient.PostAsync($"{baseKibanaUri}/api/saved_objects/index-pattern/logstash-*?overwrite=true", indexPattern);
                    }).Wait();

                    // Now we need to save a Kibana config document so that [logstash-*] will be
                    // the default index and the timestamp will be displayed as UTC and have a
                    // more useful terse format.

                    firstManager.Status = "configure kibana defaults";

                    retry.InvokeAsync(
                        async() =>
                    {
                        dynamic setting = new ExpandoObject();

                        setting.value = "logstash-*";
                        await jsonClient.PostAsync($"{baseKibanaUri}/api/kibana/settings/defaultIndex", setting);

                        setting.value = "HH:mm:ss.SSS MM-DD-YYYY";
                        await jsonClient.PostAsync($"{baseKibanaUri}/api/kibana/settings/dateFormat", setting);

                        setting.value = "UTC";
                        await jsonClient.PostAsync($"{baseKibanaUri}/api/kibana/settings/dateFormat:tz", setting);
                    }).Wait();

                    // Set the Kibana traffic manager rule.

                    firstManager.Status = "kibana traffic manager rule";

                    var rule = new TrafficHttpRule()
                    {
                        Name     = "neon-log-kibana",
                        System   = true,
                        Log      = true,
                        Resolver = null
                    };

                    rule.Frontends.Add(
                        new TrafficHttpFrontend()
                    {
                        ProxyPort = HiveHostPorts.ProxyPrivateKibanaDashboard
                    });

                    rule.Backends.Add(
                        new TrafficHttpBackend()
                    {
                        Server = "neon-log-kibana",
                        Port   = NetworkPorts.Kibana
                    });

                    hive.PrivateTraffic.SetRule(rule);

                    firstManager.Status = string.Empty;
                }
            });
        }
示例#21
0
        /// <summary>
        /// Implements the service as a <see cref="Task"/>.
        /// </summary>
        /// <returns>The <see cref="Task"/>.</returns>
        private static async Task RunAsync()
        {
            var localMD5    = string.Empty;
            var remoteMD5   = "[unknown]";
            var verifyTimer = new PolledTimer(verifyInterval, autoReset: true);

            var periodicTask =
                new AsyncPeriodicTask(
                    pollInterval,
                    onTaskAsync:
                    async() =>
            {
                log.LogDebug(() => "Starting poll");
                log.LogDebug(() => "Fetching DNS answers MD5 from Consul.");

                remoteMD5 = await consul.KV.GetStringOrDefault(HiveConst.ConsulDnsHostsMd5Key, terminator.CancellationToken);

                if (remoteMD5 == null)
                {
                    remoteMD5 = "[unknown]";
                }

                var verify = verifyTimer.HasFired;

                if (verify)
                {
                    // Under normal circumstances, we should never see the reload signal file
                    // here because the [neon-dns-loader] service should have deleted it after
                    // handling the last change signal.
                    //
                    // This probably means that [neon-dns-loader] is not running or if this service
                    // is configured with POLL_INTERVAL being so short that [neon-dns-loader]
                    // hasn't had a chance to handle the previous signal.

                    if (File.Exists(reloadSignalPath))
                    {
                        log.LogWarn("[neon-dns-loader] service doesn't appear to be running because the reload signal file is present.");
                    }
                }

                if (!verify && localMD5 == remoteMD5)
                {
                    log.LogDebug(() => "DNS answers are unchanged.");
                }
                else
                {
                    if (localMD5 == remoteMD5)
                    {
                        log.LogDebug(() => "DNS answers have not changed but we're going to verify that we have the correct hosts anyway.");
                    }
                    else
                    {
                        log.LogDebug(() => "DNS answers have changed.");
                    }

                    log.LogDebug(() => "Fetching DNS answers.");

                    var hostsTxt = await consul.KV.GetStringOrDefault(HiveConst.ConsulDnsHostsKey, terminator.CancellationToken);

                    if (hostsTxt == null)
                    {
                        log.LogWarn(() => "DNS answers do not exist on Consul.  Is [neon-dns-mon] functioning properly?");
                    }
                    else
                    {
                        var marker = "# -------- NEON-DNS --------";

                        // We have the host entries from Consul.  We need to add these onto the
                        // end [/etc/powserdns/hosts], replacing any host entries written during
                        // a previous run.
                        //
                        // We're going to use the special marker line:
                        //
                        //  # ---DYNAMIC-HOSTS---
                        //
                        // to separate the built-in hosts (above the line) from the dynamic hosts
                        // we're generating here (which will be below the line).  Note that this
                        // line won't exist the first time this service runs, so we'll just add it.
                        //
                        // Note that it's possible that the PowerDNS Recursor might be reading this
                        // file while we're trying to write it.  We're going to treat these as a
                        // transient errors and retry.

                        var retry = new LinearRetryPolicy(typeof(IOException), maxAttempts: 5, retryInterval: TimeSpan.FromSeconds(1));

                        await retry.InvokeAsync(
                            async() =>
                        {
                            using (var stream = new FileStream(powerDnsHostsPath, FileMode.Open, FileAccess.ReadWrite))
                            {
                                // Read a copy of the hosts file as bytes so we can compare
                                // the old version with the new one generated below for changes.

                                var orgHostBytes = stream.ReadToEnd();

                                stream.Position = 0;

                                // Generate the new hosts file.

                                var sbHosts = new StringBuilder();

                                // Read the hosts file up to but not including the special marker
                                // line (if it's present).

                                using (var reader = new StreamReader(stream, Encoding.UTF8, true, 32 * 1024, leaveOpen: true))
                                {
                                    foreach (var line in reader.Lines())
                                    {
                                        if (line.StartsWith(marker))
                                        {
                                            break;
                                        }

                                        sbHosts.AppendLine(line);
                                    }
                                }

                                // Strip any trailing whitespace from the hosts file so we'll
                                // be able to leave a nice blank line between the end of the
                                // original file and the special marker line.

                                var text = sbHosts.ToString().TrimEnd();

                                sbHosts.Clear();
                                sbHosts.AppendLine(text);

                                // Append the marker line, followed by dynamic host
                                // entries we downloaded from Consul.

                                sbHosts.AppendLine();
                                sbHosts.AppendLine(marker);
                                sbHosts.AppendLine();
                                sbHosts.Append(hostsTxt);

                                // Generate the new host file bytes, taking care to ensure that
                                // we're using Linux style line endings and then update the
                                // hosts file if anything changed.

                                var hostsText    = NeonHelper.ToLinuxLineEndings(sbHosts.ToString());
                                var newHostBytes = Encoding.UTF8.GetBytes(hostsText);

                                if (NeonHelper.ArrayEquals(orgHostBytes, newHostBytes))
                                {
                                    log.LogDebug(() => $"[{powerDnsHostsPath}] file is up-to-date.");
                                }
                                else
                                {
                                    log.LogDebug(() => $"[{powerDnsHostsPath}] is being updated.");

                                    stream.Position = 0;
                                    stream.SetLength(0);
                                    stream.Write(newHostBytes);

                                    // Signal to the local [neon-dns-loader] systemd service that it needs
                                    // to have PowerDNS Recursor reload the hosts file.

                                    File.WriteAllText(reloadSignalPath, "reload now");
                                }
                            }

                            log.LogDebug(() => "Finished poll");
                            await Task.CompletedTask;
                        });

                        // We've successfully synchronized the local hosts file with
                        // the Consul DNS settings.

                        localMD5 = remoteMD5;
                    }
                }

                return(await Task.FromResult(false));
            },
                    onExceptionAsync:
                    async e =>
            {
                log.LogError(e);
                return(await Task.FromResult(false));
            },
                    onTerminateAsync:
                    async() =>
            {
                log.LogInfo(() => "Terminating");
                await Task.CompletedTask;
            });

            terminator.AddDisposable(periodicTask);
            await periodicTask.Run();
        }
示例#22
0
        /// <inheritdoc/>
        public void Run(ModuleContext context)
        {
            var    hive = HiveHelper.Hive;
            string hostname;

            if (!context.ValidateArguments(context.Arguments, validModuleArgs))
            {
                context.Failed = true;
                return;
            }

            // Obtain common arguments.

            context.WriteLine(AnsibleVerbosity.Trace, $"Parsing [state]");

            if (!context.Arguments.TryGetValue <string>("state", out var state))
            {
                state = "present";
            }

            state = state.ToLowerInvariant();

            if (context.HasErrors)
            {
                return;
            }

            var manager      = hive.GetReachableManager();
            var sbErrorNodes = new StringBuilder();

            // Determine whether the registry service is already deployed and
            // also retrieve the registry credentials from Vault if present.
            // Note that the current registry hostname will be persisted to
            // Consul at [neon/service/neon-registry/hostname] when a registry
            // is deployed.

            context.WriteLine(AnsibleVerbosity.Trace, $"Inspecting the [neon-registry] service.");

            var currentService = hive.Docker.InspectService("neon-registry");

            context.WriteLine(AnsibleVerbosity.Trace, $"Getting current registry hostname from Consul.");

            var currentHostname = hive.Registry.GetLocalHostname();
            var currentSecret   = hive.Registry.GetLocalSecret();
            var currentImage    = currentService?.Spec.TaskTemplate.ContainerSpec.ImageWithoutSHA;

            var currentCredentials =        // Set blank properties for the change detection below.
                                     new RegistryCredentials()
            {
                Registry = string.Empty,
                Username = string.Empty,
                Password = string.Empty
            };

            if (!string.IsNullOrEmpty(currentHostname))
            {
                context.WriteLine(AnsibleVerbosity.Trace, $"Reading existing registry credentials for [{currentHostname}].");

                currentCredentials = hive.Registry.GetCredentials(currentHostname);

                if (currentCredentials != null)
                {
                    context.WriteLine(AnsibleVerbosity.Info, $"Registry credentials for [{currentHostname}] exist.");
                }
                else
                {
                    context.WriteLine(AnsibleVerbosity.Info, $"Registry credentials for [{currentHostname}] do not exist.");
                }
            }

            // Obtain the current registry TLS certificate (if any).

            var currentCertificate = hive.Certificate.Get("neon-registry");

            // Perform the operation.

            switch (state)
            {
            case "absent":

                context.WriteLine(AnsibleVerbosity.Trace, $"Parsing [hostname]");

                if (!context.Arguments.TryGetValue <string>("hostname", out hostname))
                {
                    throw new ArgumentException($"[hostname] module argument is required.");
                }

                if (currentService == null)
                {
                    context.WriteLine(AnsibleVerbosity.Important, "[neon-registry] is not currently deployed.");
                }

                if (context.CheckMode)
                {
                    context.WriteLine(AnsibleVerbosity.Important, $"Local registry will be removed when CHECK-MODE is disabled.");
                    return;
                }

                if (currentService == null)
                {
                    return;     // Nothing to do
                }

                context.Changed = true;

                // Logout of the registry.

                if (currentCredentials != null)
                {
                    context.WriteLine(AnsibleVerbosity.Trace, $"Logging the hive out of the [{currentHostname}] registry.");
                    hive.Registry.Logout(currentHostname);
                }

                // Delete the [neon-registry] service and volume.  Note that
                // the volume should exist on all of the manager nodes.

                context.WriteLine(AnsibleVerbosity.Trace, $"Removing the [neon-registry] service.");
                manager.DockerCommand(RunOptions.None, "docker", "service", "rm", "neon-registry");

                context.WriteLine(AnsibleVerbosity.Trace, $"Removing the [neon-registry] volumes.");

                var volumeRemoveActions = new List <Action>();
                var volumeRetryPolicy   = new LinearRetryPolicy(typeof(TransientException), maxAttempts: 10, retryInterval: TimeSpan.FromSeconds(2));

                foreach (var node in hive.Managers)
                {
                    volumeRemoveActions.Add(
                        () =>
                    {
                        // $hack(jeff.lill):
                        //
                        // Docker service removal appears to be synchronous but the removal of the
                        // actual service task containers is not.  We're going to detect this and
                        // throw a [TransientException] and then retry.

                        using (var clonedNode = node.Clone())
                        {
                            lock (context)
                            {
                                context.WriteLine(AnsibleVerbosity.Trace, $"Removing [neon-registry] volume on [{clonedNode.Name}].");
                            }

                            volumeRetryPolicy.InvokeAsync(
                                async() =>
                            {
                                var response = clonedNode.DockerCommand(RunOptions.None, "docker", "volume", "rm", "neon-registry");

                                if (response.ExitCode != 0)
                                {
                                    var message = $"Error removing [neon-registry] volume from [{clonedNode.Name}: {response.ErrorText}";

                                    lock (syncLock)
                                    {
                                        context.WriteLine(AnsibleVerbosity.Info, message);
                                    }

                                    if (response.AllText.Contains("volume is in use"))
                                    {
                                        throw new TransientException(message);
                                    }
                                }
                                else
                                {
                                    lock (context)
                                    {
                                        context.WriteLine(AnsibleVerbosity.Trace, $"Removed [neon-registry] volume on [{clonedNode.Name}].");
                                    }
                                }

                                await Task.Delay(0);
                            }).Wait();
                        }
                    });
                }

                NeonHelper.WaitForParallel(volumeRemoveActions);

                // Remove the traffic manager rule and certificate.

                context.WriteLine(AnsibleVerbosity.Trace, $"Removing the [neon-registry] traffic manager rule.");
                hive.PublicTraffic.RemoveRule("neon-registry");
                context.WriteLine(AnsibleVerbosity.Trace, $"Removing the [neon-registry] traffic manager certificate.");
                hive.Certificate.Remove("neon-registry");

                // Remove any related Consul state.

                context.WriteLine(AnsibleVerbosity.Trace, $"Removing the [neon-registry] Consul [hostname] and [secret].");
                hive.Registry.SetLocalHostname(null);
                hive.Registry.SetLocalSecret(null);

                // Logout the hive from the registry.

                context.WriteLine(AnsibleVerbosity.Trace, $"Logging the hive out of the [{currentHostname}] registry.");
                hive.Registry.Logout(currentHostname);

                // Remove the hive DNS host entry.

                context.WriteLine(AnsibleVerbosity.Trace, $"Removing the [{currentHostname}] registry DNS hosts entry.");
                hive.Dns.Remove(hostname);
                break;

            case "present":

                if (!hive.Definition.HiveFS.Enabled)
                {
                    context.WriteErrorLine("The local registry service requires hive CephFS.");
                    return;
                }

                // Parse the [hostname], [certificate], [username] and [password] arguments.

                context.WriteLine(AnsibleVerbosity.Trace, $"Parsing [hostname]");

                if (!context.Arguments.TryGetValue <string>("hostname", out hostname))
                {
                    throw new ArgumentException($"[hostname] module argument is required.");
                }

                context.WriteLine(AnsibleVerbosity.Trace, $"Parsing [certificate]");

                if (!context.Arguments.TryGetValue <string>("certificate", out var certificatePem))
                {
                    throw new ArgumentException($"[certificate] module argument is required.");
                }

                if (!TlsCertificate.TryParse(certificatePem, out var certificate))
                {
                    throw new ArgumentException($"[certificate] is not a valid certificate.");
                }

                context.WriteLine(AnsibleVerbosity.Trace, $"Parsing [username]");

                if (!context.Arguments.TryGetValue <string>("username", out var username))
                {
                    throw new ArgumentException($"[username] module argument is required.");
                }

                context.WriteLine(AnsibleVerbosity.Trace, $"Parsing [password]");

                if (!context.Arguments.TryGetValue <string>("password", out var password))
                {
                    throw new ArgumentException($"[password] module argument is required.");
                }

                context.WriteLine(AnsibleVerbosity.Trace, $"Parsing [secret]");

                if (!context.Arguments.TryGetValue <string>("secret", out var secret) || string.IsNullOrEmpty(secret))
                {
                    throw new ArgumentException($"[secret] module argument is required.");
                }

                context.WriteLine(AnsibleVerbosity.Trace, $"Parsing [image]");

                if (!context.Arguments.TryGetValue <string>("image", out var image))
                {
                    image = HiveConst.NeonProdRegistry + "/neon-registry:latest";
                }

                // Detect service changes.

                var hostnameChanged    = hostname != currentCredentials?.Registry;
                var usernameChanged    = username != currentCredentials?.Username;
                var passwordChanged    = password != currentCredentials?.Password;
                var secretChanged      = secret != currentSecret;
                var imageChanged       = image != currentImage;
                var certificateChanged = certificate?.CombinedPemNormalized != currentCertificate?.CombinedPemNormalized;
                var updateRequired     = hostnameChanged ||
                                         usernameChanged ||
                                         passwordChanged ||
                                         secretChanged ||
                                         imageChanged ||
                                         certificateChanged;

                if (hostnameChanged)
                {
                    context.WriteLine(AnsibleVerbosity.Info, $"[hostname] changed from [{currentCredentials?.Registry}] --> [{hostname}]");
                }

                if (usernameChanged)
                {
                    context.WriteLine(AnsibleVerbosity.Info, $"[username] changed from [{currentCredentials?.Username}] --> [{username}]");
                }

                if (usernameChanged)
                {
                    context.WriteLine(AnsibleVerbosity.Info, $"[password] changed from [{currentCredentials?.Password}] --> [**REDACTED**]");
                }

                if (secretChanged)
                {
                    context.WriteLine(AnsibleVerbosity.Info, $"[secret] changed from [{currentSecret}] --> [**REDACTED**]");
                }

                if (imageChanged)
                {
                    context.WriteLine(AnsibleVerbosity.Info, $"[image] changed from [{currentImage}] --> [{image}]");
                }

                if (certificateChanged)
                {
                    var currentCertRedacted = currentCertificate != null ? "**REDACTED**" : "**NONE**";

                    context.WriteLine(AnsibleVerbosity.Info, $"[certificate] changed from [{currentCertRedacted}] --> [**REDACTED**]");
                }

                // Handle CHECK-MODE.

                if (context.CheckMode)
                {
                    if (currentService == null)
                    {
                        context.WriteLine(AnsibleVerbosity.Important, $"Local registry will be deployed when CHECK-MODE is disabled.");
                        return;
                    }

                    if (updateRequired)
                    {
                        context.WriteLine(AnsibleVerbosity.Important, $"One or more of the arguments have changed so the registry will be updated when CHECK-MODE is disabled.");
                        return;
                    }

                    return;
                }

                // Create the hive DNS host entry we'll use to redirect traffic targeting the registry
                // hostname to the hive managers.  We need to do this because registry IP addresses
                // are typically public, typically targeting the external firewall or load balancer
                // interface.
                //
                // The problem is that hive nodes will generally be unable to connect to the
                // local managers through the firewall/load balancer because most network routers
                // block network traffic that originates from inside the hive, then leaves
                // to hit the external router interface with the expectation of being routed
                // back inside.  I believe this is an anti-spoofing security measure.

                var dnsRedirect = GetRegistryDnsEntry(hostname);

                // Perform the operation.

                if (currentService == null)
                {
                    context.WriteLine(AnsibleVerbosity.Important, $"[neon-registry] service needs to be created.");
                    context.Changed = true;

                    // The registry service isn't running, so we'll do a full deployment.

                    context.WriteLine(AnsibleVerbosity.Trace, $"Setting certificate.");
                    hive.Certificate.Set("neon-registry", certificate);

                    context.WriteLine(AnsibleVerbosity.Trace, $"Updating Consul settings.");
                    hive.Registry.SetLocalHostname(hostname);
                    hive.Registry.SetLocalSecret(secret);

                    context.WriteLine(AnsibleVerbosity.Trace, $"Adding hive DNS host entry for [{hostname}].");
                    hive.Dns.Set(dnsRedirect, waitUntilPropagated: true);

                    context.WriteLine(AnsibleVerbosity.Trace, $"Writing traffic manager rule.");
                    hive.PublicTraffic.SetRule(GetRegistryTrafficManagerRule(hostname));

                    context.WriteLine(AnsibleVerbosity.Trace, $"Creating the [neon-registry] service.");

                    var createResponse = manager.DockerCommand(RunOptions.None,
                                                               "docker service create",
                                                               "--name", "neon-registry",
                                                               "--mode", "global",
                                                               "--constraint", "node.role==manager",
                                                               "--env", $"USERNAME={username}",
                                                               "--env", $"PASSWORD={password}",
                                                               "--env", $"SECRET={secret}",
                                                               "--env", $"LOG_LEVEL=info",
                                                               "--env", $"READ_ONLY=false",
                                                               "--mount", "type=volume,src=neon-registry,volume-driver=neon,dst=/var/lib/neon-registry",
                                                               "--network", "neon-public",
                                                               "--restart-delay", "10s",
                                                               image);

                    if (createResponse.ExitCode != 0)
                    {
                        context.WriteErrorLine($"[neon-registry] service create failed: {createResponse.ErrorText}");
                        return;
                    }

                    context.WriteLine(AnsibleVerbosity.Trace, $"Service created.");
                    context.WriteLine(AnsibleVerbosity.Trace, $"Wait for [neon-registry] service to stabilize (30s).");
                    Thread.Sleep(TimeSpan.FromSeconds(30));
                    context.WriteLine(AnsibleVerbosity.Trace, $"Logging the hive into the [{hostname}] registry.");
                    hive.Registry.Login(hostname, username, password);
                }
                else if (updateRequired)
                {
                    context.WriteLine(AnsibleVerbosity.Important, $"[neon-registry] service update is required.");
                    context.Changed = true;

                    // Update the service and related settings as required.

                    if (certificateChanged)
                    {
                        context.WriteLine(AnsibleVerbosity.Trace, $"Updating certificate.");
                        hive.Certificate.Set("neon-registry", certificate);
                    }

                    if (hostnameChanged)
                    {
                        context.WriteLine(AnsibleVerbosity.Trace, $"Updating traffic manager rule.");
                        hive.PublicTraffic.SetRule(GetRegistryTrafficManagerRule(hostname));

                        context.WriteLine(AnsibleVerbosity.Trace, $"Updating hive DNS host entry for [{hostname}] (60 seconds).");
                        hive.Dns.Set(dnsRedirect, waitUntilPropagated: true);

                        context.WriteLine(AnsibleVerbosity.Trace, $"Updating local hive hostname [{hostname}].");
                        hive.Registry.SetLocalHostname(hostname);

                        if (!string.IsNullOrEmpty(currentHostname))
                        {
                            context.WriteLine(AnsibleVerbosity.Trace, $"Logging the hive out of the [{currentHostname}] registry.");
                            hive.Registry.Logout(currentHostname);
                        }
                    }

                    if (secretChanged)
                    {
                        context.WriteLine(AnsibleVerbosity.Trace, $"Updating local hive secret.");
                        hive.Registry.SetLocalSecret(secret);
                    }

                    context.WriteLine(AnsibleVerbosity.Trace, $"Updating service.");

                    var updateResponse = manager.DockerCommand(RunOptions.None,
                                                               "docker service update",
                                                               "--env-add", $"USERNAME={username}",
                                                               "--env-add", $"PASSWORD={password}",
                                                               "--env-add", $"SECRET={secret}",
                                                               "--env-add", $"LOG_LEVEL=info",
                                                               "--env-add", $"READ_ONLY=false",
                                                               "--image", image,
                                                               "neon-registry");

                    if (updateResponse.ExitCode != 0)
                    {
                        context.WriteErrorLine($"[neon-registry] service update failed: {updateResponse.ErrorText}");
                        return;
                    }

                    context.WriteLine(AnsibleVerbosity.Trace, $"Service updated.");

                    context.WriteLine(AnsibleVerbosity.Trace, $"Logging the hive into the [{hostname}] registry.");
                    hive.Registry.Login(hostname, username, password);
                }
                else
                {
                    context.WriteLine(AnsibleVerbosity.Important, $"[neon-registry] service update is not required but we're logging all nodes into [{hostname}] to ensure hive consistency.");
                    hive.Registry.Login(hostname, username, password);

                    context.Changed = false;
                }
                break;

            case "prune":

                if (currentService == null)
                {
                    context.WriteLine(AnsibleVerbosity.Important, "Registry service is not running.");
                    return;
                }

                if (context.CheckMode)
                {
                    context.WriteLine(AnsibleVerbosity.Important, "Registry will be pruned when CHECK-MODE is disabled.");
                    return;
                }

                context.Changed = true;     // Always set this to TRUE for prune.

                // We're going to upload a script to one of the managers that handles
                // putting the [neon-registry] service into READ-ONLY mode, running
                // the garbage collection container and then restoring [neon-registry]
                // to READ/WRITE mode.
                //
                // The nice thing about this is that the operation will continue to
                // completion on the manager node even if we lose the SSH connection.

                var updateScript =
                    $@"#!/bin/bash
# Update [neon-registry] to READ-ONLY mode:

docker service update --env-rm READ_ONLY --env-add READ_ONLY=true neon-registry

# Prune the registry:

docker run \
   --name neon-registry-prune \
   --restart-condition=none \
   --mount type=volume,src=neon-registry,volume-driver=neon,dst=/var/lib/neon-registry \
   {HiveConst.NeonProdRegistry}/neon-registry garbage-collect

# Restore [neon-registry] to READ/WRITE mode:

docker service update --env-rm READ_ONLY --env-add READ_ONLY=false neon-registry
";
                var bundle = new CommandBundle("./collect.sh");

                bundle.AddFile("collect.sh", updateScript, isExecutable: true);

                context.WriteLine(AnsibleVerbosity.Info, "Registry prune started.");

                var pruneResponse = manager.SudoCommand(bundle, RunOptions.None);

                if (pruneResponse.ExitCode != 0)
                {
                    context.WriteErrorLine($"The prune operation failed.  The registry may be running in READ-ONLY mode: {pruneResponse.ErrorText}");
                    return;
                }

                context.WriteLine(AnsibleVerbosity.Info, "Registry prune completed.");
                break;

            default:

                throw new ArgumentException($"[state={state}] is not one of the valid choices: [present], [absent], or [prune].");
            }
        }
示例#23
0
        /// <summary>
        /// Handles purging of old <b>logstash</b> and <b>metricbeat</b> Elasticsearch indexes.
        /// </summary>
        /// <returns>The tracking <see cref="Task"/>.</returns>
        public void KibanaSetup()
        {
            Log.LogInfo("Setting up Kibana index patterns.");
            using (var jsonClient = new JsonClient())
            {
                jsonClient.BaseAddress = KubernetesClientConfiguration.IsInCluster() ?
                                         this.ServiceMap[NeonServices.Kibana].Endpoints.Default.Uri : new Uri($"http://*****:*****@timestamp";

                    indexPattern.attributes = attributes;

                    await jsonClient.PostAsync($"api/saved_objects/index-pattern/logstash-*?overwrite=true", indexPattern);
                }).Wait();

                // Now we need to save a Kibana config document so that [logstash-*] will be
                // the default index and the timestamp will be displayed as UTC and have a
                // more useful terse format.

                retry.InvokeAsync(
                    async() =>
                {
                    dynamic setting = new ExpandoObject();

                    setting.value = "logstash-*";
                    await jsonClient.PostAsync($"api/kibana/settings/defaultIndex", setting);

                    setting.value = "HH:mm:ss.SSS MM-DD-YYYY";
                    await jsonClient.PostAsync($"api/kibana/settings/dateFormat", setting);

                    setting.value = "UTC";
                    await jsonClient.PostAsync($"api/kibana/settings/dateFormat:tz", setting);
                }).Wait();
            }
            Log.LogInfo("Kibana index patterns configured.");
        }
示例#24
0
        /// <summary>
        /// Starts the container using the instance fields.
        /// </summary>
        private void StartContainer()
        {
            // Handle the special case where an earlier run of this container was
            // not stopped because the developer was debugging and interrupted the
            // the unit tests before the fixture was disposed or a container with
            // the same name is already running for some other reason.
            //
            // We're going to look for a existing container with the same name
            // and remove it if its ID doesn't match the current container.

            var args   = new string[] { "ps", "-a", "--filter", $"name={name}", "--format", "{{.ID}}" };
            var result = NeonHelper.ExecuteCapture($"docker", args);

            if (result.ExitCode == 0)
            {
                var existingId = result.OutputText.Trim();

                if (!string.IsNullOrEmpty(existingId))
                {
                    NeonHelper.Execute("docker", new object[] { "rm", "--force", existingId });
                }
            }

            // Pull and then start the container.  Note that we're going to
            // retry the pull a few times to handle transitent issues.

            var argsString = NeonHelper.NormalizeExecArgs("pull", image);
            var pullRetry  = new LinearRetryPolicy(TransientDetector.Always, maxAttempts: 5, retryInterval: TimeSpan.FromSeconds(1));

            pullRetry.InvokeAsync(
                async() =>
            {
                result = NeonHelper.ExecuteCapture($"docker", argsString);

                if (result.ExitCode != 0)
                {
                    throw new Exception($"Cannot pull container [{image}] - [exitcode={result.ExitCode}]: {result.ErrorText}");
                }

                await Task.CompletedTask;
            }).Wait();

            var extraArgs = new List <string>();

            if (!string.IsNullOrEmpty(name))
            {
                extraArgs.Add("--name");
                extraArgs.Add(name);
            }

            if (env != null)
            {
                foreach (var variable in env)
                {
                    extraArgs.Add("--env");
                    extraArgs.Add(variable);
                }
            }

            argsString = NeonHelper.NormalizeExecArgs("run", dockerArgs, extraArgs.ToArray(), image, containerArgs);

            result = NeonHelper.ExecuteCapture($"docker", argsString);

            if (result.ExitCode != 0)
            {
                throw new Exception($"Cannot launch container [{image}] - [exitcode={result.ExitCode}]: {result.ErrorText}");
            }
            else
            {
                ContainerName = name;
                ContainerId   = result.OutputText.Trim().Substring(0, 12);
            }
        }
示例#25
0
        /// <summary>
        /// Removes then local Docker registry from the hive.
        /// </summary>
        /// <param name="progress">Optional action that will be called with a progress message.</param>
        /// <exception cref="HiveException">Thrown if no registry is deployed or there was an error removing it.</exception>
        public void RemoveLocalRegistry(Action <string> progress = null)
        {
            if (!HasLocalRegistry)
            {
                throw new HiveException("The [neon-registry] service is not deployed.");
            }

            var syncLock = new object();
            var manager  = hive.GetReachableManager();
            var hostname = hive.Registry.GetLocalHostname();

            // Logout of the registry.

            progress?.Invoke($"Logging the hive out of the [{hostname}] registry.");
            hive.Registry.Logout(hostname);

            // Delete the [neon-registry] service and volume.  Note that
            // the volume should exist on all of the manager nodes.

            progress?.Invoke($"Removing the [neon-registry] service.");
            manager.DockerCommand(RunOptions.None, "docker", "service", "rm", "neon-registry");

            progress?.Invoke($"Removing the [neon-registry] volumes.");

            var volumeRemoveActions = new List <Action>();
            var volumeRetryPolicy   = new LinearRetryPolicy(typeof(TransientException), maxAttempts: 10, retryInterval: TimeSpan.FromSeconds(2));

            foreach (var node in hive.Managers)
            {
                volumeRemoveActions.Add(
                    () =>
                {
                    // $hack(jeff.lill):
                    //
                    // Docker service removal appears to be synchronous but the removal of the
                    // actual service task containers is not.  We're going to detect this and
                    // throw a [TransientException] and then retry.

                    using (var clonedNode = node.Clone())
                    {
                        lock (syncLock)
                        {
                            progress?.Invoke($"Removing [neon-registry] volume on [{clonedNode.Name}].");
                        }

                        volumeRetryPolicy.InvokeAsync(
                            async() =>
                        {
                            var response = clonedNode.DockerCommand(RunOptions.None, "docker", "volume", "rm", "neon-registry");

                            if (response.ExitCode != 0)
                            {
                                if (response.AllText.Contains("volume is in use"))
                                {
                                    throw new TransientException($"Error removing [neon-registry] volume from [{clonedNode.Name}: {response.ErrorText}");
                                }
                            }
                            else
                            {
                                lock (syncLock)
                                {
                                    progress?.Invoke($"Removed [neon-registry] volume on [{clonedNode.Name}].");
                                }
                            }

                            await Task.Delay(0);
                        }).Wait();
                    }
                });
            }

            NeonHelper.WaitForParallel(volumeRemoveActions);

            // Remove the traffic manager rule and certificate.

            progress?.Invoke($"Removing the [neon-registry] traffic manager rule.");
            hive.PublicTraffic.RemoveRule("neon-registry");
            progress?.Invoke($"Removing the [neon-registry] traffic manager certificate.");
            hive.Certificate.Remove("neon-registry");

            // Remove any related Consul state.

            progress?.Invoke($"Removing the [neon-registry] Consul [hostname] and [secret].");
            hive.Registry.SetLocalHostname(null);
            hive.Registry.SetLocalSecret(null);

            // Logout the hive from the registry.

            progress?.Invoke($"Logging the hive out of the [{hostname}] registry.");
            hive.Registry.Logout(hostname);

            // Remove the hive DNS host entry.

            progress?.Invoke($"Removing the [{hostname}] registry DNS hosts entry.");
            hive.Dns.Remove(hostname);
        }
示例#26
0
        /// <summary>
        /// Removes a specific fixture section from the <b>hosts</b> file or all
        /// fixture sections if <paramref name="fixtureId"/> is <c>null</c>.
        /// </summary>
        /// <param name="fixtureId">
        /// Identifies the fixture section to be removed or <c>null</c> to
        /// remove all fixture sections.
        /// </param>
        private static void RemoveSection(string fixtureId = null)
        {
            var sb           = new StringBuilder();
            var changed      = false;
            var sectionGuids = new HashSet <string>();

            // Update the [hosts] file.

            retryFile.InvokeAsync(
                async() =>
            {
                if (File.Exists(HostsPath))
                {
                    using (var reader = new StreamReader(new FileStream(HostsPath, FileMode.Open, FileAccess.ReadWrite)))
                    {
                        var guid        = fixtureId ?? string.Empty;
                        var startMarker = $"# START-NEON-HOSTS-FIXTURE-{guid}";
                        var endMarker   = $"# END-NEON-HOSTS-FIXTURE-{guid}";
                        var inSection   = false;

                        foreach (var line in reader.Lines())
                        {
                            if (inSection)
                            {
                                if (line.StartsWith(endMarker))
                                {
                                    inSection = false;
                                    changed   = true;
                                }
                            }
                            else
                            {
                                if (line.StartsWith(startMarker))
                                {
                                    // Extract the section GUID from the marker because we'll need
                                    // these below when we verify that the resolver has picked up
                                    // the changes.

                                    var posGuid     = line.LastIndexOf('-') + 1;
                                    var sectionGuid = line.Substring(posGuid);

                                    if (!sectionGuids.Contains(sectionGuid))
                                    {
                                        sectionGuids.Add(sectionGuid);
                                    }

                                    inSection = true;
                                    changed   = true;
                                }
                                else
                                {
                                    if (!inSection)
                                    {
                                        sb.AppendLine(line);
                                    }
                                }
                            }
                        }
                    }
                }

                if (changed)
                {
                    File.WriteAllText(HostsPath, sb.ToString());
                }

                await Task.CompletedTask;
            }).Wait();

            if (changed)
            {
                // We need to verify that the local DNS resolver has picked up the change
                // by verifying that none of the removed section hostnames resolve.

                retryReady.InvokeAsync(
                    async() =>
                {
                    foreach (var sectionGuid in sectionGuids)
                    {
                        var hostname  = GetSectionHostname(sectionGuid);
                        var addresses = await GetHostAddressesAsync(hostname);

                        if (addresses.Length > 0)
                        {
                            throw new NotReadyException($"Waiting for [{hostname}] to be removed by the local DNS resolver.");
                        }
                    }
                }).Wait();
            }
        }
示例#27
0
        /// <summary>
        /// Rebuilds the host node's <b>/etc/containers/registries.conf.d/00-neon-cluster.conf</b> file,
        /// using the container registries passed, signals CRI-O to reload any changes and also manages
        /// container registry logins.
        /// </summary>
        private async Task UpdateContainerRegistriesAsync()
        {
            var registries = (await k8s.ListClusterCustomObjectAsync <V1NeonContainerRegistry>()).Items;

            // NOTE: Here's the documentation for the config file we're generating:
            //
            //      https://github.com/containers/image/blob/main/docs/containers-registries.conf.5.md
            //

            var sbRegistryConfig   = new StringBuilder();
            var sbSearchRegistries = new StringBuilder();

            // Configure any unqualified search registries.

            foreach (var registry in registries
                     .Where(registry => registry.Spec.SearchOrder >= 0)
                     .OrderBy(registry => registry.Spec.SearchOrder))
            {
                sbSearchRegistries.AppendWithSeparator($"\"{registry.Spec.Prefix}\"", ", ");
            }

            sbRegistryConfig.Append(
                $@"unqualified-search-registries = [{sbSearchRegistries}]
");

            // Configure any container registries including the local cluster.

            foreach (var registry in registries)
            {
                sbRegistryConfig.Append(
                    $@"
[[registry]]
prefix   = ""{registry.Spec.Prefix}""
insecure = {NeonHelper.ToBoolString(registry.Spec.Insecure)}
blocked  = {NeonHelper.ToBoolString(registry.Spec.Blocked)}
");

                if (!string.IsNullOrEmpty(registry.Spec.Location))
                {
                    sbRegistryConfig.AppendLine($"location = \"{registry.Spec.Location}\"");
                }
            }

            if (NeonHelper.IsLinux)
            {
                // Read and parse the current configuration file to create list of the existing
                // configured upstream registries.

                var currentConfigText = File.ReadAllText(configMountPath);
                var currentConfig     = Toml.Parse(currentConfigText);
                var existingLocations = new List <string>();

                foreach (var registryTable in currentConfig.Tables.Where(table => table.Name.Key.GetName() == "registry"))
                {
                    var location = registryTable.Items.SingleOrDefault(key => key.Key.GetName() == "location")?.Value.GetValue();

                    if (!string.IsNullOrWhiteSpace(location))
                    {
                        existingLocations.Add(location);
                    }
                }

                // Convert the generated config to Linux line endings and then compare the new
                // config against what's already configured on the host node.  We'll rewrite the
                // host file and then signal CRI-O to reload its config when the files differ.

                var newConfigText = NeonHelper.ToLinuxLineEndings(sbRegistryConfig.ToString());

                if (currentConfigText != newConfigText)
                {
                    configUpdateCounter.Inc();

                    File.WriteAllText(configMountPath, newConfigText);
                    (await Node.ExecuteCaptureAsync("pkill", new object[] { "-HUP", "crio" })).EnsureSuccess();

                    // Wait a few seconds to give CRI-O a chance to reload its config.  This will
                    // help mitigate problems when managing logins below due to potential inconsistencies
                    // between CRI-O's currently loaded config and the new config we just saved.

                    await Task.Delay(TimeSpan.FromSeconds(15));
                }
            }

            //-----------------------------------------------------------------
            // We need to manage registry logins by logging into new registries,
            // logging out of deleted registries, relogging in with new credentials,
            // and periodically logging in with unchanged credentials to ensure that
            // we're actually logged in.  Here's how this works:
            //
            //      https://github.com/nforgeio/neonKUBE/issues/1591

            var retry = new LinearRetryPolicy(e => true, maxAttempts: 5, retryInterval: TimeSpan.FromSeconds(5));

            // Construct LoginFile instances for all specified upstream registries
            // that require credentials and add these to a dictionary keyed by SHA-256.

            var shaToRequiredLogins = new Dictionary <string, LoginFile>();

            foreach (var registry in registries.Where(registry => !string.IsNullOrEmpty(registry.Spec.Username)))
            {
                var loginFile = LoginFile.Create(hostContainerRegistriesFolder, registry.Spec.Location, registry.Spec.Username, registry.Spec.Password);

                shaToRequiredLogins.Add(loginFile.Sha256, loginFile);
            }

            // Read all existing login files on the node and add them to a dictionary
            // mapping their SHA-256s to the file.

            var shaToExistingLogins = new Dictionary <string, LoginFile>();

            foreach (var file in Directory.GetFiles(hostContainerRegistriesFolder, "*.login", SearchOption.TopDirectoryOnly))
            {
                var loginFile = LoginFile.Read(file);

                if (loginFile != null)
                {
                    shaToExistingLogins.Add(loginFile.Sha256, loginFile);
                }
            }

            // Look for any existing login files that are not present in the collection of
            // new logins.  These correspond to registries that have been deleted or whose
            // credentials have changed.  We're going to go ahead and log out of the related
            // registries and then delete these login files (we'll re-login with new
            // credentials below for the registries that weren't targeted for removal).

            foreach (var loginFile in shaToExistingLogins.Values
                     .Where(login => !shaToRequiredLogins.ContainsKey(login.Sha256)))
            {
                try
                {
                    await retry.InvokeAsync(
                        async() =>
                    {
                        // Note that we're not ensuring success here because we may not be
                        // logged-in which is OK: we don't want to see that error.

                        log.LogInfo($"{podmanPath} logout {loginFile.Location}");

                        if (NeonHelper.IsLinux)
                        {
                            await Node.ExecuteCaptureAsync(podmanPath, new object[] { "logout", loginFile.Location });
                        }

                        loginFile.Delete();
                    });
                }
                catch (Exception e)
                {
                    loginErrorCounter.Inc();
                    log.LogError(e);
                }
            }

            // Look for any required logins that don't have an existing login file,
            // and then login the registry and then create the login file on success.

            foreach (var loginFile in shaToRequiredLogins.Values
                     .Where(login => !shaToExistingLogins.ContainsKey(login.Sha256)))
            {
                try
                {
                    await retry.InvokeAsync(
                        async() =>
                    {
                        log.LogInfo($"{podmanPath} login {loginFile.Location} --username {loginFile.Username} --password REDACTED");

                        if (NeonHelper.IsLinux)
                        {
                            (await Node.ExecuteCaptureAsync(podmanPath, new object[] { "login", loginFile.Location, "--username", loginFile.Username, "--password", loginFile.Password })).EnsureSuccess();
                        }
                    });

                    loginFile.Write();
                }
                catch (Exception e)
                {
                    loginErrorCounter.Inc();
                    log.LogError(e);
                }
            }

            //-----------------------------------------------------------------
            // Finally, we need to force a re-login for any existing logins that haven't
            // been explicitly logged into for a while.  Note that we're always going to
            // log into the local Harbor registry.

            foreach (var file in Directory.GetFiles(hostContainerRegistriesFolder, "*.login", SearchOption.TopDirectoryOnly))
            {
                // Read the next existing login file.

                var loginFile = LoginFile.Read(file);

                if (loginFile == null)
                {
                    continue;
                }

                // Update the login with the password from the corresponding container registry resource.

                var registry = registries.FirstOrDefault(registry => registry.Spec.Location == loginFile.Location);

                if (registry == null)
                {
                    log.LogWarn($"Cannot locate [{nameof(V1NeonContainerRegistry)}] resource for [location={loginFile.Location}].");
                    continue;
                }

                loginFile.Password = registry.Spec.Password;

                // Perform the login.

                var scheduledLoginUtc = loginFile.UpdatedUtc + reloginInterval + NeonHelper.PseudoRandomTimespan(reloginMaxRandomInterval);

                if (DateTime.UtcNow <= scheduledLoginUtc || loginFile.Location == KubeConst.LocalClusterRegistry)
                {
                    try
                    {
                        await retry.InvokeAsync(
                            async() =>
                        {
                            log.LogInfo($"{podmanPath} login {loginFile.Location} --username {loginFile.Username} --password REDACTED");

                            if (NeonHelper.IsLinux)
                            {
                                (await Node.ExecuteCaptureAsync(podmanPath, new object[] { "login", loginFile.Location, "--username", loginFile.Username, "--password", loginFile.Password })).EnsureSuccess();
                            }
                        });

                        loginFile.Write();
                    }
                    catch (Exception e)
                    {
                        loginErrorCounter.Inc();
                        log.LogError(e);
                    }
                }
            }
        }