public void Should_RemoveNodeMetricsAndDisposeMetricsContext_When_HostIsRemoved() { var metricsRoot = new App.Metrics.MetricsBuilder().Build(); try { var cluster = GetNewCluster(b => b.WithMetrics(metricsRoot.CreateDriverMetricsProvider())); var session = cluster.Connect(); var metrics = session.GetMetrics(); Assert.AreEqual(3, cluster.Metadata.Hosts.Count); Assert.AreEqual(3, metrics.NodeMetrics.Count); // get address for host that will be removed from the cluster in this test var address = TestCluster.ClusterIpPrefix + "2"; var hostToBeRemoved = cluster.Metadata.Hosts.First(h => h.Address.Address.Equals(IPAddress.Parse(address))); // check node metrics are valid var gauge = metrics.GetNodeGauge(hostToBeRemoved, NodeMetric.Gauges.OpenConnections); var appMetricsGaugeValue = metricsRoot.Snapshot.GetGaugeValue(gauge.Context, gauge.Name); Assert.Greater(gauge.GetValue().Value, 0); Assert.AreEqual(gauge.GetValue().Value, appMetricsGaugeValue); // check node metrics context in app metrics is valid var context = metricsRoot.Snapshot.GetForContext(gauge.Context); Assert.True(context.IsNotEmpty()); Assert.AreEqual(2, context.Gauges.Count()); // remove host from cluster TestCluster.DecommissionNode(2); TestCluster.Stop(2); try { TestHelper.RetryAssert(() => { Assert.AreEqual(2, cluster.Metadata.Hosts.Count, "metadata hosts count failed"); }, 200, 50); TestHelper.RetryAssert(() => { Assert.AreEqual(2, metrics.NodeMetrics.Count, "Node metrics count failed"); }, 10, 500); } catch { TestCluster.Start(2, "--jvm_arg=\"-Dcassandra.override_decommission=true\""); throw; } // Check node's metrics were removed from app metrics registry context = metricsRoot.Snapshot.GetForContext(gauge.Context); Assert.False(context.IsNotEmpty()); Assert.AreEqual(0, context.Gauges.Count()); TestCluster.Start(2, "--jvm_arg=\"-Dcassandra.override_decommission=true\""); TestHelper.RetryAssert(() => { Assert.AreEqual(3, cluster.Metadata.Hosts.Count, "metadata hosts count after bootstrap failed"); }, 200, 50); // when new host is chosen by LBP, connection pool is created foreach (var _ in Enumerable.Range(0, 5)) { session.Execute("SELECT * FROM system.local"); } TestHelper.RetryAssert(() => { Assert.AreEqual(3, metrics.NodeMetrics.Count, "Node metrics count after bootstrap failed"); }, 10, 500); // Check node's metrics were added again context = metricsRoot.Snapshot.GetForContext(gauge.Context); Assert.True(context.IsNotEmpty()); Assert.AreEqual(2, context.Gauges.Count()); } finally { metricsRoot.Manage.Disable(); } }
public async Task Should_ContinueQuerying_When_ANodeGoesDown() { var session = await CreateSessionAsync(act : builder => builder.WithPoolingOptions( new PoolingOptions().SetHeartBeatInterval(50)) .WithReconnectionPolicy(new ConstantReconnectionPolicy(40)) .WithQueryOptions(new QueryOptions().SetDefaultIdempotence(true))).ConfigureAwait(false); Assert.IsTrue(session.Cluster.AllHosts().All(h => h.IsUp)); var restarted = true; var t = Task.Run(async() => { TestCluster.Stop(1); await Task.Delay(2000).ConfigureAwait(false); TestCluster.Start(1, "--jvm_arg \"-Ddse.product_type=DATASTAX_APOLLO\""); await Task.Delay(500).ConfigureAwait(false); try { TestHelper.RetryAssert( () => { var dict = Session.Cluster.Metadata.TokenToReplicasMap.GetByKeyspace("system_distributed"); Assert.AreEqual(3, dict.First().Value.Count); Assert.AreEqual(3, Session.Cluster.AllHosts().Count); Assert.IsTrue(Session.Cluster.AllHosts().All(h => h.IsUp)); }, 20, 500); } finally { Volatile.Write(ref restarted, true); } }); var t2 = Task.Run(async() => { while (true) { if (Volatile.Read(ref restarted)) { return; } var tasks = new List <Task>(); long counter = 0; foreach (var _ in Enumerable.Range(0, 32)) { tasks.Add(Task.Run(async() => { while (true) { var c = Interlocked.Increment(ref counter); if (c > 1000) { return; } await session.ExecuteAsync(new SimpleStatement("SELECT key FROM system.local")).ConfigureAwait(false); } })); } await Task.WhenAll(tasks).ConfigureAwait(false); } }); await Task.WhenAll(t, t2).ConfigureAwait(false); }
public void Should_AllMetricsHaveValidValues_When_NodeIsDown() { _metricsRoot = new MetricsBuilder().Build(); TestCluster.Stop(2); try { var cluster = GetNewTemporaryCluster(b => b.WithMetrics( _metricsRoot.CreateDriverMetricsProvider(), new DriverMetricsOptions() .SetEnabledSessionMetrics(SessionMetric.AllSessionMetrics) .SetEnabledNodeMetrics(NodeMetric.AllNodeMetrics))); var session = cluster.Connect(); foreach (var i in Enumerable.Range(0, 1000)) { session.Execute("SELECT * FROM system.local"); } var metrics = session.GetMetrics(); var downNode = session.Cluster.AllHosts().Single(h => h.Address.Address.ToString() == (TestCluster.ClusterIpPrefix + "2")); foreach (var h in cluster.AllHosts()) { foreach (var c in MetricsTests.Counters) { if (h.Address.Equals(downNode.Address)) { Assert.GreaterOrEqual(metrics.GetNodeCounter(h, c).GetValue(), 0); } else { Assert.AreEqual(0, metrics.GetNodeCounter(h, c).GetValue()); } } Assert.AreEqual(2, MetricsTests.Gauges.Length); Assert.AreEqual(0, metrics.GetNodeGauge(h, NodeMetric.Gauges.InFlight).GetValue()); if (h.Address.Equals(downNode.Address)) { Assert.AreEqual(0, metrics.GetNodeGauge(h, NodeMetric.Gauges.OpenConnections).GetValue()); Assert.AreEqual(0, metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Max); Assert.AreEqual(0, metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Median); Assert.AreEqual(0, metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Min); Assert.AreEqual(0, metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Mean); Assert.AreEqual(0, metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.StdDev); Assert.AreEqual(0, metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Sum); Assert.Greater(metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.SampleSize, 0); Assert.AreEqual(0, metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.LastValue); Assert.AreEqual(0, metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Percentile999); Assert.AreEqual(0, metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Percentile75); Assert.AreEqual(0, metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Percentile95); Assert.AreEqual(0, metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Percentile98); Assert.AreEqual(0, metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Percentile99); Assert.AreEqual(0, metrics.GetNodeMeter(h, NodeMetric.Meters.BytesSent).GetValue().Count); Assert.AreEqual(0, metrics.GetNodeMeter(h, NodeMetric.Meters.BytesReceived).GetValue().Count); } else { Assert.Greater(metrics.GetNodeGauge(h, NodeMetric.Gauges.OpenConnections).GetValue(), 0); Assert.Greater(metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Max, 0); Assert.Greater(metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Median, 0); Assert.Greater(metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Min, 0); Assert.Greater(metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Mean, 0); Assert.Greater(metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.StdDev, 0); Assert.Greater(metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Sum, 0); Assert.Greater(metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.SampleSize, 0); Assert.Greater(metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.LastValue, 0); Assert.Greater(metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Percentile999, 0); Assert.Greater(metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Percentile75, 0); Assert.Greater(metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Percentile95, 0); Assert.Greater(metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Percentile98, 0); Assert.Greater(metrics.GetNodeTimer(h, NodeMetric.Timers.CqlMessages).GetValue().Histogram.Percentile99, 0); Assert.Greater(metrics.GetNodeMeter(h, NodeMetric.Meters.BytesSent).GetValue().Count, 0); Assert.Greater(metrics.GetNodeMeter(h, NodeMetric.Meters.BytesReceived).GetValue().Count, 0); } } Assert.AreEqual(0, metrics.GetSessionCounter(SessionMetric.Counters.CqlClientTimeouts).GetValue()); Assert.Greater(metrics.GetSessionTimer(SessionMetric.Timers.CqlRequests).GetValue().Histogram.Max, 0); Assert.Greater(metrics.GetSessionMeter(SessionMetric.Meters.BytesSent).GetValue().Count, 0); Assert.Greater(metrics.GetSessionMeter(SessionMetric.Meters.BytesReceived).GetValue().Count, 0); Assert.AreEqual(2, metrics.GetSessionGauge(SessionMetric.Gauges.ConnectedNodes).GetValue()); } finally { TestCluster.Start(2); } }