private async Task RecoveryLoop(BrokerMeta broker) { _log.Debug("{0} Starting recovery loop on broker: {1}", this, broker); EtwTrace.Log.RecoveryMonitor_RecoveryLoopStarted(_id, broker.Host, broker.Port, broker.NodeId); while (!_cancel.IsCancellationRequested) { //_log.Debug("RecoveryLoop iterating {0}", this); // // Check either there is any job for given broker // if (_failedList.Count == 0) { // TODO: await for the list to receive 1st item instead of looping await Task.Delay(1000, _cancel); continue; } // // Query metadata from given broker for any failed topics. // MetadataResponse response; try { EtwTrace.Log.RecoveryMonitor_SendingPing(_id, broker.Host, broker.Port); response = await _protocol.MetadataRequest(new TopicRequest { Topics = _failedList.Keys.Select(t => t.Item1).Distinct().ToArray() }, broker, noTransportErrors: true); EtwTrace.Log.RecoveryMonitor_PingResponse(_id, broker.Host, broker.Port); } catch (Exception ex) { _log.Debug("PartitionRecoveryMonitor error. Broker: {0}, error: {1}", broker, ex.Message); EtwTrace.Log.RecoveryMonitor_PingFailed(_id, broker.Host, broker.Port, ex.Message); response = null; } if (response == null) { await Task.Delay(1000, _cancel); continue; } // // Join failed partitions with successful responses to find out recovered ones // Tuple<string, int, int>[] maybeHealedPartitions = ( from responseTopic in response.Topics from responsePart in responseTopic.Partitions let key = new Tuple<string, int>(responseTopic.TopicName, responsePart.Id) where responseTopic.ErrorCode.IsSuccess() && responsePart.ErrorCode.IsSuccess() && _failedList.ContainsKey(key) select Tuple.Create(responseTopic.TopicName, responsePart.Id, responsePart.Leader) ).ToArray(); if (_log.IsDebugEnabled) { if (maybeHealedPartitions.Length == 0) { _log.Debug("Out of {0} partitions returned from broker {2}, none of the {3} errored partitions are healed. Current partition states for errored partitions: [{1}]", response.Topics.SelectMany(t => t.Partitions).Count(), string.Join(",", response.Topics .SelectMany(t => t.Partitions.Select(p => new { t.TopicName, TopicErrorCode = t.ErrorCode, PartitionId = p.Id, PartitionErrorCode = p.ErrorCode })) .Where(p => _failedList.ContainsKey(new Tuple<string, int>(p.TopicName, p.PartitionId))) .Select(p => string.Format("{0}:{1}:{2}:{3}", p.TopicName, p.TopicErrorCode, p.PartitionId, p.PartitionErrorCode))), broker, _failedList.Count ); } else { var str = new StringBuilder(); foreach (var leader in maybeHealedPartitions.GroupBy(p => p.Item3, (i, tuples) => new { Leader = i, Topics = tuples.GroupBy(t => t.Item1) })) { str.AppendFormat(" Leader: {0}\n", leader.Leader); foreach (var topic1 in leader.Topics) { str.AppendFormat(" Topic: {0} ", topic1.Key); str.AppendFormat("[{0}]\n", string.Join(",", topic1.Select(t => t.Item2))); } } _log.Debug("Healed partitions found by broker {0} (will check broker availability):\n{1}", broker, str.ToString()); } } if(EtwTrace.Log.IsEnabled()) { if (maybeHealedPartitions.Length != 0) { EtwTrace.Log.RecoveryMonitor_PossiblyHealedPartitions(_id, maybeHealedPartitions.Length); } else { EtwTrace.Log.RecoveryMonitor_NoHealedPartitions(_id); } } // // Make sure that brokers for healed partitions are accessible, because it is possible that // broker B1 said that partition belongs to B2 and B2 can not be reach. // It is checked only that said broker responds to metadata request without exceptions. // maybeHealedPartitions. GroupBy(p => p.Item3). ForEach(async brokerGrp => { BrokerMeta newBroker; _brokers.TryGetValue(brokerGrp.Key, out newBroker); if (newBroker == null) { newBroker = response.Brokers.SingleOrDefault(b => b.NodeId == brokerGrp.Key); // If Cluster started when one of the brokers was down, and later it comes alive, // it will be missing from our list of brokers. See issue #14. _log.Debug("received MetadataResponse for broker that is not yet in our list: {0}", newBroker); if (newBroker == null) { _log.Error("Got metadata response with partition refering to a broker which is not part of the response: {0}", response.ToString()); return; } // Broadcast only newly discovered broker and strip everything else, because this is the only // confirmed data. var filteredMeta = new MetadataResponse { Brokers = new[] { newBroker }, Topics = new TopicMeta[] { } }; _newMetadataEvent.OnNext(filteredMeta); } try { EtwTrace.Log.RecoveryMonitor_CheckingBrokerAccessibility(_id, newBroker.Host, newBroker.Port, newBroker.NodeId); MetadataResponse response2 = await _protocol.MetadataRequest(new TopicRequest { Topics = brokerGrp.Select(g=>g.Item1).Distinct().ToArray() }, newBroker, noTransportErrors: true); EtwTrace.Log.RecoveryMonitor_BrokerIsAccessible(_id, newBroker.Host, newBroker.Port, newBroker.NodeId); // success! // raise new metadata event _log.Info("Alive brokers detected: {0} which responded with: {1}", newBroker, response2); // Join maybe healed partitions with partitions which belong to alive broker var confirmedHealedTopics = (from maybeHealedPartition in brokerGrp from healedTopic in response2.Topics where healedTopic.TopicName == maybeHealedPartition.Item1 from healedPart in healedTopic.Partitions where healedPart.Id == maybeHealedPartition.Item2 && healedPart.Leader == brokerGrp.Key group healedPart by new { healedTopic.TopicName, healedTopic.ErrorCode } into healedTopicGrp select healedTopicGrp ); // broadcast only trully healed partitions which belong to alive broker var filteredResponse = new MetadataResponse { Brokers = response2.Brokers, // we may broadcast more than 1 broker, but it should be ok because discovery of new broker metadata does not cause any actions Topics = confirmedHealedTopics. Where(t => t.Any()). // broadcast only topics which have healed partitions Select(t => new TopicMeta { ErrorCode = t.Key.ErrorCode, TopicName = t.Key.TopicName, Partitions = t.ToArray() }).ToArray() }; _log.Debug("Broadcasting filtered response {0}", filteredResponse); if(EtwTrace.Log.IsEnabled()) foreach(var topic in filteredResponse.Topics) EtwTrace.Log.RecoveryMonitor_HealedPartitions(_id, newBroker.Host, newBroker.Port, newBroker.NodeId, topic.TopicName, string.Join(",", topic.Partitions.Select(p => p.Id))); _newMetadataEvent.OnNext(filteredResponse); } catch (Exception e) { _log.Warn("Metadata points to broker but it is not accessible. Error: {0}", e.Message); } }); await Task.Delay(3000, _cancel); } _log.Debug("RecoveryLoop exiting. Setting completion"); EtwTrace.Log.RecoveryMonitor_RecoveryLoopStop(_id); }
private void RebuildBrokerIndexes(MetadataResponse clusterMeta = null) { // By default refresh current metadata if (clusterMeta == null) clusterMeta = _metadata; _topicPartitionMap = clusterMeta.Topics.ToDictionary(t => t.TopicName, t => t.Partitions); _partitionBrokerMap = ( from partition in ( from topic in clusterMeta.Topics from partition in topic.Partitions select partition ) group partition by partition.Leader into leaderGrp join brokers in clusterMeta.Brokers on leaderGrp.Key equals brokers.NodeId // flatten broker->partition[] into partition->broker from partition in leaderGrp select new { partition, broker = brokers } ).ToDictionary(p => p.partition, p => p.broker); }
private void MergeTopicMeta(MetadataResponse topicMeta) { // append new topics var newTopics = topicMeta.Topics.Except(_metadata.Topics, TopicMeta.NameComparer).ToArray(); _metadata.Topics = _metadata.Topics.Concat(newTopics).ToArray(); if(EtwTrace.Log.IsEnabled() && newTopics.Length > 0) newTopics.ForEach(t => EtwTrace.Log.MetadataNewTopic(_id, t.TopicName)); // update existing topics ( from updatedTopic in topicMeta.Topics where _metadata.Topics.Any(t => t.TopicName == updatedTopic.TopicName) // assume no new partition can happen (kafka does not allow re-partitioning) from oldPart in _metadata.Topics.Single(t => t.TopicName == updatedTopic.TopicName).Partitions from updatedPart in updatedTopic.Partitions where updatedPart.Id == oldPart.Id select new { oldPart, updatedPart, updatedTopic.TopicName } ).ForEach(_ => { if (_.oldPart.ErrorCode.IsDifferent(_.updatedPart.ErrorCode)) { EtwTrace.Log.MetadataPartitionErrorChange(_id, _.TopicName, _.oldPart.Id, (int)_.oldPart.ErrorCode, (int)_.updatedPart.ErrorCode); _.oldPart.ErrorCode = _.updatedPart.ErrorCode; } if (!_.oldPart.Isr.SequenceEqual(_.updatedPart.Isr)) { EtwTrace.Log.MetadataPartitionIsrChange(_id, _.TopicName, _.oldPart.Id, string.Join(",", _.oldPart.Isr), string.Join(",", _.updatedPart.Isr)); _.oldPart.Isr = _.updatedPart.Isr; } if (_.oldPart.Leader != _.updatedPart.Leader) { _log.Info("Partition changed leader {0}->{1}", _.oldPart, _.updatedPart); EtwTrace.Log.MetadataPartitionLeaderChange(_id, _.TopicName, _.oldPart.Id, _.oldPart.Leader, _.updatedPart.Leader); _.oldPart.Leader = _.updatedPart.Leader; } if (!_.oldPart.Replicas.SequenceEqual(_.updatedPart.Replicas)) { EtwTrace.Log.MetadataPartitionReplicasChange(_id, _.TopicName, _.oldPart.Id, string.Join(",", _.oldPart.Replicas), string.Join(",", _.updatedPart.Replicas)); _.oldPart.Replicas = _.updatedPart.Replicas; } }); // add new brokers var newBrokers = topicMeta.Brokers.Except(_metadata.Brokers, BrokerMeta.NodeIdComparer).ToArray(); // Brokers which were created from seed have NodeId == -99. // Once we learn their true Id, update the NodeId var resolvedSeedBrokers = ( from seed in _metadata.Brokers where seed.NodeId == -99 from resolved in topicMeta.Brokers where resolved.NodeId != -99 && seed.Port == resolved.Port && string.Compare(resolved.Host, seed.Host, true, CultureInfo.InvariantCulture) == 0 select new { seed, resolved } ).ToArray(); // remove old seeds which have been resolved _metadata.Brokers = _metadata.Brokers.Except(resolvedSeedBrokers.Select(b => b.seed)).ToArray(); newBrokers.ForEach(b => b.Conn = new Connection(b.Host, b.Port, e => HandleTransportError(e, b))); _metadata.Brokers = _metadata.Brokers.Concat(newBrokers).ToArray(); // Close old seed connection and make sure nobody can use it anymore resolvedSeedBrokers.ForEach(old => { _log.Debug("Closing seed connection because found brokerId {0} NodeId: {1}", old.seed.Conn, old.resolved.NodeId); old.seed.Conn.ShutdownAsync(); }); RebuildBrokerIndexes(_metadata); // broadcast any new brokers newBrokers.ForEach(b => EtwTrace.Log.MetadataNewBroker(_id, b.Host, b.Port, b.NodeId)); newBrokers.Where(b => b.NodeId != -99).ForEach(b => _newBrokerSubject.OnNext(b)); // broadcast the current partition state for all partitions. topicMeta.Topics. SelectMany(t => t.Partitions.Select(part => new PartitionStateChangeEvent(t.TopicName, part.Id, part.ErrorCode))). ForEach(tp => _partitionStateChangesSubject.OnNext(tp)); }
/// <summary> /// Connect to the cluster. Connects to all seed addresses, and fetches initial metadata for the cluster. /// </summary> /// <returns></returns> public async Task ConnectAsync() { await await Scheduler.Ask(async () => { // we cannot reconnect if we have closed already. if (_state == ClusterState.Closed) throw new BrokerException("Cluster is already closed. Cannot reconnect. Please create a new Cluster."); if (_state != ClusterState.Disconnected) return; _log.Debug("Connecting"); var initBrokers = Connection.ParseAddress(_seedBrokers). Select(seed => new BrokerMeta { Host = seed.Item1, Port = seed.Item2, NodeId = -99 }).ToArray(); EtwTrace.Log.ClusterStarting(_id); var initMeta = new MetadataResponse { Topics = new TopicMeta[0], Brokers = initBrokers }; MergeTopicMeta(initMeta); _state = ClusterState.Connected; // start up a recovery monitor to watch for recovered partitions _partitionRecoveryMonitor = new PartitionRecoveryMonitor(this, _protocol, _cancel.Token); // Merge metadata that recovery monitor discovers _partitionRecoveryMonitor.NewMetadataEvents.Subscribe(MergeTopicMeta, ex => _log.Error(ex, "Error thrown by RecoveryMonitor.NewMetadataEvents!")); _log.Debug("Connected"); EtwTrace.Log.ClusterStarted(_id); }).ConfigureAwait(false); }