public void AddProducers(ProducerConfiguration config) { var configuredBrokers = config.Brokers.Select(x => new Broker(x.BrokerId, x.Host, x.Port)); if (configuredBrokers.Any()) { configuredBrokers.ForEach(this.AddProducer); } else if (this.zkClient != null) { Logger.DebugFormat("Connecting to {0} for creating sync producers for all brokers in the cluster", config.ZooKeeper.ZkConnect); var brokers = ZkUtils.GetAllBrokersInCluster(this.zkClient); brokers.ForEach(this.AddProducer); } else { throw new IllegalStateException("No producers found from configuration and zk not setup."); } }
public override void DoWork() { var leaderForPartitionsMap = new Dictionary <TopicAndPartition, Broker>(); [email protected](); try { while (this.parent.NoLeaderPartitionSet.Count == 0) { Logger.Debug("No partition for leader election."); this.parent.cond.Await(); } Logger.DebugFormat("Partitions without leader {0}", string.Join(",", this.parent.NoLeaderPartitionSet)); var brokers = ZkUtils.GetAllBrokersInCluster(this.parent.zkClient); var topicsMetadata = ClientUtils.FetchTopicMetadata( new HashSet <string>(this.parent.NoLeaderPartitionSet.Select(m => m.Topic)), brokers, this.parent.config.ClientId, this.parent.config.SocketTimeoutMs, this.parent.correlationId.GetAndIncrement()).TopicsMetadata; if (Logger.IsDebugEnabled) { foreach (var topicMetadata in topicsMetadata) { Logger.Debug(topicMetadata); } } foreach (var tmd in topicsMetadata) { var topic = tmd.Topic; foreach (var pmd in tmd.PartitionsMetadata) { var topicAndPartition = new TopicAndPartition(topic, pmd.PartitionId); if (pmd.Leader != null && this.parent.NoLeaderPartitionSet.Contains(topicAndPartition)) { var leaderBroker = pmd.Leader; leaderForPartitionsMap[topicAndPartition] = leaderBroker; this.parent.NoLeaderPartitionSet.Remove(topicAndPartition); } } } } catch (Exception e) { if (!isRunning.Get()) { throw; /* If this thread is stopped, propagate this exception to kill the thread. */ } else { Logger.Warn("Failed to find leader for " + string.Join(",", this.parent.NoLeaderPartitionSet), e); } } finally { [email protected](); } try { this.parent.AddFetcherForPartitions( leaderForPartitionsMap.ToDictionary( kvp => kvp.Key, kvp => new BrokerAndInitialOffset(kvp.Value, this.parent.partitionMap.Get(kvp.Key).GetFetchOffset()))); } catch (Exception e) { if (!isRunning.Get()) { throw; /* If this thread is stopped, propagate this exception to kill the thread. */ } else { Logger.Warn(string.Format("Failed to add leader for partitions {0}; will retry", string.Join(",", leaderForPartitionsMap.Keys)), e); [email protected](); foreach (var leader in leaderForPartitionsMap.Keys) { this.parent.NoLeaderPartitionSet.Add(leader); } [email protected](); } } this.parent.ShutdownIdleFetcherThreads(); Thread.Sleep(this.parent.config.RefreshLeaderBackoffMs); }
private bool Rebalance(Cluster cluster) { var myTopicThreadIdsMap = TopicCount.ConstructTopicCount(group, consumerIdString, parent.zkClient) .GetConsumerThreadIdsPerTopic(); var consumersPerTopicMap = ZkUtils.GetConsumersPerTopic(parent.zkClient, group); var brokers = ZkUtils.GetAllBrokersInCluster(parent.zkClient); if (brokers.Count == 0) { // This can happen in a rare case when there are no brokers available in the cluster when the consumer is started. // We log an warning and register for child changes on brokers/id so that rebalance can be triggered when the brokers // are up. Logger.Warn("no brokers found when trying to rebalance."); parent.zkClient.SubscribeChildChanges(ZkUtils.BrokerIdsPath, parent.loadBalancerListener); return(true); } else { var partitionsAssignmentPerTopicMap = ZkUtils.GetPartitionAssignmentForTopics( parent.zkClient, myTopicThreadIdsMap.Keys.ToList()); var partitionsPerTopicMap = partitionsAssignmentPerTopicMap.ToDictionary( p => p.Key, p => p.Value.Keys.OrderBy(x => x).ToList()); /** * fetchers must be stopped to avoid Data duplication, since if the current * rebalancing attempt fails, the partitions that are released could be owned by another consumer. * But if we don't stop the fetchers first, this consumer would continue returning Data for released * partitions in parallel. So, not stopping the fetchers leads to duplicate Data. */ this.CloseFetchers(cluster, (IDictionary <string, IList <KafkaStream <TKey, TValue> > >)KafkaMessageAndMetadataStreams, myTopicThreadIdsMap); this.ReleasePartitionOwnership(parent.topicRegistry); var partitionOwnershipDecision = new Dictionary <Tuple <string, int>, string>(); var currentTopicRegistry = new Pool <string, Pool <int, PartitionTopicInfo> >(); foreach (var topicAndConsumerThreadIsSet in myTopicThreadIdsMap) { var topic = topicAndConsumerThreadIsSet.Key; var consumerThreadIdSet = topicAndConsumerThreadIsSet.Value; currentTopicRegistry[topic] = new Pool <int, PartitionTopicInfo>(); var topicDirs = new ZKGroupTopicDirs(group, topic); var curConsumers = consumersPerTopicMap.Get(topic); var curPartitions = partitionsPerTopicMap.Get(topic); var nPartsPerConsumer = curPartitions.Count / curConsumers.Count; var nConsumersWithExtraPart = curPartitions.Count % curConsumers.Count; Logger.InfoFormat("Consumer {0} rebalancing the following partitions: {1} for topic {2} with consumers: {3}", consumerIdString, string.Join(",", curPartitions), topic, string.Join(",", curConsumers)); foreach (var consumerThreadId in consumerThreadIdSet) { var myConsumerPosition = curConsumers.IndexOf(consumerThreadId); Contract.Assert(myConsumerPosition >= 0); var startPart = (nPartsPerConsumer * myConsumerPosition) + Math.Min(nConsumersWithExtraPart, myConsumerPosition); var nParts = nPartsPerConsumer + (myConsumerPosition + 1 > nConsumersWithExtraPart ? 0 : 1); /** * Range-partition the sorted partitions to consumers for better locality. * The first few consumers pick up an extra partition, if any. */ if (nParts <= 0) { Logger.WarnFormat( "No broker partitions consumed by consumer thread {0} for topic {1}", consumerThreadId, topic); } else { for (var i = startPart; i < startPart + nParts; i++) { var partition = curPartitions[i]; Logger.InfoFormat("{0} attempting to claim partition {1}", consumerThreadId, partition); this.AddPartitionTopicInfo(currentTopicRegistry, topicDirs, partition, topic, consumerThreadId); // record the partition ownership decision partitionOwnershipDecision[Tuple.Create(topic, partition)] = consumerThreadId; } } } } /** * move the partition ownership here, since that can be used to indicate a truly successful rebalancing attempt * A rebalancing attempt is completed successfully only after the fetchers have been started correctly */ if (this.ReflectPartitionOwnershipDecision(partitionOwnershipDecision)) { Logger.Info("Updating the cache"); Logger.Debug("Partitions per topic cache " + JObject.FromObject(partitionsPerTopicMap).ToString(Formatting.None)); Logger.Debug("Consumers per topic cache " + JObject.FromObject(consumersPerTopicMap).ToString(Formatting.None)); parent.topicRegistry = currentTopicRegistry; this.UpdateFetcher(cluster); return(true); } else { return(false); } } }
private bool Rebalance(Cluster.Cluster cluster, CancellationTokenSource cancellationTokenSource) { var topicCount = GetTopicCount(consumerIdString); var topicThreadIdsMap = topicCount.GetConsumerThreadIdsPerTopic(); if (!topicThreadIdsMap.Any()) { Logger.ErrorFormat("Consumer ID is not registered to any topics in ZK. Exiting rebalance"); return(false); } var consumersPerTopicMap = GetConsumersPerTopic(config.GroupId); var brokers = ZkUtils.GetAllBrokersInCluster(zkClient); if (!brokers.Any()) { Logger.Warn("No brokers found when trying to rebalance."); zkClient.Subscribe(ZooKeeperClient.DefaultBrokerIdsPath, this); zkConsumerConnector.subscribedChildCollection.Add( new Tuple <string, IZooKeeperChildListener>(ZooKeeperClient.DefaultBrokerIdsPath, this)); Logger.ErrorFormat( "Subscribe count: subscribedChildCollection:{0} , subscribedZookeeperStateCollection:{1} subscribedZookeeperDataCollection:{2} " , zkConsumerConnector.subscribedChildCollection.Count, zkConsumerConnector.subscribedZookeeperStateCollection.Count, zkConsumerConnector.subscribedZookeeperDataCollection.Count); return(false); } var partitionsPerTopicMap = ZkUtils.GetPartitionsForTopics(zkClient, topicThreadIdsMap.Keys); // Check if we've been canceled externally before we dive into the rebalance if (cancellationTokenSource.IsCancellationRequested) { Logger.ErrorFormat( "Rebalance operation has been canceled externally by a future rebalance event. Exiting immediately"); return(false); } CloseFetchers(cluster, topicThreadIdsMap, zkConsumerConnector); ReleasePartitionOwnership(topicThreadIdsMap); try { foreach (var item in topicThreadIdsMap) { var topic = item.Key; var consumerThreadIdSet = item.Value; topicRegistry.Add(topic, new ConcurrentDictionary <int, PartitionTopicInfo>()); var topicDirs = new ZKGroupTopicDirs(config.GroupId, topic); var curConsumers = new List <string>(consumersPerTopicMap[topic]); curConsumers.Sort(); var curPartitions = partitionsPerTopicMap[topic].OrderBy(p => int.Parse(p)).ToList(); Logger.InfoFormat( "{4} Partitions. {5} ConsumerClients. Consumer {0} rebalancing the following partitions: {1} for topic {2} with consumers: {3}", consumerIdString, string.Join(",", curPartitions), topic, string.Join(",", curConsumers), curPartitions.Count, curConsumers.Count); var numberOfPartsPerConsumer = curPartitions.Count / curConsumers.Count; Logger.Info("Number of partitions per consumer is: " + numberOfPartsPerConsumer); var numberOfConsumersWithExtraPart = curPartitions.Count % curConsumers.Count; Logger.Info("Number of consumers with an extra partition are: " + numberOfConsumersWithExtraPart); foreach (var consumerThreadId in consumerThreadIdSet) { var myConsumerPosition = curConsumers.IndexOf(consumerThreadId); Logger.Info("Consumer position for consumer " + consumerThreadId + " is: " + myConsumerPosition); if (myConsumerPosition < 0) { continue; } var startPart = numberOfPartsPerConsumer * myConsumerPosition + Math.Min(myConsumerPosition, numberOfConsumersWithExtraPart); Logger.Info("Starting partition is: " + startPart); var numberOfParts = numberOfPartsPerConsumer + (myConsumerPosition + 1 > numberOfConsumersWithExtraPart ? 0 : 1); Logger.Info("Number of partitions to work on is: " + numberOfParts); if (numberOfParts <= 0) { Logger.InfoFormat("No broker partitions consumed by consumer thread {0} for topic {1}", consumerThreadId, item.Key); } else { for (var i = startPart; i < startPart + numberOfParts; i++) { var partition = curPartitions[i]; Logger.InfoFormat("{0} attempting to claim partition {1}", consumerThreadId, partition); var ownPartition = ProcessPartition(topicDirs, partition, topic, consumerThreadId, curConsumers, curPartitions, cancellationTokenSource); if (!ownPartition) { Logger.InfoFormat( "{0} failed to claim partition {1} for topic {2}. Exiting rebalance", consumerThreadId, partition, topic); return(false); } } } } } } catch (Exception ex) { Logger.ErrorFormat("error when rebalance: {0}", ex.FormatException()); return(false); } // If we get here, we know that we have owned all partitions successfully, // therefore it is safe to update fetcher threads and begin dequeuing Logger.Info("All partitions were successfully owned. Updating fetchers"); UpdateFetcher(cluster); return(true); }
public IEnumerable <Broker> GetAllBrokers() { return(ZkUtils.GetAllBrokersInCluster(_client)); }