private IDictionary <string, IList <KafkaStream <TKey, TValue> > > Consume <TKey, TValue>(IDictionary <string, int> topicCountMap, IDecoder <TKey> keyDecoder, IDecoder <TValue> valueDecoder) { Logger.Debug("entering consume"); if (topicCountMap == null) { throw new ArgumentNullException("topicCountMap"); } var topicCount = TopicCount.ConstructTopicCount(consumerIdString, topicCountMap); var topicThreadIds = topicCount.GetConsumerThreadIdsPerTopic(); // make a list of (queue,stream) pairs, one pair for each threadId var queuesAndStreams = topicThreadIds.Values.SelectMany(threadIdSet => threadIdSet.Select(_ => { var queue = new BlockingCollection <FetchedDataChunk>(this.Config.QueuedMaxMessages); var stream = new KafkaStream <TKey, TValue>( queue, this.Config.ConsumerTimeoutMs, keyDecoder, valueDecoder, this.Config.ClientId); return(Tuple.Create(queue, stream)); })).ToList(); var dirs = new ZKGroupDirs(this.Config.GroupId); this.RegisterConsumerInZK(dirs, consumerIdString, topicCount); ReinitializeConsumer(topicCount, queuesAndStreams); return((IDictionary <string, IList <KafkaStream <TKey, TValue> > >)loadBalancerListener.KafkaMessageAndMetadataStreams); }
public ZKSessionExpireListener(ZKGroupDirs dirs, string consumerIdString, TopicCount topicCount, ZKRebalancerListener loadBalancerListener, ZookeeperConsumerConnector zkConsumerConnector) { this.consumerIdString = consumerIdString; this.loadBalancerListener = loadBalancerListener; this.zkConsumerConnector = zkConsumerConnector; this.dirs = dirs; this.topicCount = topicCount; }
internal WildcardStreamsHandler( ZookeeperConsumerConnector parent, TopicFilter topicFilter, int numStreams, IDecoder <TKey> keyDecoder, IDecoder <TValue> valueDecoder) { this.parent = parent; this.topicFilter = topicFilter; this.numStreams = numStreams; this.keyDecoder = keyDecoder; this.valueDecoder = valueDecoder; if (parent.messageStreamCreated.GetAndSet(true)) { throw new Exception("Each consumer connector can create message streams by filter at most once."); } this.wildcardQueuesAndStreams = Enumerable.Range(1, numStreams).Select(e => { var queue = new BlockingCollection <FetchedDataChunk>(this.parent.Config.QueuedMaxMessages); var stream = new KafkaStream <TKey, TValue>( queue, this.parent.Config.ConsumerTimeoutMs, keyDecoder, valueDecoder, this.parent.Config.ClientId); return(Tuple.Create(queue, stream)); }).ToList(); this.wildcardTopics = ZkUtils.GetChildrenParentMayNotExist(this.parent.zkClient, ZkUtils.BrokerTopicsPath) .Where(topicFilter.IsTopicAllowed) .ToList(); this.wildcardTopicCount = TopicCount.ConstructTopicCount( this.parent.consumerIdString, topicFilter, numStreams, this.parent.zkClient); this.dirs = new ZKGroupDirs(this.parent.Config.GroupId); this.parent.RegisterConsumerInZK(dirs, this.parent.consumerIdString, this.wildcardTopicCount); this.parent.ReinitializeConsumer(this.wildcardTopicCount, this.wildcardQueuesAndStreams); // Topic events will trigger subsequent synced rebalances. Logger.InfoFormat("Creating topic event watcher for topics {0}", topicFilter); this.parent.wildcardTopicWatcher = new ZookeeperTopicEventWatcher(this.parent.zkClient, this); }
private void RegisterConsumerInZK(ZKGroupDirs dirs, string consumerIdString, TopicCount topicCount) { Logger.InfoFormat("begin registering consumer {0} in ZK", consumerIdString); var timestamp = DateTimeHelper.CurrentTimeMilis(); var consumerRegistrationInfo = JsonConvert.SerializeObject(new { version = 1, subscription = topicCount.TopicCountMap, pattern = topicCount.Pattern, timestamp }); ZkUtils.CreateEphemeralPathExpectConflictHandleZKBug( zkClient, dirs.ConsumerRegistryDir + "/" + consumerIdString, consumerRegistrationInfo, null, (consumerZKstring, consumer) => true, this.Config.ZooKeeper.ZkSessionTimeoutMs); Logger.InfoFormat("end registering consumer {0} in ZK", consumerIdString); }
internal void RegisterConsumerInZk(ZKGroupDirs dirs, string consumerIdString, TopicCount topicCount) { this.EnsuresNotDisposed(); Logger.InfoFormat(CultureInfo.CurrentCulture, "begin registering consumer {0} in ZK", consumerIdString); ZkUtils.CreateEphemeralPathExpectConflict(this.zkClient, dirs.ConsumerRegistryDir + "/" + consumerIdString, topicCount.ToJsonString()); Logger.InfoFormat(CultureInfo.CurrentCulture, "end registering consumer {0} in ZK", consumerIdString); }
private IDictionary <string, IList <KafkaMessageStream> > Consume(IDictionary <string, int> topicCountDict) { Logger.Debug("entering consume"); if (topicCountDict == null) { throw new ArgumentNullException(); } var dirs = new ZKGroupDirs(this.config.GroupId); var result = new Dictionary <string, IList <KafkaMessageStream> >(); var guid = Guid.NewGuid().ToString().Replace("-", string.Empty).Substring(0, 8); string consumerUuid = string.Format("{0}-{1}-{2}", Dns.GetHostName(), DateTime.Now.Ticks, guid); string consumerIdString = this.config.GroupId + "_" + consumerUuid; var topicCount = new TopicCount(consumerIdString, topicCountDict); // listener to consumer and partition changes var loadBalancerListener = new ZKRebalancerListener( this.config, consumerIdString, this.topicRegistry, this.zkClient, this, queues, this.fetcher, this.syncLock, result); this.RegisterConsumerInZk(dirs, consumerIdString, topicCount); this.zkClient.Subscribe(dirs.ConsumerRegistryDir, loadBalancerListener); //// create a queue per topic per consumer thread var consumerThreadIdsPerTopicMap = topicCount.GetConsumerThreadIdsPerTopic(); foreach (var topic in consumerThreadIdsPerTopicMap.Keys) { var streamList = new List <KafkaMessageStream>(); foreach (string threadId in consumerThreadIdsPerTopicMap[topic]) { var stream = new BlockingCollection <FetchedDataChunk>(new ConcurrentQueue <FetchedDataChunk>(), config.MaxQueuedChunks); this.queues.Add(new Tuple <string, string>(topic, threadId), stream); streamList.Add(new KafkaMessageStream(stream, this.config.Timeout)); } result.Add(topic, streamList); Logger.DebugFormat(CultureInfo.CurrentCulture, "adding topic {0} and stream to map...", topic); // register on broker partition path changes string partitionPath = ZooKeeperClient.DefaultBrokerTopicsPath + "/" + topic; this.zkClient.MakeSurePersistentPathExists(partitionPath); this.zkClient.Subscribe(partitionPath, loadBalancerListener); } //// register listener for session expired event this.zkClient.Subscribe(new ZKSessionExpireListener(dirs, consumerIdString, topicCount, loadBalancerListener, this)); //// explicitly trigger load balancing for this consumer);); lock (this.syncLock) { loadBalancerListener.SyncedRebalance(); } return(result); }
private IDictionary<string, IList<KafkaMessageStream>> Consume(IDictionary<string, int> topicCountDict) { Logger.Debug("entering consume"); if (topicCountDict == null) { throw new ArgumentNullException(); } var dirs = new ZKGroupDirs(this.config.GroupId); var result = new Dictionary<string, IList<KafkaMessageStream>>(); string consumerUuid = Environment.MachineName + "-" + DateTime.Now.Millisecond; string consumerIdString = this.config.GroupId + "_" + consumerUuid; var topicCount = new TopicCount(consumerIdString, topicCountDict); // listener to consumer and partition changes var loadBalancerListener = new ZKRebalancerListener( this.config, consumerIdString, this.topicRegistry, this.zkClient, this, queues, this.fetcher, this.syncLock); this.RegisterConsumerInZk(dirs, consumerIdString, topicCount); this.zkClient.Subscribe(dirs.ConsumerRegistryDir, loadBalancerListener); //// create a queue per topic per consumer thread var consumerThreadIdsPerTopicMap = topicCount.GetConsumerThreadIdsPerTopic(); foreach (var topic in consumerThreadIdsPerTopicMap.Keys) { var streamList = new List<KafkaMessageStream>(); foreach (string threadId in consumerThreadIdsPerTopicMap[topic]) { var stream = new BlockingCollection<FetchedDataChunk>(new ConcurrentQueue<FetchedDataChunk>()); this.queues.Add(new Tuple<string, string>(topic, threadId), stream); streamList.Add(new KafkaMessageStream(stream, this.config.Timeout)); } result.Add(topic, streamList); Logger.DebugFormat(CultureInfo.CurrentCulture, "adding topic {0} and stream to map...", topic); // register on broker partition path changes string partitionPath = ZooKeeperClient.DefaultBrokerTopicsPath + "/" + topic; this.zkClient.MakeSurePersistentPathExists(partitionPath); this.zkClient.Subscribe(partitionPath, loadBalancerListener); } //// register listener for session expired event this.zkClient.Subscribe(new ZKSessionExpireListener(dirs, consumerIdString, topicCount, loadBalancerListener, this)); //// explicitly trigger load balancing for this consumer lock (this.syncLock) { loadBalancerListener.SyncedRebalance(); } return result; }
private void ReinitializeConsumer <TKey, TValue>( TopicCount topicCount, IList <Tuple <BlockingCollection <FetchedDataChunk>, KafkaStream <TKey, TValue> > > queuesAndStreams) { var dirs = new ZKGroupDirs(this.Config.GroupId); // listener to consumer and partition changes if (loadBalancerListener == null) { var topicStreamsMaps = new Dictionary <string, IList <KafkaStream <TKey, TValue> > >(); loadBalancerListener = new ZKRebalancerListener <TKey, TValue>(this, this.Config.GroupId, consumerIdString, topicStreamsMaps); } // create listener for session expired event if not exist yet if (sessionExpirationListener == null) { sessionExpirationListener = new ZKSessionExpireListener(this, dirs, consumerIdString, topicCount, loadBalancerListener); } // create listener for topic partition change event if not exist yet if (topicPartitionChangeListener == null) { topicPartitionChangeListener = new ZKTopicPartitionChangeListener(this, loadBalancerListener); } var topicStreamsMap = (IDictionary <string, IList <KafkaStream <TKey, TValue> > >)loadBalancerListener.KafkaMessageAndMetadataStreams; // map of {topic -> Set(thread-1, thread-2, ...)} var consumerThreadIdsPerTopic = topicCount.GetConsumerThreadIdsPerTopic(); IList <Tuple <BlockingCollection <FetchedDataChunk>, KafkaStream <TKey, TValue> > > allQueuesAndStreams = null; if (topicCount is WildcardTopicCount) { /* * Wild-card consumption streams share the same queues, so we need to * duplicate the list for the subsequent zip operation. */ allQueuesAndStreams = Enumerable.Range(1, consumerThreadIdsPerTopic.Keys.Count).SelectMany(_ => queuesAndStreams).ToList(); } else if (topicCount is StaticTopicCount) { allQueuesAndStreams = queuesAndStreams; } var topicThreadIds = consumerThreadIdsPerTopic.SelectMany(topicAndThreadIds => { var topic = topicAndThreadIds.Key; var threadIds = topicAndThreadIds.Value; return(threadIds.Select(id => Tuple.Create(topic, id))); }).ToList(); Contract.Assert(topicThreadIds.Count == allQueuesAndStreams.Count, string.Format("Mismatch betwen thread ID count ({0}) adn queue count ({1})", topicThreadIds.Count, allQueuesAndStreams.Count)); var threadQueueStreamPairs = topicThreadIds.Zip(allQueuesAndStreams, Tuple.Create).ToList(); foreach (var e in threadQueueStreamPairs) { var topicThreadId = e.Item1; var q = e.Item2.Item1; topicThreadIdAndQueues[topicThreadId] = q; Logger.DebugFormat("Adding topicThreadId {0} and queue {1} to topicThreadIdAndQueues Data structure", topicThreadId, string.Join(",", q)); MetersFactory.NewGauge(this.Config.ClientId + "-" + this.Config.GroupId + "-" + topicThreadId.Item1 + "-" + topicThreadId.Item2 + "-FetchQueueSize", () => q.Count); } var groupedByTopic = threadQueueStreamPairs.GroupBy(x => x.Item1.Item1).ToList(); foreach (var e in groupedByTopic) { var topic = e.Key; var streams = e.Select(x => x.Item2.Item2).ToList(); topicStreamsMap[topic] = streams; Logger.DebugFormat("adding topic {0} and {1} stream to map", topic, streams.Count); } // listener to consumer and partition changes zkClient.SubscribeStateChanges(sessionExpirationListener); zkClient.SubscribeChildChanges(dirs.ConsumerRegistryDir, loadBalancerListener); foreach (var topicAndSteams in topicStreamsMap) { // register on broker partition path changes var topicPath = ZkUtils.BrokerTopicsPath + "/" + topicAndSteams.Key; zkClient.SubscribeDataChanges(topicPath, topicPartitionChangeListener); } // explicitly trigger load balancing for this consumer loadBalancerListener.SyncedRebalance(); }
private bool Rebalance(Cluster cluster) { var myTopicThreadIdsMap = TopicCount.ConstructTopicCount(group, consumerIdString, parent.zkClient) .GetConsumerThreadIdsPerTopic(); var consumersPerTopicMap = ZkUtils.GetConsumersPerTopic(parent.zkClient, group); var brokers = ZkUtils.GetAllBrokersInCluster(parent.zkClient); if (brokers.Count == 0) { // This can happen in a rare case when there are no brokers available in the cluster when the consumer is started. // We log an warning and register for child changes on brokers/id so that rebalance can be triggered when the brokers // are up. Logger.Warn("no brokers found when trying to rebalance."); parent.zkClient.SubscribeChildChanges(ZkUtils.BrokerIdsPath, parent.loadBalancerListener); return(true); } else { var partitionsAssignmentPerTopicMap = ZkUtils.GetPartitionAssignmentForTopics( parent.zkClient, myTopicThreadIdsMap.Keys.ToList()); var partitionsPerTopicMap = partitionsAssignmentPerTopicMap.ToDictionary( p => p.Key, p => p.Value.Keys.OrderBy(x => x).ToList()); /** * fetchers must be stopped to avoid Data duplication, since if the current * rebalancing attempt fails, the partitions that are released could be owned by another consumer. * But if we don't stop the fetchers first, this consumer would continue returning Data for released * partitions in parallel. So, not stopping the fetchers leads to duplicate Data. */ this.CloseFetchers(cluster, (IDictionary <string, IList <KafkaStream <TKey, TValue> > >)KafkaMessageAndMetadataStreams, myTopicThreadIdsMap); this.ReleasePartitionOwnership(parent.topicRegistry); var partitionOwnershipDecision = new Dictionary <Tuple <string, int>, string>(); var currentTopicRegistry = new Pool <string, Pool <int, PartitionTopicInfo> >(); foreach (var topicAndConsumerThreadIsSet in myTopicThreadIdsMap) { var topic = topicAndConsumerThreadIsSet.Key; var consumerThreadIdSet = topicAndConsumerThreadIsSet.Value; currentTopicRegistry[topic] = new Pool <int, PartitionTopicInfo>(); var topicDirs = new ZKGroupTopicDirs(group, topic); var curConsumers = consumersPerTopicMap.Get(topic); var curPartitions = partitionsPerTopicMap.Get(topic); var nPartsPerConsumer = curPartitions.Count / curConsumers.Count; var nConsumersWithExtraPart = curPartitions.Count % curConsumers.Count; Logger.InfoFormat("Consumer {0} rebalancing the following partitions: {1} for topic {2} with consumers: {3}", consumerIdString, string.Join(",", curPartitions), topic, string.Join(",", curConsumers)); foreach (var consumerThreadId in consumerThreadIdSet) { var myConsumerPosition = curConsumers.IndexOf(consumerThreadId); Contract.Assert(myConsumerPosition >= 0); var startPart = (nPartsPerConsumer * myConsumerPosition) + Math.Min(nConsumersWithExtraPart, myConsumerPosition); var nParts = nPartsPerConsumer + (myConsumerPosition + 1 > nConsumersWithExtraPart ? 0 : 1); /** * Range-partition the sorted partitions to consumers for better locality. * The first few consumers pick up an extra partition, if any. */ if (nParts <= 0) { Logger.WarnFormat( "No broker partitions consumed by consumer thread {0} for topic {1}", consumerThreadId, topic); } else { for (var i = startPart; i < startPart + nParts; i++) { var partition = curPartitions[i]; Logger.InfoFormat("{0} attempting to claim partition {1}", consumerThreadId, partition); this.AddPartitionTopicInfo(currentTopicRegistry, topicDirs, partition, topic, consumerThreadId); // record the partition ownership decision partitionOwnershipDecision[Tuple.Create(topic, partition)] = consumerThreadId; } } } } /** * move the partition ownership here, since that can be used to indicate a truly successful rebalancing attempt * A rebalancing attempt is completed successfully only after the fetchers have been started correctly */ if (this.ReflectPartitionOwnershipDecision(partitionOwnershipDecision)) { Logger.Info("Updating the cache"); Logger.Debug("Partitions per topic cache " + JObject.FromObject(partitionsPerTopicMap).ToString(Formatting.None)); Logger.Debug("Consumers per topic cache " + JObject.FromObject(consumersPerTopicMap).ToString(Formatting.None)); parent.topicRegistry = currentTopicRegistry; this.UpdateFetcher(cluster); return(true); } else { return(false); } } }
public ZKSessionExpireListener(ZookeeperConsumerConnector parent, ZKGroupDirs dirs, string consumerIdString, TopicCount topicCount, IZKRebalancerListener loadbalancerListener) { this.parent = parent; this.Dirs = dirs; this.ConsumerIdString = consumerIdString; this.TopicCount = topicCount; this.LoadbalancerListener = loadbalancerListener; }
internal void RegisterConsumerInZk(ZKGroupDirs dirs, string consumerIdString, TopicCount topicCount) { this.EnsuresNotDisposed(); Logger.InfoFormat("begin registering consumer {0} in ZK", consumerIdString); try { this.GetZkClient().SlimLock.EnterWriteLock(); ZkUtils.CreateEphemeralPathExpectConflict(this.GetZkClient(), dirs.ConsumerRegistryDir + "/" + consumerIdString, topicCount.ToJsonString()); Logger.InfoFormat("successfully registering consumer {0} in ZK", consumerIdString); } catch (Exception ex) { Logger.ErrorFormat("error in RegisterConsumerInZk CreateEphemeralPathExpectConflict : {0}", ex.FormatException()); } finally { GetZkClient().SlimLock.ExitWriteLock(); } }
private IDictionary <string, IList <KafkaMessageStream <TData> > > Consume <TData>(IDictionary <string, int> topicCountDict, IDecoder <TData> decoder) { Logger.Debug("entering consume"); if (topicCountDict == null) { throw new ArgumentNullException(nameof(topicCountDict)); } var dirs = new ZKGroupDirs(this.config.GroupId); var result = new Dictionary <string, IList <KafkaMessageStream <TData> > >(); string consumerIdString = GetConsumerIdString(); var topicCount = new TopicCount(consumerIdString, topicCountDict); //// create a queue per topic per consumer thread var consumerThreadIdsPerTopicMap = topicCount.GetConsumerThreadIdsPerTopic(); foreach (var topic in consumerThreadIdsPerTopicMap.Keys) { var streamList = new List <KafkaMessageStream <TData> >(); foreach (string threadId in consumerThreadIdsPerTopicMap[topic]) { var stream = new BlockingCollection <FetchedDataChunk>(new ConcurrentQueue <FetchedDataChunk>()); this.queues.Add(new Tuple <string, string>(topic, threadId), stream); streamList.Add(new KafkaMessageStream <TData>(topic, stream, this.config.Timeout, decoder)); } result.Add(topic, streamList); Logger.InfoFormat("adding topic {0} and stream to map...", topic); } // listener to consumer and partition changes var loadBalancerListener = new ZKRebalancerListener <TData>( this.config, consumerIdString, this.topicRegistry, this.GetZkClient(), this, queues, this.fetcher, result, topicCount); if (this.consumerRebalanceHandler != null) { loadBalancerListener.ConsumerRebalance += this.consumerRebalanceHandler; } stopAsyncRebalancing.Add(loadBalancerListener.StopRebalance); this.RegisterConsumerInZk(dirs, consumerIdString, topicCount); //// register listener for session expired event var zkSessionExpireListener = new ZKSessionExpireListener <TData>(dirs, consumerIdString, topicCount, loadBalancerListener, this); if (this.zkSessionDisconnectedHandler != null) { zkSessionExpireListener.ZKSessionDisconnected += this.zkSessionDisconnectedHandler; } if (this.zkSessionExpiredHandler != null) { zkSessionExpireListener.ZKSessionExpired += this.zkSessionExpiredHandler; } this.GetZkClient().Subscribe(zkSessionExpireListener); this.subscribedZookeeperStateCollection.Add(zkSessionExpireListener); this.GetZkClient().Subscribe(dirs.ConsumerRegistryDir, loadBalancerListener); this.subscribedChildCollection.Add(new Tuple <string, IZooKeeperChildListener>(dirs.ConsumerRegistryDir, loadBalancerListener)); result.ForEach(topicAndStreams => { // register on broker partition path changes string partitionPath = ZooKeeperClient.DefaultBrokerTopicsPath + "/" + topicAndStreams.Key; if (this.GetZkClient().Exists(partitionPath)) { this.GetZkClient().Subscribe(partitionPath, loadBalancerListener); this.subscribedChildCollection.Add(new Tuple <string, IZooKeeperChildListener>(partitionPath, loadBalancerListener)); // Create a mapping of all topic partitions and their current leaders var topicsAndPartitions = ZkUtils.GetPartitionsForTopics(this.GetZkClient(), new[] { topicAndStreams.Key }); Dictionary <string, int> partitionLeaderMap = new Dictionary <string, int>(); foreach (var partitionId in topicsAndPartitions[topicAndStreams.Key]) { // Find/parse current partition leader for this partition and add it // to the mapping object var partitionStatePath = partitionPath + "/partitions/" + partitionId + "/state"; this.GetZkClient().MakeSurePersistentPathExists(partitionStatePath); int?partitionLeader = ZkUtils.GetLeaderForPartition(this.GetZkClient(), topicAndStreams.Key, int.Parse(partitionId)); partitionLeaderMap.Add(partitionStatePath, partitionLeader.GetValueOrDefault(-1)); } // listen for changes on the state nodes for the partitions // this will indicate when a leader switches, or the in sync replicas change var leaderListener = new ZkPartitionLeaderListener <TData>(loadBalancerListener, partitionLeaderMap); foreach (var partitionId in topicsAndPartitions[topicAndStreams.Key]) { var partitionStatePath = partitionPath + "/partitions/" + partitionId + "/state"; this.GetZkClient().Subscribe(partitionStatePath, leaderListener); this.subscribedZookeeperDataCollection.Add(new Tuple <string, IZooKeeperDataListener>(partitionStatePath, leaderListener)); } } else { Logger.WarnFormat("The topic path at {0}, does not exist.", partitionPath); } }); //// explicitly trigger load balancing for this consumer Logger.Info("Performing rebalancing. A new consumer has been added to consumer group: " + dirs.ConsumerRegistryDir + ", consumer: " + consumerIdString); Logger.InfoFormat("Subscribe count: subscribedChildCollection:{0} , subscribedZookeeperStateCollection:{1} subscribedZookeeperDataCollection:{2} " , subscribedChildCollection.Count, subscribedZookeeperStateCollection.Count, subscribedZookeeperDataCollection.Count); //// When a new consumer join, need wait for rebalance finish to make sure Fetcher thread started. loadBalancerListener.AsyncRebalance(DefaultWaitTimeForInitialRebalanceInSeconds * 1000); return(result); }