public static ConstructTopicCount ( string consumerIdString, string json ) : |
||
consumerIdString | string | |
json | string | |
return |
private IDictionary <string, IList <KafkaStream <TKey, TValue> > > Consume <TKey, TValue>(IDictionary <string, int> topicCountMap, IDecoder <TKey> keyDecoder, IDecoder <TValue> valueDecoder) { Logger.Debug("entering consume"); if (topicCountMap == null) { throw new ArgumentNullException("topicCountMap"); } var topicCount = TopicCount.ConstructTopicCount(consumerIdString, topicCountMap); var topicThreadIds = topicCount.GetConsumerThreadIdsPerTopic(); // make a list of (queue,stream) pairs, one pair for each threadId var queuesAndStreams = topicThreadIds.Values.SelectMany(threadIdSet => threadIdSet.Select(_ => { var queue = new BlockingCollection <FetchedDataChunk>(this.Config.QueuedMaxMessages); var stream = new KafkaStream <TKey, TValue>( queue, this.Config.ConsumerTimeoutMs, keyDecoder, valueDecoder, this.Config.ClientId); return(Tuple.Create(queue, stream)); })).ToList(); var dirs = new ZKGroupDirs(this.Config.GroupId); this.RegisterConsumerInZK(dirs, consumerIdString, topicCount); ReinitializeConsumer(topicCount, queuesAndStreams); return((IDictionary <string, IList <KafkaStream <TKey, TValue> > >)loadBalancerListener.KafkaMessageAndMetadataStreams); }
internal WildcardStreamsHandler( ZookeeperConsumerConnector parent, TopicFilter topicFilter, int numStreams, IDecoder <TKey> keyDecoder, IDecoder <TValue> valueDecoder) { this.parent = parent; this.topicFilter = topicFilter; this.numStreams = numStreams; this.keyDecoder = keyDecoder; this.valueDecoder = valueDecoder; if (parent.messageStreamCreated.GetAndSet(true)) { throw new Exception("Each consumer connector can create message streams by filter at most once."); } this.wildcardQueuesAndStreams = Enumerable.Range(1, numStreams).Select(e => { var queue = new BlockingCollection <FetchedDataChunk>(this.parent.Config.QueuedMaxMessages); var stream = new KafkaStream <TKey, TValue>( queue, this.parent.Config.ConsumerTimeoutMs, keyDecoder, valueDecoder, this.parent.Config.ClientId); return(Tuple.Create(queue, stream)); }).ToList(); this.wildcardTopics = ZkUtils.GetChildrenParentMayNotExist(this.parent.zkClient, ZkUtils.BrokerTopicsPath) .Where(topicFilter.IsTopicAllowed) .ToList(); this.wildcardTopicCount = TopicCount.ConstructTopicCount( this.parent.consumerIdString, topicFilter, numStreams, this.parent.zkClient); this.dirs = new ZKGroupDirs(this.parent.Config.GroupId); this.parent.RegisterConsumerInZK(dirs, this.parent.consumerIdString, this.wildcardTopicCount); this.parent.ReinitializeConsumer(this.wildcardTopicCount, this.wildcardQueuesAndStreams); // Topic events will trigger subsequent synced rebalances. Logger.InfoFormat("Creating topic event watcher for topics {0}", topicFilter); this.parent.wildcardTopicWatcher = new ZookeeperTopicEventWatcher(this.parent.zkClient, this); }
private bool Rebalance(Cluster cluster) { var myTopicThreadIdsMap = TopicCount.ConstructTopicCount(group, consumerIdString, parent.zkClient) .GetConsumerThreadIdsPerTopic(); var consumersPerTopicMap = ZkUtils.GetConsumersPerTopic(parent.zkClient, group); var brokers = ZkUtils.GetAllBrokersInCluster(parent.zkClient); if (brokers.Count == 0) { // This can happen in a rare case when there are no brokers available in the cluster when the consumer is started. // We log an warning and register for child changes on brokers/id so that rebalance can be triggered when the brokers // are up. Logger.Warn("no brokers found when trying to rebalance."); parent.zkClient.SubscribeChildChanges(ZkUtils.BrokerIdsPath, parent.loadBalancerListener); return(true); } else { var partitionsAssignmentPerTopicMap = ZkUtils.GetPartitionAssignmentForTopics( parent.zkClient, myTopicThreadIdsMap.Keys.ToList()); var partitionsPerTopicMap = partitionsAssignmentPerTopicMap.ToDictionary( p => p.Key, p => p.Value.Keys.OrderBy(x => x).ToList()); /** * fetchers must be stopped to avoid Data duplication, since if the current * rebalancing attempt fails, the partitions that are released could be owned by another consumer. * But if we don't stop the fetchers first, this consumer would continue returning Data for released * partitions in parallel. So, not stopping the fetchers leads to duplicate Data. */ this.CloseFetchers(cluster, (IDictionary <string, IList <KafkaStream <TKey, TValue> > >)KafkaMessageAndMetadataStreams, myTopicThreadIdsMap); this.ReleasePartitionOwnership(parent.topicRegistry); var partitionOwnershipDecision = new Dictionary <Tuple <string, int>, string>(); var currentTopicRegistry = new Pool <string, Pool <int, PartitionTopicInfo> >(); foreach (var topicAndConsumerThreadIsSet in myTopicThreadIdsMap) { var topic = topicAndConsumerThreadIsSet.Key; var consumerThreadIdSet = topicAndConsumerThreadIsSet.Value; currentTopicRegistry[topic] = new Pool <int, PartitionTopicInfo>(); var topicDirs = new ZKGroupTopicDirs(group, topic); var curConsumers = consumersPerTopicMap.Get(topic); var curPartitions = partitionsPerTopicMap.Get(topic); var nPartsPerConsumer = curPartitions.Count / curConsumers.Count; var nConsumersWithExtraPart = curPartitions.Count % curConsumers.Count; Logger.InfoFormat("Consumer {0} rebalancing the following partitions: {1} for topic {2} with consumers: {3}", consumerIdString, string.Join(",", curPartitions), topic, string.Join(",", curConsumers)); foreach (var consumerThreadId in consumerThreadIdSet) { var myConsumerPosition = curConsumers.IndexOf(consumerThreadId); Contract.Assert(myConsumerPosition >= 0); var startPart = (nPartsPerConsumer * myConsumerPosition) + Math.Min(nConsumersWithExtraPart, myConsumerPosition); var nParts = nPartsPerConsumer + (myConsumerPosition + 1 > nConsumersWithExtraPart ? 0 : 1); /** * Range-partition the sorted partitions to consumers for better locality. * The first few consumers pick up an extra partition, if any. */ if (nParts <= 0) { Logger.WarnFormat( "No broker partitions consumed by consumer thread {0} for topic {1}", consumerThreadId, topic); } else { for (var i = startPart; i < startPart + nParts; i++) { var partition = curPartitions[i]; Logger.InfoFormat("{0} attempting to claim partition {1}", consumerThreadId, partition); this.AddPartitionTopicInfo(currentTopicRegistry, topicDirs, partition, topic, consumerThreadId); // record the partition ownership decision partitionOwnershipDecision[Tuple.Create(topic, partition)] = consumerThreadId; } } } } /** * move the partition ownership here, since that can be used to indicate a truly successful rebalancing attempt * A rebalancing attempt is completed successfully only after the fetchers have been started correctly */ if (this.ReflectPartitionOwnershipDecision(partitionOwnershipDecision)) { Logger.Info("Updating the cache"); Logger.Debug("Partitions per topic cache " + JObject.FromObject(partitionsPerTopicMap).ToString(Formatting.None)); Logger.Debug("Consumers per topic cache " + JObject.FromObject(consumersPerTopicMap).ToString(Formatting.None)); parent.topicRegistry = currentTopicRegistry; this.UpdateFetcher(cluster); return(true); } else { return(false); } } }