public void AddPartitions(IDictionary <TopicAndPartition, long> partitionAndOffsets) { this.partitionMapLock.LockInterruptibly(); try { foreach (var topicAndOffset in partitionAndOffsets) { var topicAndPartition = topicAndOffset.Key; var offset = topicAndOffset.Value; // If the partitionMap already has the topic/partition, then do not update the map with the old offset if (!this.partitionMap.ContainsKey(topicAndPartition)) { this.partitionMap[topicAndPartition] = PartitionTopicInfo.IsOffsetInvalid(offset) ? this.HandleOffsetOutOfRange(topicAndPartition) : offset; } this.partitionMapCond.SignalAll(); } } finally { this.partitionMapLock.Unlock(); } }
private void AddPartitionTopicInfo(ZKGroupTopicDirs topicDirs, string partition, string topic, string consumerThreadId) { var partitionId = int.Parse(partition); var partTopicInfoMap = topicRegistry[topic]; //find the leader for this partition var leaderOpt = ZkUtils.GetLeaderForPartition(zkClient, topic, partitionId); if (!leaderOpt.HasValue) { throw new NoBrokersForPartitionException( string.Format("No leader available for partitions {0} on topic {1}", partition, topic)); } Logger.InfoFormat("Leader for partition {0} for topic {1} is {2}", partition, topic, leaderOpt.Value); var leader = leaderOpt.Value; var znode = topicDirs.ConsumerOffsetDir + "/" + partition; var offsetCommitedString = zkClient.ReadData <string>(znode, true); //if first time starting a consumer, set the initial offset based on the config long offset = -1; long offsetCommited = -1; if (offsetCommitedString != null) { offsetCommited = long.Parse(offsetCommitedString); offset = offsetCommited + 1; } Logger.InfoFormat("Final offset {0} for topic {1} partition {2} OffsetCommited {3}" , offset, topic, partition, offsetCommited); var queue = queues[new Tuple <string, string>(topic, consumerThreadId)]; var partTopicInfo = new PartitionTopicInfo( topic, leader, partitionId, queue, offsetCommited, offset, config.FetchSize, offsetCommited); partTopicInfoMap[partitionId] = partTopicInfo; Logger.InfoFormat("{0} selected new offset {1}", partTopicInfo, offset); }
private void AddPartitionTopicInfo(ZKGroupTopicDirs topicDirs, string partitionString, string topic, string consumerThreadId) { var partition = Partition.ParseFrom(partitionString); var partTopicInfoMap = this.topicRegistry[topic]; var znode = topicDirs.ConsumerOffsetDir + "/" + partition.Name; var offsetString = this.zkClient.ReadData <string>(znode, true); long offset = string.IsNullOrEmpty(offsetString) ? 0 : long.Parse(offsetString, CultureInfo.InvariantCulture); var queue = this.queues[new Tuple <string, string>(topic, consumerThreadId)]; var partTopicInfo = new PartitionTopicInfo( topic, partition.BrokerId, partition, queue, offset, offset, this.config.FetchSize); partTopicInfoMap.Add(partition, partTopicInfo); if (Logger.IsDebugEnabled) { Logger.DebugFormat(CultureInfo.CurrentCulture, "{0} selected new offset {1}", partTopicInfo, offset); } }
internal void Consume() { // connects to zookeeper using (ZookeeperConsumerConnector connector = new ZookeeperConsumerConnector(configSettings, true)) { if (this.ThreadID == 0) { ConsumerGroupHelper.initialOffset = connector.GetOffset(cgOptions.Topic); //Logger.InfoFormat("======Original offset \r\n{0}", ConsumerGroupHelper.initialOffset == null ? "(NULL)" : ConsumeGroupMonitorHelper.GetComsumerGroupOffsetsAsLog(ConsumerGroupHelper.initialOffset)); } // defines collection of topics and number of threads to consume it with // ===============NOTE============================ // For example , if there is 80 partitions for one topic. // // Normally start more than 96 = 80*120% clients with same GroupId. ( the extra 20% are buffer for autopilot IMP). And set FetchThreadCountPerConsumer as 1. // Then 80 clients can lock partitions, can set MACHINENAME_ProcessID as ConsumerId, other 16 are idle. Strongly recomand take this method. // // If start 40 clients, and set FetchThreadCountPerConsumer as 1. then every client can lock 2 partitions at least. And if some client not available for autopilot // IMP reason, then some of the client maybe lock 3 partitions. // // If start 40 clients, and set FetchThreadCountPerConsumer as 2, you will get two IEnumerator<Message>:topicData[0].GetEnumerator(),topicData[1].GetEnumerator() // you need start TWO threads to process them in dependently. // If the client get 2 partitions, each thread will handle 1 partition, // If the client get 3 partitions, then one thread get 2 partitions, the other one get 1 partition. It will make the situaiton complex and the consume of partition not balance. //==================NOTE============================= IDictionary <string, int> topicMap = new Dictionary <string, int> { { cgOptions.Topic, cgOptions.FetchThreadCountPerConsumer } }; // get references to topic streams. IDictionary <string, IList <KafkaMessageStream <Message> > > streams = connector.CreateMessageStreams(topicMap, new DefaultDecoder()); IList <KafkaMessageStream <Message> > topicData = streams[cgOptions.Topic]; long latestTotalCount = 0; bool hitEndAndCommited = false; if (cgOptions.CancellationTimeoutMs == 5000) { // Get the message enumerator. IEnumerator <Message> messageEnumerator = topicData[0].GetEnumerator(); //TODO: the enumerator count equal with FetchThreadCountPerConsumer . For example, if that value is 5, then here should get 5 enumerator. //IF have 100 partitions, and only 20 consumers, need set this value to 5. and start 5 thread handle each one. // Add tuples until maximum receive message count is reached or no new messages read after consumer configured timeout. while (true) { bool noMoreMessage = false; try { messageEnumerator.MoveNext(); Message m = messageEnumerator.Current; latestTotalCount = Interlocked.Increment(ref ConsumerGroupHelper.totalCount); Logger.InfoFormat("Message {0} from Partition:{1}, Offset:{2}, key:{3}, value:{4}", latestTotalCount, m.PartitionId, m.Offset, m.Key == null ? "(null)" : Encoding.UTF8.GetString(m.Key), m.Payload == null ? "(null)" : Encoding.UTF8.GetString(m.Payload)); if (latestTotalCount == 1) { Logger.WarnFormat("Read FIRST message, it's offset: {0} PartitionID:{1}", m.Offset, ((ConsumerIterator <Message>)messageEnumerator).currentTopicInfo.PartitionId); } hitEndAndCommited = false; if (latestTotalCount % cgOptions.CommitBatchSize == 0) { //NOTE====== //Normally, just directly call .CommitOffsets() // CommitOffset(string topic, int partition, long offset) only used when customer has strong requirement for reprocess messages as few as possible. //Need tune the frequecy of calling .CommitOffsets(), it will directly increate zookeeper load and impact your overall performance if (cgOptions.CommitOffsetWithPartitionIDOffset) { connector.CommitOffset(cgOptions.Topic, m.PartitionId.Value, m.Offset); } else { connector.CommitOffsets(); } Console.WriteLine("\tRead some and commit once, LATEST message offset: {0}. PartitionID:{1} -- {2} Totally read {3} will commit offset. {4} FetchOffset:{5} ConsumeOffset:{6} CommitedOffset:{7}" , m.Offset, m.PartitionId.Value, ((ConsumerIterator <Message>)messageEnumerator).currentTopicInfo.PartitionId, latestTotalCount, DateTime.Now , ((ConsumerIterator <Message>)messageEnumerator).currentTopicInfo.FetchOffset , ((ConsumerIterator <Message>)messageEnumerator).currentTopicInfo.ConsumeOffset , ((ConsumerIterator <Message>)messageEnumerator).currentTopicInfo.CommitedOffset); } if (cgOptions.Count > 0 && latestTotalCount >= cgOptions.Count) { Logger.WarnFormat("Read LAST message, it's offset: {0}. PartitionID:{1} Totally read {2} want {3} will exit.", m.Offset, ((ConsumerIterator <Message>)messageEnumerator).currentTopicInfo.PartitionId, latestTotalCount, cgOptions.Count); break; } } catch (ConsumerTimeoutException) { if (!hitEndAndCommited) { Logger.WarnFormat("Totally Read {0} will commit offset. {1}", latestTotalCount, DateTime.Now); connector.CommitOffsets(); hitEndAndCommited = true; } // Thrown if no new messages read after consumer configured timeout. noMoreMessage = true; } if (noMoreMessage) { Logger.InfoFormat("No more message , hit end ,will Sleep(1), {0}", DateTime.Now); if (cgOptions.SleepTypeWhileAlwaysRead == 0) { Thread.Sleep(0); } else if (cgOptions.SleepTypeWhileAlwaysRead == 1) { Thread.Sleep(1); //Best choice is Thread.Sleep(1). Other 3 choice still make the CPU 100% } else if (cgOptions.SleepTypeWhileAlwaysRead == 2) { Thread.Yield(); } else { } } } } else { //Siphon scenario, repeatly take some messages and process. if no enough messages, will stop current batch after timeout. while (true) { #if NET45 bool noMoreMessage = false; Message lastMessage = null; int count = 0; KafkaMessageStream <Message> messagesStream = null; ConsumerIterator <Message> iterator = null; using (CancellationTokenSource cancellationTokenSource = new CancellationTokenSource(cgOptions.CancellationTimeoutMs)) { lastMessage = null; IEnumerable <Message> messages = topicData[0].GetCancellable(cancellationTokenSource.Token); messagesStream = (KafkaMessageStream <Message>)messages; iterator = (ConsumerIterator <Message>)messagesStream.iterator; foreach (Message message in messages) { latestTotalCount = Interlocked.Increment(ref ConsumerGroupHelper.totalCount); lastMessage = message; if (latestTotalCount == 1) { PartitionTopicInfo p = iterator.currentTopicInfo; Logger.InfoFormat("Read FIRST message, it's offset: {0} PartitionID:{1}", lastMessage.Offset, p == null ? "null" : p.PartitionId.ToString()); } hitEndAndCommited = false; if (++count >= cgOptions.CommitBatchSize) { cancellationTokenSource.Cancel(); } } } if (count > 0) { connector.CommitOffsets(); consumedTotalCount += count; PartitionTopicInfo p = iterator.currentTopicInfo; Console.WriteLine("\tRead some and commit once, Thread: {8} consumedTotalCount:{9} Target:{10} LATEST message offset: {0}. PartitionID:{1} -- {2} Totally read {3} will commit offset. {4} FetchOffset:{5} ConsumeOffset:{6} CommitedOffset:{7}" , lastMessage.Offset, lastMessage.PartitionId.Value, p == null ? "null" : p.PartitionId.ToString(), latestTotalCount, DateTime.Now , p == null ? "null" : p.FetchOffset.ToString() , p == null ? "null" : p.ConsumeOffset.ToString() , p == null ? "null" : p.CommitedOffset.ToString() , this.ThreadID , this.consumedTotalCount , this.Count); } else { noMoreMessage = true; } if (this.Count > 0 && consumedTotalCount >= this.Count) { Logger.InfoFormat("Current thrad Read LAST message, Totally read {0} want {1} will exit current thread.", consumedTotalCount, this.Count); break; } if (noMoreMessage) { Logger.InfoFormat("No more message , hit end ,will Sleep(2000), {0}", DateTime.Now); if (cgOptions.SleepTypeWhileAlwaysRead == 0) { Thread.Sleep(0); } else if (cgOptions.SleepTypeWhileAlwaysRead == 1) { Thread.Sleep(2000); //Best choice is Thread.Sleep(1). Other 3 choice still make the CPU 100% } else if (cgOptions.SleepTypeWhileAlwaysRead == 2) { Thread.Yield(); } else { } } #endif #if NET4 throw new NotSupportedException("Please use .net45 to compile ."); #endif } } Logger.InfoFormat("Read {0} will commit offset. {1}", latestTotalCount, DateTime.Now); connector.CommitOffsets(); latestTotalCount = Interlocked.Read(ref ConsumerGroupHelper.totalCount); Logger.InfoFormat("Totally read {0} want {1} . ", latestTotalCount, cgOptions.Count); if (this.ThreadID == 0) { ConsumerGroupHelper.newOffset = connector.GetOffset(cgOptions.Topic); } } this.resetEvent.Set(); }
private void AddPartitionTopicInfo(ZKGroupTopicDirs topicDirs, string partition, string topic, string consumerThreadId) { var partitionId = int.Parse(partition); var partTopicInfoMap = this.topicRegistry[topic]; //find the leader for this partition var leaderOpt = ZkUtils.GetLeaderForPartition(this.zkClient, topic, partitionId); if (!leaderOpt.HasValue) { throw new NoBrokersForPartitionException(string.Format("No leader available for partitions {0} on topic {1}", partition, topic)); } else { Logger.InfoFormat("Leader for partition {0} for topic {1} is {2}", partition, topic, leaderOpt.Value); } var leader = leaderOpt.Value; var znode = topicDirs.ConsumerOffsetDir + "/" + partition; var offsetString = this.zkClient.ReadData <string>(znode, true); //if first time starting a consumer, set the initial offset based on the config long offset = 0; long offsetCommited = 0; if (offsetString == null) { switch (config.AutoOffsetReset) { case OffsetRequest.SmallestTime: offset = this.EarliestOrLatestOffset(topic, leader, partitionId, OffsetRequest.EarliestTime); break; case OffsetRequest.LargestTime: offset = this.EarliestOrLatestOffset(topic, leader, partitionId, OffsetRequest.LatestTime); break; default: throw new ConfigurationErrorsException("Wrong value in autoOffsetReset in ConsumerConfig"); } } else { offsetCommited = long.Parse(offsetString); long latestOffset = this.EarliestOrLatestOffset(topic, leader, partitionId, OffsetRequest.LatestTime); offset = Math.Min(offsetCommited + 1, latestOffset); Logger.InfoFormat("Final offset {0} for topic {1} partition {2} OffsetCommited {3} latestOffset {4}" , offset, topic, partition, offsetCommited, latestOffset); } var queue = this.queues[new Tuple <string, string>(topic, consumerThreadId)]; var partTopicInfo = new PartitionTopicInfo( topic, leader, partitionId, queue, offsetCommited, offset, offset, this.config.FetchSize, offsetCommited); partTopicInfoMap[partitionId] = partTopicInfo; Logger.InfoFormat("{0} selected new offset {1}", partTopicInfo, offset); }