예제 #1
0
        private void CloseFetchersForQueues(Cluster.Cluster cluster,
                                            IEnumerable <BlockingCollection <FetchedDataChunk> > queuesToBeCleared,
                                            IDictionary <string, IList <KafkaMessageStream <TData> > > kafkaMessageStreams,
                                            ZookeeperConsumerConnector zkConsumerConnector)
        {
            if (fetcher != null)
            {
                var allPartitionInfos = new List <PartitionTopicInfo>();
                foreach (var item in topicRegistry.Values)
                {
                    foreach (var partitionTopicInfo in item.Values)
                    {
                        allPartitionInfos.Add(partitionTopicInfo);
                    }
                }
                fetcher.Shutdown();
                fetcher.ClearFetcherQueues(allPartitionInfos, cluster, queuesToBeCleared, kafkaMessageStreams);
                Logger.Info("Committing all offsets after clearing the fetcher queues");

                if (config.AutoCommit)
                {
                    zkConsumerConnector.CommitOffsets();
                }
            }
        }
예제 #2
0
        static void BalancedConsumer(string consumerGroupId, string uniqueConsumerId, string topic, int threads, string zookeeperServer, Action <Message> ProcessMessage)
        {
            // Here we create a balanced consumer on one consumer machine for consumerGroupId. All machines consuming for this group will get balanced together
            ConsumerConfiguration config = new ConsumerConfiguration
            {
                AutoCommit = false,
                GroupId    = consumerGroupId,
                ConsumerId = uniqueConsumerId,
                ZooKeeper  = new ZooKeeperConfiguration(zookeeperServer, 30000, 30000, 2000)
            };
            var balancedConsumer = new ZookeeperConsumerConnector(config, true);

            // grab streams for desired topics
            var topicMap = new Dictionary <string, int>()
            {
                { topic, threads }
            };
            var streams            = balancedConsumer.CreateMessageStreams(topicMap, new DefaultDecoder());
            var KafkaMessageStream = streams[topic][0];

            // start consuming stream
            foreach (Message message in KafkaMessageStream.GetCancellable(new CancellationToken()))
            {
                ProcessMessage(message);
                balancedConsumer.CommitOffsets();
            }
        }
예제 #3
0
        internal void Consume()
        {
            // connects to zookeeper
            using (ZookeeperConsumerConnector connector = new ZookeeperConsumerConnector(configSettings, true))
            {
                if (this.ThreadID == 0)
                {
                    ConsumerGroupHelper.initialOffset = connector.GetOffset(cgOptions.Topic);

                    //Logger.InfoFormat("======Original offset \r\n{0}", ConsumerGroupHelper.initialOffset == null ? "(NULL)" : ConsumeGroupMonitorHelper.GetComsumerGroupOffsetsAsLog(ConsumerGroupHelper.initialOffset));
                }

                // defines collection of topics and number of threads to consume it with
                // ===============NOTE============================
                // For example , if there is 80 partitions for one topic.
                //
                // Normally start more than 96 = 80*120% clients with same GroupId.  ( the extra 20% are buffer for autopilot IMP).  And set  FetchThreadCountPerConsumer as 1.
                // Then 80 clients can lock partitions,  can set MACHINENAME_ProcessID as ConsumerId, other 16 are idle.  Strongly recomand take this method.
                //
                // If start 40 clients,  and  set  FetchThreadCountPerConsumer as 1. then every client can lock 2 partitions at least.   And if some client not available for autopilot
                // IMP reason, then some of the client maybe lock 3 partitions.
                //
                //  If start 40 clients, and set  FetchThreadCountPerConsumer as 2,  you will get two IEnumerator<Message>:topicData[0].GetEnumerator(),topicData[1].GetEnumerator()
                //  you need start TWO threads to process them in dependently.
                //  If the client get 2 partitions, each thread will handle 1 partition,
                //  If the client get 3 partitions, then one thread get 2 partitions, the other one get 1 partition.  It will make the situaiton complex and the consume of partition not balance.
                //==================NOTE=============================
                IDictionary <string, int> topicMap = new Dictionary <string, int> {
                    { cgOptions.Topic, cgOptions.FetchThreadCountPerConsumer }
                };

                // get references to topic streams.
                IDictionary <string, IList <KafkaMessageStream <Message> > > streams = connector.CreateMessageStreams(topicMap, new DefaultDecoder());
                IList <KafkaMessageStream <Message> > topicData = streams[cgOptions.Topic];
                long latestTotalCount = 0;

                bool hitEndAndCommited = false;
                if (cgOptions.CancellationTimeoutMs == 5000)
                {
                    // Get the message enumerator.
                    IEnumerator <Message> messageEnumerator = topicData[0].GetEnumerator();
                    //TODO:  the enumerator count equal with FetchThreadCountPerConsumer . For example,  if that value is 5, then here should get 5 enumerator.
                    //IF have 100 partitions, and only 20 consumers, need set this value to 5.  and start 5 thread handle each one.

                    // Add tuples until maximum receive message count is reached or no new messages read after consumer configured timeout.
                    while (true)
                    {
                        bool noMoreMessage = false;
                        try
                        {
                            messageEnumerator.MoveNext();
                            Message m = messageEnumerator.Current;
                            latestTotalCount = Interlocked.Increment(ref ConsumerGroupHelper.totalCount);
                            Logger.InfoFormat("Message {0} from Partition:{1}, Offset:{2}, key:{3}, value:{4}", latestTotalCount, m.PartitionId, m.Offset, m.Key == null ? "(null)" : Encoding.UTF8.GetString(m.Key), m.Payload == null ? "(null)" : Encoding.UTF8.GetString(m.Payload));
                            if (latestTotalCount == 1)
                            {
                                Logger.WarnFormat("Read FIRST message, it's offset: {0}  PartitionID:{1}", m.Offset, ((ConsumerIterator <Message>)messageEnumerator).currentTopicInfo.PartitionId);
                            }

                            hitEndAndCommited = false;
                            if (latestTotalCount % cgOptions.CommitBatchSize == 0)
                            {
                                //NOTE======
                                //Normally, just directly call .CommitOffsets()
                                //    CommitOffset(string topic, int partition, long offset)  only used when customer has strong requirement for reprocess messages as few as possible.
                                //Need tune the frequecy of calling  .CommitOffsets(), it will directly increate zookeeper load and impact your overall performance
                                if (cgOptions.CommitOffsetWithPartitionIDOffset)
                                {
                                    connector.CommitOffset(cgOptions.Topic, m.PartitionId.Value, m.Offset);
                                }
                                else
                                {
                                    connector.CommitOffsets();
                                }
                                Console.WriteLine("\tRead some and commit once,  LATEST message offset: {0}. PartitionID:{1} -- {2}  Totally read  {3}  will commit offset. {4} FetchOffset:{5}  ConsumeOffset:{6} CommitedOffset:{7}"
                                                  , m.Offset, m.PartitionId.Value, ((ConsumerIterator <Message>)messageEnumerator).currentTopicInfo.PartitionId, latestTotalCount, DateTime.Now
                                                  , ((ConsumerIterator <Message>)messageEnumerator).currentTopicInfo.FetchOffset
                                                  , ((ConsumerIterator <Message>)messageEnumerator).currentTopicInfo.ConsumeOffset
                                                  , ((ConsumerIterator <Message>)messageEnumerator).currentTopicInfo.CommitedOffset);
                            }

                            if (cgOptions.Count > 0 && latestTotalCount >= cgOptions.Count)
                            {
                                Logger.WarnFormat("Read LAST message, it's offset: {0}. PartitionID:{1}   Totally read {2}  want {3} will exit.", m.Offset, ((ConsumerIterator <Message>)messageEnumerator).currentTopicInfo.PartitionId, latestTotalCount, cgOptions.Count);
                                break;
                            }
                        }
                        catch (ConsumerTimeoutException)
                        {
                            if (!hitEndAndCommited)
                            {
                                Logger.WarnFormat("Totally Read {0}  will commit offset. {1}", latestTotalCount, DateTime.Now);
                                connector.CommitOffsets();
                                hitEndAndCommited = true;
                            }
                            // Thrown if no new messages read after consumer configured timeout.
                            noMoreMessage = true;
                        }

                        if (noMoreMessage)
                        {
                            Logger.InfoFormat("No more message , hit end ,will Sleep(1), {0}", DateTime.Now);
                            if (cgOptions.SleepTypeWhileAlwaysRead == 0)
                            {
                                Thread.Sleep(0);
                            }
                            else if (cgOptions.SleepTypeWhileAlwaysRead == 1)
                            {
                                Thread.Sleep(1);        //Best choice is Thread.Sleep(1).  Other 3 choice still make the CPU 100%
                            }
                            else if (cgOptions.SleepTypeWhileAlwaysRead == 2)
                            {
                                Thread.Yield();
                            }
                            else
                            {
                            }
                        }
                    }
                }
                else
                {
                    //Siphon scenario, repeatly take some messages and process. if no enough messages, will stop current batch after timeout.
                    while (true)
                    {
#if NET45
                        bool    noMoreMessage = false;
                        Message lastMessage   = null;
                        int     count         = 0;
                        KafkaMessageStream <Message> messagesStream = null;
                        ConsumerIterator <Message>   iterator       = null;
                        using (CancellationTokenSource cancellationTokenSource = new CancellationTokenSource(cgOptions.CancellationTimeoutMs))
                        {
                            lastMessage = null;
                            IEnumerable <Message> messages = topicData[0].GetCancellable(cancellationTokenSource.Token);
                            messagesStream = (KafkaMessageStream <Message>)messages;
                            iterator       = (ConsumerIterator <Message>)messagesStream.iterator;
                            foreach (Message message in messages)
                            {
                                latestTotalCount = Interlocked.Increment(ref ConsumerGroupHelper.totalCount);
                                lastMessage      = message;
                                if (latestTotalCount == 1)
                                {
                                    PartitionTopicInfo p = iterator.currentTopicInfo;
                                    Logger.InfoFormat("Read FIRST message, it's offset: {0}  PartitionID:{1}", lastMessage.Offset, p == null ? "null" : p.PartitionId.ToString());
                                }
                                hitEndAndCommited = false;
                                if (++count >= cgOptions.CommitBatchSize)
                                {
                                    cancellationTokenSource.Cancel();
                                }
                            }
                        }
                        if (count > 0)
                        {
                            connector.CommitOffsets();
                            consumedTotalCount += count;
                            PartitionTopicInfo p = iterator.currentTopicInfo;
                            Console.WriteLine("\tRead some and commit once, Thread: {8}  consumedTotalCount:{9} Target:{10} LATEST message offset: {0}. PartitionID:{1} -- {2}  Totally read  {3}  will commit offset. {4} FetchOffset:{5}  ConsumeOffset:{6} CommitedOffset:{7}"
                                              , lastMessage.Offset, lastMessage.PartitionId.Value, p == null ? "null" : p.PartitionId.ToString(), latestTotalCount, DateTime.Now
                                              , p == null ? "null" : p.FetchOffset.ToString()
                                              , p == null ? "null" : p.ConsumeOffset.ToString()
                                              , p == null ? "null" : p.CommitedOffset.ToString()
                                              , this.ThreadID
                                              , this.consumedTotalCount
                                              , this.Count);
                        }
                        else
                        {
                            noMoreMessage = true;
                        }

                        if (this.Count > 0 && consumedTotalCount >= this.Count)
                        {
                            Logger.InfoFormat("Current thrad Read LAST message, Totally read {0}  want {1} will exit current thread.", consumedTotalCount, this.Count);
                            break;
                        }

                        if (noMoreMessage)
                        {
                            Logger.InfoFormat("No more message , hit end ,will Sleep(2000), {0}", DateTime.Now);
                            if (cgOptions.SleepTypeWhileAlwaysRead == 0)
                            {
                                Thread.Sleep(0);
                            }
                            else if (cgOptions.SleepTypeWhileAlwaysRead == 1)
                            {
                                Thread.Sleep(2000);        //Best choice is Thread.Sleep(1).  Other 3 choice still make the CPU 100%
                            }
                            else if (cgOptions.SleepTypeWhileAlwaysRead == 2)
                            {
                                Thread.Yield();
                            }
                            else
                            {
                            }
                        }
#endif
#if NET4
                        throw new NotSupportedException("Please use .net45 to compile .");
#endif
                    }
                }

                Logger.InfoFormat("Read {0}  will commit offset. {1}", latestTotalCount, DateTime.Now);
                connector.CommitOffsets();

                latestTotalCount = Interlocked.Read(ref ConsumerGroupHelper.totalCount);

                Logger.InfoFormat("Totally read {0}  want {1} . ", latestTotalCount, cgOptions.Count);
                if (this.ThreadID == 0)
                {
                    ConsumerGroupHelper.newOffset = connector.GetOffset(cgOptions.Topic);
                }
            }

            this.resetEvent.Set();
        }
        public void TestBasic()
        {
            // test consumer timeout logic
            var consumerConfig0      = TestUtils.CreateConsumerProperties(ZkConnect, Group, Consumer0, 200);
            var zkConsumerConnector0 = new ZookeeperConsumerConnector(consumerConfig0);
            var topicMessageSterams0 =
                zkConsumerConnector0.CreateMessageStreams(
                    new Dictionary <string, int> {
                { Topic, 1 }
            }, new StringDecoder(), new StringDecoder());

            // no messages to consume, we should hit timeout;
            // also the iterator should support re-entrant, so loop it twice
            for (var i = 0; i < 2; i++)
            {
                Assert.Throws <ConsumerTimeoutException>(
                    () => this.GetMessages(nMessages * 2, topicMessageSterams0));
            }

            zkConsumerConnector0.Shutdown();

            // send some messages to each broker
            var sentMessages1 =
                this.SendMessagesToBrokerPartition(Configs.First(), Topic, 0, nMessages)
                .Union(this.SendMessagesToBrokerPartition(Configs.Last(), Topic, 0, nMessages)).ToList();

            // wait to make sure the topic and partition have a leader for the successful case
            TestUtils.WaitUntilLeaderIsElectedOrChanged(this.ZkClient, Topic, 0, 500);
            TestUtils.WaitUntilLeaderIsElectedOrChanged(this.ZkClient, Topic, 1, 500);

            TestUtils.WaitUntilMetadataIsPropagated(this.Servers, Topic, 0, 1000);
            TestUtils.WaitUntilMetadataIsPropagated(this.Servers, Topic, 1, 1000);

            // create a consuemr
            var consumerConfig1      = TestUtils.CreateConsumerProperties(ZkConnect, Group, Consumer1);
            var zkConsumerConnector1 = new ZookeeperConsumerConnector(consumerConfig1);
            var topicMessageStreams1 =
                zkConsumerConnector1.CreateMessageStreams(
                    new Dictionary <string, int> {
                { Topic, 1 }
            }, new StringDecoder(), new StringDecoder());

            var receivedMessages1 = this.GetMessages(nMessages * 2, topicMessageStreams1);

            Assert.Equal(sentMessages1.OrderBy(x => x).ToArray(), receivedMessages1.OrderBy(x => x).ToArray());

            // also check partition ownership
            var actual_1   = this.GetZKChildrenValues(this.dirs.ConsumerOwnerDir);
            var expected_1 = new List <Tuple <string, string> >
            {
                Tuple.Create("0", "group1_consumer1-0"),
                Tuple.Create("1", "group1_consumer1-0")
            };

            Assert.Equal(expected_1, actual_1);

            // commit consumer offsets
            zkConsumerConnector1.CommitOffsets();

            // create a consumer
            var consumerConfig2 = TestUtils.CreateConsumerProperties(ZkConnect, Group, Consumer2);

            consumerConfig2.RebalanceBackoffMs = RebalanceBackoutMs;

            var zkConsumerConnector2 = new ZookeeperConsumerConnector(consumerConfig2);
            var topicMessageStreams2 =
                zkConsumerConnector2.CreateMessageStreams(
                    new Dictionary <string, int> {
                { Topic, 1 }
            }, new StringDecoder(), new StringDecoder());

            // send some messages to each broker
            var sentMessages2 =
                this.SendMessagesToBrokerPartition(Configs.First(), Topic, 0, nMessages)
                .Union(this.SendMessagesToBrokerPartition(Configs.Last(), Topic, 1, nMessages)).ToList();

            // wait to make sure the topic and partition have a leader for the successful case
            TestUtils.WaitUntilLeaderIsElectedOrChanged(this.ZkClient, Topic, 0, 500);
            TestUtils.WaitUntilLeaderIsElectedOrChanged(this.ZkClient, Topic, 1, 500);

            var receivedMessages2 =
                this.GetMessages(nMessages, topicMessageStreams1)
                .Union(this.GetMessages(nMessages, topicMessageStreams2))
                .ToList();

            Assert.Equal(sentMessages2.OrderBy(x => x).ToList(), receivedMessages2.OrderBy(x => x).ToList());

            // also check partition ownership
            var actual_2   = this.GetZKChildrenValues(this.dirs.ConsumerOwnerDir);
            var expected_2 = new List <Tuple <string, string> >
            {
                Tuple.Create("0", "group1_consumer1-0"),
                Tuple.Create("1", "group1_consumer2-0")
            };

            Assert.Equal(expected_2, actual_2);

            // create a consumer with empty map
            var consumerConfig3      = TestUtils.CreateConsumerProperties(ZkConnect, Group, Consumer3);
            var zkConsumerConnector3 = new ZookeeperConsumerConnector(consumerConfig3);

            zkConsumerConnector3.CreateMessageStreams(new Dictionary <string, int>());

            // send some messages to each broker
            var sentMessages3 =
                this.SendMessagesToBrokerPartition(Configs.First(), Topic, 0, nMessages)
                .Union(this.SendMessagesToBrokerPartition(Configs.Last(), Topic, 1, nMessages))
                .ToList();

            // wait to make sure the topic and partition have a leader for the successful case
            TestUtils.WaitUntilLeaderIsElectedOrChanged(this.ZkClient, Topic, 0, 500);
            TestUtils.WaitUntilLeaderIsElectedOrChanged(this.ZkClient, Topic, 1, 500);

            var receivedMessages3 =
                this.GetMessages(nMessages, topicMessageStreams1)
                .Union(this.GetMessages(nMessages, topicMessageStreams2))
                .ToList();

            Assert.Equal(sentMessages3.OrderBy(x => x).ToList(), receivedMessages3.OrderBy(x => x).ToList());

            // also check partition ownership
            var actual_3 = this.GetZKChildrenValues(this.dirs.ConsumerOwnerDir);

            Assert.Equal(expected_2, actual_3);

            zkConsumerConnector1.Shutdown();
            zkConsumerConnector2.Shutdown();
            zkConsumerConnector3.Shutdown();

            Logger.Info("all consumer connectors stopped");
        }
예제 #5
0
        public IEnumerable <Message> ConsumeRecords(string topic, int timeToExecuteInMilliSeconds)
        {
            XmlConfigurator.Configure();
            CancellationToken token = _tokenSource.Token;

            _timer = new Timer(TimerElapsed, null, timeToExecuteInMilliSeconds, timeToExecuteInMilliSeconds);
            _watch = Stopwatch.StartNew();
            ConsumerConfiguration config = new ConsumerConfiguration
            {
                AutoCommit           = false,
                GroupId              = _consumerGroupId,
                ConsumerId           = _consumerUniqueId + Thread.CurrentThread.ManagedThreadId,
                MaxFetchBufferLength = 20000,
                //FetchSize = fetchSize,
                AutoOffsetReset = OffsetRequest.SmallestTime,
                NumberOfTries   = 20,
                ZooKeeper       = new ZooKeeperConfiguration(_zooKeeperString, 30000, 30000, 8000),
                Verbose         = true
            };
            var balancedConsumer = new ZookeeperConsumerConnector(config, true);
            // grab streams for desired topics
            Dictionary <string, int> topicMap = new Dictionary <string, int>();

            topicMap.Add(topic, 1);
            var streams            = balancedConsumer.CreateMessageStreams(topicMap, new DefaultDecoder());
            var kafkaMessageStream = streams[topic][0];

            List <Message> consumedMessages = new List <Message>();

            try
            {
                foreach (Message message in kafkaMessageStream.GetCancellable(token))
                {
                    var insertQuery = CqlQuery.InsertInto(ConfigurationManager.AppSettings["CassandraTable"])
                                      .SetValue("kafkaId", new Random().Next())
                                      .SetValue("NGMData", Encoding.UTF8.GetString(message.Payload))
                                      .SetValue("timeConsumed", timeToExecuteInMilliSeconds);
                    var task = _dbContext.ExecuteNonQueryAsync(insertQuery);
                    if (_enablePerFetchOffsetCommit)
                    {
                        balancedConsumer.CommitOffset(topic, Convert.ToInt32(message.PartitionId), message.Offset);
                    }
                    if (!_isCassandraAsync)
                    {
                        task.Wait();
                    }
                    consumedMessages.Add(message);
                    if (_watch.ElapsedMilliseconds >= timeToExecuteInMilliSeconds)
                    {
                        break;
                    }

                    if (_enableBatchOffsetCommit && (consumedMessages.Count() ^ _offsetCommitBatchCount) == 0)
                    {
                        balancedConsumer.CommitOffsets();
                    }
                }
            }
            catch (OperationCanceledException operationCancelledException) { }
            return(consumedMessages);
        }