public static void MainInternal(int taskIndexInStorm, string[] args) { GenerateAssistFile("producesimple"); GenerateAssistFile("produceperftest"); GenerateAssistFile("producemonitor"); GenerateAssistFile("eventserverperftest"); GenerateAssistFile("consumesimple"); GenerateAssistFile("consumegroup"); GenerateAssistFile("consumegroupmonitor"); GenerateAssistFile("topic"); ServicePointManager.DefaultConnectionLimit = 5000; ServicePointManager.UseNagleAlgorithm = false; var log4netSection = ConfigurationManager.GetSection("log4net"); if (log4netSection != null) { //XmlConfigurator.Configure(); } KafkaNETExampleCommandVerb commandOptions = new KafkaNETExampleCommandVerb(); try { commandOptions.Parse(args); } catch (Exception e) { Logger.ErrorFormat("{0}", e.FormatException()); Console.WriteLine(KafkaNETExampleCommandVerb.GetUsage()); Environment.Exit(-1); } KafkaNETExampleSubArguments realOption = KafkaNETExampleCommandVerb.ActiveSubOption; try { realOption.Parse(args); } catch (Exception e) { Logger.ErrorFormat("{0}", e.FormatException()); Console.WriteLine(realOption.GetUsage(false)); Environment.Exit(-1); } Logger.InfoFormat("All arguments of {0}: \r\n{1}", KafkaNETExampleCommandVerb.AssemblyName, realOption.GetArgDict()); switch (KafkaNETExampleCommandVerb.Verb) { case "producesimple": case "produceroundrobin": ProduceSimpleHelperOption produceroundrobinOptions = (ProduceSimpleHelperOption)realOption; ProduceSimpleHelper.Run(produceroundrobinOptions); break; case "produceperftest": case "producewrapper": ProducePerfTestHelperOption producewrapperOption = (ProducePerfTestHelperOption)realOption; (new ProducePerfTestHelper()).Run(producewrapperOption); break; case "producem": case "producemonitor": ProduceMonitorHelperOptions produceMonitorOptions = (ProduceMonitorHelperOptions)realOption; ProduceMonitorHelper.Run(produceMonitorOptions); break; case "eventserverperftest": JavaEventServerPerfTestHelperOptions evetServerPerfTestOptions = (JavaEventServerPerfTestHelperOptions)realOption; (new JavaEventServerPerfTestHelper()).Run(evetServerPerfTestOptions); break; case "consumesimple": case "dumpdata": ConsumeDataHelperArguments dumpdataOptions = (ConsumeDataHelperArguments)realOption; ConsumeSimpleHelper.ConsumeDataSimple(dumpdataOptions); break; case "consumegroup": case "dumpdataasconsumergroup": ConsumeGroupHelperOptions cgOptions = (ConsumeGroupHelperOptions)realOption; if (taskIndexInStorm >= 0) { cgOptions.ConsumerId = cgOptions.ConsumerId + taskIndexInStorm.ToString(); cgOptions.File = cgOptions.ConsumerId + taskIndexInStorm.ToString() + cgOptions.File; } ConsumerGroupHelper.DumpMessageAsConsumerGroup(cgOptions); break; case "latestoffsetofconsumergroup": case "consumegroupmonitor": case "consumegroupm": case "consumem": ConsumeGroupMonitorHelperOptions dcgOptions = (ConsumeGroupMonitorHelperOptions)realOption; ConsumeGroupMonitorHelper.DumpConsumerGroupOffsets(dcgOptions); break; case "topic": TopicHelperArguments dtOptions = (TopicHelperArguments)realOption; TopicHelper.DumpTopicMetadataAndOffset(dtOptions); break; case "test": var testOptions = (TestHelperOptions)realOption; TestHelper.Run(testOptions); break; default: Logger.Error(string.Format("Invalid verb={0}", KafkaNETExampleCommandVerb.Verb)); return; } }
internal void Consume() { // connects to zookeeper using (ZookeeperConsumerConnector connector = new ZookeeperConsumerConnector(configSettings, true)) { if (this.ThreadID == 0) { ConsumerGroupHelper.initialOffset = connector.GetOffset(cgOptions.Topic); Logger.InfoFormat("======Original offset \r\n{0}", ConsumerGroupHelper.initialOffset == null ? "(NULL)" : ConsumeGroupMonitorHelper.GetComsumerGroupOffsetsAsLog(ConsumerGroupHelper.initialOffset)); } // defines collection of topics and number of threads to consume it with // ===============NOTE============================ // For example , if there is 80 partitions for one topic. // // Normally start more than 96 = 80*120% clients with same GroupId. ( the extra 20% are buffer for autopilot IMP). And set FetchThreadCountPerConsumer as 1. // Then 80 clients can lock partitions, can set MACHINENAME_ProcessID as ConsumerId, other 16 are idle. Strongly recomand take this method. // // If start 40 clients, and set FetchThreadCountPerConsumer as 1. then every client can lock 2 partitions at least. And if some client not available for autopilot // IMP reason, then some of the client maybe lock 3 partitions. // // If start 40 clients, and set FetchThreadCountPerConsumer as 2, you will get two IEnumerator<Message>:topicData[0].GetEnumerator(),topicData[1].GetEnumerator() // you need start TWO threads to process them in dependently. // If the client get 2 partitions, each thread will handle 1 partition, // If the client get 3 partitions, then one thread get 2 partitions, the other one get 1 partition. It will make the situaiton complex and the consume of partition not balance. //==================NOTE============================= IDictionary <string, int> topicMap = new Dictionary <string, int> { { cgOptions.Topic, cgOptions.FetchThreadCountPerConsumer } }; // get references to topic streams. IDictionary <string, IList <KafkaMessageStream <Message> > > streams = connector.CreateMessageStreams(topicMap, new DefaultDecoder()); IList <KafkaMessageStream <Message> > topicData = streams[cgOptions.Topic]; long latestTotalCount = 0; bool hitEndAndCommited = false; if (cgOptions.CancellationTimeoutMs == KafkaNETExampleConstants.DefaultCancellationTimeoutMs) { // Get the message enumerator. IEnumerator <Message> messageEnumerator = topicData[0].GetEnumerator(); //TODO: the enumerator count equal with FetchThreadCountPerConsumer . For example, if that value is 5, then here should get 5 enumerator. //IF have 100 partitions, and only 20 consumers, need set this value to 5. and start 5 thread handle each one. // Add tuples until maximum receive message count is reached or no new messages read after consumer configured timeout. while (true) { bool noMoreMessage = false; try { messageEnumerator.MoveNext(); Message m = messageEnumerator.Current; latestTotalCount = Interlocked.Increment(ref ConsumerGroupHelper.totalCount); Logger.InfoFormat("Message {0} from Partition:{1}, Offset:{2}, key:{3}, value:{4}", latestTotalCount, m.PartitionId, m.Offset, m.Key == null ? "(null)" : Encoding.UTF8.GetString(m.Key), m.Payload == null ? "(null)" : Encoding.UTF8.GetString(m.Payload)); if (latestTotalCount == 1) { Logger.InfoFormat("Read FIRST message, it's offset: {0} PartitionID:{1}", m.Offset, ((ConsumerIterator <Message>)messageEnumerator).currentTopicInfo.PartitionId); } hitEndAndCommited = false; if (latestTotalCount % cgOptions.CommitBatchSize == 0) { //NOTE====== //Normally, just directly call .CommitOffsets() // CommitOffset(string topic, int partition, long offset) only used when customer has strong requirement for reprocess messages as few as possible. //Need tune the frequecy of calling .CommitOffsets(), it will directly increate zookeeper load and impact your overall performance if (cgOptions.CommitOffsetWithPartitionIDOffset) { connector.CommitOffset(cgOptions.Topic, m.PartitionId.Value, m.Offset); } else { connector.CommitOffsets(); } Console.WriteLine("\tRead some and commit once, LATEST message offset: {0}. PartitionID:{1} -- {2} Totally read {3} will commit offset. {4} FetchOffset:{5} ConsumeOffset:{6} CommitedOffset:{7}" , m.Offset, m.PartitionId.Value, ((ConsumerIterator <Message>)messageEnumerator).currentTopicInfo.PartitionId, latestTotalCount, DateTime.Now , ((ConsumerIterator <Message>)messageEnumerator).currentTopicInfo.FetchOffset , ((ConsumerIterator <Message>)messageEnumerator).currentTopicInfo.ConsumeOffset , ((ConsumerIterator <Message>)messageEnumerator).currentTopicInfo.CommitedOffset); } if (cgOptions.Count > 0 && latestTotalCount >= cgOptions.Count) { Logger.InfoFormat("Read LAST message, it's offset: {0}. PartitionID:{1} Totally read {2} want {3} will exit.", m.Offset, ((ConsumerIterator <Message>)messageEnumerator).currentTopicInfo.PartitionId, latestTotalCount, cgOptions.Count); break; } } catch (ConsumerTimeoutException) { if (!hitEndAndCommited) { Logger.InfoFormat("Totally Read {0} will commit offset. {1}", latestTotalCount, DateTime.Now); connector.CommitOffsets(); hitEndAndCommited = true; } // Thrown if no new messages read after consumer configured timeout. noMoreMessage = true; } if (noMoreMessage) { Logger.InfoFormat("No more message , hit end ,will Sleep(1), {0}", DateTime.Now); if (cgOptions.SleepTypeWhileAlwaysRead == 0) { Thread.Sleep(0); } else if (cgOptions.SleepTypeWhileAlwaysRead == 1) { Thread.Sleep(1); //Best choice is Thread.Sleep(1). Other 3 choice still make the CPU 100% } else if (cgOptions.SleepTypeWhileAlwaysRead == 2) { Thread.Yield(); } else { } } } } else { //Siphon scenario, repeatly take some messages and process. if no enough messages, will stop current batch after timeout. while (true) { #if NET45 bool noMoreMessage = false; Message lastMessage = null; int count = 0; KafkaMessageStream <Message> messagesStream = null; ConsumerIterator <Message> iterator = null; using (CancellationTokenSource cancellationTokenSource = new CancellationTokenSource(cgOptions.CancellationTimeoutMs)) { lastMessage = null; IEnumerable <Message> messages = topicData[0].GetCancellable(cancellationTokenSource.Token); messagesStream = (KafkaMessageStream <Message>)messages; iterator = (ConsumerIterator <Message>)messagesStream.iterator; foreach (Message message in messages) { latestTotalCount = Interlocked.Increment(ref ConsumerGroupHelper.totalCount); lastMessage = message; if (latestTotalCount == 1) { PartitionTopicInfo p = iterator.currentTopicInfo; Logger.InfoFormat("Read FIRST message, it's offset: {0} PartitionID:{1}", lastMessage.Offset, p == null ? "null" : p.PartitionId.ToString()); } hitEndAndCommited = false; if (++count >= cgOptions.CommitBatchSize) { cancellationTokenSource.Cancel(); } } } if (count > 0) { connector.CommitOffsets(); consumedTotalCount += count; PartitionTopicInfo p = iterator.currentTopicInfo; Console.WriteLine("\tRead some and commit once, Thread: {8} consumedTotalCount:{9} Target:{10} LATEST message offset: {0}. PartitionID:{1} -- {2} Totally read {3} will commit offset. {4} FetchOffset:{5} ConsumeOffset:{6} CommitedOffset:{7}" , lastMessage.Offset, lastMessage.PartitionId.Value, p == null ? "null" : p.PartitionId.ToString(), latestTotalCount, DateTime.Now , p == null ? "null" : p.FetchOffset.ToString() , p == null ? "null" : p.ConsumeOffset.ToString() , p == null ? "null" : p.CommitedOffset.ToString() , this.ThreadID , this.consumedTotalCount , this.Count); } else { noMoreMessage = true; } if (this.Count > 0 && consumedTotalCount >= this.Count) { Logger.InfoFormat("Current thrad Read LAST message, Totally read {0} want {1} will exit current thread.", consumedTotalCount, this.Count); break; } if (noMoreMessage) { Logger.InfoFormat("No more message , hit end ,will Sleep(2000), {0}", DateTime.Now); if (cgOptions.SleepTypeWhileAlwaysRead == 0) { Thread.Sleep(0); } else if (cgOptions.SleepTypeWhileAlwaysRead == 1) { Thread.Sleep(2000); //Best choice is Thread.Sleep(1). Other 3 choice still make the CPU 100% } else if (cgOptions.SleepTypeWhileAlwaysRead == 2) { Thread.Yield(); } else { } } #endif #if NET4 throw new NotSupportedException("Please use .net45 to compile ."); #endif } } Logger.InfoFormat("Read {0} will commit offset. {1}", latestTotalCount, DateTime.Now); connector.CommitOffsets(); latestTotalCount = Interlocked.Read(ref ConsumerGroupHelper.totalCount); Logger.InfoFormat("Totally read {0} want {1} . ", latestTotalCount, cgOptions.Count); if (this.ThreadID == 0) { ConsumerGroupHelper.newOffset = connector.GetOffset(cgOptions.Topic); } } this.resetEvent.Set(); }