private TopologyTestDriver(InternalTopologyBuilder builder, IStreamConfig config) { this.topologyBuilder = builder; this.configuration = config; // ONLY 1 thread for test driver this.configuration.NumStreamThreads = 1; this.configuration.Guarantee = ProcessingGuarantee.AT_LEAST_ONCE; this.topicConfiguration = config.Clone(); this.topicConfiguration.ApplicationId = $"test-driver-{this.configuration.ApplicationId}"; var processID = Guid.NewGuid(); var clientId = string.IsNullOrEmpty(configuration.ClientId) ? $"{this.configuration.ApplicationId.ToLower()}-{processID}" : configuration.ClientId; this.configuration.ClientId = clientId; kafkaSupplier = new MockKafkaSupplier(); pipeBuilder = new PipeBuilder(kafkaSupplier); this.processorTopology = this.topologyBuilder.BuildTopology(); this.threadTopology = StreamThread.Create( $"{this.configuration.ApplicationId.ToLower()}-stream-thread-0", clientId, builder, config, kafkaSupplier, kafkaSupplier.GetAdmin(configuration.ToAdminConfig($"{clientId}-admin")), 0); RunDriver(); }
public StreamTask(string threadId, TaskId id, TopicPartition partition, ProcessorTopology processorTopology, IConsumer <byte[], byte[]> consumer, IStreamConfig configuration, IKafkaSupplier kafkaSupplier, IProducer <byte[], byte[]> producer) : base(id, partition, processorTopology, consumer, configuration) { this.threadId = threadId; this.kafkaSupplier = kafkaSupplier; this.consumedOffsets = new Dictionary <TopicPartition, long>(); // eos enabled if (producer == null) { this.producer = CreateEOSProducer(); InitializeTransaction(); eosEnabled = true; } else { this.producer = producer; } this.collector = new RecordCollector(logPrefix); collector.Init(ref this.producer); var sourceTimestampExtractor = (processorTopology.GetSourceProcessor(id.Topic) as ISourceProcessor).Extractor; Context = new ProcessorContext(configuration, stateMgr).UseRecordCollector(collector); processor = processorTopology.GetSourceProcessor(partition.Topic); queue = new RecordQueue <ConsumeResult <byte[], byte[]> >( 100, logPrefix, $"record-queue-{id.Topic}-{id.Partition}", sourceTimestampExtractor == null ? configuration.DefaultTimestampExtractor : sourceTimestampExtractor); }
/// <summary> /// Create a <see cref="KafkaStream"/> instance with your own <see cref="IKafkaSupplier" /> /// Please DO NOT FORGET to call Close to avoid resources leak ! /// </summary> /// <param name="topology">the topology specifying the computational logic</param> /// <param name="configuration">configuration about this stream</param> /// <param name="kafkaSupplier">the Kafka clients supplier which provides underlying producer and consumer clients for the new <see cref="KafkaStream"/> instance</param> public KafkaStream(Topology topology, IStreamConfig configuration, IKafkaSupplier kafkaSupplier) { this.topology = topology; this.configuration = configuration; this.kafkaSupplier = kafkaSupplier; var processID = Guid.NewGuid(); clientId = string.IsNullOrEmpty(configuration.ClientId) ? $"{this.configuration.ApplicationId.ToLower()}-{processID}" : configuration.ClientId; logPrefix = $"stream-application[{configuration.ApplicationId}] "; // re-write the physical topology according to the config topology.Builder.RewriteTopology(configuration); // sanity check this.processorTopology = topology.Builder.BuildTopology(); this.threads = new IThread[this.configuration.NumStreamThreads]; var threadState = new Dictionary <long, Processors.ThreadState>(); List <StreamThreadStateStoreProvider> stateStoreProviders = new List <StreamThreadStateStoreProvider>(); for (int i = 0; i < this.configuration.NumStreamThreads; ++i) { var threadId = $"{this.configuration.ApplicationId.ToLower()}-stream-thread-{i}"; adminClient = this.kafkaSupplier.GetAdmin(configuration.ToAdminConfig(StreamThread.GetSharedAdminClientId(clientId))); this.threads[i] = StreamThread.Create( threadId, clientId, this.topology.Builder, configuration, this.kafkaSupplier, adminClient, i); threadState.Add(this.threads[i].Id, this.threads[i].State); stateStoreProviders.Add(new StreamThreadStateStoreProvider(this.threads[i], this.topology.Builder)); } var manager = new StreamStateManager(this, threadState); foreach (var t in threads) { t.StateChanged += manager.OnChange; } this.queryableStoreProvider = new QueryableStoreProvider(stateStoreProviders); StreamState = State.CREATED; }
public GlobalStreamThreadFactory(ProcessorTopology topology, string threadClientId, IConsumer <byte[], byte[]> globalConsumer, IStreamConfig configuration, IAdminClient adminClient) { this.adminClient = adminClient; this.topology = topology; this.threadClientId = threadClientId; this.configuration = configuration; this.globalConsumer = globalConsumer; }
public void SetUp() { mockKafkaSupplier = new SyncKafkaSupplier(); var consumerConfig = new ConsumerConfig(); consumerConfig.GroupId = "global-consulmer"; var globalConsumer = mockKafkaSupplier.GetConsumer(consumerConfig, null); streamConfigMock = new Mock <IStreamConfig>(); streamConfigMock.Setup(c => c.StateDir).Returns($"./{Guid.NewGuid().ToString()}"); streamConfigMock.Setup(c => c.ApplicationId).Returns("app"); kvStoreMock = CreateMockStore <IKeyValueStore <object, object> >(kvStoreName); otherStoreMock = CreateMockStore <IKeyValueStore <object, object> >(otherStoreName); var globalStateStores = new Dictionary <string, IStateStore>() { { kvStoreMock.Object.Name, kvStoreMock.Object }, { otherStoreMock.Object.Name, otherStoreMock.Object } }; var storesToTopics = new Dictionary <string, string>() { { kvStoreMock.Object.Name, kvStoreTopic }, { otherStoreMock.Object.Name, otherStoreTopic } }; topology = new ProcessorTopology( null, new Dictionary <string, IProcessor>(), new Dictionary <string, IProcessor>(), new Dictionary <string, IProcessor>(), new Dictionary <string, IStateStore>(), globalStateStores, storesToTopics, new List <string>()); adminClientMock = new Mock <IAdminClient>(); RegisterPartitionInAdminClient(kvStoreTopic); RegisterPartitionInAdminClient(otherStoreTopic); stateManager = new GlobalStateManager(globalConsumer, topology, adminClientMock.Object, streamConfigMock.Object ); context = new GlobalProcessorContext( streamConfigMock.Object, stateManager, new StreamMetricsRegistry()); stateManager.SetGlobalProcessorContext(context); }
protected AbstractTask(TaskId id, TopicPartition partition, ProcessorTopology topology, IConsumer <byte[], byte[]> consumer, IStreamConfig config) { log = Logger.GetLogger(GetType()); logPrefix = $"stream-task[{id.Topic}|{id.Partition}] "; Partition = partition; Id = id; Topology = topology; this.consumer = consumer; configuration = config; stateMgr = new ProcessorStateManager(id, partition); }
public TaskSynchronousTopologyDriver(string clientId, InternalTopologyBuilder topologyBuilder, IStreamConfig configuration, IStreamConfig topicConfiguration, IKafkaSupplier supplier, CancellationToken token) { this.configuration = configuration; this.configuration.ClientId = clientId; this.topicConfiguration = topicConfiguration; metricsRegistry = new StreamMetricsRegistry(clientId, MetricsRecordingLevel.DEBUG); this.token = token; builder = topologyBuilder; this.supplier = supplier ?? new SyncKafkaSupplier(); this.supplier.MetricsRegistry = metricsRegistry; producer = this.supplier.GetProducer(configuration.ToProducerConfig()) as SyncProducer; foreach (var sourceTopic in builder .GetSourceTopics()) { var part = new TopicPartition(sourceTopic, 0); var taskId = builder.GetTaskIdFromPartition(part); if (partitionsByTaskId.ContainsKey(taskId)) { partitionsByTaskId[taskId].Add(part); } else { partitionsByTaskId.Add(taskId, new List <TopicPartition> { part }); } } ProcessorTopology globalTaskTopology = topologyBuilder.BuildGlobalStateTopology(); hasGlobalTopology = globalTaskTopology != null; if (hasGlobalTopology) { var globalConsumer = this.supplier.GetGlobalConsumer(configuration.ToGlobalConsumerConfig($"{clientId}-global-consumer")); var adminClient = this.supplier.GetAdmin(configuration.ToAdminConfig($"{clientId}-admin")); var stateManager = new GlobalStateManager(globalConsumer, globalTaskTopology, adminClient, configuration); globalProcessorContext = new GlobalProcessorContext(configuration, stateManager, metricsRegistry); stateManager.SetGlobalProcessorContext(globalProcessorContext); globalTask = new GlobalStateUpdateTask(stateManager, globalTaskTopology, globalProcessorContext); globalTask.Initialize(); } }
protected AbstractTask(TaskId id, IEnumerable <TopicPartition> partition, ProcessorTopology topology, IConsumer <byte[], byte[]> consumer, IStreamConfig config) { log = Logger.GetLogger(GetType()); logPrefix = $"stream-task[{id.Id}|{id.Partition}] "; var topicPartitions = partition.ToList(); Partition = topicPartitions; Id = id; Topology = topology; this.consumer = consumer; configuration = config; stateMgr = new ProcessorStateManager(id, topicPartitions); }
/// <summary> /// Create a <see cref="KafkaStream"/> instance. /// Please DO NOT FORGET to call Close to avoid resources leak ! /// </summary> /// <param name="topology">the topology specifying the computational logic</param> /// <param name="configuration">configuration about this stream</param> public KafkaStream(Topology topology, IStreamConfig configuration) { this.topology = topology; this.configuration = configuration; this.kafkaSupplier = new DefaultKafkaClientSupplier(new KafkaLoggerAdapter(configuration)); var processID = Guid.NewGuid(); clientId = string.IsNullOrEmpty(configuration.ClientId) ? $"{this.configuration.ApplicationId.ToLower()}-{processID}" : configuration.ClientId; logPrefix = $"stream-application[{configuration.ApplicationId}] "; // sanity check this.processorTopology = topology.Builder.BuildTopology(); this.threads = new IThread[this.configuration.NumStreamThreads]; var threadState = new Dictionary <long, Processors.ThreadState>(); for (int i = 0; i < this.configuration.NumStreamThreads; ++i) { var threadId = $"{this.configuration.ApplicationId.ToLower()}-stream-thread-{i}"; adminClient = this.kafkaSupplier.GetAdmin(configuration.ToAdminConfig(StreamThread.GetSharedAdminClientId(clientId))); this.threads[i] = StreamThread.Create( threadId, clientId, this.topology.Builder, configuration, this.kafkaSupplier, adminClient, i); threadState.Add(this.threads[i].Id, this.threads[i].State); } var manager = new StreamStateManager(this, threadState); foreach (var t in threads) { t.StateChanged += manager.OnChange; } StreamState = State.CREATED; }
public void Setup() { contextMock = new Mock <GlobalProcessorContext>(null, null, new StreamMetricsRegistry()); stateManagerMock = new Mock <IGlobalStateManager>(); processorMock = new Mock <IProcessor>(); sourceProcessorMock = new Mock <ISourceProcessor>(); otherSourceProcessorMock = new Mock <ISourceProcessor>(); sourceProcessorMock.Setup(x => x.TopicName).Returns("topic1"); otherSourceProcessorMock.Setup(x => x.TopicName).Returns("topic2"); var sourceProcessors = new Dictionary <string, IProcessor>() { { "source1", sourceProcessorMock.Object }, { "source2", otherSourceProcessorMock.Object } }; var processors = new Dictionary <string, IProcessor>() { { "processor", processorMock.Object } }; var storesToTopics = new Dictionary <string, string>() { { "store1", "topic1" }, { "store2", "topic2" }, }; stateManagerMock.Setup(x => x.Initialize()).Returns(new HashSet <string>() { "store1", "store2" }); var topology = new ProcessorTopology( null, sourceProcessors, null, processors, null, null, storesToTopics, null); globalStateUpdateTask = new GlobalStateUpdateTask(stateManagerMock.Object, topology, contextMock.Object); }
public void SetUp() { streamConfigMock = new Mock <IStreamConfig>(); streamConfigMock.Setup(x => x.MetadataRequestTimeoutMs).Returns(1); kvStoreMock = CreateMockStore <IKeyValueStore <object, object> >(kvStoreName); otherStoreMock = CreateMockStore <IKeyValueStore <object, object> >(otherStoreName); var globalStateStores = new Dictionary <string, IStateStore>() { { kvStoreMock.Object.Name, kvStoreMock.Object }, { otherStoreMock.Object.Name, otherStoreMock.Object } }; var storesToTopics = new Dictionary <string, string>() { { kvStoreMock.Object.Name, kvStoreTopic }, { otherStoreMock.Object.Name, otherStoreTopic } }; topology = new ProcessorTopology( null, new Dictionary <string, IProcessor>(), new Dictionary <string, IProcessor>(), new Dictionary <string, IProcessor>(), new Dictionary <string, IStateStore>(), globalStateStores, storesToTopics); adminClientMock = new Mock <IAdminClient>(); RegisterPartitionInAdminClient(kvStoreTopic); RegisterPartitionInAdminClient(otherStoreTopic); stateManager = new GlobalStateManager( topology, adminClientMock.Object, streamConfigMock.Object ); }
protected AbstractTask(TaskId id, IEnumerable <TopicPartition> partition, ProcessorTopology topology, IConsumer <byte[], byte[]> consumer, IStreamConfig config, IChangelogRegister changelogRegister) { log = Logger.GetLogger(GetType()); logPrefix = $"stream-task[{id.Id}|{id.Partition}] "; Partition = partition; Id = id; Topology = topology; this.consumer = consumer; configuration = config; var offsetCheckpointMngt = config.OffsetCheckpointManager ?? new OffsetCheckpointFile(Path.Combine(config.StateDir, config.ApplicationId, $"{id.Id}-{id.Partition}")); offsetCheckpointMngt.Configure(config, id); stateMgr = new ProcessorStateManager( id, partition, topology.StoresToTopics, changelogRegister, offsetCheckpointMngt); }
UnassignedStreamTask(string threadId, TaskId id, IEnumerable <TopicPartition> partitions, ProcessorTopology processorTopology, IConsumer <byte[], byte[]> consumer, IStreamConfig configuration, IKafkaSupplier kafkaSupplier, IProducer <byte[], byte[]> producer) : base(threadId, id, partitions, processorTopology, consumer, configuration, kafkaSupplier, producer, null, new StreamMetricsRegistry()) { config = configuration; }
/// <summary> /// Create a <see cref="KafkaStream"/> instance with your own <see cref="IKafkaSupplier" /> /// Please DO NOT FORGET to call Close to avoid resources leak ! /// </summary> /// <param name="topology">the topology specifying the computational logic</param> /// <param name="configuration">configuration about this stream</param> /// <param name="kafkaSupplier">the Kafka clients supplier which provides underlying producer and consumer clients for the new <see cref="KafkaStream"/> instance</param> public KafkaStream(Topology topology, IStreamConfig configuration, IKafkaSupplier kafkaSupplier) { this.topology = topology; this.kafkaSupplier = kafkaSupplier; // check if ApplicationId & BootstrapServers has been set if (string.IsNullOrEmpty(configuration.ApplicationId) || string.IsNullOrEmpty(configuration.BootstrapServers)) { throw new StreamConfigException($"Stream configuration is not correct. Please set ApplicationId and BootstrapServers as minimal."); } var processID = Guid.NewGuid(); clientId = string.IsNullOrEmpty(configuration.ClientId) ? $"{configuration.ApplicationId.ToLower()}-{processID}" : configuration.ClientId; logPrefix = $"stream-application[{configuration.ApplicationId}] "; logger.Info($"{logPrefix} Start creation of the stream application with this configuration: {configuration}"); // re-write the physical topology according to the config topology.Builder.RewriteTopology(configuration); // sanity check var processorTopology = topology.Builder.BuildTopology(); int numStreamThreads = topology.Builder.HasNoNonGlobalTopology ? 0 : configuration.NumStreamThreads; threads = new IThread[numStreamThreads]; var threadState = new Dictionary <long, Processors.ThreadState>(); ProcessorTopology globalTaskTopology = topology.Builder.BuildGlobalStateTopology(); bool hasGlobalTopology = globalTaskTopology != null; if (numStreamThreads == 0 && !hasGlobalTopology) { throw new TopologyException("Topology has no stream threads and no global threads, " + "must subscribe to at least one source topic or global table."); } GlobalThreadState globalThreadState = null; if (hasGlobalTopology) { string globalThreadId = $"{clientId}-GlobalStreamThread"; GlobalStreamThreadFactory globalStreamThreadFactory = new GlobalStreamThreadFactory(globalTaskTopology, globalThreadId, kafkaSupplier.GetGlobalConsumer(configuration.ToGlobalConsumerConfig(globalThreadId)), configuration, kafkaSupplier.GetAdmin(configuration.ToAdminConfig(clientId))); globalStreamThread = globalStreamThreadFactory.GetGlobalStreamThread(); globalThreadState = globalStreamThread.State; } List <StreamThreadStateStoreProvider> stateStoreProviders = new List <StreamThreadStateStoreProvider>(); for (int i = 0; i < numStreamThreads; ++i) { var threadId = $"{configuration.ApplicationId.ToLower()}-stream-thread-{i}"; var adminClient = this.kafkaSupplier.GetAdmin(configuration.ToAdminConfig(StreamThread.GetSharedAdminClientId(clientId))); threads[i] = StreamThread.Create( threadId, clientId, this.topology.Builder, configuration, this.kafkaSupplier, adminClient, i); threadState.Add(threads[i].Id, threads[i].State); stateStoreProviders.Add(new StreamThreadStateStoreProvider(threads[i], this.topology.Builder)); } var manager = new StreamStateManager(this, threadState, globalThreadState); if (hasGlobalTopology) { globalStreamThread.StateChanged += manager.OnGlobalThreadStateChange; } foreach (var t in threads) { t.StateChanged += manager.OnChange; } var globalStateStoreProvider = new GlobalStateStoreProvider(topology.Builder.GlobalStateStores); queryableStoreProvider = new QueryableStoreProvider(stateStoreProviders, globalStateStoreProvider); StreamState = State.CREATED; }
public GlobalStateManager(ProcessorTopology topology, IAdminClient adminClient, IStreamConfig config) { this.topology = topology; this.adminClient = adminClient; this.config = config; }
public GlobalStateUpdateTask(IGlobalStateManager globalStateManager, ProcessorTopology topology, ProcessorContext context) { this.globalStateManager = globalStateManager; this.topology = topology; this.context = context; }
public ProcessorTopology BuildTopology() { var topology = new ProcessorTopology(root, sourceOperators, sinkOperators, processorOperators, stateStores); return(topology); }
public StreamTask(string threadId, TaskId id, IEnumerable <TopicPartition> partitions, ProcessorTopology processorTopology, IConsumer <byte[], byte[]> consumer, IStreamConfig configuration, IKafkaSupplier kafkaSupplier, IProducer <byte[], byte[]> producer) : base(id, partitions, processorTopology, consumer, configuration) { this.threadId = threadId; this.kafkaSupplier = kafkaSupplier; consumedOffsets = new Dictionary <TopicPartition, long>(); maxTaskIdleMs = configuration.MaxTaskIdleMs; maxBufferedSize = configuration.BufferedRecordsPerPartition; idleStartTime = -1; // eos enabled if (producer == null) { this.producer = CreateEOSProducer(); InitializeTransaction(); eosEnabled = true; } else { this.producer = producer; } collector = new RecordCollector(logPrefix); collector.Init(ref this.producer); Context = new ProcessorContext(this, configuration, stateMgr).UseRecordCollector(collector); var partitionsQueue = new Dictionary <TopicPartition, RecordQueue>(); foreach (var p in partitions) { var sourceProcessor = processorTopology.GetSourceProcessor(p.Topic); var sourceTimestampExtractor = sourceProcessor.Extractor ?? configuration.DefaultTimestampExtractor; var queue = new RecordQueue( logPrefix, $"record-queue-{p.Topic}-{id.Id}-{id.Partition}", sourceTimestampExtractor, sourceProcessor); partitionsQueue.Add(p, queue); processors.Add(sourceProcessor); } partitionGrouper = new PartitionGrouper(partitionsQueue); }
public StreamTask(string threadId, TaskId id, IEnumerable <TopicPartition> partitions, ProcessorTopology processorTopology, IConsumer <byte[], byte[]> consumer, IStreamConfig configuration, IKafkaSupplier kafkaSupplier, IProducer <byte[], byte[]> producer, IChangelogRegister changelogRegister, StreamMetricsRegistry streamMetricsRegistry) : base(id, partitions, processorTopology, consumer, configuration, changelogRegister) { this.threadId = threadId; this.kafkaSupplier = kafkaSupplier; this.streamMetricsRegistry = streamMetricsRegistry; consumedOffsets = new Dictionary <TopicPartition, long>(); maxTaskIdleMs = configuration.MaxTaskIdleMs; maxBufferedSize = configuration.BufferedRecordsPerPartition; followMetadata = configuration.FollowMetadata; idleStartTime = -1; // eos enabled if (producer == null) { this.producer = CreateEOSProducer(); InitializeTransaction(); eosEnabled = true; } else { this.producer = producer; } var droppedRecordsSensor = TaskMetrics.DroppedRecordsSensor(this.threadId, Id, this.streamMetricsRegistry); collector = new RecordCollector(logPrefix, configuration, id, droppedRecordsSensor); collector.Init(ref this.producer); Context = new ProcessorContext(this, configuration, stateMgr, streamMetricsRegistry) .UseRecordCollector(collector); Context.FollowMetadata = followMetadata; var partitionsQueue = new Dictionary <TopicPartition, RecordQueue>(); foreach (var p in partitions) { var sourceProcessor = processorTopology.GetSourceProcessor(p.Topic); sourceProcessor.SetTaskId(id); var sourceTimestampExtractor = sourceProcessor.Extractor ?? configuration.DefaultTimestampExtractor; var queue = new RecordQueue( logPrefix, $"record-queue-{p.Topic}-{id.Id}-{id.Partition}", sourceTimestampExtractor, p, sourceProcessor, droppedRecordsSensor); partitionsQueue.Add(p, queue); processors.Add(sourceProcessor); } partitionGrouper = new PartitionGrouper(partitionsQueue); closeTaskSensor = ThreadMetrics.ClosedTaskSensor(this.threadId, streamMetricsRegistry); activeBufferedRecordSensor = TaskMetrics.ActiveBufferedRecordsSensor(this.threadId, Id, streamMetricsRegistry); processSensor = TaskMetrics.ProcessSensor(this.threadId, Id, streamMetricsRegistry); processLatencySensor = TaskMetrics.ProcessLatencySensor(this.threadId, Id, streamMetricsRegistry); enforcedProcessingSensor = TaskMetrics.EnforcedProcessingSensor(this.threadId, Id, streamMetricsRegistry); commitSensor = TaskMetrics.CommitSensor(this.threadId, Id, streamMetricsRegistry); activeRestorationSensor = TaskMetrics.ActiveRestorationSensor(this.threadId, Id, streamMetricsRegistry); restorationRecordsSendsor = TaskMetrics.RestorationRecordsSensor(this.threadId, Id, streamMetricsRegistry); }