internal StreamTask GetTask(string topicName) { StreamTask task; var id = builder.GetTaskIdFromPartition(new Confluent.Kafka.TopicPartition(topicName, 0)); if (tasks.ContainsKey(id)) { task = tasks[id]; } else { task = new StreamTask("thread-0", id, partitionsByTaskId[id], builder.BuildTopology(id), supplier.GetConsumer(configuration.ToConsumerConfig(), null), configuration, supplier, producer); task.InitializeStateStores(); task.InitializeTopology(); tasks.Add(id, task); } return(task); }
internal StreamTask GetTask(string topicName) { StreamTask task; if (tasks.ContainsKey(topicName)) { task = tasks[topicName]; } else { task = new StreamTask("thread-0", new TaskId { Id = id++, Partition = 0, Topic = topicName }, new Confluent.Kafka.TopicPartition(topicName, 0), builder.BuildTopology(topicName), supplier.GetConsumer(configuration.ToConsumerConfig(), null), configuration, supplier, producer); task.InitializeStateStores(); task.InitializeTopology(); tasks.Add(topicName, task); } return(task); }
public void DecompressStream_Should_Extract_Data_Correctly() { var testData = new Dictionary <string, DecompressionMethods> { { FileName, DecompressionMethods.None }, { FileName + ".deflate", DecompressionMethods.Deflate }, { FileName + ".gzip", DecompressionMethods.GZip } }; var expected = File.ReadAllBytes(FileName); foreach (var data in testData) { using (var stream = StreamTask.DecompressStream(data.Key, data.Value)) { using (var memStream = new MemoryStream()) { stream.CopyTo(memStream); Assert.IsTrue(expected.SequenceEqual(memStream.ToArray()), "DecompressStream failed for {data.Value}"); } } CheckFileIsClosed(data.Key); } }
internal StreamTask GetTask(string topicName) { StreamTask task = null; var id = builder.GetTaskIdFromPartition(new Confluent.Kafka.TopicPartition(topicName, 0)); if (tasks.ContainsKey(id)) { task = tasks[id]; } else { if (!builder.GetGlobalTopics().Contains(topicName)) { task = new StreamTask("thread-0", id, partitionsByTaskId[id], builder.BuildTopology(id), supplier.GetConsumer(configuration.ToConsumerConfig(), null), configuration, supplier, producer, new MockChangelogRegister(), metricsRegistry); task.InitializeStateStores(); task.InitializeTopology(); task.RestorationIfNeeded(); task.CompleteRestoration(); tasks.Add(id, task); } } return(task); }
public void Initialize() { streamMetricsRegistry = new StreamMetricsRegistry(Guid.NewGuid().ToString(), MetricsRecordingLevel.DEBUG); config.ApplicationId = "test-stream-thread"; config.StateDir = Guid.NewGuid().ToString(); config.Guarantee = ProcessingGuarantee.AT_LEAST_ONCE; config.PollMs = 10; config.CommitIntervalMs = 1; var builder = new StreamBuilder(); var stream = builder.Stream <string, string>("topic"); stream.GroupByKey().Count(); stream.To("topic2"); var topo = builder.Build(); id = new TaskId { Id = 0, Partition = 0 }; var processorTopology = builder.Build().Builder.BuildTopology(id); syncKafkaSupplier = new SyncKafkaSupplier(); var producer = syncKafkaSupplier.GetProducer(config.ToProducerConfig()); var consumer = syncKafkaSupplier.GetConsumer(config.ToConsumerConfig(), null); topicPartition = new TopicPartition("topic", 0); task = new StreamTask( threadId, id, new List <TopicPartition> { topicPartition }, processorTopology, consumer, config, syncKafkaSupplier, null, new MockChangelogRegister(), streamMetricsRegistry); task.GroupMetadata = consumer as SyncConsumer; task.InitializeStateStores(); task.InitializeTopology(); task.RestorationIfNeeded(); var activeRestorationSensor = streamMetricsRegistry.GetSensors().FirstOrDefault(s => s.Name.Equals(GetSensorName(TaskMetrics.ACTIVE_RESTORATION))); Assert.AreEqual(1, activeRestorationSensor.Metrics[MetricName.NameAndGroup( TaskMetrics.ACTIVE_RESTORATION, StreamMetricsRegistry.TASK_LEVEL_GROUP)].Value); task.CompleteRestoration(); Assert.AreEqual(0, activeRestorationSensor.Metrics[MetricName.NameAndGroup( TaskMetrics.ACTIVE_RESTORATION, StreamMetricsRegistry.TASK_LEVEL_GROUP)].Value); }
public void WithNullMaterialize() { // CERTIFIED THAT SAME IF Materialize is null, a state store exist for count processor with a generated namestore var config = new StreamConfig <StringSerDes, StringSerDes>(); var serdes = new StringSerDes(); config.ApplicationId = "test-count"; var builder = new StreamBuilder(); Materialized <string, long, IKeyValueStore <Bytes, byte[]> > m = null; builder .Table <string, string>("topic") .GroupBy((k, v) => KeyValuePair.Create(k.ToUpper(), v)) .Count(m); var topology = builder.Build(); TaskId id = new TaskId { Id = 0, Partition = 0 }; var processorTopology = topology.Builder.BuildTopology(id); var supplier = new SyncKafkaSupplier(); var producer = supplier.GetProducer(config.ToProducerConfig()); var consumer = supplier.GetConsumer(config.ToConsumerConfig(), null); var part = new TopicPartition("topic", 0); StreamTask task = new StreamTask( "thread-0", id, new List <TopicPartition> { part }, processorTopology, consumer, config, supplier, null); task.GroupMetadata = consumer as SyncConsumer; task.InitializeStateStores(); task.InitializeTopology(); Assert.AreEqual(2, task.Context.States.StateStoreNames.Count()); var nameStore1 = task.Context.States.StateStoreNames.ElementAt(0); var nameStore2 = task.Context.States.StateStoreNames.ElementAt(1); Assert.IsNotNull(nameStore1); Assert.IsNotNull(nameStore2); Assert.AreNotEqual(string.Empty, nameStore1); Assert.AreNotEqual(string.Empty, nameStore2); var store1 = task.GetStore(nameStore1); var store2 = task.GetStore(nameStore2); Assert.IsInstanceOf <TimestampedKeyValueStore <string, string> >(store1); Assert.IsInstanceOf <TimestampedKeyValueStore <string, long> >(store2); Assert.AreEqual(0, (store1 as TimestampedKeyValueStore <string, string>).ApproximateNumEntries()); Assert.AreEqual(0, (store2 as TimestampedKeyValueStore <string, long>).ApproximateNumEntries()); }
public void WithNullMaterialize() { // CERTIFIED THAT SAME IF Materialize is null, a state store exist for count processor with a generated namestore var config = new StreamConfig <StringSerDes, StringSerDes>(); var serdes = new StringSerDes(); config.ApplicationId = "test-window-count"; var builder = new StreamBuilder(); Materialized <string, long, IWindowStore <Bytes, byte[]> > m = null; builder .Stream <string, string>("topic") .GroupByKey() .WindowedBy(TumblingWindowOptions.Of(2000)) .Count(m); var topology = builder.Build(); TaskId id = new TaskId { Id = 0, Partition = 0 }; var processorTopology = topology.Builder.BuildTopology(id); var supplier = new SyncKafkaSupplier(); var producer = supplier.GetProducer(config.ToProducerConfig()); var consumer = supplier.GetConsumer(config.ToConsumerConfig(), null); var part = new TopicPartition("topic", 0); StreamTask task = new StreamTask( "thread-0", id, new List <TopicPartition> { part }, processorTopology, consumer, config, supplier, null, new MockChangelogRegister(), new StreamMetricsRegistry()); task.GroupMetadata = consumer as SyncConsumer; task.InitializeStateStores(); task.InitializeTopology(); task.RestorationIfNeeded(); task.CompleteRestoration(); Assert.AreEqual(1, task.Context.States.StateStoreNames.Count()); var nameStore = task.Context.States.StateStoreNames.ElementAt(0); Assert.IsNotNull(nameStore); Assert.AreNotEqual(string.Empty, nameStore); var store = task.GetStore(nameStore); Assert.IsInstanceOf <ITimestampedWindowStore <string, long> >(store); Assert.AreEqual(0, (store as ITimestampedWindowStore <string, long>).All().ToList().Count); }
private StreamTask GetTask(TaskId taskId) { var config = new StreamConfig(); config.ClientId = "test"; config.ApplicationId = "test-app"; var streamTask = new StreamTask( "thread", taskId, new List <TopicPartition>(), new Stream.Internal.ProcessorTopology(null, null, null, null, null, null, null, null), null, config, null, new SyncProducer(config.ToProducerConfig()), new MockChangelogRegister(), new StreamMetricsRegistry()); return(streamTask); }
public void WithNullMaterialize() { // CERTIFIED THAT SAME IF Materialize is null, a state store exist for count processor with a generated namestore var config = new StreamConfig <StringSerDes, StringSerDes>(); var serdes = new StringSerDes(); config.ApplicationId = "test-window-count"; var builder = new StreamBuilder(); Materialized <string, int, IWindowStore <Bytes, byte[]> > m = null; builder .Stream <string, string>("topic") .GroupByKey() .WindowedBy(TumblingWindowOptions.Of(2000)) .Aggregate( () => 0, (k, v, agg) => Math.Max(v.Length, agg), m); var topology = builder.Build(); TaskId id = new TaskId { Id = 0, Partition = 0 }; var processorTopology = topology.Builder.BuildTopology(id); var supplier = new SyncKafkaSupplier(); var producer = supplier.GetProducer(config.ToProducerConfig()); var consumer = supplier.GetConsumer(config.ToConsumerConfig(), null); var part = new TopicPartition("topic", 0); StreamTask task = new StreamTask( "thread-0", id, new List <TopicPartition> { part }, processorTopology, consumer, config, supplier, null); task.GroupMetadata = consumer as SyncConsumer; Assert.Throws <StreamsException>(() => task.InitializeStateStores()); }
public void ReadEncodedText_Should_Convert_Text_Using_Specified_Encoding() { var expected = "Deutschland ist ein Bundesstaat in Mitteleuropa. Gemäß seiner Verfassung ist Deutschland eine föderal organisierte Republik, die aus den 16 deutschen Ländern gebildet wird. " + "Die Bundesrepublik Deutschland ist ein freiheitlich-demokratischer und sozialer Rechtsstaat und stellt die jüngste Ausprägung des deutschen Nationalstaates dar. Bundeshauptstadt ist Berlin." + "Neun europäische Nachbarstaaten grenzen an die Bundesrepublik, naturräumlich zudem im Norden die Gewässer der Nord- und Ostsee und im Süden das Bergland der Alpen. " + "Sie liegt in der gemäßigten Klimazone und zählt mit rund 82 Millionen Einwohnern zu den dicht besiedelten Flächenländern." + "Deutschland ist Gründungsmitglied der Europäischen Union sowie deren bevölkerungsreichstes Land und bildet mit 16 anderen EU-Mitgliedstaaten eine Währungsunion, die Eurozone. " + "Es ist Mitglied der Vereinten Nationen, der OECD, der NATO, der G8 und der G20." + "Gemessen am nominalen Bruttoinlandsprodukt ist Deutschland die größte Volkswirtschaft Europas und viertgrößte der Welt. " + "Im Jahr 2011 war es die drittgrößte Export- und Importnation.[10] Der Index für menschliche Entwicklung zählt Deutschland zu den sehr hoch entwickelten Staaten"; var actual = StreamTask.ReadEncodedText(EncodedFileName, "ISO-8859-1"); Assert.AreEqual(expected, actual); CheckFileIsClosed(EncodedFileName); }
public void ReadPlanetInfoFromXlsx_Should_Parse_Excel_File() { var expected = new[] { new PlanetInfo() { Name = "Jupiter", MeanRadius = 69911.00 }, new PlanetInfo() { Name = "Saturn", MeanRadius = 58232.00 }, new PlanetInfo() { Name = "Uranus", MeanRadius = 25362.00 }, new PlanetInfo() { Name = "Neptune", MeanRadius = 24622.00 }, new PlanetInfo() { Name = "Earth", MeanRadius = 6371.00 }, new PlanetInfo() { Name = "Venus", MeanRadius = 6051.80 }, new PlanetInfo() { Name = "Mars", MeanRadius = 3390.00 }, new PlanetInfo() { Name = "Mercury", MeanRadius = 2439.70 }, }; var actual = StreamTask.ReadPlanetInfoFromXlsx(FileName).ToArray(); Assert.IsTrue(expected.SequenceEqual(actual)); CheckFileIsClosed(FileName); }
public void WithNullMaterialize() { // CERTIFIED THAT SAME IF Materialize is null, a state store exist for count processor with a generated namestore var config = new StreamConfig(); var serdes = new StringSerDes(); config.ApplicationId = "test-window-reduce"; var builder = new StreamBuilder(); builder .Stream <string, string>("topic") .GroupByKey() .WindowedBy(TumblingWindowOptions.Of(2000)) .Reduce((v1, v2) => v1.Length > v2.Length ? v1 : v2); var topology = builder.Build(); TaskId id = new TaskId { Id = 0, Partition = 0 }; var processorTopology = topology.Builder.BuildTopology(id); var supplier = new SyncKafkaSupplier(); var producer = supplier.GetProducer(config.ToProducerConfig()); var consumer = supplier.GetConsumer(config.ToConsumerConfig(), null); var part = new TopicPartition("topic", 0); StreamTask task = new StreamTask( "thread-0", id, new List <TopicPartition> { part }, processorTopology, consumer, config, supplier, null, new MockChangelogRegister(), new StreamMetricsRegistry()); task.GroupMetadata = consumer as SyncConsumer; Assert.Throws <StreamsException>(() => task.InitializeStateStores()); }
public SyncPipeBuilder(StreamTask task, SyncProducer mockProducer) { this.mockProducer = mockProducer; this.task = task; }
public SyncPipeBuilder(StreamTask task) { this.task = task; }
public SyncPipeBuilder(StreamTask task) { publisher = new StreamTaskPublisher(task); }
public StreamTaskPublisher(StreamTask task) { this.task = task; }
public virtual void Setup <TOut, TOperator>(StreamTask <TOut, TOperator> containingTask, StreamConfig config, IOutput <StreamRecord <TOutput> > output) where TOperator : IStreamOperator <TOut> { }
public void StreamTaskWrittingCheckpoint() { var config = new StreamConfig <StringSerDes, StringSerDes>(); config.ApplicationId = "test-app"; config.StateDir = Path.Combine(".", Guid.NewGuid().ToString()); var serdes = new StringSerDes(); var builder = new StreamBuilder(); var table = builder.Table("topic", RocksDb <string, string> .As("store").WithLoggingEnabled()); TaskId id = new TaskId { Id = 0, Partition = 0 }; var topology = builder.Build(); topology.Builder.RewriteTopology(config); var processorTopology = topology.Builder.BuildTopology(id); var supplier = new SyncKafkaSupplier(); var producer = supplier.GetProducer(config.ToProducerConfig()); var consumer = supplier.GetConsumer(config.ToConsumerConfig(), null); var part = new TopicPartition("topic", 0); StreamTask task = new StreamTask( "thread-0", id, new List <TopicPartition> { part }, processorTopology, consumer, config, supplier, producer, new MockChangelogRegister() , new StreamMetricsRegistry()); task.GroupMetadata = consumer as SyncConsumer; task.InitializeStateStores(); task.InitializeTopology(); task.RestorationIfNeeded(); task.CompleteRestoration(); List <ConsumeResult <byte[], byte[]> > messages = new List <ConsumeResult <byte[], byte[]> >(); int offset = 0; for (int i = 0; i < 5; ++i) { messages.Add( new ConsumeResult <byte[], byte[]> { Message = new Message <byte[], byte[]> { Key = serdes.Serialize($"key{i + 1}", new SerializationContext()), Value = serdes.Serialize($"value{i + 1}", new SerializationContext()) }, TopicPartitionOffset = new TopicPartitionOffset(part, offset++) }); } task.AddRecords(messages); Assert.IsTrue(task.CanProcess(DateTime.Now.GetMilliseconds())); while (task.CanProcess(DateTime.Now.GetMilliseconds())) { Assert.IsTrue(task.Process()); Assert.IsTrue(task.CommitNeeded); task.Commit(); } task.MayWriteCheckpoint(true); messages = new List <ConsumeResult <byte[], byte[]> >(); for (int i = 0; i < StateManagerTools.OFFSET_DELTA_THRESHOLD_FOR_CHECKPOINT + 10; ++i) { messages.Add( new ConsumeResult <byte[], byte[]> { Message = new Message <byte[], byte[]> { Key = serdes.Serialize($"key{i + 1}", new SerializationContext()), Value = serdes.Serialize($"value{i + 1}", new SerializationContext()) }, TopicPartitionOffset = new TopicPartitionOffset(part, offset++) }); } task.AddRecords(messages); while (task.CanProcess(DateTime.Now.GetMilliseconds())) { Assert.IsTrue(task.Process()); } task.MayWriteCheckpoint(false); var lines = File.ReadAllLines(Path.Combine(config.StateDir, config.ApplicationId, "0-0", ".checkpoint")); Assert.AreEqual(3, lines.Length); Assert.AreEqual("test-app-store-changelog 0 10014", lines[2]); task.Suspend(); task.Close(); Directory.Delete(config.StateDir, true); }
public SyncPipeBuilder(StreamTask task, IKafkaSupplier kafkaSupplier) { this.kafkaSupplier = kafkaSupplier; this.task = task; }
public SyncPipeInput(StreamTask task, string topic) { this.task = task; this.topic = topic; }
public SyncPipeInput(StreamTask task) { this.task = task; }
public SyncPipeBuilder(StreamTask task, IKafkaSupplier kafkaSupplier, SyncProducer mockProducer) { this.kafkaSupplier = kafkaSupplier; this.mockProducer = mockProducer; this.task = task; }
public void Dispose() { task.Suspend(); task.Close(); task = null; }
public void StreamTaskWithEXACTLY_ONCE() { var config = new StreamConfig <StringSerDes, StringSerDes>(); config.ApplicationId = "test-app"; config.Guarantee = ProcessingGuarantee.EXACTLY_ONCE; var serdes = new StringSerDes(); var builder = new StreamBuilder(); builder.Stream <string, string>("topic") .Map((k, v) => KeyValuePair.Create(k.ToUpper(), v.ToUpper())) .To("topic2"); var topology = builder.Build(); TaskId id = new TaskId { Id = 0, Partition = 0 }; var processorTopology = topology.Builder.BuildTopology(id); var supplier = new SyncKafkaSupplier(); var producer = supplier.GetProducer(config.ToProducerConfig()); var consumer = supplier.GetConsumer(config.ToConsumerConfig(), null); var part = new TopicPartition("topic", 0); StreamTask task = new StreamTask( "thread-0", id, new List <TopicPartition> { part }, processorTopology, consumer, config, supplier, null); task.GroupMetadata = consumer as SyncConsumer; task.InitializeStateStores(); task.InitializeTopology(); List <ConsumeResult <byte[], byte[]> > messages = new List <ConsumeResult <byte[], byte[]> >(); int offset = 0; for (int i = 0; i < 5; ++i) { messages.Add( new ConsumeResult <byte[], byte[]> { Message = new Message <byte[], byte[]> { Key = serdes.Serialize($"key{i + 1}", new SerializationContext()), Value = serdes.Serialize($"value{i + 1}", new SerializationContext()) }, TopicPartitionOffset = new TopicPartitionOffset(part, offset++) }); } task.AddRecords(messages); Assert.IsTrue(task.CanProcess(DateTime.Now.GetMilliseconds())); while (task.CanProcess(DateTime.Now.GetMilliseconds())) { Assert.IsTrue(task.Process()); Assert.IsTrue(task.CommitNeeded); task.Commit(); } // CHECK IN TOPIC topic2 consumer.Subscribe("topic2"); List <ConsumeResult <byte[], byte[]> > results = new List <ConsumeResult <byte[], byte[]> >(); ConsumeResult <byte[], byte[]> result = null; do { result = consumer.Consume(100); if (result != null) { results.Add(result); consumer.Commit(result); } } while (result != null); Assert.AreEqual(5, results.Count); for (int i = 0; i < 5; ++i) { Assert.AreEqual($"KEY{i + 1}", serdes.Deserialize(results[i].Message.Key, new SerializationContext())); Assert.AreEqual($"VALUE{i + 1}", serdes.Deserialize(results[i].Message.Value, new SerializationContext())); } task.Close(); }
public void StreamTaskSuspendResume() { var config = new StreamConfig <StringSerDes, StringSerDes>(); config.ApplicationId = "test-app"; var serdes = new StringSerDes(); var builder = new StreamBuilder(); builder.Table <string, string>("topic", InMemory <string, string> .As("store").WithLoggingDisabled()) .MapValues((k, v) => v.ToUpper()) .ToStream() .To("topic2"); TaskId id = new TaskId { Id = 0, Partition = 0 }; var topology = builder.Build(); var processorTopology = topology.Builder.BuildTopology(id); var supplier = new SyncKafkaSupplier(); var producer = supplier.GetProducer(config.ToProducerConfig()); var consumer = supplier.GetConsumer(config.ToConsumerConfig(), null); var part = new TopicPartition("topic", 0); StreamTask task = new StreamTask( "thread-0", id, new List <TopicPartition> { part }, processorTopology, consumer, config, supplier, producer, new MockChangelogRegister() , new StreamMetricsRegistry()); task.GroupMetadata = consumer as SyncConsumer; task.InitializeStateStores(); task.InitializeTopology(); task.RestorationIfNeeded(); task.CompleteRestoration(); List <ConsumeResult <byte[], byte[]> > messages = new List <ConsumeResult <byte[], byte[]> >(); int offset = 0; for (int i = 0; i < 5; ++i) { messages.Add( new ConsumeResult <byte[], byte[]> { Message = new Message <byte[], byte[]> { Key = serdes.Serialize($"key{i + 1}", new SerializationContext()), Value = serdes.Serialize($"value{i + 1}", new SerializationContext()) }, TopicPartitionOffset = new TopicPartitionOffset(part, offset++) }); } task.AddRecords(messages); Assert.IsTrue(task.CanProcess(DateTime.Now.GetMilliseconds())); while (task.CanProcess(DateTime.Now.GetMilliseconds())) { Assert.IsTrue(task.Process()); Assert.IsTrue(task.CommitNeeded); task.Commit(); } Assert.IsNotNull(task.GetStore("store")); task.Suspend(); Assert.IsNull(task.GetStore("store")); task.Resume(); task.RestorationIfNeeded(); Assert.IsNotNull(task.GetStore("store")); task.AddRecords(messages); Assert.IsTrue(task.CanProcess(DateTime.Now.GetMilliseconds())); while (task.CanProcess(DateTime.Now.GetMilliseconds())) { Assert.IsTrue(task.Process()); Assert.IsTrue(task.CommitNeeded); task.Commit(); } // CHECK IN TOPIC topic2 consumer.Subscribe("topic2"); List <ConsumeResult <byte[], byte[]> > results = new List <ConsumeResult <byte[], byte[]> >(); ConsumeResult <byte[], byte[]> result = null; do { result = consumer.Consume(100); if (result != null) { results.Add(result); consumer.Commit(result); } } while (result != null); Assert.AreEqual(10, results.Count); task.Close(); }