Exemplo n.º 1
0
        static void Main(string[] args)
        {
            // 创建 execution environment
            var env = StreamExecutionEnvironment.GetExecutionEnvironment()
                      // 告诉系统按照 EventTime 处理
                      .SetStreamTimeCharacteristic(TimeCharacteristic.EventTime)
                      // 为了打印到控制台的结果不乱序,我们配置全局的并发为1,改变并发对结果正确性没有影响
                      .SetParallelism(1);

            var file     = Path.Combine(Directory.GetCurrentDirectory(), "Resources", "UserBehavior.csv");
            var pojoType = TypeExtractor.CreateTypeInfo <UserBehavior>() as PojoTypeInfo <UserBehavior>;

            var stream = env.ReadCsvFile <UserBehavior>("")                                            // 创建数据源,得到UserBehavior类型的DataStream
                         .AssignTimestampsAndWatermarks(new UserBehaviorAscendingTimestampExtractor()) // 抽取出时间和生成watermark
                         .Filter(new UserBehaviorFilter())                                             // 过滤出只有点击的数据
                         .KeyBy("itemId")                                                              // 按商品分区统计
                         .TimeWindow(TimeSpan.FromMinutes(60), TimeSpan.FromMinutes(5))                // 窗口大小是一小时,每隔5分钟滑动一次
                         .Aggregate(new CountAggregator(), new WindowResultFunction())                 // 获得每个窗口的点击量的数据流
                         .KeyBy("windowEnd")                                                           // 为了统计每个窗口下最热门的商品,再次按窗口进行分组.
                         .Process(new TopNHotProducts(3));                                             // 计算点击量排名前3名的商品

            stream.Print();                                                                            // 控制台打印输出

            env.Execute("Hot Products Job");
        }
 public BroadcastConnectedStream(
     StreamExecutionEnvironment env,
     DataStream <TInput1> input1,
     BroadcastStream <TInput2> input2,
     List <MapStateDescriptor <dynamic, dynamic> > broadcastStateDescriptors)
 {
     ExecutionEnvironment      = Preconditions.CheckNotNull(env);
     FirstInput                = Preconditions.CheckNotNull(input1);
     SecondInput               = Preconditions.CheckNotNull(input2);
     BroadcastStateDescriptors = Preconditions.CheckNotNull(broadcastStateDescriptors);
 }
 public DataStreamSource(
     StreamExecutionEnvironment env,
     TypeInformation <T> outTypeInfo,
     StreamSource <T, ISourceFunction <T> > @operator,
     bool isParallel, string sourceName)
     : base(env, new SourceTransformation <T>(sourceName, @operator, outTypeInfo, env.Parallelism))
 {
     _isParallel = isParallel;
     if (!isParallel)
     {
         SetParallelism(1);
     }
 }
        public static void Run()
        {
            var env = StreamExecutionEnvironment.GetExecutionEnvironment()
                      .SetParallelism(4);

            var source = env.AddSource(new DataSource());

            var steam = source.KeyBy(0)
                        .TimeWindow(TimeSpan.FromMilliseconds(2500), TimeSpan.FromMilliseconds(500))
                        .Reduce(new SummingReducer())
                        .AddSink(new EmptySink());

            env.Execute();
        }
Exemplo n.º 5
0
        public static void Run()
        {
            var env = StreamExecutionEnvironment.GetExecutionEnvironment();

            var steam = env.ReadTextFile("");

            var transformation = steam.FlatMap(new Splitter())
                                 .KeyBy(0)
                                 .CountWindow(10, 5)
                                 .Sum(1); // sum up tuple field "Count"

            transformation.Print();

            env.Execute("WindowWordCount");
        }
Exemplo n.º 6
0
        static void Main(string[] args)
        {
            var env = StreamExecutionEnvironment.GetExecutionEnvironment()
                      .SetStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime)
                      .EnableCheckpointing(60 * 1000, CheckpointingMode.ExactlyOnce);

            env.CheckpointConfig.CheckpointTimeout = 30 * 1000;

            var partitionCount    = 2;
            var orderExtTopicName = "orders";

            // 1. 读取kafka创建源数据
            var sourceStream = env.AddSource <string>(null)
                               .SetParallelism(partitionCount)
                               .SetName("source_kafka_" + orderExtTopicName)
                               .SetUId("source_kafka_" + orderExtTopicName);

            // 2. 创建SubOrderDetail数据流
            var orderStream = sourceStream.Map(new SubOrderDetailMapper())
                              .SetName("map_sub_order_detail")
                              .SetUId("map_sub_order_detail");

            // 3. 统计每天的数据,每秒持续输出。
            var siteDayWindowStream = orderStream
                                      .KeyBy("siteId")
                                      .Window(TumblingProcessingTimeWindowAssigner <SubOrderDetail> .Of(TimeSpan.FromDays(1), TimeSpan.FromHours(-8)))
                                      .Trigger(ContinuousProcessingTimeTrigger <SubOrderDetail, TimeWindow> .Of(TimeSpan.FromSeconds(1)));

            // 5. 计算站点聚合指标
            var siteAggStream = siteDayWindowStream
                                .Aggregate(new OrderAndGmvAggregateFunc())
                                .SetName("aggregate_site_order_gmv")
                                .SetUId("aggregate_site_order_gmv");

            // 6. 只输出变化的聚合指标
            var siteResultStream = siteAggStream
                                   .KeyBy(0)
                                   .Process(new OutputOrderGmvProcessFunc(), TypeInformation.Of <(long, string)>())
                                   .SetName("process_site_gmv_changed")
                                   .SetUId("process_site_gmv_changed");

            // todo:
            // 6. Sink到Redis,利用有序集合完成TopN排序。
            // 显示端直接从Redis中查询结果。

            Console.WriteLine("Hello World!");
        }
Exemplo n.º 7
0
        public static void Run()
        {
            var env = StreamExecutionEnvironment.GetExecutionEnvironment()
                      .SetStreamTimeCharacteristic(TimeCharacteristic.EventTime)
                      .SetParallelism(1);

            var source = env.AddSource(new DataSourceFunctor());

            var aggregated = source
                             .KeyBy(0)
                             .Window(EventTimeSessionWindowAssigner <SessionElement> .WithGap(TimeSpan.FromMilliseconds(3)))
                             .Sum(2);

            aggregated.Print();

            env.Execute("SessionWindowing");
        }
Exemplo n.º 8
0
        public static void Run()
        {
            var env = StreamExecutionEnvironment.GetExecutionEnvironment()
                      .SetStreamTimeCharacteristic(TimeCharacteristic.EventTime)
                      .SetParallelism(1);

            var stream = env.ReadTextFile("").Map(new CarDatumMapper());

            var transformation = stream.AssignTimestampsAndWatermarks(new CarTimestampExtractor())
                                 .KeyBy("Id")
                                 .Window(GlobalWindowAssigner <CarDatum> .Create())
                                 .Evictor(TimeWindowEvictor.Of <CarDatum, GlobalWindow>(TimeSpan.FromSeconds(10)))
                                 .Trigger(DeltaWindowTrigger.Of <CarDatum, GlobalWindow>(50, new CarDeltaFunctor(), null))
                                 .MaxBy(1);

            transformation.Print();

            env.Execute("CarTopSpeedWindowingExample");
        }
Exemplo n.º 9
0
        public static void Run()
        {
            // create the environment to create streams and configure execution
            var env = StreamExecutionEnvironment.GetExecutionEnvironment();

            env.EnableCheckpointing(2000);

            ISourceFunction <Event> source = default;
            var events = env.AddSource(source);

            var alerts = events
                         // partition on the address to make sure equal addresses end up in the same state machine flatMap function
                         .KeyBy(nameof(Event.SourceAddress))
                         // the function that evaluates the state machine over the sequence of events
                         .FlatMap(new StateMachineMapper());

            alerts.Print();

            env.Execute("State machine job");
        }
Exemplo n.º 10
0
        static void Main(string[] args)
        {
            var env = StreamExecutionEnvironment.GetExecutionEnvironment();

            DataStream <UserAction>    actions  = null;
            DataStream <ActionPattern> patterns = null;

            var actionsByUser = actions.KeyBy(action => action.UserId);

            var bcStateDescriptor = new MapStateDescriptor <string, ActionPattern>("patterns", Types.String, Types.Poco <ActionPattern>());

            var bcedPatterns = patterns.Broadcast(bcStateDescriptor);

            var matches = actionsByUser
                          .Connect(bcedPatterns)
                          .Process(new PatternEvaluator());

            matches.Print();

            env.Execute("User Actions Evaluation");
        }
Exemplo n.º 11
0
        public static void Run()
        {
            Init();

            // get the execution environment
            var env = StreamExecutionEnvironment.GetExecutionEnvironment();

            // source: get input data by connecting to the socket.
            var stream = env.SocketTextStream("localhost", 5000, "\n");

            // transformation
            var stat = stream
                       .FlatMap(new Splitter()) // FlatMap算子对数据进行转换
                       .KeyBy("word")           // 按照指定key对数据进行分区,相同key的数据流向相同的SubTask实例。
                       .TimeWindow(TimeSpan.FromSeconds(5), TimeSpan.FromSeconds(1))
                       .Reduce(new Reducer());

            // sink: 将数据输出到外部存储(控制台标准输出)。
            stat.Print().SetParallelism(1);

            env.Execute("Socket Window WordCount");
        }
Exemplo n.º 12
0
 /// <summary>
 /// Gets the default trigger associated with this WindowAssigner.
 /// </summary>
 /// <param name="env"></param>
 public abstract WindowTrigger <TElement, TWindow> GetDefaultTrigger(StreamExecutionEnvironment env);
Exemplo n.º 13
0
 protected SingleOutputStreamOperator(StreamExecutionEnvironment environment, Transformation <T> transformation)
     : base(environment, transformation)
 {
 }
Exemplo n.º 14
0
 public ConnectedStreams(StreamExecutionEnvironment environment, DataStream <TInput1> inputStream1, DataStream <TInput2> inputStream2)
 {
     Environment  = environment;
     InputStream1 = inputStream1;
     InputStream2 = inputStream2;
 }
Exemplo n.º 15
0
 public override WindowTrigger <TElement, TimeWindow> GetDefaultTrigger(StreamExecutionEnvironment env) =>
 EventTimeWindowTrigger <TElement> .Create();
Exemplo n.º 16
0
 public override WindowTrigger <TElement, TimeWindow> GetDefaultTrigger(StreamExecutionEnvironment env)
 {
     throw new System.NotImplementedException();
 }
Exemplo n.º 17
0
 protected BroadcastStream(DataStream <T> input, StreamExecutionEnvironment environment, params MapStateDescriptor <object, object>[] broadcastStateDescriptors)
 {
     _inputStream = input;
     _environment = environment;
     _broadcastStateDescriptors = broadcastStateDescriptors.ToList();
 }
 public SingleOutputStreamOperator(StreamExecutionEnvironment environment, Transformation <TElement> transformation)
     : base(environment, transformation)
 {
 }
Exemplo n.º 19
0
 /// <summary>
 /// Create a new <see cref="DataStream{TElement}"/> in the given execution environment with partitioning set to forward by default.
 /// </summary>
 /// <param name="environment">The StreamExecutionEnvironment</param>
 /// <param name="transformation"></param>
 public DataStream(StreamExecutionEnvironment environment, Transformation <TElement> transformation)
 {
     ExecutionEnvironment = Preconditions.CheckNotNull(environment, "Execution Environment must not be null.");
     Transformation       = Preconditions.CheckNotNull(transformation, "Stream Transformation must not be null.");
 }
Exemplo n.º 20
0
 public override WindowTrigger <TElement, GlobalWindow> GetDefaultTrigger(StreamExecutionEnvironment env) => new NeverTrigger();
Exemplo n.º 21
0
 public KeyedStream(StreamExecutionEnvironment environment, Transformation <T> transformation) : base(environment, transformation)
 {
 }
 protected IterativeStream(StreamExecutionEnvironment environment, Transformation <T> transformation) : base(environment, transformation)
 {
 }