Пример #1
0
        public void TestCSharpInputDStream()
        {
            // test create CSharpInputDStream
            var sc  = new SparkContext("", "");
            var ssc = new StreamingContext(sc, 1000L);
            Func <double, int, IEnumerable <string> > func =
                (double time, int pid) =>
            {
                var list = new List <string>()
                {
                    string.Format("PluggableInputDStream-{0}-{1}", pid, time)
                };
                return(list.AsEnumerable());
            };
            const int numPartitions = 5;
            var       inputDStream  = CSharpInputDStreamUtils.CreateStream <string>(
                ssc,
                numPartitions,
                func);

            Assert.IsNotNull(inputDStream);
            Assert.AreEqual(ssc, inputDStream.streamingContext);

            // test CSharpInputDStreamMapPartitionWithIndexHelper
            int[] array          = new int[numPartitions];
            int   partitionIndex = 0;

            new CSharpInputDStreamMapPartitionWithIndexHelper <string>(0.0, func).Execute(partitionIndex, array.AsEnumerable());

            // test CSharpInputDStreamGenerateRDDHelper
            new CSharpInputDStreamGenerateRDDHelper <string>(numPartitions, func).Execute(0.0);
        }
Пример #2
0
        internal static void DStreamCSharpInputSample()
        {
            const int numPartitions = 5;

            var sc  = SparkCLRSamples.SparkContext;
            var ssc = new StreamingContext(sc, 2000L); // batch interval is in milliseconds

            var inputDStream = CSharpInputDStreamUtils.CreateStream <string>(
                ssc,
                numPartitions,
                (double time, int pid) =>
            {
                var list = new List <string>()
                {
                    string.Format("PluggableInputDStream-{0}-{1}", pid, time)
                };
                return(list.AsEnumerable());
            });

            inputDStream.ForeachRDD((time, rdd) =>
            {
                var taken      = rdd.Collect();
                int partitions = rdd.GetNumPartitions();

                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Time: {0}", time);
                Console.WriteLine("-------------------------------------------");
                Console.WriteLine("Count: " + taken.Length);
                Console.WriteLine("Partitions: " + partitions);

                foreach (object record in taken)
                {
                    Console.WriteLine(record);
                }
            });

            ssc.Start();
            ssc.AwaitTermination();
        }