SparkContext.Parallelize C# (CSharp) 코드 예제들

예제 #1

0

파일 보기

        /// <summary>
        /// Calculate Pi
        /// Reference: https://github.com/apache/spark/blob/branch-1.5/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
        /// </summary>
        private static void Pi()
        {
            const int slices = 3;
            var       n      = (int)Math.Min(100000L * slices, int.MaxValue);
            var       values = new List <int>(n);

            for (var i = 0; i <= n; i++)
            {
                values.Add(i);
            }

            //
            // Anonymous method approach
            //
            var count = SparkContext.Parallelize(values, slices)
                        .Map(i =>
            {
                var random = new Random();
                var x      = random.NextDouble() * 2 - 1;
                var y      = random.NextDouble() * 2 - 1;

                return((x * x + y * y) < 1 ? 1 : 0);
            }
                             ).Reduce((x, y) => x + y);

            Logger.InfoFormat("(anonymous method approach) Pi is roughly {0}.", 4.0 * (int)count / n);

            //
            // Serialized class approach, an alternative to the anonymous method approach above
            //
            var countComputedUsingAnotherApproach = SparkContext.Parallelize(values, slices).Map(new PiHelper().Execute).Reduce((x, y) => x + y);
            var approximatePiValue = 4.0 * countComputedUsingAnotherApproach / n;

            Logger.InfoFormat("(serialized class approach) Pi is roughly {0}.", approximatePiValue);
        }

예제 #2

0

파일 보기

 public void TestParallelize()
 {
     {
         RDD <int> rdd = _sc.Parallelize(Enumerable.Range(0, 5));
         Assert.Equal(new[] { 0, 1, 2, 3, 4 }, rdd.Collect());
     }
     {
         var          strs = new string[] { "hello", "spark", "for", "dotnet" };
         RDD <string> rdd  = _sc.Parallelize(strs);
         Assert.Equal(strs, rdd.Collect());
     }
 }

예제 #3

0

파일 보기

파일: Program.cs 프로젝트: outifaout/Mobius

        public static void Main(string[] args)
        {
            LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set
            Logger = LoggerServiceFactory.GetLogger(typeof(PiExample));

            var sparkContext = new SparkContext(new SparkConf());

            try
            {
                const int slices        = 3;
                var       numberOfItems = (int)Math.Min(100000L * slices, int.MaxValue);
                var       values        = new List <int>(numberOfItems);
                for (var i = 0; i <= numberOfItems; i++)
                {
                    values.Add(i);
                }

                var rdd = sparkContext.Parallelize(values, slices);

                CalculatePiUsingAnonymousMethod(numberOfItems, rdd);

                CalculatePiUsingSerializedClassApproach(numberOfItems, rdd);

                Logger.LogInfo("Completed calculating the value of Pi");
            }
            catch (Exception ex)
            {
                Logger.LogError("Error calculating Pi");
                Logger.LogException(ex);
            }

            sparkContext.Stop();
        }

예제 #4

0

파일 보기

        /// <summary>
        /// To calculate Pi value
        /// </summary>
        private static void Pi()
        {
            var sparkConf = new SparkConf();

            sparkConf.SetAppName("MobiusSimpleSamplePI");
            sparkConf.SetMaster("yarn");
            sparkContext = new SparkContext(sparkConf);
            try
            {
                const int slices        = 3;
                var       numberOfItems = (int)Math.Min(100000L * slices, int.MaxValue);
                var       values        = new List <int>(numberOfItems);
                for (var i = 0; i <= numberOfItems; i++)
                {
                    values.Add(i);
                }

                var rdd = sparkContext.Parallelize(values, slices);

                logger.LogInfo("Started Calculating Pi");

                CalculatePiUsingAnonymousMethod(numberOfItems, rdd);

                CalculatePiUsingSerializedClassApproach(numberOfItems, rdd);

                logger.LogInfo("Completed calculating the value of Pi");
                logger.LogInfo("Executed Successfully.................");
            }
            catch (Exception ex)
            {
                logger.LogError("Error calculating Pi");
                logger.LogException(ex);
            }
        }

예제 #5

0

파일 보기

파일: Program.cs 프로젝트: xsidurd/SparkCLR

        public static void Main(string[] args)
        {
            LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); //this is optional - DefaultLoggerService will be used if not set
            Logger = LoggerServiceFactory.GetLogger(typeof(PiExample));

            var sparkContext = new SparkContext(new SparkConf());

            try
            {
                const int slices = 3;
                var numberOfItems = (int)Math.Min(100000L * slices, int.MaxValue);
                var values = new List<int>(numberOfItems);
                for (var i = 0; i <= numberOfItems; i++)
                {
                    values.Add(i);
                }

                var rdd = sparkContext.Parallelize(values, slices);

                CalculatePiUsingAnonymousMethod(numberOfItems, rdd);

                CalculatePiUsingSerializedClassApproach(numberOfItems, rdd);

                Logger.LogInfo("Completed calculating the value of Pi");
            }
            catch (Exception ex)
            {
                Logger.LogError("Error calculating Pi");
                Logger.LogException(ex);
            }

            sparkContext.Stop();

        }

예제 #6

0

파일 보기

파일: SparkContextTest.cs 프로젝트: CapeTownCoders/SparkCLR

        public void TestSparkContextProxy()
        {
            var sparkContext = new SparkContext("masterUrl", "appName");

            sparkContext.AddFile(null);
            sparkContext.BinaryFiles(null, null);
            sparkContext.CancelAllJobs();
            sparkContext.CancelJobGroup(null);
            sparkContext.EmptyRDD <string>();
            sparkContext.GetLocalProperty(null);
            sparkContext.HadoopFile(null, null, null, null);
            sparkContext.HadoopRDD(null, null, null);
            sparkContext.NewAPIHadoopFile(null, null, null, null);
            sparkContext.NewAPIHadoopRDD(null, null, null);
            sparkContext.Parallelize <int>(new int[] { 1, 2, 3, 4, 5 });
            sparkContext.SequenceFile(null, null, null, null, null, null);
            sparkContext.SetCheckpointDir(null);
            sparkContext.SetJobGroup(null, null);
            sparkContext.SetLocalProperty(null, null);
            sparkContext.SetLogLevel(null);
            sparkContext.TextFile(null);
            sparkContext.WholeTextFiles(null);
            sparkContext.Stop();
            sparkContext.Union <string>(null);
        }

예제 #7

0

파일 보기

        public void TestCollect()
        {
            RDD <Tuple <string, int> > rdd = _sc.Parallelize(new[] {
                new Tuple <string, int>("a", 1),
                new Tuple <string, int>("b", 2)
            });

            // Validate CollectAsMap().
            {
                var expected = new Dictionary <string, int>
                {
                    ["a"] = 1,
                    ["b"] = 2
                };

                Assert.Equal(expected, rdd.CollectAsMap());
            }
            // Validate Keys().
            {
                Assert.Equal(new[] { "a", "b" }, rdd.Keys().Collect());
            }

            // Validate Values().
            {
                Assert.Equal(new[] { 1, 2 }, rdd.Values().Collect());
            }
        }

예제 #8

0

파일 보기

파일: DStreamTest.cs 프로젝트: jthelin/SparkCLR

        public void TestConstantInputDStream()
        {
            var sc = new SparkContext("", "");
            var rdd = sc.Parallelize(Enumerable.Range(0, 10), 1);
            var ssc = new StreamingContext(sc, 1000L);

            // test when rdd is null
            Assert.Throws<ArgumentNullException>(() => new ConstantInputDStream<int>(null, ssc));

            var constantInputDStream = new ConstantInputDStream<int>(rdd, ssc);
            Assert.IsNotNull(constantInputDStream);
            Assert.AreEqual(ssc, constantInputDStream.streamingContext);
        }

예제 #9

0

파일 보기

        public void TestConstantInputDStream()
        {
            var sc  = new SparkContext("", "");
            var rdd = sc.Parallelize(Enumerable.Range(0, 10), 1);
            var ssc = new StreamingContext(sc, 1);

            // test when rdd is null
            Assert.Throws <ArgumentNullException>(() => new ConstantInputDStream <int>(null, ssc));

            var constantInputDStream = new ConstantInputDStream <int>(rdd, ssc);

            Assert.IsNotNull(constantInputDStream);
            Assert.AreEqual(ssc, constantInputDStream.streamingContext);
        }

예제 #10

0

파일 보기

파일: testArgsQuotes.cs 프로젝트: lqm678/testMobius

        static void Main(string[] args)
        {
            Console.WriteLine("sizeof(int) = " + sizeof(int) + ", sizeof(long) = " + sizeof(long)
                              + ", Is64BitOperatingSystem = " + Environment.Is64BitOperatingSystem + ", Is64BitProcess = " + Environment.Is64BitProcess
                              + ", OSVersion = " + Environment.OSVersion + ", MachineName = " + Environment.MachineName);

            var exe = Path.GetFileName(System.Reflection.Assembly.GetExecutingAssembly().CodeBase);

            if (args.Length < 1 || args[0] == "-h" || args[0] == "--help")
            {
                Console.WriteLine("Usage    : {0}  input-arguments", exe);
                Console.WriteLine("Example-1: {0}  any-thing that you-want-to-write=input", exe);
                var mapCurrentDir = new Dictionary <PlatformID, string> {
                    { PlatformID.Win32NT, "%CD%" }, { PlatformID.Win32S, "%CD%" }, { PlatformID.Win32Windows, "%CD%" }, { PlatformID.WinCE, "%CD%" },
                    { PlatformID.Unix, "$PWD" }
                };

                var currentDirectory = string.Empty;
                if (mapCurrentDir.TryGetValue(Environment.OSVersion.Platform, out currentDirectory))
                {
                    Console.WriteLine(@"Example-2: {0}  {1}  arg2@*#:,+.-\/~  Pi* d:\tmp {2}", exe, currentDirectory, "\"jdbc:mysql://localhost:3306/lzdb?user=guest&password=abc123\"");
                }

                return;
            }

            var idx = 0;

            Log("args.Length = " + args.Length + Environment.NewLine
                + string.Join(Environment.NewLine, args.Select(arg => { idx++; return("args[" + idx + "] = " + arg); }))
                );

            var singleValueRDD = new List <KeyValuePair <string, int> >(
                args.Select(arg => new KeyValuePair <string, int>(arg, 1))
                );

            idx = 0;
            singleValueRDD.ForEach(kv => Log(string.Format("src-pair[{0}] : {1} = {2}", idx++, kv.Key, kv.Value)));

            var sparkContext = new SparkContext(new SparkConf());
            var rdd          = sparkContext.Parallelize(singleValueRDD);

            Log(string.Format("Main() rdd = {0}", rdd));
            var reduced = rdd.ReduceByKey((v1, v2) => v1 + v2);

            Log("reduced.count = " + reduced.Count());
            sparkContext.Stop();
        }

예제 #11

0

파일 보기

파일: SparkContextTest.cs 프로젝트: liam0949/Mobius

        public void TestRunJob()
        {
            // Arrange
            Mock <ISparkContextProxy> sparkContextProxy = new Mock <ISparkContextProxy>();
            SparkContext sc  = new SparkContext(sparkContextProxy.Object, null);
            RDD <int>    rdd = sc.Parallelize(new int[] { 0, 1, 2, 3, 4, 5 }, 2);

            sparkContextProxy.Setup(m => m.RunJob(It.IsAny <IRDDProxy>(), It.IsAny <IEnumerable <int> >()));

            // Act
            int[] partitions = new int[] { 0, 1 };
            rdd.SparkContext.RunJob(rdd, partitions);

            // Assert
            sparkContextProxy.Verify(m => m.RunJob(rdd.RddProxy, partitions), Times.Once);
        }

예제 #12

0

파일 보기

파일: SparkContextTest.cs 프로젝트: valmac/Mobius

        public void TestParallelize()
        {
            // Arrange
            Mock <IRDDProxy>          rddProxy          = new Mock <IRDDProxy>();
            Mock <ISparkContextProxy> sparkContextProxy = new Mock <ISparkContextProxy>();

            sparkContextProxy.Setup(m => m.Parallelize(It.IsAny <IEnumerable <byte[]> >(), It.IsAny <int>())).Returns(rddProxy.Object);
            SparkContext sc = new SparkContext(sparkContextProxy.Object, null);

            // Act
            var       nums = new[] { 0, 2, 3, 4, 6 };
            RDD <int> rdd  = sc.Parallelize(nums, -2);

            // Assert
            Assert.IsNotNull(rdd);
            Assert.AreEqual(rddProxy.Object, rdd.RddProxy);
            Assert.AreEqual(sc, rdd.sparkContext);
        }

예제 #13

0

파일 보기

파일: SparkContextTest.cs 프로젝트: jinyeqing/SparkCLR

 public void TestSparkContextProxy()
 {
     var sparkContext = new SparkContext("masterUrl", "appName");
     sparkContext.AddFile(null);
     sparkContext.BinaryFiles(null, null);
     sparkContext.CancelAllJobs();
     sparkContext.CancelJobGroup(null);
     sparkContext.EmptyRDD<string>();
     sparkContext.GetLocalProperty(null);
     sparkContext.HadoopFile(null, null, null, null);
     sparkContext.HadoopRDD(null, null, null);
     sparkContext.NewAPIHadoopFile(null, null, null, null);
     sparkContext.NewAPIHadoopRDD(null, null, null);
     sparkContext.Parallelize<int>(new int[] { 1, 2, 3, 4, 5 });
     sparkContext.SequenceFile(null, null, null, null, null, null);
     sparkContext.SetCheckpointDir(null);
     sparkContext.SetJobGroup(null, null);
     sparkContext.SetLocalProperty(null, null);
     sparkContext.SetLogLevel(null);
     sparkContext.TextFile(null);
     sparkContext.WholeTextFiles(null);
     sparkContext.Stop();
     sparkContext.Union<string>(null);
 }

예제 #14

0

파일 보기

파일: DStreamSamples.cs 프로젝트: zwffff2015/Mobius

        internal static void DStreamTextFileSample()
        {
            count = 0;

            string directory      = SparkCLRSamples.Configuration.SampleDataLocation;
            string checkpointPath = Path.Combine(directory, "checkpoint");

            SparkContext sc = SparkCLRSamples.SparkContext;
            var          b  = sc.Broadcast <int>(0);

            StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath,
                                                                () =>
            {
                StreamingContext context = new StreamingContext(sc, 2000L);     // batch interval is in milliseconds
                context.Checkpoint(checkpointPath);

                var lines = context.TextFileStream(Path.Combine(directory, "test"));
                lines     = context.Union(lines, lines);
                var words = lines.FlatMap(l => l.Split(' '));
                var pairs = words.Map(w => new Tuple <string, int>(w, 1));

                // since operations like ReduceByKey, Join and UpdateStateByKey are
                // separate dstream transformations defined in CSharpDStream.scala
                // an extra CSharpRDD is introduced in between these operations
                var wordCounts      = pairs.ReduceByKey((x, y) => x + y);
                var join            = wordCounts.Window(2, 2).Join(wordCounts, 2);
                var initialStateRdd = sc.Parallelize(new[] { new Tuple <string, int>("AAA", 88), new Tuple <string, int>("BBB", 88) });
                var state           = join.UpdateStateByKey(new UpdateStateHelper(b).Execute, initialStateRdd);

                state.ForeachRDD((time, rdd) =>
                {
                    // there's chance rdd.Take conflicts with ssc.Stop
                    if (stopFileServer)
                    {
                        return;
                    }

                    object[] taken = rdd.Take(10);
                    Console.WriteLine("-------------------------------------------");
                    Console.WriteLine("Time: {0}", time);
                    Console.WriteLine("-------------------------------------------");
                    foreach (object record in taken)
                    {
                        Console.WriteLine(record);

                        var countByWord = (Tuple <string, int>)record;
                        Assert.AreEqual(countByWord.Item2, countByWord.Item1 == "The" || countByWord.Item1 == "lazy" || countByWord.Item1 == "dog" ? 92 : 88);
                    }
                    Console.WriteLine();

                    stopFileServer = true;
                });

                return(context);
            });

            StartFileServer(ssc, directory, "words.txt");

            ssc.Start();

            ssc.AwaitTermination();
        }

예제 #15

0

파일 보기

파일: DStreamStateSample.cs 프로젝트: valmac/Mobius

        internal static void DStreamMapWithStateSample()
        {
            string directory      = SparkCLRSamples.Configuration.SampleDataLocation;
            string checkpointPath = Path.Combine(directory, "checkpoint");

            StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath,
                                                                () =>
            {
                SparkContext sc          = SparkCLRSamples.SparkContext;
                StreamingContext context = new StreamingContext(sc, 10000L);     // batch interval is in milliseconds
                context.Checkpoint(checkpointPath);

                var lines = context.TextFileStream(Path.Combine(directory, "test1"));
                lines     = context.Union(lines, lines);
                var words = lines.FlatMap(l => l.Split(' '));
                var pairs = words.Map(w => new KeyValuePair <string, int>(w, 1));

                var wordCounts   = pairs.ReduceByKey((x, y) => x + y);
                var initialState = sc.Parallelize(new[] { new KeyValuePair <string, int>("NOT_A_WORD", 1024), new KeyValuePair <string, int>("dog", 10000), }, 1);
                var stateSpec    = new StateSpec <string, int, int, KeyValuePair <string, int> >((word, count, state) =>
                {
                    if (state.IsTimingOut())
                    {
                        Console.WriteLine("Found timing out word: {0}", word);
                        return(new KeyValuePair <string, int>(word, state.Get()));
                    }

                    var sum = 0;
                    if (state.Exists())
                    {
                        sum = state.Get();
                    }
                    state.Update(sum + count);
                    Console.WriteLine("word: {0}, count: {1}", word, sum + count);
                    return(new KeyValuePair <string, int>(word, sum + count));
                }).NumPartitions(1).InitialState(initialState).Timeout(TimeSpan.FromSeconds(30));

                var snapshots = wordCounts.MapWithState(stateSpec).StateSnapshots();
                snapshots.ForeachRDD((double time, RDD <dynamic> rdd) =>
                {
                    Console.WriteLine("-------------------------------------------");
                    Console.WriteLine("Snapshots @ Time: {0}", time);
                    Console.WriteLine("-------------------------------------------");

                    foreach (KeyValuePair <string, int> record in rdd.Collect())
                    {
                        Console.WriteLine("[{0}, {1}]", record.Key, record.Value);
                    }
                    Console.WriteLine();
                });

                return(context);
            });

            ssc.Start();

            StartFileServer(directory, "words.txt", 100);

            ssc.AwaitTermination();
            ssc.Stop();
        }

C# (CSharp) SparkContext.Parallelize 예제들