Ejemplo n.º 1
0
        public void TestSparkConf()
        {
            var sparkConf = new SparkConf(false);

            sparkConf.SetMaster("master");
            sparkConf.SetAppName("test");
            sparkConf.SetSparkHome("test home");
            sparkConf.Set("key_string", "value");
            sparkConf.Set("key_int", "100");

            var expectedConfigs = new Dictionary <string, string>()
            {
                { "spark.master", "master" },
                { "spark.app.name", "test" },
                { "spark.home", "test home" },
                { "key_string", "value" },
                { "key_int", "100" }
            };

            foreach (KeyValuePair <string, string> kv in expectedConfigs)
            {
                Assert.Equal(kv.Value, sparkConf.Get(kv.Key, string.Empty));
            }

            Assert.Equal(100, sparkConf.GetInt("key_int", 0));

            // Validate GetAll().
            Dictionary <string, string> actualAllConfigs =
                sparkConf.GetAll().ToDictionary(x => x.Key, x => x.Value);

            Assert.Equal(expectedConfigs, actualAllConfigs);
        }
Ejemplo n.º 2
0
        /// <summary>
        ///
        /// To process with the given connection string for the SQL
        /// </summary>
        private static void JdbcDataFrame()
        {
            if (!string.IsNullOrEmpty(connectionString) && !string.IsNullOrEmpty(tableName))
            {
                var sparkConf = new SparkConf();
                sparkConf.SetAppName("SqlConnectionFromMobius");
                sparkConf.SetMaster("yarn");
                sparkConf.Set("spark.sql.warehouse.dir", "/user/hive/warehouse");
                sparkContext = new SparkContext(sparkConf);
                var sqlContext = new SqlContext(sparkContext);

                var df = sqlContext
                         .Read()
                         .Jdbc(connectionString, tableName, new Dictionary <string, string>());
                var rowCount = df.Count();

                logger.LogInfo("****Row count is " + rowCount + "****");
                logger.LogInfo("Executed Successfully.................");
            }
            else
            {
                logger.LogInfo("****Please provide correct connectionstring and table name****");
                GetValues();
                JdbcDataFrame();
            }
        }
Ejemplo n.º 3
0
        private static void InitializeSparkContext(string[] args)
        {
            var sparkConf = new SparkConf();

            sparkConf.Set("spark.local.dir", args[0]);
            sparkConf.SetAppName("SparkCLR perf suite - C#");
            SparkContext = new SparkContext(sparkConf);
            SqlContext   = new SqlContext(PerfBenchmark.SparkContext);
        }
Ejemplo n.º 4
0
        // Creates and returns a context
        private static SparkContext CreateSparkContext()
        {
            var conf = new SparkConf();

            if (Configuration.SparkLocalDirectoryOverride != null)
            {
                conf.Set("spark.local.dir", Configuration.SparkLocalDirectoryOverride);
            }
            return(new SparkContext(conf));
        }
Ejemplo n.º 5
0
        internal static void DStreamDirectKafkaWithRepartitionSample()
        {
            count = 0;

            string directory      = SparkCLRSamples.Configuration.SampleDataLocation;
            string checkpointPath = Path.Combine(directory, "checkpoint");

            StreamingContext ssc = StreamingContext.GetOrCreate(checkpointPath,
                                                                () =>
            {
                var conf                 = new SparkConf();
                SparkContext sc          = new SparkContext(conf);
                StreamingContext context = new StreamingContext(sc, 2000L);
                context.Checkpoint(checkpointPath);

                var kafkaParams = new List <Tuple <string, string> > {
                    new Tuple <string, string>("metadata.broker.list", brokers),
                    new Tuple <string, string>("auto.offset.reset", "smallest")
                };

                conf.Set("spark.mobius.streaming.kafka.numPartitions." + topic, partitions.ToString());
                var dstream = KafkaUtils.CreateDirectStream(context, new List <string> {
                    topic
                }, kafkaParams, Enumerable.Empty <Tuple <string, long> >());

                dstream.ForeachRDD((time, rdd) =>
                {
                    long batchCount   = rdd.Count();
                    int numPartitions = rdd.GetNumPartitions();

                    Console.WriteLine("-------------------------------------------");
                    Console.WriteLine("Time: {0}", time);
                    Console.WriteLine("-------------------------------------------");
                    Console.WriteLine("Count: " + batchCount);
                    Console.WriteLine("Partitions: " + numPartitions);

                    // only first batch has data and is repartitioned into 10 partitions
                    if (count++ == 0)
                    {
                        Assert.AreEqual(messages, batchCount);
                        Assert.IsTrue(numPartitions >= partitions);
                    }
                    else
                    {
                        Assert.AreEqual(0, batchCount);
                        Assert.IsTrue(numPartitions == 0);
                    }
                });

                return(context);
            });

            ssc.Start();
            ssc.AwaitTermination();
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Gets an existing [[SparkSession]] or, if there is no existing one, creates a new
        /// one based on the options set in this builder.
        /// </summary>
        /// <returns></returns>
        public SparkSession GetOrCreate()
        {
            var sparkConf = new SparkConf();

            foreach (var option in options)
            {
                sparkConf.Set(option.Key, option.Value);
            }
            var sparkContext = SparkContext.GetOrCreate(sparkConf);

            return(SqlContext.GetOrCreate(sparkContext).SparkSession);
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Gets an existing [[SparkSession]] or, if there is no existing one, creates a new
        /// one based on the options set in this builder.
        /// </summary>
        /// <returns></returns>
        public SparkSession GetOrCreate()
        {
            var sparkConf = new SparkConf();

            foreach (KeyValuePair <string, string> option in _options)
            {
                sparkConf.Set(option.Key, option.Value);
            }

            _jvmObject.Invoke("config", sparkConf);

            return(new SparkSession((JvmObjectReference)_jvmObject.Invoke("getOrCreate")));
        }
Ejemplo n.º 8
0
        // Creates and returns a context
        private static SparkContext CreateSparkContext()
        {
            var conf = new SparkConf()
            {
            };

            conf.SetMaster(Env.SPARK_MASTER_URL);
            if (Configuration.SparkLocalDirectoryOverride != null)
            {
                conf.Set("spark.local.dir", Configuration.SparkLocalDirectoryOverride);
            }
            return(new SparkContext(conf));
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Creates and returns a context
        /// </summary>
        /// <returns>SparkContext</returns>
        private static SparkContext CreateSparkContext()
        {
            var conf = new SparkConf();

            // set up local directory
            var tempDir = Environment.GetEnvironmentVariable("spark.local.dir");

            if (string.IsNullOrEmpty(tempDir))
            {
                tempDir = Path.GetTempPath();
            }

            conf.Set("spark.local.dir", tempDir);
            Logger.DebugFormat("spark.local.dir is set to {0}", tempDir);

            return(new SparkContext(conf));
        }