Beispiel #1
0
        /// <summary>
        /// To integrate with Hive operations
        /// </summary>
        private static void HiveDataFrame()
        {
            var builder = SparkSession.Builder().EnableHiveSupport();

            builder = builder.Config("spark.master", "yarn");
            builder = builder.Config("spark.app.name", "HiveDataFrame");
            builder = builder.Config("spark.sql.warehouse.dir", "/user/hive/warehouse");
            session = builder.GetOrCreate();
            var peopleDataFrame = session.Read().Json(jsonFilePath);

            logger.LogInfo("****Create table if not exists****");
            session.Sql(string.Format("CREATE DATABASE IF NOT EXISTS {0}", dbName)); // create database if not exists
            logger.LogInfo("****Database Created****");
            session.Sql(string.Format("USE {0}", dbName));

            logger.LogInfo("****Create Table operation started****");
            peopleDataFrame.Write().Mode(SaveMode.Overwrite).SaveAsTable(tableName); // create table
            logger.LogInfo("****Table Created successfully****");
            var tablesDataFrame = session.Table(tableName);

            logger.LogInfo(string.Format("****Table count in database {0}: {1}", dbName, tablesDataFrame.Count()) + "****");
            var rowCollections = tablesDataFrame.Collect();

            logger.LogInfo("**********************************************");
            foreach (var row in rowCollections)
            {
                Console.WriteLine("{0}", row);
            }
            logger.LogInfo("*********************************************");
            logger.LogInfo("Executed Successfully.................");
        }
Beispiel #2
0
        public void TestSignaturesV2_3_X()
        {
            Assert.IsType <SparkContext>(_spark.SparkContext);

            Assert.IsType <Builder>(SparkSession.Builder());

            SparkSession.ClearDefaultSession();
            SparkSession.SetDefaultSession(_spark);
            Assert.IsType <SparkSession>(SparkSession.GetDefaultSession());

            Assert.IsType <RuntimeConfig>(_spark.Conf());

            Assert.IsType <SparkSession>(_spark.NewSession());

            Assert.IsType <DataFrameReader>(_spark.Read());

            Assert.IsType <DataFrame>(_spark.Range(10));
            Assert.IsType <DataFrame>(_spark.Range(10, 100));
            Assert.IsType <DataFrame>(_spark.Range(10, 100, 10));
            Assert.IsType <DataFrame>(_spark.Range(10, 100, 10, 5));

            _spark.Range(10).CreateOrReplaceTempView("testView");
            Assert.IsType <DataFrame>(_spark.Table("testView"));

            Assert.IsType <DataStreamReader>(_spark.ReadStream());

            Assert.IsType <UdfRegistration>(_spark.Udf());

            Assert.IsType <Catalog>(_spark.Catalog());
        }