Example #1
0
        private static void RunDataFrameSample(bool createNewSession)
        {
            SparkSession ss = GetSparkSession();

            if (createNewSession)
            {
                ss = sparkSession.NewSession();
            }

            var peopleDataFrame = ss.Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(DataFrameSamples.PeopleJson));
            var count           = peopleDataFrame.Count();

            Console.WriteLine("Count of items in DataFrame {0}", count);

            var sortedDataFrame = peopleDataFrame.Sort(new string[] { "name", "age" }, new bool[] { true, false });

            sortedDataFrame.Show();

            if (SparkCLRSamples.Configuration.IsValidationEnabled)
            {
                var sortedDF = sortedDataFrame.Collect().ToArray();
                Assert.AreEqual("789", sortedDF[0].GetAs <string>("id"));
                Assert.AreEqual("123", sortedDF[1].GetAs <string>("id"));
                Assert.AreEqual("531", sortedDF[2].GetAs <string>("id"));
                Assert.AreEqual("456", sortedDF[3].GetAs <string>("id"));
            }
        }
Example #2
0
        public void TestSignaturesV2_3_X()
        {
            Assert.IsType <SparkContext>(_spark.SparkContext);

            Assert.IsType <Builder>(SparkSession.Builder());

            SparkSession.ClearDefaultSession();
            SparkSession.SetDefaultSession(_spark);
            Assert.IsType <SparkSession>(SparkSession.GetDefaultSession());

            Assert.IsType <RuntimeConfig>(_spark.Conf());

            Assert.IsType <SparkSession>(_spark.NewSession());

            Assert.IsType <DataFrameReader>(_spark.Read());

            Assert.IsType <DataFrame>(_spark.Range(10));
            Assert.IsType <DataFrame>(_spark.Range(10, 100));
            Assert.IsType <DataFrame>(_spark.Range(10, 100, 10));
            Assert.IsType <DataFrame>(_spark.Range(10, 100, 10, 5));

            _spark.Range(10).CreateOrReplaceTempView("testView");
            Assert.IsType <DataFrame>(_spark.Table("testView"));

            Assert.IsType <DataStreamReader>(_spark.ReadStream());

            Assert.IsType <UdfRegistration>(_spark.Udf());

            Assert.IsType <Catalog>(_spark.Catalog());
        }
        static void BasicDfExample(SparkSession spark)
        {
            var dataFrame = spark.Read().Json("/Users/ed/spark-2.4.6-bin-without-hadoop/examples/src/main/resources/people.json");

            dataFrame.Show();

            dataFrame.PrintSchema();

            dataFrame.Select("name").Show();

            dataFrame.Select(dataFrame["name"], dataFrame["age"] + 1).Show();
            dataFrame.Select(dataFrame["name"], dataFrame["age"].Plus(1)).Show();

            dataFrame.Filter(dataFrame["age"] > 21).Show();
            dataFrame.Filter(dataFrame["age"].Gt(21)).Show();

            dataFrame.GroupBy(dataFrame["age"]).Count().Show();

            dataFrame.CreateOrReplaceTempView("people");
            var sqlDataFrame = spark.Sql("SELECT * FROM people");

            dataFrame.CreateGlobalTempView("people");
            spark.Sql("SELECT * FROM global_temp.people").Show();
            spark.NewSession().Sql("SELECT * FROM global_temp.people").Show();
        }