private static void RunDataFrameSample(bool createNewSession) { SparkSession ss = GetSparkSession(); if (createNewSession) { ss = sparkSession.NewSession(); } var peopleDataFrame = ss.Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(DataFrameSamples.PeopleJson)); var count = peopleDataFrame.Count(); Console.WriteLine("Count of items in DataFrame {0}", count); var sortedDataFrame = peopleDataFrame.Sort(new string[] { "name", "age" }, new bool[] { true, false }); sortedDataFrame.Show(); if (SparkCLRSamples.Configuration.IsValidationEnabled) { var sortedDF = sortedDataFrame.Collect().ToArray(); Assert.AreEqual("789", sortedDF[0].GetAs <string>("id")); Assert.AreEqual("123", sortedDF[1].GetAs <string>("id")); Assert.AreEqual("531", sortedDF[2].GetAs <string>("id")); Assert.AreEqual("456", sortedDF[3].GetAs <string>("id")); } }
public void TestSignaturesV2_3_X() { Assert.IsType <SparkContext>(_spark.SparkContext); Assert.IsType <Builder>(SparkSession.Builder()); SparkSession.ClearDefaultSession(); SparkSession.SetDefaultSession(_spark); Assert.IsType <SparkSession>(SparkSession.GetDefaultSession()); Assert.IsType <RuntimeConfig>(_spark.Conf()); Assert.IsType <SparkSession>(_spark.NewSession()); Assert.IsType <DataFrameReader>(_spark.Read()); Assert.IsType <DataFrame>(_spark.Range(10)); Assert.IsType <DataFrame>(_spark.Range(10, 100)); Assert.IsType <DataFrame>(_spark.Range(10, 100, 10)); Assert.IsType <DataFrame>(_spark.Range(10, 100, 10, 5)); _spark.Range(10).CreateOrReplaceTempView("testView"); Assert.IsType <DataFrame>(_spark.Table("testView")); Assert.IsType <DataStreamReader>(_spark.ReadStream()); Assert.IsType <UdfRegistration>(_spark.Udf()); Assert.IsType <Catalog>(_spark.Catalog()); }
static void BasicDfExample(SparkSession spark) { var dataFrame = spark.Read().Json("/Users/ed/spark-2.4.6-bin-without-hadoop/examples/src/main/resources/people.json"); dataFrame.Show(); dataFrame.PrintSchema(); dataFrame.Select("name").Show(); dataFrame.Select(dataFrame["name"], dataFrame["age"] + 1).Show(); dataFrame.Select(dataFrame["name"], dataFrame["age"].Plus(1)).Show(); dataFrame.Filter(dataFrame["age"] > 21).Show(); dataFrame.Filter(dataFrame["age"].Gt(21)).Show(); dataFrame.GroupBy(dataFrame["age"]).Count().Show(); dataFrame.CreateOrReplaceTempView("people"); var sqlDataFrame = spark.Sql("SELECT * FROM people"); dataFrame.CreateGlobalTempView("people"); spark.Sql("SELECT * FROM global_temp.people").Show(); spark.NewSession().Sql("SELECT * FROM global_temp.people").Show(); }