public void TestDestroy() { var obj1 = new TestBroadcastVariable(5, "destroy"); Broadcast <TestBroadcastVariable> bc1 = _spark.SparkContext.Broadcast(obj1); Func <Column, Column> udf = Udf <string, string>( str => $"{str} {bc1.Value().StringValue}, {bc1.Value().IntValue}"); var expected = new string[] { "hello destroy, 5", "world destroy, 5" }; string[] actual = ToStringArray(_df.Select(udf(_df["_1"]))); Assert.Equal(expected, actual); bc1.Destroy(); // Throws the following exception: // ERROR Utils: Exception encountered // org.apache.spark.SparkException: Attempted to use Broadcast(0) after it was destroyed(destroy at NativeMethodAccessorImpl.java:0) // at org.apache.spark.broadcast.Broadcast.assertValid(Broadcast.scala:144) // at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$writeObject$1.apply$mcV$sp(TorrentBroadcast.scala:203) // at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$writeObject$1.apply(TorrentBroadcast.scala:202) // at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$writeObject$1.apply(TorrentBroadcast.scala:202) // at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1326) // at org.apache.spark.broadcast.TorrentBroadcast.writeObject(TorrentBroadcast.scala:202) // at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) try { _df.Select(udf(_df["_1"])).Collect().ToArray(); Assert.True(false); } catch (Exception e) { Assert.NotNull(e); } }
public void TestMultipleBroadcastWithoutEncryption() { var obj1 = new TestBroadcastVariable(1, "first"); var obj2 = new TestBroadcastVariable(2, "second"); Broadcast <TestBroadcastVariable> bc1 = _spark.SparkContext.Broadcast(obj1); Broadcast <TestBroadcastVariable> bc2 = _spark.SparkContext.Broadcast(obj2); Func <Column, Column> udf = Udf <string, string>( str => $"{str} {bc1.Value().StringValue} and {bc2.Value().StringValue}"); var expected = new string[] { "hello first and second", "world first and second" }; string[] actual = ToStringArray(_df.Select(udf(_df["_1"]))); Assert.Equal(expected, actual); }
public void TestUnpersist() { var obj = new TestBroadcastVariable(1, "unpersist"); Broadcast <TestBroadcastVariable> bc = _spark.SparkContext.Broadcast(obj); Func <Column, Column> udf = Udf <string, string>( str => $"{str} {bc.Value().StringValue}, {bc.Value().IntValue}"); var expected = new string[] { "hello unpersist, 1", "world unpersist, 1" }; string[] actual = ToStringArray(_df.Select(udf(_df["_1"]))); Assert.Equal(expected, actual); // This deletes the copies of the broadcast on the executors. We then use the Broadcast // variable again in the UDF and validate that it is re-sent to all executors. bc.Unpersist(); string[] actualUnpersisted = ToStringArray(_df.Select(udf(_df["_1"]))); Assert.Equal(expected, actualUnpersisted); }