예제 #1
0
        public void TestDestroy()
        {
            var obj1 = new TestBroadcastVariable(5, "destroy");
            Broadcast <TestBroadcastVariable> bc1 = _spark.SparkContext.Broadcast(obj1);

            Func <Column, Column> udf = Udf <string, string>(
                str => $"{str} {bc1.Value().StringValue}, {bc1.Value().IntValue}");

            var expected = new string[] { "hello destroy, 5", "world destroy, 5" };

            string[] actual = ToStringArray(_df.Select(udf(_df["_1"])));
            Assert.Equal(expected, actual);

            bc1.Destroy();

            // Throws the following exception:
            // ERROR Utils: Exception encountered
            //  org.apache.spark.SparkException: Attempted to use Broadcast(0) after it was destroyed(destroy at NativeMethodAccessorImpl.java:0)
            //  at org.apache.spark.broadcast.Broadcast.assertValid(Broadcast.scala:144)
            //  at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$writeObject$1.apply$mcV$sp(TorrentBroadcast.scala:203)
            //  at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$writeObject$1.apply(TorrentBroadcast.scala:202)
            //  at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$writeObject$1.apply(TorrentBroadcast.scala:202)
            //  at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1326)
            //  at org.apache.spark.broadcast.TorrentBroadcast.writeObject(TorrentBroadcast.scala:202)
            //  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
            try
            {
                _df.Select(udf(_df["_1"])).Collect().ToArray();
                Assert.True(false);
            }
            catch (Exception e)
            {
                Assert.NotNull(e);
            }
        }
예제 #2
0
        public void TestMultipleBroadcastWithoutEncryption()
        {
            var obj1 = new TestBroadcastVariable(1, "first");
            var obj2 = new TestBroadcastVariable(2, "second");
            Broadcast <TestBroadcastVariable> bc1 = _spark.SparkContext.Broadcast(obj1);
            Broadcast <TestBroadcastVariable> bc2 = _spark.SparkContext.Broadcast(obj2);

            Func <Column, Column> udf = Udf <string, string>(
                str => $"{str} {bc1.Value().StringValue} and {bc2.Value().StringValue}");

            var expected = new string[] { "hello first and second", "world first and second" };

            string[] actual = ToStringArray(_df.Select(udf(_df["_1"])));
            Assert.Equal(expected, actual);
        }
예제 #3
0
        public void TestUnpersist()
        {
            var obj = new TestBroadcastVariable(1, "unpersist");
            Broadcast <TestBroadcastVariable> bc = _spark.SparkContext.Broadcast(obj);

            Func <Column, Column> udf = Udf <string, string>(
                str => $"{str} {bc.Value().StringValue}, {bc.Value().IntValue}");

            var expected = new string[] { "hello unpersist, 1", "world unpersist, 1" };

            string[] actual = ToStringArray(_df.Select(udf(_df["_1"])));

            Assert.Equal(expected, actual);

            // This deletes the copies of the broadcast on the executors. We then use the Broadcast
            // variable again in the UDF and validate that it is re-sent to all executors.
            bc.Unpersist();

            string[] actualUnpersisted = ToStringArray(_df.Select(udf(_df["_1"])));
            Assert.Equal(expected, actualUnpersisted);
        }