/// <summary> /// Constructor mainly called by SparkContext for creating the first RDD /// via <see cref="SparkContext.Parallelize{T}(IEnumerable{T}, int?)"/>, etc. /// </summary> /// <param name="jvmObject">The reference to the RDD JVM object</param> /// <param name="sparkContext">SparkContext object</param> /// <param name="serializedMode">Serialization mode for the current RDD</param> internal RDD( JvmObjectReference jvmObject, SparkContext sparkContext, SerializedMode serializedMode) { _jvmObject = jvmObject; _sparkContext = sparkContext; _serializedMode = serializedMode; }
internal PipelinedRDD( RDD.WorkerFunction func, bool preservesPartitioning, JvmObjectReference prevRddJvmObjRef, SparkContext sparkContext, SerializedMode prevSerializedMode) : base(prevRddJvmObjRef, sparkContext, SerializedMode.Byte, prevSerializedMode) { _func = func ?? throw new ArgumentNullException("UDF cannot be null."); _preservesPartitioning = preservesPartitioning; }
/// <summary> /// Constructor mainly called by <see cref="PipelinedRDD{T}"/>. /// </summary> /// <param name="prevRddJvmObjRef"> /// The reference to the RDD JVM object from which pipeline is created /// </param> /// <param name="sparkContext">SparkContext object</param> /// <param name="serializedMode">Serialization mode for the current RDD</param> /// <param name="prevSerializedMode">Serialization mode for the previous RDD</param> internal RDD( JvmObjectReference prevRddJvmObjRef, SparkContext sparkContext, SerializedMode serializedMode, SerializedMode prevSerializedMode) { // This constructor is called from PipelineRDD constructor // where the _jvmObject is not yet created. _prevRddJvmObjRef = prevRddJvmObjRef; _sparkContext = sparkContext; _serializedMode = serializedMode; _prevSerializedMode = prevSerializedMode; }
/// <summary> /// Function to create the Broadcast variable (org.apache.spark.broadcast.Broadcast) /// </summary> /// <param name="sc">SparkContext object of type <see cref="SparkContext"/></param> /// <param name="value">Broadcast value of type object</param> /// <returns>Returns broadcast variable of type <see cref="JvmObjectReference"/></returns> private JvmObjectReference CreateBroadcast(SparkContext sc, T value) { var javaSparkContext = (JvmObjectReference)sc.Reference.Jvm.CallStaticJavaMethod( "org.apache.spark.api.java.JavaSparkContext", "fromSparkContext", sc); Version version = SparkEnvironment.SparkVersion; return((version.Major, version.Minor) switch { (2, 4) => CreateBroadcast_V2_4_X(javaSparkContext, sc, value), (3, _) => CreateBroadcast_V2_4_X(javaSparkContext, sc, value), _ => throw new NotSupportedException($"Spark {version} not supported.") });
/// <summary> /// Function to create the Broadcast variable (org.apache.spark.broadcast.Broadcast) /// </summary> /// <param name="sc">SparkContext object of type <see cref="SparkContext"/></param> /// <param name="value">Broadcast value of type object</param> /// <returns>Returns broadcast variable of type <see cref="JvmObjectReference"/></returns> private JvmObjectReference CreateBroadcast(SparkContext sc, T value) { IJvmBridge jvm = ((IJvmObjectReferenceProvider)sc).Reference.Jvm; var javaSparkContext = (JvmObjectReference)jvm.CallStaticJavaMethod( "org.apache.spark.api.java.JavaSparkContext", "fromSparkContext", sc); Version version = SparkEnvironment.SparkVersion; return((version.Major, version.Minor) switch { (2, 3)when version.Build == 0 || version.Build == 1 => CreateBroadcast_V2_3_1_AndBelow(javaSparkContext, value), (2, 3) => CreateBroadcast_V2_3_2_AndAbove(javaSparkContext, sc, value), (2, 4) => CreateBroadcast_V2_3_2_AndAbove(javaSparkContext, sc, value), _ => throw new NotSupportedException($"Spark {version} not supported.") });
internal Broadcast(SparkContext sc, T value) { _path = CreateTempFilePath(sc.GetConf()); _jvmObject = CreateBroadcast(sc, value); _bid = (long)_jvmObject.Invoke("id"); }