/// <summary> /// Used to chain two function. /// </summary> internal static WorkerFunction Chain( WorkerFunction innerFunction, WorkerFunction outerFunction) { return(new WorkerFunction( new WorkerFuncChainHelper( innerFunction.Func, outerFunction.Func).Execute)); }
internal PipelinedRDD( RDD.WorkerFunction func, bool preservesPartitioning, JvmObjectReference prevRddJvmObjRef, SparkContext sparkContext, SerializedMode prevSerializedMode) : base(prevRddJvmObjRef, sparkContext, SerializedMode.Byte, prevSerializedMode) { _func = func ?? throw new ArgumentNullException("UDF cannot be null."); _preservesPartitioning = preservesPartitioning; }
/// <summary> /// Return a new RDD by applying a function to each partition of this RDD, /// while tracking the index of the original partition. /// </summary> /// <typeparam name="U">The element type of new RDD</typeparam> /// <param name="newFunc">The function to be applied to each partition</param> /// <param name="preservesPartitioning"> /// Indicates if it preserves partition parameters /// </param> /// <returns>A new RDD</returns> internal override RDD <U> MapPartitionsWithIndexInternal <U>( RDD.WorkerFunction.ExecuteDelegate newFunc, bool preservesPartitioning = false) { if (IsPipelinable()) { RDD.WorkerFunction newWorkerFunc = RDD.WorkerFunction.Chain( new RDD.WorkerFunction(_func.Func), new RDD.WorkerFunction(newFunc)); return(new PipelinedRDD <U>( newWorkerFunc, preservesPartitioning && _preservesPartitioning, _prevRddJvmObjRef, _sparkContext, _serializedMode)); } return(base.MapPartitionsWithIndexInternal <U>(newFunc, preservesPartitioning)); }