/// <summary> /// Helper function for creating PipelinedRDD. /// </summary> /// <typeparam name="U">Type of the new RDD elements</typeparam> /// <param name="func">Function to apply</param> /// <param name="preservesPartitioning">Flag to preserve partitioning</param> /// <returns>New RDD by applying a given function</returns> internal virtual RDD <U> MapPartitionsWithIndexInternal <U>( RDD.WorkerFunction.ExecuteDelegate func, bool preservesPartitioning = false) { return(new PipelinedRDD <U>( new RDD.WorkerFunction(func), preservesPartitioning, _jvmObject, _sparkContext, _serializedMode)); }
/// <summary> /// Return a new RDD by applying a function to each partition of this RDD, /// while tracking the index of the original partition. /// </summary> /// <typeparam name="U">The element type of new RDD</typeparam> /// <param name="newFunc">The function to be applied to each partition</param> /// <param name="preservesPartitioning"> /// Indicates if it preserves partition parameters /// </param> /// <returns>A new RDD</returns> internal override RDD <U> MapPartitionsWithIndexInternal <U>( RDD.WorkerFunction.ExecuteDelegate newFunc, bool preservesPartitioning = false) { if (IsPipelinable()) { RDD.WorkerFunction newWorkerFunc = RDD.WorkerFunction.Chain( new RDD.WorkerFunction(_func.Func), new RDD.WorkerFunction(newFunc)); return(new PipelinedRDD <U>( newWorkerFunc, preservesPartitioning && _preservesPartitioning, _prevRddJvmObjRef, _sparkContext, _serializedMode)); } return(base.MapPartitionsWithIndexInternal <U>(newFunc, preservesPartitioning)); }
/// <summary> /// Executes the commands on the input data read from input stream /// and writes results to the output stream. /// </summary> /// <param name="inputStream">Input stream to read data from</param> /// <param name="outputStream">Output stream to write results to</param> /// <param name="splitIndex">Split index for this task</param> /// <param name="command">Contains the commands to execute</param> /// <returns>Statistics captured during the Execute() run</returns> internal CommandExecutorStat Execute( Stream inputStream, Stream outputStream, int splitIndex, RDDCommand command) { var stat = new CommandExecutorStat(); CommandSerDe.SerializedMode serializerMode = command.SerializerMode; CommandSerDe.SerializedMode deserializerMode = command.DeserializerMode; RDD.WorkerFunction.ExecuteDelegate func = command.WorkerFunction.Func; foreach (object output in func( splitIndex, GetInputIterator(inputStream, deserializerMode))) { WriteOutput(outputStream, serializerMode, output); ++stat.NumEntriesProcessed; } return(stat); }