Пример #1
0
 /// <summary>
 /// Helper function for creating PipelinedRDD.
 /// </summary>
 /// <typeparam name="U">Type of the new RDD elements</typeparam>
 /// <param name="func">Function to apply</param>
 /// <param name="preservesPartitioning">Flag to preserve partitioning</param>
 /// <returns>New RDD by applying a given function</returns>
 internal virtual RDD <U> MapPartitionsWithIndexInternal <U>(
     RDD.WorkerFunction.ExecuteDelegate func,
     bool preservesPartitioning = false)
 {
     return(new PipelinedRDD <U>(
                new RDD.WorkerFunction(func),
                preservesPartitioning,
                _jvmObject,
                _sparkContext,
                _serializedMode));
 }
Пример #2
0
        /// <summary>
        /// Return a new RDD by applying a function to each partition of this RDD,
        /// while tracking the index of the original partition.
        /// </summary>
        /// <typeparam name="U">The element type of new RDD</typeparam>
        /// <param name="newFunc">The function to be applied to each partition</param>
        /// <param name="preservesPartitioning">
        /// Indicates if it preserves partition parameters
        /// </param>
        /// <returns>A new RDD</returns>
        internal override RDD <U> MapPartitionsWithIndexInternal <U>(
            RDD.WorkerFunction.ExecuteDelegate newFunc,
            bool preservesPartitioning = false)
        {
            if (IsPipelinable())
            {
                RDD.WorkerFunction newWorkerFunc = RDD.WorkerFunction.Chain(
                    new RDD.WorkerFunction(_func.Func),
                    new RDD.WorkerFunction(newFunc));

                return(new PipelinedRDD <U>(
                           newWorkerFunc,
                           preservesPartitioning && _preservesPartitioning,
                           _prevRddJvmObjRef,
                           _sparkContext,
                           _serializedMode));
            }

            return(base.MapPartitionsWithIndexInternal <U>(newFunc, preservesPartitioning));
        }
Пример #3
0
        /// <summary>
        /// Executes the commands on the input data read from input stream
        /// and writes results to the output stream.
        /// </summary>
        /// <param name="inputStream">Input stream to read data from</param>
        /// <param name="outputStream">Output stream to write results to</param>
        /// <param name="splitIndex">Split index for this task</param>
        /// <param name="command">Contains the commands to execute</param>
        /// <returns>Statistics captured during the Execute() run</returns>
        internal CommandExecutorStat Execute(
            Stream inputStream,
            Stream outputStream,
            int splitIndex,
            RDDCommand command)
        {
            var stat = new CommandExecutorStat();

            CommandSerDe.SerializedMode serializerMode   = command.SerializerMode;
            CommandSerDe.SerializedMode deserializerMode = command.DeserializerMode;

            RDD.WorkerFunction.ExecuteDelegate func = command.WorkerFunction.Func;
            foreach (object output in func(
                         splitIndex,
                         GetInputIterator(inputStream, deserializerMode)))
            {
                WriteOutput(outputStream, serializerMode, output);

                ++stat.NumEntriesProcessed;
            }

            return(stat);
        }