Exemple #1
0
        /// <summary>
        /// explicitly convert KeyValuePair<K, V> to KeyValuePair<K, dynamic>
        /// since they are incompatibles types unlike V to dynamic
        /// </summary>
        /// <typeparam name="K"></typeparam>
        /// <typeparam name="V"></typeparam>
        /// <typeparam name="W1"></typeparam>
        /// <typeparam name="W2"></typeparam>
        /// <typeparam name="W3"></typeparam>
        /// <param name="self"></param>
        /// <returns></returns>
        private static RDD <KeyValuePair <K, dynamic> > MapPartitionsWithIndex <K, V, W1, W2, W3>(this RDD <KeyValuePair <K, dynamic> > self)
        {
            CSharpWorkerFunc csharpWorkerFunc = new CSharpWorkerFunc(new DynamicTypingWrapper <K, V, W1, W2, W3>().Execute);
            var pipelinedRDD = new PipelinedRDD <KeyValuePair <K, dynamic> >
            {
                workerFunc            = csharpWorkerFunc,
                preservesPartitioning = true,
                previousRddProxy      = self.rddProxy,
                prevSerializedMode    = self.serializedMode,

                sparkContext   = self.sparkContext,
                rddProxy       = null,
                serializedMode = SerializedMode.Byte,
                partitioner    = self.partitioner
            };

            return(pipelinedRDD);
        }
Exemple #2
0
        internal static byte[] BuildCommand(CSharpWorkerFunc workerFunc, SerializedMode deserializerMode = SerializedMode.Byte, SerializedMode serializerMode = SerializedMode.Byte)
        {
            var formatter = new BinaryFormatter();
            var stream    = new MemoryStream();

            formatter.Serialize(stream, workerFunc);
            List <byte[]> commandPayloadBytesList = new List <byte[]>();

            // reserve 12 bytes for RddId, stageId and partitionId, this info will be filled in CSharpRDD.scala
            byte[] rddInfo = new byte[12];
            for (int i = 0; i < rddInfo.Length; i++)
            {
                rddInfo[i] = 0;
            }
            commandPayloadBytesList.Add(rddInfo);

            // add deserializer mode
            var modeBytes     = Encoding.UTF8.GetBytes(deserializerMode.ToString());
            var length        = modeBytes.Length;
            var lengthAsBytes = BitConverter.GetBytes(length);

            Array.Reverse(lengthAsBytes);
            commandPayloadBytesList.Add(lengthAsBytes);
            commandPayloadBytesList.Add(modeBytes);
            // add serializer mode
            modeBytes     = Encoding.UTF8.GetBytes(serializerMode.ToString());
            length        = modeBytes.Length;
            lengthAsBytes = BitConverter.GetBytes(length);
            Array.Reverse(lengthAsBytes);
            commandPayloadBytesList.Add(lengthAsBytes);
            commandPayloadBytesList.Add(modeBytes);
            // add func
            var funcBytes = stream.ToArray();
            var funcBytesLengthAsBytes = BitConverter.GetBytes(funcBytes.Length);

            Array.Reverse(funcBytesLengthAsBytes);
            commandPayloadBytesList.Add(funcBytesLengthAsBytes);
            commandPayloadBytesList.Add(funcBytes);
            return(commandPayloadBytesList.SelectMany(byteArray => byteArray).ToArray());
        }
Exemple #3
0
        //TODO - give generic types a better id
        public override RDD <U1> MapPartitionsWithIndex <U1>(Func <int, IEnumerable <U>, IEnumerable <U1> > newFunc, bool preservesPartitioningParam = false)
        {
            if (IsPipelinable())
            {
                CSharpWorkerFunc newWorkerFunc = new CSharpWorkerFunc(
                    new MapPartitionsWithIndexHelper <U, U1>(newFunc, workerFunc.Func).Execute, workerFunc.StackTrace);

                var pipelinedRDD = new PipelinedRDD <U1>
                {
                    workerFunc            = newWorkerFunc,
                    preservesPartitioning = preservesPartitioning && preservesPartitioningParam,
                    previousRddProxy      = this.previousRddProxy,
                    prevSerializedMode    = this.prevSerializedMode,
                    sparkContext          = this.sparkContext,
                    rddProxy       = null,
                    serializedMode = SerializedMode.Byte,
                    partitioner    = preservesPartitioning ? partitioner : null
                };
                return(pipelinedRDD);
            }

            return(base.MapPartitionsWithIndex(newFunc, preservesPartitioningParam));
        }
Exemple #4
0
        internal static byte[] BuildCommand(CSharpWorkerFunc workerFunc, SerializedMode deserializerMode = SerializedMode.Byte, SerializedMode serializerMode = SerializedMode.Byte)
        {
            var formatter = new BinaryFormatter();
            var stream    = new MemoryStream();

            formatter.Serialize(stream, workerFunc);
            List <byte[]> commandPayloadBytesList = new List <byte[]>();

            // reserve 12 bytes for RddId, stageId and partitionId, this info will be filled in CSharpRDD.scala
            byte[] rddInfo = new byte[12];
            for (int i = 0; i < rddInfo.Length; i++)
            {
                rddInfo[i] = 0;
            }
            commandPayloadBytesList.Add(rddInfo);

            // add deserializer mode
            var modeBytes     = Encoding.UTF8.GetBytes(deserializerMode.ToString());
            var length        = modeBytes.Length;
            var lengthAsBytes = BitConverter.GetBytes(length);

            Array.Reverse(lengthAsBytes);
            commandPayloadBytesList.Add(lengthAsBytes);
            commandPayloadBytesList.Add(modeBytes);
            // add serializer mode
            modeBytes     = Encoding.UTF8.GetBytes(serializerMode.ToString());
            length        = modeBytes.Length;
            lengthAsBytes = BitConverter.GetBytes(length);
            Array.Reverse(lengthAsBytes);
            commandPayloadBytesList.Add(lengthAsBytes);
            commandPayloadBytesList.Add(modeBytes);

            // add run mode
            // N - normal
            // R - repl
            var runMode      = Environment.GetEnvironmentVariable("SPARKCLR_RUN_MODE") ?? "N";
            var runModeBytes = Encoding.UTF8.GetBytes(runMode);

            lengthAsBytes = BitConverter.GetBytes(runModeBytes.Length);
            Array.Reverse(lengthAsBytes);
            commandPayloadBytesList.Add(lengthAsBytes);
            commandPayloadBytesList.Add(runModeBytes);

            if ("R".Equals(runMode, StringComparison.InvariantCultureIgnoreCase))
            {
                // add compilation dump directory
                var compilationDumpDirBytes = Encoding.UTF8.GetBytes(Environment.GetEnvironmentVariable("SPARKCLR_SCRIPT_COMPILATION_DIR") ?? ".");
                lengthAsBytes = BitConverter.GetBytes(compilationDumpDirBytes.Length);
                Array.Reverse(lengthAsBytes);
                commandPayloadBytesList.Add(lengthAsBytes);
                commandPayloadBytesList.Add(compilationDumpDirBytes);
            }

            // add func
            var funcBytes = stream.ToArray();
            var funcBytesLengthAsBytes = BitConverter.GetBytes(funcBytes.Length);

            Array.Reverse(funcBytesLengthAsBytes);
            commandPayloadBytesList.Add(funcBytesLengthAsBytes);
            commandPayloadBytesList.Add(funcBytes);
            return(commandPayloadBytesList.SelectMany(byteArray => byteArray).ToArray());
        }
 /// <summary>
 /// Used to chain functions
 /// </summary>
 public static CSharpWorkerFunc Chain(CSharpWorkerFunc innerCSharpWorkerFunc, CSharpWorkerFunc outCSharpWorkerFunc)
 {
     return(new CSharpWorkerFunc(new CSharpWrokerFuncChainHelper(innerCSharpWorkerFunc.Func, outCSharpWorkerFunc.Func).Execute));
 }