/// <summary> /// explicitly convert KeyValuePair<K, V> to KeyValuePair<K, dynamic> /// since they are incompatibles types unlike V to dynamic /// </summary> /// <typeparam name="K"></typeparam> /// <typeparam name="V"></typeparam> /// <typeparam name="W1"></typeparam> /// <typeparam name="W2"></typeparam> /// <typeparam name="W3"></typeparam> /// <param name="self"></param> /// <returns></returns> private static RDD <KeyValuePair <K, dynamic> > MapPartitionsWithIndex <K, V, W1, W2, W3>(this RDD <KeyValuePair <K, dynamic> > self) { CSharpWorkerFunc csharpWorkerFunc = new CSharpWorkerFunc(new DynamicTypingWrapper <K, V, W1, W2, W3>().Execute); var pipelinedRDD = new PipelinedRDD <KeyValuePair <K, dynamic> > { workerFunc = csharpWorkerFunc, preservesPartitioning = true, previousRddProxy = self.rddProxy, prevSerializedMode = self.serializedMode, sparkContext = self.sparkContext, rddProxy = null, serializedMode = SerializedMode.Byte, partitioner = self.partitioner }; return(pipelinedRDD); }
internal static byte[] BuildCommand(CSharpWorkerFunc workerFunc, SerializedMode deserializerMode = SerializedMode.Byte, SerializedMode serializerMode = SerializedMode.Byte) { var formatter = new BinaryFormatter(); var stream = new MemoryStream(); formatter.Serialize(stream, workerFunc); List <byte[]> commandPayloadBytesList = new List <byte[]>(); // reserve 12 bytes for RddId, stageId and partitionId, this info will be filled in CSharpRDD.scala byte[] rddInfo = new byte[12]; for (int i = 0; i < rddInfo.Length; i++) { rddInfo[i] = 0; } commandPayloadBytesList.Add(rddInfo); // add deserializer mode var modeBytes = Encoding.UTF8.GetBytes(deserializerMode.ToString()); var length = modeBytes.Length; var lengthAsBytes = BitConverter.GetBytes(length); Array.Reverse(lengthAsBytes); commandPayloadBytesList.Add(lengthAsBytes); commandPayloadBytesList.Add(modeBytes); // add serializer mode modeBytes = Encoding.UTF8.GetBytes(serializerMode.ToString()); length = modeBytes.Length; lengthAsBytes = BitConverter.GetBytes(length); Array.Reverse(lengthAsBytes); commandPayloadBytesList.Add(lengthAsBytes); commandPayloadBytesList.Add(modeBytes); // add func var funcBytes = stream.ToArray(); var funcBytesLengthAsBytes = BitConverter.GetBytes(funcBytes.Length); Array.Reverse(funcBytesLengthAsBytes); commandPayloadBytesList.Add(funcBytesLengthAsBytes); commandPayloadBytesList.Add(funcBytes); return(commandPayloadBytesList.SelectMany(byteArray => byteArray).ToArray()); }
//TODO - give generic types a better id public override RDD <U1> MapPartitionsWithIndex <U1>(Func <int, IEnumerable <U>, IEnumerable <U1> > newFunc, bool preservesPartitioningParam = false) { if (IsPipelinable()) { CSharpWorkerFunc newWorkerFunc = new CSharpWorkerFunc( new MapPartitionsWithIndexHelper <U, U1>(newFunc, workerFunc.Func).Execute, workerFunc.StackTrace); var pipelinedRDD = new PipelinedRDD <U1> { workerFunc = newWorkerFunc, preservesPartitioning = preservesPartitioning && preservesPartitioningParam, previousRddProxy = this.previousRddProxy, prevSerializedMode = this.prevSerializedMode, sparkContext = this.sparkContext, rddProxy = null, serializedMode = SerializedMode.Byte, partitioner = preservesPartitioning ? partitioner : null }; return(pipelinedRDD); } return(base.MapPartitionsWithIndex(newFunc, preservesPartitioningParam)); }
internal static byte[] BuildCommand(CSharpWorkerFunc workerFunc, SerializedMode deserializerMode = SerializedMode.Byte, SerializedMode serializerMode = SerializedMode.Byte) { var formatter = new BinaryFormatter(); var stream = new MemoryStream(); formatter.Serialize(stream, workerFunc); List <byte[]> commandPayloadBytesList = new List <byte[]>(); // reserve 12 bytes for RddId, stageId and partitionId, this info will be filled in CSharpRDD.scala byte[] rddInfo = new byte[12]; for (int i = 0; i < rddInfo.Length; i++) { rddInfo[i] = 0; } commandPayloadBytesList.Add(rddInfo); // add deserializer mode var modeBytes = Encoding.UTF8.GetBytes(deserializerMode.ToString()); var length = modeBytes.Length; var lengthAsBytes = BitConverter.GetBytes(length); Array.Reverse(lengthAsBytes); commandPayloadBytesList.Add(lengthAsBytes); commandPayloadBytesList.Add(modeBytes); // add serializer mode modeBytes = Encoding.UTF8.GetBytes(serializerMode.ToString()); length = modeBytes.Length; lengthAsBytes = BitConverter.GetBytes(length); Array.Reverse(lengthAsBytes); commandPayloadBytesList.Add(lengthAsBytes); commandPayloadBytesList.Add(modeBytes); // add run mode // N - normal // R - repl var runMode = Environment.GetEnvironmentVariable("SPARKCLR_RUN_MODE") ?? "N"; var runModeBytes = Encoding.UTF8.GetBytes(runMode); lengthAsBytes = BitConverter.GetBytes(runModeBytes.Length); Array.Reverse(lengthAsBytes); commandPayloadBytesList.Add(lengthAsBytes); commandPayloadBytesList.Add(runModeBytes); if ("R".Equals(runMode, StringComparison.InvariantCultureIgnoreCase)) { // add compilation dump directory var compilationDumpDirBytes = Encoding.UTF8.GetBytes(Environment.GetEnvironmentVariable("SPARKCLR_SCRIPT_COMPILATION_DIR") ?? "."); lengthAsBytes = BitConverter.GetBytes(compilationDumpDirBytes.Length); Array.Reverse(lengthAsBytes); commandPayloadBytesList.Add(lengthAsBytes); commandPayloadBytesList.Add(compilationDumpDirBytes); } // add func var funcBytes = stream.ToArray(); var funcBytesLengthAsBytes = BitConverter.GetBytes(funcBytes.Length); Array.Reverse(funcBytesLengthAsBytes); commandPayloadBytesList.Add(funcBytesLengthAsBytes); commandPayloadBytesList.Add(funcBytes); return(commandPayloadBytesList.SelectMany(byteArray => byteArray).ToArray()); }
/// <summary> /// Used to chain functions /// </summary> public static CSharpWorkerFunc Chain(CSharpWorkerFunc innerCSharpWorkerFunc, CSharpWorkerFunc outCSharpWorkerFunc) { return(new CSharpWorkerFunc(new CSharpWrokerFuncChainHelper(innerCSharpWorkerFunc.Func, outCSharpWorkerFunc.Func).Execute)); }