private static UdfWrapperData GetUdfWrapperDataFromStream( Stream stream, out SerializedMode serializerMode, out SerializedMode deserializerMode, out string runMode) { if (!Enum.TryParse(SerDe.ReadString(stream), out serializerMode)) { throw new InvalidDataException("Serializer mode is not valid."); } if (!Enum.TryParse(SerDe.ReadString(stream), out deserializerMode)) { throw new InvalidDataException("Deserializer mode is not valid."); } runMode = SerDe.ReadString(stream); byte[] serializedCommand = SerDe.ReadBytes(stream); var bf = new BinaryFormatter(); var ms = new MemoryStream(serializedCommand, false); return((UdfWrapperData)bf.Deserialize(ms)); }
internal static byte[] BuildCommand(object func, SerializedMode deserializerMode = SerializedMode.Byte, SerializedMode serializerMode = SerializedMode.Byte) { var formatter = new BinaryFormatter(); var stream = new MemoryStream(); formatter.Serialize(stream, func); List <byte[]> commandPayloadBytesList = new List <byte[]>(); // add deserializer mode var modeBytes = Encoding.UTF8.GetBytes(deserializerMode.ToString()); var length = modeBytes.Length; var lengthAsBytes = BitConverter.GetBytes(length); Array.Reverse(lengthAsBytes); commandPayloadBytesList.Add(lengthAsBytes); commandPayloadBytesList.Add(modeBytes); // add serializer mode modeBytes = Encoding.UTF8.GetBytes(serializerMode.ToString()); length = modeBytes.Length; lengthAsBytes = BitConverter.GetBytes(length); Array.Reverse(lengthAsBytes); commandPayloadBytesList.Add(lengthAsBytes); commandPayloadBytesList.Add(modeBytes); // add func var funcBytes = stream.ToArray(); var funcBytesLengthAsBytes = BitConverter.GetBytes(funcBytes.Length); Array.Reverse(funcBytesLengthAsBytes); commandPayloadBytesList.Add(funcBytesLengthAsBytes); commandPayloadBytesList.Add(funcBytes); return(commandPayloadBytesList.SelectMany(byteArray => byteArray).ToArray()); }
internal static T Deserialize <T>( Stream stream, out SerializedMode serializerMode, out SerializedMode deserializerMode, out string runMode) where T : Delegate { if (!Enum.TryParse(SerDe.ReadString(stream), out serializerMode)) { throw new InvalidDataException("Serializer mode is not valid."); } if (!Enum.TryParse(SerDe.ReadString(stream), out deserializerMode)) { throw new InvalidDataException("Deserializer mode is not valid."); } runMode = SerDe.ReadString(stream); byte[] serializedCommand = SerDe.ReadBytes(stream); var bf = new BinaryFormatter(); var ms = new MemoryStream(serializedCommand, false); var udfWrapperData = (UdfWrapperData)bf.Deserialize(ms); int nodeIndex = 0; int udfIndex = 0; var udf = (T)DeserializeUdfs <T>(udfWrapperData, ref nodeIndex, ref udfIndex); // Check all the data is consumed. Debug.Assert(nodeIndex == udfWrapperData.UdfWrapperNodes.Length); Debug.Assert(udfIndex == udfWrapperData.Udfs.Length); return(udf); }
internal DStream(IDStreamProxy dstreamProxy, StreamingContext streamingContext, SerializedMode serializedMode = SerializedMode.Byte) { this.streamingContext = streamingContext; this.dstreamProxy = dstreamProxy; this.serializedMode = serializedMode; isCached = false; isCheckpointed = false; }
/// <summary> /// Constructor mainly called by SparkContext for creating the first RDD /// via <see cref="SparkContext.Parallelize{T}(IEnumerable{T}, int?)"/>, etc. /// </summary> /// <param name="jvmObject">The reference to the RDD JVM object</param> /// <param name="sparkContext">SparkContext object</param> /// <param name="serializedMode">Serialization mode for the current RDD</param> internal RDD( JvmObjectReference jvmObject, SparkContext sparkContext, SerializedMode serializedMode) { _jvmObject = jvmObject; _sparkContext = sparkContext; _serializedMode = serializedMode; }
internal PipelinedRDD( RDD.WorkerFunction func, bool preservesPartitioning, JvmObjectReference prevRddJvmObjRef, SparkContext sparkContext, SerializedMode prevSerializedMode) : base(prevRddJvmObjRef, sparkContext, SerializedMode.Byte, prevSerializedMode) { _func = func ?? throw new ArgumentNullException("UDF cannot be null."); _preservesPartitioning = preservesPartitioning; }
internal static object DeserializeArrowOrDataFrameUdf( Stream stream, out SerializedMode serializerMode, out SerializedMode deserializerMode, out string runMode) { UdfWrapperData udfWrapperData = GetUdfWrapperDataFromStream( stream, out serializerMode, out deserializerMode, out runMode); int nodeIndex = 0; int udfIndex = 0; UdfWrapperNode node = udfWrapperData.UdfWrapperNodes[nodeIndex]; Type nodeType = Type.GetType(node.TypeName); Delegate udf = null; if (nodeType == typeof(DataFrameGroupedMapUdfWrapper)) { udf = (DataFrameGroupedMapWorkerFunction.ExecuteDelegate)DeserializeUdfs <DataFrameGroupedMapWorkerFunction.ExecuteDelegate>( udfWrapperData, ref nodeIndex, ref udfIndex); } else if (nodeType == typeof(DataFrameWorkerFunction) || nodeType.IsSubclassOf(typeof(DataFrameUdfWrapper))) { udf = (DataFrameWorkerFunction.ExecuteDelegate)DeserializeUdfs <DataFrameWorkerFunction.ExecuteDelegate>( udfWrapperData, ref nodeIndex, ref udfIndex); } else if (nodeType == typeof(ArrowGroupedMapUdfWrapper)) { udf = (ArrowGroupedMapWorkerFunction.ExecuteDelegate)DeserializeUdfs <ArrowGroupedMapWorkerFunction.ExecuteDelegate>( udfWrapperData, ref nodeIndex, ref udfIndex); } else { udf = (ArrowWorkerFunction.ExecuteDelegate) DeserializeUdfs <ArrowWorkerFunction.ExecuteDelegate>( udfWrapperData, ref nodeIndex, ref udfIndex); } // Check all the data is consumed. Debug.Assert(nodeIndex == udfWrapperData.UdfWrapperNodes.Length); Debug.Assert(udfIndex == udfWrapperData.Udfs.Length); return(udf); }
/// <summary> /// Collects pickled row objects from the given socket. /// </summary> /// <param name="stream">Stream object to read from</param> /// <param name="serializedMode">Serialized mode for each element</param> /// <returns>Collection of row objects</returns> public IEnumerable <object> Collect(Stream stream, SerializedMode serializedMode) { IDeserializer deserializer = GetDeserializer(serializedMode); int?length; while (((length = SerDe.ReadBytesLength(stream)) != null) && (length.GetValueOrDefault() > 0)) { yield return(deserializer.Deserialize(stream, length.GetValueOrDefault())); } }
/// <summary> /// Returns a deserializer based on the given serialization mode. /// </summary> /// <param name="mode">Serialization mode</param> /// <returns>A deserializer object</returns> internal static IDeserializer GetDeserializer(SerializedMode mode) { switch (mode) { case SerializedMode.Byte: return(new BinaryDeserializer()); case SerializedMode.String: return(new StringDeserializer()); default: throw new ArgumentException($"Unsupported mode found {mode}"); } }
/// <summary> /// Constructor mainly called by <see cref="PipelinedRDD{T}"/>. /// </summary> /// <param name="prevRddJvmObjRef"> /// The reference to the RDD JVM object from which pipeline is created /// </param> /// <param name="sparkContext">SparkContext object</param> /// <param name="serializedMode">Serialization mode for the current RDD</param> /// <param name="prevSerializedMode">Serialization mode for the previous RDD</param> internal RDD( JvmObjectReference prevRddJvmObjRef, SparkContext sparkContext, SerializedMode serializedMode, SerializedMode prevSerializedMode) { // This constructor is called from PipelineRDD constructor // where the _jvmObject is not yet created. _prevRddJvmObjRef = prevRddJvmObjRef; _sparkContext = sparkContext; _serializedMode = serializedMode; _prevSerializedMode = prevSerializedMode; }
public IEnumerable <dynamic> Collect(SocketInfo info, SerializedMode serializedMode, Type type) { IFormatter formatter = new BinaryFormatter(); var sock = SocketFactory.CreateSocket(); sock.Connect(IPAddress.Loopback, info.Port, null); using (var s = sock.GetStream()) { if (info.Secret != null) { SerDe.Write(s, info.Secret); var reply = SerDe.ReadString(s); Logger.LogDebug("Connect back to JVM: " + reply); } byte[] buffer; while ((buffer = SerDe.ReadBytes(s)) != null && buffer.Length > 0) { if (serializedMode == SerializedMode.Byte) { MemoryStream ms = new MemoryStream(buffer); yield return(formatter.Deserialize(ms)); } else if (serializedMode == SerializedMode.String) { yield return(Encoding.UTF8.GetString(buffer)); } else if (serializedMode == SerializedMode.Pair) { MemoryStream ms = new MemoryStream(buffer); MemoryStream ms2 = new MemoryStream(SerDe.ReadBytes(s)); ConstructorInfo ci = type.GetConstructors()[0]; yield return(ci.Invoke(new object[] { formatter.Deserialize(ms), formatter.Deserialize(ms2) })); } else if (serializedMode == SerializedMode.Row) { var unpickledObjects = PythonSerDe.GetUnpickledObjects(buffer); foreach (var item in unpickledObjects) { yield return((item as RowConstructor).GetRow()); } } } } }
internal static T Deserialize <T>( Stream stream, out SerializedMode serializerMode, out SerializedMode deserializerMode, out string runMode) where T : Delegate { UdfWrapperData udfWrapperData = GetUdfWrapperDataFromStream( stream, out serializerMode, out deserializerMode, out runMode); int nodeIndex = 0; int udfIndex = 0; T udf = (T)DeserializeUdfs <T>(udfWrapperData, ref nodeIndex, ref udfIndex); // Check all the data is consumed. Debug.Assert(nodeIndex == udfWrapperData.UdfWrapperNodes.Length); Debug.Assert(udfIndex == udfWrapperData.Udfs.Length); return(udf); }
internal static byte[] BuildCommand(CSharpWorkerFunc workerFunc, SerializedMode deserializerMode = SerializedMode.Byte, SerializedMode serializerMode = SerializedMode.Byte) { var formatter = new BinaryFormatter(); var stream = new MemoryStream(); formatter.Serialize(stream, workerFunc); List <byte[]> commandPayloadBytesList = new List <byte[]>(); // reserve 12 bytes for RddId, stageId and partitionId, this info will be filled in CSharpRDD.scala byte[] rddInfo = new byte[12]; for (int i = 0; i < rddInfo.Length; i++) { rddInfo[i] = 0; } commandPayloadBytesList.Add(rddInfo); // add deserializer mode var modeBytes = Encoding.UTF8.GetBytes(deserializerMode.ToString()); var length = modeBytes.Length; var lengthAsBytes = BitConverter.GetBytes(length); Array.Reverse(lengthAsBytes); commandPayloadBytesList.Add(lengthAsBytes); commandPayloadBytesList.Add(modeBytes); // add serializer mode modeBytes = Encoding.UTF8.GetBytes(serializerMode.ToString()); length = modeBytes.Length; lengthAsBytes = BitConverter.GetBytes(length); Array.Reverse(lengthAsBytes); commandPayloadBytesList.Add(lengthAsBytes); commandPayloadBytesList.Add(modeBytes); // add func var funcBytes = stream.ToArray(); var funcBytesLengthAsBytes = BitConverter.GetBytes(funcBytes.Length); Array.Reverse(funcBytesLengthAsBytes); commandPayloadBytesList.Add(funcBytesLengthAsBytes); commandPayloadBytesList.Add(funcBytes); return(commandPayloadBytesList.SelectMany(byteArray => byteArray).ToArray()); }
public IEnumerable <dynamic> Collect(int port, SerializedMode serializedMode, Type type) { IFormatter formatter = new BinaryFormatter(); Socket sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); sock.Connect(IPAddress.Loopback, port); using (NetworkStream s = new NetworkStream(sock)) { byte[] buffer; while ((buffer = SerDe.ReadBytes(s)) != null && buffer.Length > 0) { if (serializedMode == SerializedMode.Byte) { MemoryStream ms = new MemoryStream(buffer); yield return(formatter.Deserialize(ms)); } else if (serializedMode == SerializedMode.String) { yield return(Encoding.UTF8.GetString(buffer)); } else if (serializedMode == SerializedMode.Pair) { MemoryStream ms = new MemoryStream(buffer); MemoryStream ms2 = new MemoryStream(SerDe.ReadBytes(s)); ConstructorInfo ci = type.GetConstructors()[0]; yield return(ci.Invoke(new object[] { formatter.Deserialize(ms), formatter.Deserialize(ms2) })); } else if (serializedMode == SerializedMode.Row) { var unpickledObjects = PythonSerDe.GetUnpickledObjects(buffer); foreach (var item in unpickledObjects) { yield return((item as RowConstructor).GetRow()); } } } } }
public IEnumerable<dynamic> Collect(int port, SerializedMode serializedMode, Type type) { IFormatter formatter = new BinaryFormatter(); var sock = SocketFactory.CreateSocket(); sock.Connect(IPAddress.Loopback, port); using (var s = sock.GetStream()) { byte[] buffer; while ((buffer = SerDe.ReadBytes(s)) != null && buffer.Length > 0) { if (serializedMode == SerializedMode.Byte) { MemoryStream ms = new MemoryStream(buffer); yield return formatter.Deserialize(ms); } else if (serializedMode == SerializedMode.String) { yield return Encoding.UTF8.GetString(buffer); } else if (serializedMode == SerializedMode.Pair) { MemoryStream ms = new MemoryStream(buffer); MemoryStream ms2 = new MemoryStream(SerDe.ReadBytes(s)); ConstructorInfo ci = type.GetConstructors()[0]; yield return ci.Invoke(new object[] { formatter.Deserialize(ms), formatter.Deserialize(ms2) }); } else if (serializedMode == SerializedMode.Row) { var unpickledObjects = PythonSerDe.GetUnpickledObjects(buffer); foreach (var item in unpickledObjects) { yield return (item as RowConstructor).GetRow(); } } } } }
internal DStream(IDStreamProxy dstreamProxy, StreamingContext streamingContext, SerializedMode serializedMode = SerializedMode.Byte) { this.streamingContext = streamingContext; this.dstreamProxy = dstreamProxy; this.serializedMode = serializedMode; }
internal RDD <T> CheckpointFile <T>(string filePath, SerializedMode serializedMode) { return(new RDD <T>(SparkContextProxy.CheckpointFile(filePath), this, serializedMode)); }
internal static byte[] Serialize( Delegate func, SerializedMode deserializerMode = SerializedMode.Byte, SerializedMode serializerMode = SerializedMode.Byte) { // TODO: Rework on the following List<Byte[]> to use MemoryStream! var commandPayloadBytesList = new List <byte[]>(); // Add serializer mode. byte[] modeBytes = Encoding.UTF8.GetBytes(serializerMode.ToString()); int length = modeBytes.Length; byte[] lengthAsBytes = BitConverter.GetBytes(length); Array.Reverse(lengthAsBytes); commandPayloadBytesList.Add(lengthAsBytes); commandPayloadBytesList.Add(modeBytes); // Add deserializer mode. modeBytes = Encoding.UTF8.GetBytes(deserializerMode.ToString()); length = modeBytes.Length; lengthAsBytes = BitConverter.GetBytes(length); Array.Reverse(lengthAsBytes); commandPayloadBytesList.Add(lengthAsBytes); commandPayloadBytesList.Add(modeBytes); // Add run mode: // N - normal // R - repl string runMode = Environment.GetEnvironmentVariable("SPARK_NET_RUN_MODE") ?? "N"; byte[] runModeBytes = Encoding.UTF8.GetBytes(runMode); lengthAsBytes = BitConverter.GetBytes(runModeBytes.Length); Array.Reverse(lengthAsBytes); commandPayloadBytesList.Add(lengthAsBytes); commandPayloadBytesList.Add(runModeBytes); if ("R".Equals(runMode, StringComparison.InvariantCultureIgnoreCase)) { // add compilation dump directory byte[] compilationDumpDirBytes = Encoding.UTF8.GetBytes( Environment.GetEnvironmentVariable("SPARK_NET_SCRIPT_COMPILATION_DIR") ?? "."); lengthAsBytes = BitConverter.GetBytes(compilationDumpDirBytes.Length); Array.Reverse(lengthAsBytes); commandPayloadBytesList.Add(lengthAsBytes); commandPayloadBytesList.Add(compilationDumpDirBytes); } // Serialize the UDFs. var udfWrapperNodes = new List <UdfWrapperNode>(); var udfs = new List <UdfSerDe.UdfData>(); SerializeUdfs(func, null, udfWrapperNodes, udfs); // Run through UdfSerDe.Serialize once more to get serialization info // on the actual UDF. var udfWrapperData = new UdfWrapperData() { UdfWrapperNodes = udfWrapperNodes.ToArray(), Udfs = udfs.ToArray() }; var formatter = new BinaryFormatter(); using (var stream = new MemoryStream()) { formatter.Serialize(stream, udfWrapperData); byte[] udfBytes = stream.ToArray(); byte[] udfBytesLengthAsBytes = BitConverter.GetBytes(udfBytes.Length); Array.Reverse(udfBytesLengthAsBytes); commandPayloadBytesList.Add(udfBytesLengthAsBytes); commandPayloadBytesList.Add(udfBytes); } return(commandPayloadBytesList.SelectMany(byteArray => byteArray).ToArray()); }
internal static byte[] BuildCommand(CSharpWorkerFunc workerFunc, SerializedMode deserializerMode = SerializedMode.Byte, SerializedMode serializerMode = SerializedMode.Byte) { var formatter = new BinaryFormatter(); var stream = new MemoryStream(); formatter.Serialize(stream, workerFunc); List <byte[]> commandPayloadBytesList = new List <byte[]>(); // reserve 12 bytes for RddId, stageId and partitionId, this info will be filled in CSharpRDD.scala byte[] rddInfo = new byte[12]; for (int i = 0; i < rddInfo.Length; i++) { rddInfo[i] = 0; } commandPayloadBytesList.Add(rddInfo); // add deserializer mode var modeBytes = Encoding.UTF8.GetBytes(deserializerMode.ToString()); var length = modeBytes.Length; var lengthAsBytes = BitConverter.GetBytes(length); Array.Reverse(lengthAsBytes); commandPayloadBytesList.Add(lengthAsBytes); commandPayloadBytesList.Add(modeBytes); // add serializer mode modeBytes = Encoding.UTF8.GetBytes(serializerMode.ToString()); length = modeBytes.Length; lengthAsBytes = BitConverter.GetBytes(length); Array.Reverse(lengthAsBytes); commandPayloadBytesList.Add(lengthAsBytes); commandPayloadBytesList.Add(modeBytes); // add run mode // N - normal // R - repl var runMode = Environment.GetEnvironmentVariable("SPARKCLR_RUN_MODE") ?? "N"; var runModeBytes = Encoding.UTF8.GetBytes(runMode); lengthAsBytes = BitConverter.GetBytes(runModeBytes.Length); Array.Reverse(lengthAsBytes); commandPayloadBytesList.Add(lengthAsBytes); commandPayloadBytesList.Add(runModeBytes); if ("R".Equals(runMode, StringComparison.InvariantCultureIgnoreCase)) { // add compilation dump directory var compilationDumpDirBytes = Encoding.UTF8.GetBytes(Environment.GetEnvironmentVariable("SPARKCLR_SCRIPT_COMPILATION_DIR") ?? "."); lengthAsBytes = BitConverter.GetBytes(compilationDumpDirBytes.Length); Array.Reverse(lengthAsBytes); commandPayloadBytesList.Add(lengthAsBytes); commandPayloadBytesList.Add(compilationDumpDirBytes); } // add func var funcBytes = stream.ToArray(); var funcBytesLengthAsBytes = BitConverter.GetBytes(funcBytes.Length); Array.Reverse(funcBytesLengthAsBytes); commandPayloadBytesList.Add(funcBytesLengthAsBytes); commandPayloadBytesList.Add(funcBytes); return(commandPayloadBytesList.SelectMany(byteArray => byteArray).ToArray()); }
public IEnumerable <dynamic> Collect(int port, SerializedMode serializedMode, Type type) { throw new NotImplementedException(); }
public IEnumerable<dynamic> Collect(int port, SerializedMode serializedMode, Type type) { throw new NotImplementedException(); }