Beispiel #1
0
        private static UdfWrapperData GetUdfWrapperDataFromStream(
            Stream stream,
            out SerializedMode serializerMode,
            out SerializedMode deserializerMode,
            out string runMode)
        {
            if (!Enum.TryParse(SerDe.ReadString(stream), out serializerMode))
            {
                throw new InvalidDataException("Serializer mode is not valid.");
            }

            if (!Enum.TryParse(SerDe.ReadString(stream), out deserializerMode))
            {
                throw new InvalidDataException("Deserializer mode is not valid.");
            }

            runMode = SerDe.ReadString(stream);

            byte[] serializedCommand = SerDe.ReadBytes(stream);

            var bf = new BinaryFormatter();
            var ms = new MemoryStream(serializedCommand, false);

            return((UdfWrapperData)bf.Deserialize(ms));
        }
Beispiel #2
0
        internal static byte[] BuildCommand(object func, SerializedMode deserializerMode = SerializedMode.Byte, SerializedMode serializerMode = SerializedMode.Byte)
        {
            var formatter = new BinaryFormatter();
            var stream    = new MemoryStream();

            formatter.Serialize(stream, func);
            List <byte[]> commandPayloadBytesList = new List <byte[]>();
            // add deserializer mode
            var modeBytes     = Encoding.UTF8.GetBytes(deserializerMode.ToString());
            var length        = modeBytes.Length;
            var lengthAsBytes = BitConverter.GetBytes(length);

            Array.Reverse(lengthAsBytes);
            commandPayloadBytesList.Add(lengthAsBytes);
            commandPayloadBytesList.Add(modeBytes);
            // add serializer mode
            modeBytes     = Encoding.UTF8.GetBytes(serializerMode.ToString());
            length        = modeBytes.Length;
            lengthAsBytes = BitConverter.GetBytes(length);
            Array.Reverse(lengthAsBytes);
            commandPayloadBytesList.Add(lengthAsBytes);
            commandPayloadBytesList.Add(modeBytes);
            // add func
            var funcBytes = stream.ToArray();
            var funcBytesLengthAsBytes = BitConverter.GetBytes(funcBytes.Length);

            Array.Reverse(funcBytesLengthAsBytes);
            commandPayloadBytesList.Add(funcBytesLengthAsBytes);
            commandPayloadBytesList.Add(funcBytes);
            return(commandPayloadBytesList.SelectMany(byteArray => byteArray).ToArray());
        }
Beispiel #3
0
        internal static T Deserialize <T>(
            Stream stream,
            out SerializedMode serializerMode,
            out SerializedMode deserializerMode,
            out string runMode) where T : Delegate
        {
            if (!Enum.TryParse(SerDe.ReadString(stream), out serializerMode))
            {
                throw new InvalidDataException("Serializer mode is not valid.");
            }

            if (!Enum.TryParse(SerDe.ReadString(stream), out deserializerMode))
            {
                throw new InvalidDataException("Deserializer mode is not valid.");
            }

            runMode = SerDe.ReadString(stream);

            byte[] serializedCommand = SerDe.ReadBytes(stream);

            var bf = new BinaryFormatter();
            var ms = new MemoryStream(serializedCommand, false);

            var udfWrapperData = (UdfWrapperData)bf.Deserialize(ms);

            int nodeIndex = 0;
            int udfIndex  = 0;
            var udf       = (T)DeserializeUdfs <T>(udfWrapperData, ref nodeIndex, ref udfIndex);

            // Check all the data is consumed.
            Debug.Assert(nodeIndex == udfWrapperData.UdfWrapperNodes.Length);
            Debug.Assert(udfIndex == udfWrapperData.Udfs.Length);

            return(udf);
        }
Beispiel #4
0
 internal DStream(IDStreamProxy dstreamProxy, StreamingContext streamingContext, SerializedMode serializedMode = SerializedMode.Byte)
 {
     this.streamingContext = streamingContext;
     this.dstreamProxy     = dstreamProxy;
     this.serializedMode   = serializedMode;
     isCached       = false;
     isCheckpointed = false;
 }
Beispiel #5
0
 /// <summary>
 /// Constructor mainly called by SparkContext for creating the first RDD
 /// via <see cref="SparkContext.Parallelize{T}(IEnumerable{T}, int?)"/>, etc.
 /// </summary>
 /// <param name="jvmObject">The reference to the RDD JVM object</param>
 /// <param name="sparkContext">SparkContext object</param>
 /// <param name="serializedMode">Serialization mode for the current RDD</param>
 internal RDD(
     JvmObjectReference jvmObject,
     SparkContext sparkContext,
     SerializedMode serializedMode)
 {
     _jvmObject      = jvmObject;
     _sparkContext   = sparkContext;
     _serializedMode = serializedMode;
 }
Beispiel #6
0
 internal PipelinedRDD(
     RDD.WorkerFunction func,
     bool preservesPartitioning,
     JvmObjectReference prevRddJvmObjRef,
     SparkContext sparkContext,
     SerializedMode prevSerializedMode)
     : base(prevRddJvmObjRef, sparkContext, SerializedMode.Byte, prevSerializedMode)
 {
     _func = func ?? throw new ArgumentNullException("UDF cannot be null.");
     _preservesPartitioning = preservesPartitioning;
 }
Beispiel #7
0
        internal static object DeserializeArrowOrDataFrameUdf(
            Stream stream,
            out SerializedMode serializerMode,
            out SerializedMode deserializerMode,
            out string runMode)
        {
            UdfWrapperData udfWrapperData = GetUdfWrapperDataFromStream(
                stream,
                out serializerMode,
                out deserializerMode,
                out runMode);

            int            nodeIndex = 0;
            int            udfIndex  = 0;
            UdfWrapperNode node      = udfWrapperData.UdfWrapperNodes[nodeIndex];
            Type           nodeType  = Type.GetType(node.TypeName);
            Delegate       udf       = null;

            if (nodeType == typeof(DataFrameGroupedMapUdfWrapper))
            {
                udf = (DataFrameGroupedMapWorkerFunction.ExecuteDelegate)DeserializeUdfs <DataFrameGroupedMapWorkerFunction.ExecuteDelegate>(
                    udfWrapperData,
                    ref nodeIndex,
                    ref udfIndex);
            }
            else if (nodeType == typeof(DataFrameWorkerFunction) || nodeType.IsSubclassOf(typeof(DataFrameUdfWrapper)))
            {
                udf = (DataFrameWorkerFunction.ExecuteDelegate)DeserializeUdfs <DataFrameWorkerFunction.ExecuteDelegate>(
                    udfWrapperData,
                    ref nodeIndex,
                    ref udfIndex);
            }
            else if (nodeType == typeof(ArrowGroupedMapUdfWrapper))
            {
                udf = (ArrowGroupedMapWorkerFunction.ExecuteDelegate)DeserializeUdfs <ArrowGroupedMapWorkerFunction.ExecuteDelegate>(
                    udfWrapperData,
                    ref nodeIndex,
                    ref udfIndex);
            }
            else
            {
                udf = (ArrowWorkerFunction.ExecuteDelegate)
                      DeserializeUdfs <ArrowWorkerFunction.ExecuteDelegate>(
                    udfWrapperData,
                    ref nodeIndex,
                    ref udfIndex);
            }

            // Check all the data is consumed.
            Debug.Assert(nodeIndex == udfWrapperData.UdfWrapperNodes.Length);
            Debug.Assert(udfIndex == udfWrapperData.Udfs.Length);

            return(udf);
        }
Beispiel #8
0
        /// <summary>
        /// Collects pickled row objects from the given socket.
        /// </summary>
        /// <param name="stream">Stream object to read from</param>
        /// <param name="serializedMode">Serialized mode for each element</param>
        /// <returns>Collection of row objects</returns>
        public IEnumerable <object> Collect(Stream stream, SerializedMode serializedMode)
        {
            IDeserializer deserializer = GetDeserializer(serializedMode);

            int?length;

            while (((length = SerDe.ReadBytesLength(stream)) != null) &&
                   (length.GetValueOrDefault() > 0))
            {
                yield return(deserializer.Deserialize(stream, length.GetValueOrDefault()));
            }
        }
Beispiel #9
0
        /// <summary>
        /// Returns a deserializer based on the given serialization mode.
        /// </summary>
        /// <param name="mode">Serialization mode</param>
        /// <returns>A deserializer object</returns>
        internal static IDeserializer GetDeserializer(SerializedMode mode)
        {
            switch (mode)
            {
            case SerializedMode.Byte:
                return(new BinaryDeserializer());

            case SerializedMode.String:
                return(new StringDeserializer());

            default:
                throw new ArgumentException($"Unsupported mode found {mode}");
            }
        }
Beispiel #10
0
        /// <summary>
        /// Constructor mainly called by <see cref="PipelinedRDD{T}"/>.
        /// </summary>
        /// <param name="prevRddJvmObjRef">
        /// The reference to the RDD JVM object from which pipeline is created
        /// </param>
        /// <param name="sparkContext">SparkContext object</param>
        /// <param name="serializedMode">Serialization mode for the current RDD</param>
        /// <param name="prevSerializedMode">Serialization mode for the previous RDD</param>
        internal RDD(
            JvmObjectReference prevRddJvmObjRef,
            SparkContext sparkContext,
            SerializedMode serializedMode,
            SerializedMode prevSerializedMode)
        {
            // This constructor is called from PipelineRDD constructor
            // where the _jvmObject is not yet created.

            _prevRddJvmObjRef   = prevRddJvmObjRef;
            _sparkContext       = sparkContext;
            _serializedMode     = serializedMode;
            _prevSerializedMode = prevSerializedMode;
        }
Beispiel #11
0
        public IEnumerable <dynamic> Collect(SocketInfo info, SerializedMode serializedMode, Type type)
        {
            IFormatter formatter = new BinaryFormatter();
            var        sock      = SocketFactory.CreateSocket();

            sock.Connect(IPAddress.Loopback, info.Port, null);

            using (var s = sock.GetStream())
            {
                if (info.Secret != null)
                {
                    SerDe.Write(s, info.Secret);
                    var reply = SerDe.ReadString(s);
                    Logger.LogDebug("Connect back to JVM: " + reply);
                }
                byte[] buffer;
                while ((buffer = SerDe.ReadBytes(s)) != null && buffer.Length > 0)
                {
                    if (serializedMode == SerializedMode.Byte)
                    {
                        MemoryStream ms = new MemoryStream(buffer);
                        yield return(formatter.Deserialize(ms));
                    }
                    else if (serializedMode == SerializedMode.String)
                    {
                        yield return(Encoding.UTF8.GetString(buffer));
                    }
                    else if (serializedMode == SerializedMode.Pair)
                    {
                        MemoryStream ms  = new MemoryStream(buffer);
                        MemoryStream ms2 = new MemoryStream(SerDe.ReadBytes(s));

                        ConstructorInfo ci = type.GetConstructors()[0];
                        yield return(ci.Invoke(new object[] { formatter.Deserialize(ms), formatter.Deserialize(ms2) }));
                    }
                    else if (serializedMode == SerializedMode.Row)
                    {
                        var unpickledObjects = PythonSerDe.GetUnpickledObjects(buffer);
                        foreach (var item in unpickledObjects)
                        {
                            yield return((item as RowConstructor).GetRow());
                        }
                    }
                }
            }
        }
Beispiel #12
0
        internal static T Deserialize <T>(
            Stream stream,
            out SerializedMode serializerMode,
            out SerializedMode deserializerMode,
            out string runMode) where T : Delegate
        {
            UdfWrapperData udfWrapperData = GetUdfWrapperDataFromStream(
                stream,
                out serializerMode,
                out deserializerMode,
                out runMode);
            int nodeIndex = 0;
            int udfIndex  = 0;
            T   udf       = (T)DeserializeUdfs <T>(udfWrapperData, ref nodeIndex, ref udfIndex);

            // Check all the data is consumed.
            Debug.Assert(nodeIndex == udfWrapperData.UdfWrapperNodes.Length);
            Debug.Assert(udfIndex == udfWrapperData.Udfs.Length);

            return(udf);
        }
Beispiel #13
0
        internal static byte[] BuildCommand(CSharpWorkerFunc workerFunc, SerializedMode deserializerMode = SerializedMode.Byte, SerializedMode serializerMode = SerializedMode.Byte)
        {
            var formatter = new BinaryFormatter();
            var stream    = new MemoryStream();

            formatter.Serialize(stream, workerFunc);
            List <byte[]> commandPayloadBytesList = new List <byte[]>();

            // reserve 12 bytes for RddId, stageId and partitionId, this info will be filled in CSharpRDD.scala
            byte[] rddInfo = new byte[12];
            for (int i = 0; i < rddInfo.Length; i++)
            {
                rddInfo[i] = 0;
            }
            commandPayloadBytesList.Add(rddInfo);

            // add deserializer mode
            var modeBytes     = Encoding.UTF8.GetBytes(deserializerMode.ToString());
            var length        = modeBytes.Length;
            var lengthAsBytes = BitConverter.GetBytes(length);

            Array.Reverse(lengthAsBytes);
            commandPayloadBytesList.Add(lengthAsBytes);
            commandPayloadBytesList.Add(modeBytes);
            // add serializer mode
            modeBytes     = Encoding.UTF8.GetBytes(serializerMode.ToString());
            length        = modeBytes.Length;
            lengthAsBytes = BitConverter.GetBytes(length);
            Array.Reverse(lengthAsBytes);
            commandPayloadBytesList.Add(lengthAsBytes);
            commandPayloadBytesList.Add(modeBytes);
            // add func
            var funcBytes = stream.ToArray();
            var funcBytesLengthAsBytes = BitConverter.GetBytes(funcBytes.Length);

            Array.Reverse(funcBytesLengthAsBytes);
            commandPayloadBytesList.Add(funcBytesLengthAsBytes);
            commandPayloadBytesList.Add(funcBytes);
            return(commandPayloadBytesList.SelectMany(byteArray => byteArray).ToArray());
        }
Beispiel #14
0
        public IEnumerable <dynamic> Collect(int port, SerializedMode serializedMode, Type type)
        {
            IFormatter formatter = new BinaryFormatter();
            Socket     sock      = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);

            sock.Connect(IPAddress.Loopback, port);

            using (NetworkStream s = new NetworkStream(sock))
            {
                byte[] buffer;
                while ((buffer = SerDe.ReadBytes(s)) != null && buffer.Length > 0)
                {
                    if (serializedMode == SerializedMode.Byte)
                    {
                        MemoryStream ms = new MemoryStream(buffer);
                        yield return(formatter.Deserialize(ms));
                    }
                    else if (serializedMode == SerializedMode.String)
                    {
                        yield return(Encoding.UTF8.GetString(buffer));
                    }
                    else if (serializedMode == SerializedMode.Pair)
                    {
                        MemoryStream ms  = new MemoryStream(buffer);
                        MemoryStream ms2 = new MemoryStream(SerDe.ReadBytes(s));

                        ConstructorInfo ci = type.GetConstructors()[0];
                        yield return(ci.Invoke(new object[] { formatter.Deserialize(ms), formatter.Deserialize(ms2) }));
                    }
                    else if (serializedMode == SerializedMode.Row)
                    {
                        var unpickledObjects = PythonSerDe.GetUnpickledObjects(buffer);
                        foreach (var item in unpickledObjects)
                        {
                            yield return((item as RowConstructor).GetRow());
                        }
                    }
                }
            }
        }
Beispiel #15
0
        public IEnumerable<dynamic> Collect(int port, SerializedMode serializedMode, Type type)
        {
            IFormatter formatter = new BinaryFormatter();
            var sock = SocketFactory.CreateSocket();
            sock.Connect(IPAddress.Loopback, port);

            using (var s = sock.GetStream())
            {
                byte[] buffer;
                while ((buffer = SerDe.ReadBytes(s)) != null && buffer.Length > 0)
                {
                    if (serializedMode == SerializedMode.Byte)
                    {
                        MemoryStream ms = new MemoryStream(buffer);
                        yield return formatter.Deserialize(ms);
                    }
                    else if (serializedMode == SerializedMode.String)
                    {
                        yield return Encoding.UTF8.GetString(buffer);
                    }
                    else if (serializedMode == SerializedMode.Pair)
                    {
                        MemoryStream ms = new MemoryStream(buffer);
                        MemoryStream ms2 = new MemoryStream(SerDe.ReadBytes(s));

                        ConstructorInfo ci = type.GetConstructors()[0];
                        yield return ci.Invoke(new object[] { formatter.Deserialize(ms), formatter.Deserialize(ms2) });
                    }
                    else if (serializedMode == SerializedMode.Row)
                    {
                        var unpickledObjects = PythonSerDe.GetUnpickledObjects(buffer);
                        foreach (var item in unpickledObjects)
                        {
                            yield return (item as RowConstructor).GetRow();
                        }
                    }
                }
            }
        }
Beispiel #16
0
 internal DStream(IDStreamProxy dstreamProxy, StreamingContext streamingContext, SerializedMode serializedMode = SerializedMode.Byte)
 {
     this.streamingContext = streamingContext;
     this.dstreamProxy     = dstreamProxy;
     this.serializedMode   = serializedMode;
 }
Beispiel #17
0
 internal RDD <T> CheckpointFile <T>(string filePath, SerializedMode serializedMode)
 {
     return(new RDD <T>(SparkContextProxy.CheckpointFile(filePath), this, serializedMode));
 }
Beispiel #18
0
        internal static byte[] Serialize(
            Delegate func,
            SerializedMode deserializerMode = SerializedMode.Byte,
            SerializedMode serializerMode   = SerializedMode.Byte)
        {
            // TODO: Rework on the following List<Byte[]> to use MemoryStream!

            var commandPayloadBytesList = new List <byte[]>();

            // Add serializer mode.
            byte[] modeBytes = Encoding.UTF8.GetBytes(serializerMode.ToString());
            int    length    = modeBytes.Length;

            byte[] lengthAsBytes = BitConverter.GetBytes(length);
            Array.Reverse(lengthAsBytes);
            commandPayloadBytesList.Add(lengthAsBytes);
            commandPayloadBytesList.Add(modeBytes);

            // Add deserializer mode.
            modeBytes     = Encoding.UTF8.GetBytes(deserializerMode.ToString());
            length        = modeBytes.Length;
            lengthAsBytes = BitConverter.GetBytes(length);
            Array.Reverse(lengthAsBytes);
            commandPayloadBytesList.Add(lengthAsBytes);
            commandPayloadBytesList.Add(modeBytes);

            // Add run mode:
            // N - normal
            // R - repl
            string runMode = Environment.GetEnvironmentVariable("SPARK_NET_RUN_MODE") ?? "N";

            byte[] runModeBytes = Encoding.UTF8.GetBytes(runMode);
            lengthAsBytes = BitConverter.GetBytes(runModeBytes.Length);
            Array.Reverse(lengthAsBytes);
            commandPayloadBytesList.Add(lengthAsBytes);
            commandPayloadBytesList.Add(runModeBytes);

            if ("R".Equals(runMode, StringComparison.InvariantCultureIgnoreCase))
            {
                // add compilation dump directory
                byte[] compilationDumpDirBytes = Encoding.UTF8.GetBytes(
                    Environment.GetEnvironmentVariable("SPARK_NET_SCRIPT_COMPILATION_DIR") ?? ".");
                lengthAsBytes = BitConverter.GetBytes(compilationDumpDirBytes.Length);
                Array.Reverse(lengthAsBytes);
                commandPayloadBytesList.Add(lengthAsBytes);
                commandPayloadBytesList.Add(compilationDumpDirBytes);
            }

            // Serialize the UDFs.
            var udfWrapperNodes = new List <UdfWrapperNode>();
            var udfs            = new List <UdfSerDe.UdfData>();

            SerializeUdfs(func, null, udfWrapperNodes, udfs);

            // Run through UdfSerDe.Serialize once more to get serialization info
            // on the actual UDF.
            var udfWrapperData = new UdfWrapperData()
            {
                UdfWrapperNodes = udfWrapperNodes.ToArray(),
                Udfs            = udfs.ToArray()
            };

            var formatter = new BinaryFormatter();

            using (var stream = new MemoryStream())
            {
                formatter.Serialize(stream, udfWrapperData);

                byte[] udfBytes = stream.ToArray();
                byte[] udfBytesLengthAsBytes = BitConverter.GetBytes(udfBytes.Length);
                Array.Reverse(udfBytesLengthAsBytes);
                commandPayloadBytesList.Add(udfBytesLengthAsBytes);
                commandPayloadBytesList.Add(udfBytes);
            }

            return(commandPayloadBytesList.SelectMany(byteArray => byteArray).ToArray());
        }
Beispiel #19
0
        internal static byte[] BuildCommand(CSharpWorkerFunc workerFunc, SerializedMode deserializerMode = SerializedMode.Byte, SerializedMode serializerMode = SerializedMode.Byte)
        {
            var formatter = new BinaryFormatter();
            var stream    = new MemoryStream();

            formatter.Serialize(stream, workerFunc);
            List <byte[]> commandPayloadBytesList = new List <byte[]>();

            // reserve 12 bytes for RddId, stageId and partitionId, this info will be filled in CSharpRDD.scala
            byte[] rddInfo = new byte[12];
            for (int i = 0; i < rddInfo.Length; i++)
            {
                rddInfo[i] = 0;
            }
            commandPayloadBytesList.Add(rddInfo);

            // add deserializer mode
            var modeBytes     = Encoding.UTF8.GetBytes(deserializerMode.ToString());
            var length        = modeBytes.Length;
            var lengthAsBytes = BitConverter.GetBytes(length);

            Array.Reverse(lengthAsBytes);
            commandPayloadBytesList.Add(lengthAsBytes);
            commandPayloadBytesList.Add(modeBytes);
            // add serializer mode
            modeBytes     = Encoding.UTF8.GetBytes(serializerMode.ToString());
            length        = modeBytes.Length;
            lengthAsBytes = BitConverter.GetBytes(length);
            Array.Reverse(lengthAsBytes);
            commandPayloadBytesList.Add(lengthAsBytes);
            commandPayloadBytesList.Add(modeBytes);

            // add run mode
            // N - normal
            // R - repl
            var runMode      = Environment.GetEnvironmentVariable("SPARKCLR_RUN_MODE") ?? "N";
            var runModeBytes = Encoding.UTF8.GetBytes(runMode);

            lengthAsBytes = BitConverter.GetBytes(runModeBytes.Length);
            Array.Reverse(lengthAsBytes);
            commandPayloadBytesList.Add(lengthAsBytes);
            commandPayloadBytesList.Add(runModeBytes);

            if ("R".Equals(runMode, StringComparison.InvariantCultureIgnoreCase))
            {
                // add compilation dump directory
                var compilationDumpDirBytes = Encoding.UTF8.GetBytes(Environment.GetEnvironmentVariable("SPARKCLR_SCRIPT_COMPILATION_DIR") ?? ".");
                lengthAsBytes = BitConverter.GetBytes(compilationDumpDirBytes.Length);
                Array.Reverse(lengthAsBytes);
                commandPayloadBytesList.Add(lengthAsBytes);
                commandPayloadBytesList.Add(compilationDumpDirBytes);
            }

            // add func
            var funcBytes = stream.ToArray();
            var funcBytesLengthAsBytes = BitConverter.GetBytes(funcBytes.Length);

            Array.Reverse(funcBytesLengthAsBytes);
            commandPayloadBytesList.Add(funcBytesLengthAsBytes);
            commandPayloadBytesList.Add(funcBytes);
            return(commandPayloadBytesList.SelectMany(byteArray => byteArray).ToArray());
        }
Beispiel #20
0
 public IEnumerable <dynamic> Collect(int port, SerializedMode serializedMode, Type type)
 {
     throw new NotImplementedException();
 }
Beispiel #21
0
 public IEnumerable<dynamic> Collect(int port, SerializedMode serializedMode, Type type)
 {
     throw new NotImplementedException();
 }