Пример #1
0
        private static Delegate DeserializeUdfs <T>(
            UdfWrapperData data,
            ref int nodeIndex,
            ref int udfIndex)
        {
            UdfWrapperNode node     = data.UdfWrapperNodes[nodeIndex++];
            Type           nodeType = Type.GetType(node.TypeName);

            if (node.HasUdf)
            {
                var udfs = new object[node.NumChildren];
                for (int i = 0; i < node.NumChildren; ++i)
                {
                    udfs[i] = UdfSerDe.Deserialize(data.Udfs[udfIndex++]);
                }

                return(CreateUdfWrapperDelegate <T>(nodeType, udfs));
            }

            var udfWrappers = new object[node.NumChildren];

            for (int i = 0; i < node.NumChildren; ++i)
            {
                udfWrappers[i] = DeserializeUdfs <T>(data, ref nodeIndex, ref udfIndex);
            }

            return(CreateUdfWrapperDelegate <T>(nodeType, udfWrappers));
        }
Пример #2
0
        internal static object DeserializeArrowOrDataFrameUdf(
            Stream stream,
            out SerializedMode serializerMode,
            out SerializedMode deserializerMode,
            out string runMode)
        {
            UdfWrapperData udfWrapperData = GetUdfWrapperDataFromStream(
                stream,
                out serializerMode,
                out deserializerMode,
                out runMode);

            int            nodeIndex = 0;
            int            udfIndex  = 0;
            UdfWrapperNode node      = udfWrapperData.UdfWrapperNodes[nodeIndex];
            Type           nodeType  = Type.GetType(node.TypeName);
            Delegate       udf       = null;

            if (nodeType == typeof(DataFrameGroupedMapUdfWrapper))
            {
                udf = (DataFrameGroupedMapWorkerFunction.ExecuteDelegate)DeserializeUdfs <DataFrameGroupedMapWorkerFunction.ExecuteDelegate>(
                    udfWrapperData,
                    ref nodeIndex,
                    ref udfIndex);
            }
            else if (nodeType == typeof(DataFrameWorkerFunction) || nodeType.IsSubclassOf(typeof(DataFrameUdfWrapper)))
            {
                udf = (DataFrameWorkerFunction.ExecuteDelegate)DeserializeUdfs <DataFrameWorkerFunction.ExecuteDelegate>(
                    udfWrapperData,
                    ref nodeIndex,
                    ref udfIndex);
            }
            else if (nodeType == typeof(ArrowGroupedMapUdfWrapper))
            {
                udf = (ArrowGroupedMapWorkerFunction.ExecuteDelegate)DeserializeUdfs <ArrowGroupedMapWorkerFunction.ExecuteDelegate>(
                    udfWrapperData,
                    ref nodeIndex,
                    ref udfIndex);
            }
            else
            {
                udf = (ArrowWorkerFunction.ExecuteDelegate)
                      DeserializeUdfs <ArrowWorkerFunction.ExecuteDelegate>(
                    udfWrapperData,
                    ref nodeIndex,
                    ref udfIndex);
            }

            // Check all the data is consumed.
            Debug.Assert(nodeIndex == udfWrapperData.UdfWrapperNodes.Length);
            Debug.Assert(udfIndex == udfWrapperData.Udfs.Length);

            return(udf);
        }
Пример #3
0
        internal static T Deserialize <T>(
            Stream stream,
            out SerializedMode serializerMode,
            out SerializedMode deserializerMode,
            out string runMode) where T : Delegate
        {
            UdfWrapperData udfWrapperData = GetUdfWrapperDataFromStream(
                stream,
                out serializerMode,
                out deserializerMode,
                out runMode);
            int nodeIndex = 0;
            int udfIndex  = 0;
            T   udf       = (T)DeserializeUdfs <T>(udfWrapperData, ref nodeIndex, ref udfIndex);

            // Check all the data is consumed.
            Debug.Assert(nodeIndex == udfWrapperData.UdfWrapperNodes.Length);
            Debug.Assert(udfIndex == udfWrapperData.Udfs.Length);

            return(udf);
        }
Пример #4
0
        internal static byte[] Serialize(
            Delegate func,
            SerializedMode deserializerMode = SerializedMode.Byte,
            SerializedMode serializerMode   = SerializedMode.Byte)
        {
            // TODO: Rework on the following List<Byte[]> to use MemoryStream!

            var commandPayloadBytesList = new List <byte[]>();

            // Add serializer mode.
            byte[] modeBytes = Encoding.UTF8.GetBytes(serializerMode.ToString());
            int    length    = modeBytes.Length;

            byte[] lengthAsBytes = BitConverter.GetBytes(length);
            Array.Reverse(lengthAsBytes);
            commandPayloadBytesList.Add(lengthAsBytes);
            commandPayloadBytesList.Add(modeBytes);

            // Add deserializer mode.
            modeBytes     = Encoding.UTF8.GetBytes(deserializerMode.ToString());
            length        = modeBytes.Length;
            lengthAsBytes = BitConverter.GetBytes(length);
            Array.Reverse(lengthAsBytes);
            commandPayloadBytesList.Add(lengthAsBytes);
            commandPayloadBytesList.Add(modeBytes);

            // Add run mode:
            // N - normal
            // R - repl
            string runMode = Environment.GetEnvironmentVariable("SPARK_NET_RUN_MODE") ?? "N";

            byte[] runModeBytes = Encoding.UTF8.GetBytes(runMode);
            lengthAsBytes = BitConverter.GetBytes(runModeBytes.Length);
            Array.Reverse(lengthAsBytes);
            commandPayloadBytesList.Add(lengthAsBytes);
            commandPayloadBytesList.Add(runModeBytes);

            if ("R".Equals(runMode, StringComparison.InvariantCultureIgnoreCase))
            {
                // add compilation dump directory
                byte[] compilationDumpDirBytes = Encoding.UTF8.GetBytes(
                    Environment.GetEnvironmentVariable("SPARK_NET_SCRIPT_COMPILATION_DIR") ?? ".");
                lengthAsBytes = BitConverter.GetBytes(compilationDumpDirBytes.Length);
                Array.Reverse(lengthAsBytes);
                commandPayloadBytesList.Add(lengthAsBytes);
                commandPayloadBytesList.Add(compilationDumpDirBytes);
            }

            // Serialize the UDFs.
            var udfWrapperNodes = new List <UdfWrapperNode>();
            var udfs            = new List <UdfSerDe.UdfData>();

            SerializeUdfs(func, null, udfWrapperNodes, udfs);

            // Run through UdfSerDe.Serialize once more to get serialization info
            // on the actual UDF.
            var udfWrapperData = new UdfWrapperData()
            {
                UdfWrapperNodes = udfWrapperNodes.ToArray(),
                Udfs            = udfs.ToArray()
            };

            var formatter = new BinaryFormatter();

            using (var stream = new MemoryStream())
            {
                formatter.Serialize(stream, udfWrapperData);

                byte[] udfBytes = stream.ToArray();
                byte[] udfBytesLengthAsBytes = BitConverter.GetBytes(udfBytes.Length);
                Array.Reverse(udfBytesLengthAsBytes);
                commandPayloadBytesList.Add(udfBytesLengthAsBytes);
                commandPayloadBytesList.Add(udfBytes);
            }

            return(commandPayloadBytesList.SelectMany(byteArray => byteArray).ToArray());
        }