Example #1
0
        private static void ExecuteCommand(Stream inputStream, Stream outputStream, int splitIndex, DateTime bootTime,
                                           string deserializerMode, CSharpWorkerFunc workerFunc, string serializerMode,
                                           IFormatter formatter, Stopwatch commandProcessWatch, int stageId, int isSqlUdf)
        {
            int count            = 0;
            int nullMessageCount = 0;

            logger.LogDebug("Beginning to execute func");
            var func = workerFunc.Func;

            var      funcProcessWatch = Stopwatch.StartNew();
            DateTime initTime         = DateTime.UtcNow;

            foreach (var message in func(splitIndex, GetIterator(inputStream, deserializerMode, isSqlUdf)))
            {
                funcProcessWatch.Stop();

                if (object.ReferenceEquals(null, message))
                {
                    nullMessageCount++;
                    continue;
                }

                try
                {
                    WriteOutput(outputStream, serializerMode, message, formatter);
                }
                catch (Exception)
                {
                    logger.LogError("WriteOutput() failed at iteration {0}", count);
                    throw;
                }

                count++;
                funcProcessWatch.Start();
            }

            logger.LogInfo("Output entries count: " + count);
            logger.LogDebug("Null messages count: " + nullMessageCount);

            //if profiler:
            //    profiler.profile(process)
            //else:
            //    process()

            WriteDiagnosticsInfo(outputStream, bootTime, initTime);

            commandProcessWatch.Stop();

            // log statistics
            logger.LogInfo(string.Format("func process time: {0}", funcProcessWatch.ElapsedMilliseconds));
            logger.LogInfo(string.Format("stage {0}, command process time: {1}", stageId,
                                         commandProcessWatch.ElapsedMilliseconds));
        }
Example #2
0
        public void ChainTest()
        {
            var func1 = new CSharpWorkerFunc((id, iter) => new List <dynamic> {
                1, 2, 3
            });
            var func2 = new CSharpWorkerFunc(Multiplier);
            var func3 = CSharpWorkerFunc.Chain(func1, func2); //func1 will be executed first on input and result will be input to func2

            var result = func3.Func(1, new List <dynamic>()).Cast <int>().ToArray();

            Assert.AreEqual(10, result[0]);
            Assert.AreEqual(20, result[1]);
            Assert.AreEqual(30, result[2]);
        }
Example #3
0
        private static void ReadCommand(Stream networkStream, IFormatter formatter, out int stageId,
                                        out string deserializerMode,
                                        out string serializerMode, out CSharpWorkerFunc workerFunc)
        {
            stageId = ReadDiagnosticsInfo(networkStream);

            deserializerMode = SerDe.ReadString(networkStream);
            logger.LogDebug("Deserializer mode: " + deserializerMode);
            serializerMode = SerDe.ReadString(networkStream);
            logger.LogDebug("Serializer mode: " + serializerMode);

            string runMode = SerDe.ReadString(networkStream);

            if ("R".Equals(runMode, StringComparison.InvariantCultureIgnoreCase))
            {
                var compilationDumpDir = SerDe.ReadString(networkStream);
                if (Directory.Exists(compilationDumpDir))
                {
                    assemblyHandler.LoadAssemblies(Directory.GetFiles(compilationDumpDir, "ReplCompilation.*",
                                                                      SearchOption.TopDirectoryOnly));
                }
                else
                {
                    logger.LogError("Directory " + compilationDumpDir + " dose not exist.");
                }
            }


            byte[] command = SerDe.ReadBytes(networkStream);

            logger.LogDebug("command bytes read: " + command.Length);
            var stream = new MemoryStream(command);

            workerFunc = (CSharpWorkerFunc)formatter.Deserialize(stream);

            if (!logger.IsDebugEnabled)
            {
                return;
            }
            var sb = new StringBuilder(Environment.NewLine);

            sb.AppendLine(
                "------------------------ Printing stack trace of workerFunc for ** debugging ** ------------------------------");
            sb.AppendLine(workerFunc.StackTrace);
            sb.AppendLine(
                "--------------------------------------------------------------------------------------------------------------");
            logger.LogDebug(sb.ToString());
        }
Example #4
0
        public IRDDProxy CreateCSharpRdd(IRDDProxy prefvJavaRddReference, byte[] command, Dictionary <string, string> environmentVariables, List <string> cSharpIncludes, bool preservePartitioning, List <Broadcast> broadcastVariables, List <byte[]> accumulator)
        {
            IEnumerable <dynamic> input = (prefvJavaRddReference as MockRddProxy).result ??
                                          (new string[] {
                "The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog",
                "The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog",
                "The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog",
                "The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog",
                "The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog",
                "The quick brown fox jumps over the lazy dog",
                "The quick brown fox jumps over the lazy dog",
                "The quick brown fox jumps over the lazy dog",
                "The quick brown fox jumps over the lazy dog",
                "The quick brown fox jumps over the lazy dog",
                "The dog lazy"
            }).AsEnumerable().Cast <dynamic>();

            using (MemoryStream s = new MemoryStream(command))
            {
                int rddId       = SerDe.ReadInt(s);
                int stageId     = SerDe.ReadInt(s);
                int partitionId = SerDe.ReadInt(s);

                string deserializerMode = SerDe.ReadString(s);
                string serializerMode   = SerDe.ReadString(s);

                string runMode = SerDe.ReadString(s);
                if ("R".Equals(runMode, StringComparison.InvariantCultureIgnoreCase))
                {
                    string compilationDumpDir = SerDe.ReadString(s);
                }

                CSharpWorkerFunc workerFunc = (CSharpWorkerFunc)formatter.Deserialize(new MemoryStream(SerDe.ReadBytes(s)));
                var func = workerFunc.Func;
                IEnumerable <dynamic> output = func(default(int), input);

                // number 8 indicates shuffling scenario's leading 8-byte hash code of each data row which should be filtered
                if (output.FirstOrDefault() is byte[] && (output.First() as byte[]).Length == 8)
                {
                    output = output.Where(e => (e as byte[]).Length != 8).Select(e => formatter.Deserialize(new MemoryStream(e as byte[])));
                }

                return(new MockRddProxy(output));
            }
        }
Example #5
0
        public void TestInitialize()
        {
            result = null;

            // Create Mock object to mock implementation of T by new Mock<T>();
            _mockSparkCLRProxy         = new Mock <ISparkCLRProxy>();
            _mockSparkContextProxy     = new Mock <ISparkContextProxy>();
            _mockStreamingContextProxy = new Mock <IStreamingContextProxy>();
            _mockRddProxy = new Mock <IRDDProxy>();

            SparkCLREnvironment.SparkCLRProxy = _mockSparkCLRProxy.Object;

            // Mock method of T by Mock<T>.Setup(). For method with parameters, you can mock different method implementation for different method parameters.
            // e.g., if you want to mock a method regardless of what values the method parameters are, you can use It.IsAny<T>() for each parameter; if you want
            // to mock the method for certain criteria, use It.Is<T>(Func<T, bool>) can. You can mock the same method multiple times for different criteria of
            // method parameters.

            // If the method to mock has return value and you want to mock the return value only, Use Returns(TReturnValue); if you want to add logics and return,
            // use Returns<T1, T2, ...>(Func<T1, T2, ..., TReturnValue>). If method is void, use CallBack<T1, T2, ...>(Action<T1, T2, ...>)

            // for more info please visit https://github.com/Moq/moq4/wiki/Quickstart
            _mockSparkCLRProxy.Setup(m => m.CreateSparkConf(It.IsAny <bool>())).Returns(new MockSparkConfProxy()); // some of mocks which rarely change can be kept

            _mockSparkCLRProxy.Setup(m => m.CreateSparkContext(It.IsAny <ISparkConfProxy>())).Returns(_mockSparkContextProxy.Object);
            _mockSparkCLRProxy.Setup(m => m.CreateStreamingContext(It.IsAny <SparkContext>(), It.IsAny <long>())).Returns(_mockStreamingContextProxy.Object);
            _mockRddProxy.Setup(m => m.CollectAndServe()).Returns(() =>
            {
                var listener = SocketFactory.CreateSocket();
                listener.Listen();

                Task.Run(() =>
                {
                    using (var socket = listener.Accept())
                        using (var ns = socket.GetStream())
                        {
                            foreach (var item in result)
                            {
                                var ms = new MemoryStream();
                                new BinaryFormatter().Serialize(ms, item);
                                byte[] buffer = ms.ToArray();
                                SerDe.Write(ns, buffer.Length);
                                SerDe.Write(ns, buffer);
                            }
                            ns.Flush();
                        }
                });
                return((listener.LocalEndPoint as IPEndPoint).Port);
            });
            _mockRddProxy.Setup(m => m.RDDCollector).Returns(new RDDCollector());

            _mockSparkContextProxy.Setup(m => m.CreateCSharpRdd(It.IsAny <IRDDProxy>(), It.IsAny <byte[]>(), It.IsAny <Dictionary <string, string> >(),
                                                                It.IsAny <List <string> >(), It.IsAny <bool>(), It.IsAny <List <Broadcast> >(), It.IsAny <List <byte[]> >()))
            .Returns <IRDDProxy, byte[], Dictionary <string, string>, List <string>, bool, List <Broadcast>, List <byte[]> >(
                (prefvJavaRddReference, command, environmentVariables, cSharpIncludes, preservePartitioning, broadcastVariables, accumulator) =>
            {
                IEnumerable <dynamic> input = result ?? (new[] {
                    "The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog",
                    "The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog",
                    "The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog",
                    "The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog",
                    "The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog The quick brown fox jumps over the lazy dog",
                    "The quick brown fox jumps over the lazy dog",
                    "The quick brown fox jumps over the lazy dog",
                    "The quick brown fox jumps over the lazy dog",
                    "The quick brown fox jumps over the lazy dog",
                    "The quick brown fox jumps over the lazy dog",
                    "The dog lazy"
                }).AsEnumerable();

                var formatter = new BinaryFormatter();
                using (MemoryStream s = new MemoryStream(command))
                {
                    int rddId       = SerDe.ReadInt(s);
                    int stageId     = SerDe.ReadInt(s);
                    int partitionId = SerDe.ReadInt(s);

                    SerDe.ReadString(s);
                    SerDe.ReadString(s);

                    string runMode = SerDe.ReadString(s);
                    if ("R".Equals(runMode, StringComparison.InvariantCultureIgnoreCase))
                    {
                        string compilationDumpDir = SerDe.ReadString(s);
                    }

                    CSharpWorkerFunc workerFunc = (CSharpWorkerFunc)formatter.Deserialize(new MemoryStream(SerDe.ReadBytes(s)));
                    var func = workerFunc.Func;
                    result   = func(default(int), input);
                }

                if (result.FirstOrDefault() is byte[] && (result.First() as byte[]).Length == 8)
                {
                    result = result.Where(e => (e as byte[]).Length != 8).Select(e => formatter.Deserialize(new MemoryStream(e as byte[])));
                }

                return(_mockRddProxy.Object);
            });

            _streamingContext = new StreamingContext(new SparkContext("", ""), 1000L);
        }
Example #6
0
 internal WorkerFunc(CSharpWorkerFunc func, int argsCount, List <int> argOffsets)
 {
     CharpWorkerFunc = func;
     ArgsCount       = argsCount;
     ArgOffsets      = argOffsets;
 }
Example #7
0
        private static IFormatter ProcessCommand(Stream inputStream, Stream outputStream, int splitIndex, DateTime bootTime)
        {
            int isSqlUdf = SerDe.ReadInt(inputStream);

            logger.LogDebug("Is func Sql UDF = {0}", isSqlUdf);

            IFormatter formatter = new BinaryFormatter();

            if (isSqlUdf == 0)
            {
                logger.LogDebug("Processing non-UDF command");
                int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
                logger.LogDebug("Command length: " + lengthOfCommandByteArray);

                if (lengthOfCommandByteArray > 0)
                {
                    var commandProcessWatch = new Stopwatch();
                    commandProcessWatch.Start();

                    int              stageId;
                    string           deserializerMode;
                    string           serializerMode;
                    CSharpWorkerFunc workerFunc;
                    ReadCommand(inputStream, formatter, out stageId, out deserializerMode, out serializerMode,
                                out workerFunc);

                    ExecuteCommand(inputStream, outputStream, splitIndex, bootTime, deserializerMode, workerFunc, serializerMode,
                                   formatter, commandProcessWatch, stageId, isSqlUdf);
                }
                else
                {
                    logger.LogWarn("lengthOfCommandByteArray = 0. Nothing to execute :-(");
                }
            }
            else
            {
                logger.LogDebug("Processing UDF command");
                var udfCount = SerDe.ReadInt(inputStream);
                logger.LogDebug("Count of UDFs = {0}", udfCount);

                if (udfCount == 1)
                {
                    CSharpWorkerFunc func = null;
                    var argCount          = SerDe.ReadInt(inputStream);
                    logger.LogDebug("Count of args = {0}", argCount);

                    var argOffsets = new List <int>();

                    for (int argIndex = 0; argIndex < argCount; argIndex++)
                    {
                        var offset = SerDe.ReadInt(inputStream);
                        logger.LogDebug("UDF argIndex = {0}, Offset = {1}", argIndex, offset);
                        argOffsets.Add(offset);
                    }
                    var chainedFuncCount = SerDe.ReadInt(inputStream);
                    logger.LogDebug("Count of chained func = {0}", chainedFuncCount);

                    var              commandProcessWatch = new Stopwatch();
                    int              stageId             = -1;
                    string           deserializerMode    = null;
                    string           serializerMode      = null;
                    CSharpWorkerFunc workerFunc          = null;
                    for (int funcIndex = 0; funcIndex < chainedFuncCount; funcIndex++)
                    {
                        int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
                        logger.LogDebug("UDF command length: " + lengthOfCommandByteArray)
                        ;

                        if (lengthOfCommandByteArray > 0)
                        {
                            ReadCommand(inputStream, formatter, out stageId, out deserializerMode, out serializerMode,
                                        out workerFunc);

                            if (func == null)
                            {
                                func = workerFunc;
                            }
                            else
                            {
                                func = CSharpWorkerFunc.Chain(func, workerFunc);
                            }
                        }
                        else
                        {
                            logger.LogWarn("UDF lengthOfCommandByteArray = 0. Nothing to execute :-(");
                        }
                    }

                    Debug.Assert(stageId != -1);
                    Debug.Assert(deserializerMode != null);
                    Debug.Assert(serializerMode != null);
                    Debug.Assert(func != null);
                    ExecuteCommand(inputStream, outputStream, splitIndex, bootTime, deserializerMode, func, serializerMode, formatter,
                                   commandProcessWatch, stageId, isSqlUdf);
                }
                else
                {
                    throw new NotSupportedException(); //TODO - add support for multiple UDFs
                }
            }

            return(formatter);
        }
Example #8
0
        private static UDFCommand ProcessUdfCommand(Stream inputStream, Stream outputStream, int splitIndex,
                                                    DateTime bootTime, IFormatter formatter, int isSqlUdf)
        {
            logger.LogDebug("Processing UDF command");
            var udfCount = SerDe.ReadInt(inputStream);

            logger.LogDebug("Count of UDFs = {0}", udfCount);

            int               stageId             = -1;
            string            deserializerMode    = null;
            string            serializerMode      = null;
            var               commandProcessWatch = new Stopwatch();
            List <WorkerFunc> workerFuncList      = new List <WorkerFunc>();

            for (int udfIter = 0; udfIter < udfCount; udfIter++)
            {
                CSharpWorkerFunc func = null;
                var argCount          = SerDe.ReadInt(inputStream);
                logger.LogDebug("Count of args = {0}", argCount);

                List <int> argOffsets = new List <int>();
                for (int argIndex = 0; argIndex < argCount; argIndex++)
                {
                    var offset = SerDe.ReadInt(inputStream);
                    logger.LogDebug("UDF argIndex = {0}, Offset = {1}", argIndex, offset);
                    argOffsets.Add(offset);
                }

                var chainedFuncCount = SerDe.ReadInt(inputStream);
                logger.LogDebug("Count of chained func = {0}", chainedFuncCount);

                for (int funcIndex = 0; funcIndex < chainedFuncCount; funcIndex++)
                {
                    int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
                    logger.LogDebug("UDF command length: " + lengthOfCommandByteArray);

                    if (lengthOfCommandByteArray > 0)
                    {
                        CSharpWorkerFunc workerFunc;
                        ReadCommand(inputStream, formatter, out stageId, out deserializerMode, out serializerMode,
                                    out workerFunc);

                        func = func == null ? workerFunc : CSharpWorkerFunc.Chain(func, workerFunc);
                    }
                    else
                    {
                        logger.LogWarn("UDF lengthOfCommandByteArray = 0. Nothing to execute :-(");
                    }
                }

                Debug.Assert(stageId != -1);
                Debug.Assert(deserializerMode != null);
                Debug.Assert(serializerMode != null);
                Debug.Assert(func != null);

                workerFuncList.Add(new WorkerFunc(func, argCount, argOffsets));
            }

            return(new UDFCommand(inputStream, outputStream, splitIndex, bootTime, deserializerMode,
                                  serializerMode, formatter, commandProcessWatch, isSqlUdf, workerFuncList, stageId));
        }
Example #9
0
        private static void ReadCommand(Stream networkStream, IFormatter formatter, out int stageId,
            out string deserializerMode,
            out string serializerMode, out CSharpWorkerFunc workerFunc)
        {
            stageId = ReadDiagnosticsInfo(networkStream);

            deserializerMode = SerDe.ReadString(networkStream);
            logger.LogDebug("Deserializer mode: " + deserializerMode);
            serializerMode = SerDe.ReadString(networkStream);
            logger.LogDebug("Serializer mode: " + serializerMode);

            string runMode = SerDe.ReadString(networkStream);
            if ("R".Equals(runMode, StringComparison.InvariantCultureIgnoreCase))
            {
                var compilationDumpDir = SerDe.ReadString(networkStream);
                if (Directory.Exists(compilationDumpDir))
                {
                    assemblyHandler.LoadAssemblies(Directory.GetFiles(compilationDumpDir, "ReplCompilation.*",
                        SearchOption.TopDirectoryOnly));
                }
                else
                {
                    logger.LogError("Directory " + compilationDumpDir + " dose not exist.");
                }
            }

            byte[] command = SerDe.ReadBytes(networkStream);

            logger.LogDebug("command bytes read: " + command.Length);
            var stream = new MemoryStream(command);

            workerFunc = (CSharpWorkerFunc)formatter.Deserialize(stream);

            if (!logger.IsDebugEnabled) return;
            var sb = new StringBuilder(Environment.NewLine);
            sb.AppendLine(
                "------------------------ Printing stack trace of workerFunc for ** debugging ** ------------------------------");
            sb.AppendLine(workerFunc.StackTrace);
            sb.AppendLine(
                "--------------------------------------------------------------------------------------------------------------");
            logger.LogDebug(sb.ToString());
        }
Example #10
0
        private static void ExecuteCommand(Stream inputStream, Stream outputStream, int splitIndex, DateTime bootTime,
                     string deserializerMode, CSharpWorkerFunc workerFunc, string serializerMode,
                     IFormatter formatter, Stopwatch commandProcessWatch, int stageId, int isSqlUdf)
        {
            int count = 0;
            int nullMessageCount = 0;
            logger.LogDebug("Beginning to execute func");
            var func = workerFunc.Func;

            var funcProcessWatch = Stopwatch.StartNew();
            DateTime initTime = DateTime.UtcNow;
            foreach (var message in func(splitIndex, GetIterator(inputStream, deserializerMode, isSqlUdf)))
            {
                funcProcessWatch.Stop();

                if (object.ReferenceEquals(null, message))
                {
                    nullMessageCount++;
                    continue;
                }

                try
                {
                    WriteOutput(outputStream, serializerMode, message, formatter);
                }
                catch (Exception)
                {
                    logger.LogError("WriteOutput() failed at iteration {0}", count);
                    throw;
                }

                count++;
                funcProcessWatch.Start();
            }

            logger.LogInfo("Output entries count: " + count);
            logger.LogDebug("Null messages count: " + nullMessageCount);

            //if profiler:
            //    profiler.profile(process)
            //else:
            //    process()

            WriteDiagnosticsInfo(outputStream, bootTime, initTime);

            commandProcessWatch.Stop();

            // log statistics
            logger.LogInfo("func process time: {0}", funcProcessWatch.ElapsedMilliseconds);
            logger.LogInfo("stage {0}, command process time: {1}", stageId, commandProcessWatch.ElapsedMilliseconds);
        }