public static int Run(Socket sock = null) { // if there exists exe.config file, then use log4net if (File.Exists(AppDomain.CurrentDomain.SetupInformation.ConfigurationFile)) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); } logger = LoggerServiceFactory.GetLogger(typeof(Worker)); if (sock == null) { try { PrintFiles(); int javaPort = int.Parse(Console.ReadLine()); logger.LogDebug("java_port: " + javaPort); sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); sock.Connect(IPAddress.Loopback, javaPort); } catch (Exception e) { logger.LogError("CSharpWorker failed with exception:"); logger.LogException(e); Environment.Exit(-1); } } using (NetworkStream s = new NetworkStream(sock)) { try { DateTime bootTime = DateTime.UtcNow; int splitIndex = SerDe.ReadInt(s); logger.LogDebug("split_index: " + splitIndex); if (splitIndex == -1) { Environment.Exit(-1); } string ver = SerDe.ReadString(s); logger.LogDebug("ver: " + ver); //// initialize global state //shuffle.MemoryBytesSpilled = 0 //shuffle.DiskBytesSpilled = 0 // fetch name of workdir string sparkFilesDir = SerDe.ReadString(s); logger.LogDebug("spark_files_dir: " + sparkFilesDir); //SparkFiles._root_directory = sparkFilesDir //SparkFiles._is_running_on_worker = True // fetch names of includes - not used //TODO - complete the impl int numberOfIncludesItems = SerDe.ReadInt(s); logger.LogDebug("num_includes: " + numberOfIncludesItems); if (numberOfIncludesItems > 0) { for (int i = 0; i < numberOfIncludesItems; i++) { string filename = SerDe.ReadString(s); } } // fetch names and values of broadcast variables int numBroadcastVariables = SerDe.ReadInt(s); logger.LogDebug("num_broadcast_variables: " + numBroadcastVariables); if (numBroadcastVariables > 0) { for (int i = 0; i < numBroadcastVariables; i++) { long bid = SerDe.ReadLong(s); if (bid >= 0) { string path = SerDe.ReadString(s); Broadcast.broadcastRegistry[bid] = new Broadcast(path); } else { bid = -bid - 1; Broadcast.broadcastRegistry.Remove(bid); } } } Accumulator.accumulatorRegistry.Clear(); int lengthOfCommandByteArray = SerDe.ReadInt(s); logger.LogDebug("command length: " + lengthOfCommandByteArray); IFormatter formatter = new BinaryFormatter(); if (lengthOfCommandByteArray > 0) { Stopwatch commandProcessWatch = new Stopwatch(); Stopwatch funcProcessWatch = new Stopwatch(); commandProcessWatch.Start(); int rddId = SerDe.ReadInt(s); int stageId = SerDe.ReadInt(s); int partitionId = SerDe.ReadInt(s); logger.LogInfo(string.Format("rddInfo: rddId {0}, stageId {1}, partitionId {2}", rddId, stageId, partitionId)); string deserializerMode = SerDe.ReadString(s); logger.LogDebug("Deserializer mode: " + deserializerMode); string serializerMode = SerDe.ReadString(s); logger.LogDebug("Serializer mode: " + serializerMode); byte[] command = SerDe.ReadBytes(s); logger.LogDebug("command bytes read: " + command.Length); var stream = new MemoryStream(command); var workerFunc = (CSharpWorkerFunc)formatter.Deserialize(stream); var func = workerFunc.Func; //logger.LogDebug("------------------------ Printing stack trace of workerFunc for ** debugging ** ------------------------------"); //logger.LogDebug(workerFunc.StackTrace); //logger.LogDebug("--------------------------------------------------------------------------------------------------------------"); DateTime initTime = DateTime.UtcNow; int count = 0; // here we use low level API because we need to get perf metrics WorkerInputEnumerator inputEnumerator = new WorkerInputEnumerator(s, deserializerMode); IEnumerable <dynamic> inputEnumerable = Enumerable.Cast <dynamic>(inputEnumerator); funcProcessWatch.Start(); IEnumerable <dynamic> outputEnumerable = func(splitIndex, inputEnumerable); var outputEnumerator = outputEnumerable.GetEnumerator(); funcProcessWatch.Stop(); while (true) { funcProcessWatch.Start(); bool hasNext = outputEnumerator.MoveNext(); funcProcessWatch.Stop(); if (!hasNext) { break; } funcProcessWatch.Start(); var message = outputEnumerator.Current; funcProcessWatch.Stop(); if (object.ReferenceEquals(null, message)) { continue; } byte[] buffer; switch ((SerializedMode)Enum.Parse(typeof(SerializedMode), serializerMode)) { case SerializedMode.None: buffer = message as byte[]; break; case SerializedMode.String: buffer = SerDe.ToBytes(message as string); break; case SerializedMode.Row: Pickler pickler = new Pickler(); buffer = pickler.dumps(new ArrayList { message }); break; default: try { var ms = new MemoryStream(); formatter.Serialize(ms, message); buffer = ms.ToArray(); } catch (Exception) { logger.LogError("Exception serializing output"); logger.LogError("{0} : {1}", message.GetType().Name, message.GetType().FullName); throw; } break; } count++; SerDe.Write(s, buffer.Length); SerDe.Write(s, buffer); } //TODO - complete the impl logger.LogDebug("Output entries count: " + count); //if profiler: // profiler.profile(process) //else: // process() DateTime finish_time = DateTime.UtcNow; const string format = "MM/dd/yyyy hh:mm:ss.fff tt"; logger.LogDebug(string.Format("bootTime: {0}, initTime: {1}, finish_time: {2}", bootTime.ToString(format), initTime.ToString(format), finish_time.ToString(format))); SerDe.Write(s, (int)SpecialLengths.TIMING_DATA); SerDe.Write(s, ToUnixTime(bootTime)); SerDe.Write(s, ToUnixTime(initTime)); SerDe.Write(s, ToUnixTime(finish_time)); SerDe.Write(s, 0L); //shuffle.MemoryBytesSpilled SerDe.Write(s, 0L); //shuffle.DiskBytesSpilled commandProcessWatch.Stop(); // log statistics inputEnumerator.LogStatistic(); logger.LogInfo(string.Format("func process time: {0}", funcProcessWatch.ElapsedMilliseconds)); logger.LogInfo(string.Format("command process time: {0}", commandProcessWatch.ElapsedMilliseconds)); } else { logger.LogWarn("lengthOfCommandByteArray = 0. Nothing to execute :-("); } // Mark the beginning of the accumulators section of the output SerDe.Write(s, (int)SpecialLengths.END_OF_DATA_SECTION); SerDe.Write(s, Accumulator.accumulatorRegistry.Count); foreach (var item in Accumulator.accumulatorRegistry) { var ms = new MemoryStream(); var value = item.Value.GetType().GetField("value", BindingFlags.NonPublic | BindingFlags.Instance).GetValue(item.Value); logger.LogDebug(string.Format("({0}, {1})", item.Key, value)); formatter.Serialize(ms, new KeyValuePair <int, dynamic>(item.Key, value)); byte[] buffer = ms.ToArray(); SerDe.Write(s, buffer.Length); SerDe.Write(s, buffer); } int end = SerDe.ReadInt(s); // check end of stream if (end == (int)SpecialLengths.END_OF_STREAM) { SerDe.Write(s, (int)SpecialLengths.END_OF_STREAM); logger.LogDebug("END_OF_STREAM: " + (int)SpecialLengths.END_OF_STREAM); } else { // write a different value to tell JVM to not reuse this worker SerDe.Write(s, (int)SpecialLengths.END_OF_DATA_SECTION); Environment.Exit(-1); } s.Flush(); // log bytes read and write logger.LogDebug(string.Format("total read bytes: {0}", SerDe.totalReadNum)); logger.LogDebug(string.Format("total write bytes: {0}", SerDe.totalWriteNum)); } catch (Exception e) { logger.LogError(e.ToString()); try { SerDe.Write(s, e.ToString()); } catch (IOException) { // JVM close the socket } catch (Exception ex) { logger.LogError("CSharpWorker failed with exception:"); logger.LogException(ex); } Environment.Exit(-1); return(-1); } return(0); } }
private static IFormatter ProcessCommand(Stream networkStream, int splitIndex, DateTime bootTime) { int lengthOfCommandByteArray = SerDe.ReadInt(networkStream); logger.LogDebug("command length: " + lengthOfCommandByteArray); IFormatter formatter = new BinaryFormatter(); if (lengthOfCommandByteArray > 0) { var commandProcessWatch = new Stopwatch(); var funcProcessWatch = new Stopwatch(); commandProcessWatch.Start(); int stageId = ReadDiagnosticsInfo(networkStream); string deserializerMode = SerDe.ReadString(networkStream); logger.LogDebug("Deserializer mode: " + deserializerMode); string serializerMode = SerDe.ReadString(networkStream); logger.LogDebug("Serializer mode: " + serializerMode); byte[] command = SerDe.ReadBytes(networkStream); logger.LogDebug("command bytes read: " + command.Length); var stream = new MemoryStream(command); var workerFunc = (CSharpWorkerFunc)formatter.Deserialize(stream); var func = workerFunc.Func; logger.LogDebug( "------------------------ Printing stack trace of workerFunc for ** debugging ** ------------------------------"); logger.LogDebug(workerFunc.StackTrace); logger.LogDebug( "--------------------------------------------------------------------------------------------------------------"); DateTime initTime = DateTime.UtcNow; // here we use low level API because we need to get perf metrics var inputEnumerator = new WorkerInputEnumerator(networkStream, deserializerMode); IEnumerable <dynamic> inputEnumerable = inputEnumerator.Cast <dynamic>(); funcProcessWatch.Start(); IEnumerable <dynamic> outputEnumerable = func(splitIndex, inputEnumerable); var outputEnumerator = outputEnumerable.GetEnumerator(); funcProcessWatch.Stop(); int count = 0; int nullMessageCount = 0; while (true) { funcProcessWatch.Start(); bool hasNext = outputEnumerator.MoveNext(); funcProcessWatch.Stop(); if (!hasNext) { break; } funcProcessWatch.Start(); var message = outputEnumerator.Current; funcProcessWatch.Stop(); if (object.ReferenceEquals(null, message)) { nullMessageCount++; continue; } WriteOutput(networkStream, serializerMode, message, formatter); count++; } logger.LogDebug("Output entries count: " + count); logger.LogDebug("Null messages count: " + nullMessageCount); //if profiler: // profiler.profile(process) //else: // process() WriteDiagnosticsInfo(networkStream, bootTime, initTime); commandProcessWatch.Stop(); // log statistics inputEnumerator.LogStatistic(); logger.LogInfo(string.Format("func process time: {0}", funcProcessWatch.ElapsedMilliseconds)); logger.LogInfo(string.Format("stage {0}, command process time: {1}", stageId, commandProcessWatch.ElapsedMilliseconds)); } else { logger.LogWarn("lengthOfCommandByteArray = 0. Nothing to execute :-("); } return(formatter); }
private static IFormatter ProcessCommand(Stream networkStream, int splitIndex, DateTime bootTime) { int lengthOfCommandByteArray = SerDe.ReadInt(networkStream); logger.LogDebug("command length: " + lengthOfCommandByteArray); IFormatter formatter = new BinaryFormatter(); if (lengthOfCommandByteArray > 0) { var commandProcessWatch = new Stopwatch(); var funcProcessWatch = new Stopwatch(); commandProcessWatch.Start(); int stageId = ReadDiagnosticsInfo(networkStream); string deserializerMode = SerDe.ReadString(networkStream); logger.LogDebug("Deserializer mode: " + deserializerMode); string serializerMode = SerDe.ReadString(networkStream); logger.LogDebug("Serializer mode: " + serializerMode); byte[] command = SerDe.ReadBytes(networkStream); logger.LogDebug("command bytes read: " + command.Length); var stream = new MemoryStream(command); var workerFunc = (CSharpWorkerFunc)formatter.Deserialize(stream); var func = workerFunc.Func; logger.LogDebug( "------------------------ Printing stack trace of workerFunc for ** debugging ** ------------------------------"); logger.LogDebug(workerFunc.StackTrace); logger.LogDebug( "--------------------------------------------------------------------------------------------------------------"); DateTime initTime = DateTime.UtcNow; // here we use low level API because we need to get perf metrics var inputEnumerator = new WorkerInputEnumerator(networkStream, deserializerMode); IEnumerable<dynamic> inputEnumerable = inputEnumerator.Cast<dynamic>(); funcProcessWatch.Start(); IEnumerable<dynamic> outputEnumerable = func(splitIndex, inputEnumerable); var outputEnumerator = outputEnumerable.GetEnumerator(); funcProcessWatch.Stop(); int count = 0; int nullMessageCount = 0; while (true) { funcProcessWatch.Start(); bool hasNext = outputEnumerator.MoveNext(); funcProcessWatch.Stop(); if (!hasNext) { break; } funcProcessWatch.Start(); var message = outputEnumerator.Current; funcProcessWatch.Stop(); if (object.ReferenceEquals(null, message)) { nullMessageCount++; continue; } WriteOutput(networkStream, serializerMode, message, formatter); count++; } logger.LogDebug("Output entries count: " + count); logger.LogDebug("Null messages count: " + nullMessageCount); //if profiler: // profiler.profile(process) //else: // process() WriteDiagnosticsInfo(networkStream, bootTime, initTime); commandProcessWatch.Stop(); // log statistics inputEnumerator.LogStatistic(); logger.LogInfo(string.Format("func process time: {0}", funcProcessWatch.ElapsedMilliseconds)); logger.LogInfo(string.Format("stage {0}, command process time: {1}", stageId, commandProcessWatch.ElapsedMilliseconds)); } else { logger.LogWarn("lengthOfCommandByteArray = 0. Nothing to execute :-("); } return formatter; }
static void Main(string[] args) { // if there exists exe.config file, then use log4net if (File.Exists(AppDomain.CurrentDomain.SetupInformation.ConfigurationFile)) { LoggerServiceFactory.SetLoggerService(Log4NetLoggerService.Instance); } logger = LoggerServiceFactory.GetLogger(typeof(Worker)); Socket sock = null; try { PrintFiles(); int javaPort = int.Parse(Console.ReadLine()); logger.LogInfo("java_port: " + javaPort); sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); sock.Connect(IPAddress.Loopback, javaPort); } catch (Exception e) { logger.LogError("CSharpWorker failed with exception:"); logger.LogException(e); Environment.Exit(-1); } using (NetworkStream s = new NetworkStream(sock)) { try { DateTime bootTime = DateTime.UtcNow; int splitIndex = SerDe.ReadInt(s); logger.LogInfo("split_index: " + splitIndex); if (splitIndex == -1) Environment.Exit(-1); string ver = SerDe.ReadString(s); logger.LogInfo("ver: " + ver); //// initialize global state //shuffle.MemoryBytesSpilled = 0 //shuffle.DiskBytesSpilled = 0 // fetch name of workdir string sparkFilesDir = SerDe.ReadString(s); logger.LogInfo("spark_files_dir: " + sparkFilesDir); //SparkFiles._root_directory = sparkFilesDir //SparkFiles._is_running_on_worker = True // fetch names of includes - not used //TODO - complete the impl int numberOfIncludesItems = SerDe.ReadInt(s); logger.LogInfo("num_includes: " + numberOfIncludesItems); if (numberOfIncludesItems > 0) { for (int i = 0; i < numberOfIncludesItems; i++) { string filename = SerDe.ReadString(s); } } // fetch names and values of broadcast variables int numBroadcastVariables = SerDe.ReadInt(s); logger.LogInfo("num_broadcast_variables: " + numBroadcastVariables); if (numBroadcastVariables > 0) { for (int i = 0; i < numBroadcastVariables; i++) { long bid = SerDe.ReadLong(s); if (bid >= 0) { string path = SerDe.ReadString(s); Broadcast.broadcastRegistry[bid] = new Broadcast(path); } else { bid = -bid - 1; Broadcast.broadcastRegistry.Remove(bid); } } } Accumulator.accumulatorRegistry.Clear(); int lengthOCommandByteArray = SerDe.ReadInt(s); logger.LogInfo("command_len: " + lengthOCommandByteArray); IFormatter formatter = new BinaryFormatter(); if (lengthOCommandByteArray > 0) { Stopwatch commandProcessWatch = new Stopwatch(); Stopwatch funcProcessWatch = new Stopwatch(); commandProcessWatch.Start(); int rddId = SerDe.ReadInt(s); int stageId = SerDe.ReadInt(s); int partitionId = SerDe.ReadInt(s); logger.LogInfo(string.Format("rddInfo: rddId {0}, stageId {1}, partitionId {2}", rddId, stageId, partitionId)); string deserializerMode = SerDe.ReadString(s); logger.LogInfo("Deserializer mode: " + deserializerMode); string serializerMode = SerDe.ReadString(s); logger.LogInfo("Serializer mode: " + serializerMode); byte[] command = SerDe.ReadBytes(s); logger.LogInfo("command bytes read: " + command.Length); var stream = new MemoryStream(command); var workerFunc = (CSharpWorkerFunc)formatter.Deserialize(stream); var func = workerFunc.Func; logger.LogInfo(string.Format("stack trace of workerFunc (dont't panic, this is just for debug):\n{0}", workerFunc.StackTrace)); DateTime initTime = DateTime.UtcNow; int count = 0; // here we use low level API because we need to get perf metrics WorkerInputEnumerator inputEnumerator = new WorkerInputEnumerator(s, deserializerMode); IEnumerable<dynamic> inputEnumerable = Enumerable.Cast<dynamic>(inputEnumerator); funcProcessWatch.Start(); IEnumerable<dynamic> outputEnumerable = func(splitIndex, inputEnumerable); var outputEnumerator = outputEnumerable.GetEnumerator(); funcProcessWatch.Stop(); while (true) { funcProcessWatch.Start(); bool hasNext = outputEnumerator.MoveNext(); funcProcessWatch.Stop(); if (!hasNext) { break; } funcProcessWatch.Start(); var message = outputEnumerator.Current; funcProcessWatch.Stop(); if (object.ReferenceEquals(null, message)) { continue; } byte[] buffer; if (serializerMode == "None") { buffer = message as byte[]; } else if (serializerMode == "String") { buffer = SerDe.ToBytes(message as string); } else if (serializerMode == "Row") { Pickler pickler = new Pickler(); buffer = pickler.dumps(new ArrayList { message }); } else { try { var ms = new MemoryStream(); formatter.Serialize(ms, message); buffer = ms.ToArray(); } catch (Exception) { logger.LogError(string.Format("{0} : {1}", message.GetType().Name, message.GetType().FullName)); throw; } } count++; SerDe.Write(s, buffer.Length); SerDe.Write(s, buffer); } //TODO - complete the impl logger.LogInfo("Count: " + count); //if profiler: // profiler.profile(process) //else: // process() DateTime finish_time = DateTime.UtcNow; const string format = "MM/dd/yyyy hh:mm:ss.fff tt"; logger.LogInfo(string.Format("bootTime: {0}, initTime: {1}, finish_time: {2}", bootTime.ToString(format), initTime.ToString(format), finish_time.ToString(format))); SerDe.Write(s, (int)SpecialLengths.TIMING_DATA); SerDe.Write(s, ToUnixTime(bootTime)); SerDe.Write(s, ToUnixTime(initTime)); SerDe.Write(s, ToUnixTime(finish_time)); SerDe.Write(s, 0L); //shuffle.MemoryBytesSpilled SerDe.Write(s, 0L); //shuffle.DiskBytesSpilled commandProcessWatch.Stop(); // log statistics inputEnumerator.LogStatistic(); logger.LogInfo(string.Format("func process time: {0}", funcProcessWatch.ElapsedMilliseconds)); logger.LogInfo(string.Format("command process time: {0}", commandProcessWatch.ElapsedMilliseconds)); } else { logger.LogWarn("Nothing to execute :-("); } // Mark the beginning of the accumulators section of the output SerDe.Write(s, (int)SpecialLengths.END_OF_DATA_SECTION); SerDe.Write(s, Accumulator.accumulatorRegistry.Count); foreach (var item in Accumulator.accumulatorRegistry) { var ms = new MemoryStream(); var value = item.Value.GetType().GetField("value", BindingFlags.NonPublic | BindingFlags.Instance).GetValue(item.Value); logger.LogInfo(string.Format("({0}, {1})", item.Key, value)); formatter.Serialize(ms, new KeyValuePair<int, dynamic>(item.Key, value)); byte[] buffer = ms.ToArray(); SerDe.Write(s, buffer.Length); SerDe.Write(s, buffer); } int end = SerDe.ReadInt(s); // check end of stream if (end == (int)SpecialLengths.END_OF_STREAM) { SerDe.Write(s, (int)SpecialLengths.END_OF_STREAM); logger.LogInfo("END_OF_STREAM: " + (int)SpecialLengths.END_OF_STREAM); } else { // write a different value to tell JVM to not reuse this worker SerDe.Write(s, (int)SpecialLengths.END_OF_DATA_SECTION); Environment.Exit(-1); } s.Flush(); // log bytes read and write logger.LogInfo(string.Format("total read bytes: {0}", SerDe.totalReadNum)); logger.LogInfo(string.Format("total write bytes: {0}", SerDe.totalWriteNum)); // wait for server to complete, otherwise server gets 'connection reset' exception // Use SerDe.ReadBytes() to detect java side has closed socket properly // ReadBytes() will block until the socket is closed SerDe.ReadBytes(s); } catch (Exception e) { logger.LogError(e.ToString()); try { SerDe.Write(s, e.ToString()); } catch (IOException) { // JVM close the socket } catch (Exception ex) { logger.LogError("CSharpWorker failed with exception:"); logger.LogException(ex); } Environment.Exit(-1); } } sock.Close(); }