// The Vertex Host native layer will use this bridge method to invoke the vertex // entry point instead of invoking it directly through the CLR host. // This has the advantage of doing all the assembly load and invoke work for the // generated vertex assembly to happen in a managed context, so that any type or // assembly load exceptions can be caught and reported in full detail. private static void VertexBridge(string logFileName, string vertexBridgeArgs) { DryadLinqLog.Initialize(Constants.LoggingInfoLevel, logFileName); DryadLinqLog.AddInfo(".NET runtime version = v{0}.{1}.{2}", Environment.Version.Major, Environment.Version.Minor, Environment.Version.Build); DryadLinqLog.AddInfo(".NET runtime GC = {0}({1})", (GCSettings.IsServerGC) ? "ServerGC" : "WorkstationGC", GCSettings.LatencyMode); try { string[] splitArgs = vertexBridgeArgs.Split(','); if (splitArgs.Length != 4) { throw new ArgumentException(string.Format(SR.VertexBridgeBadArgs, vertexBridgeArgs), "vertexBridgeArgs"); } // We assume that the vertex DLL is in the job dir (currently always one level up from the WD). string moduleName = Path.Combine("..", splitArgs[0]); string className = splitArgs[1]; string methodName = splitArgs[2]; string nativeChannelString = splitArgs[3]; Assembly vertexAssembly = Assembly.LoadFrom(moduleName); DryadLinqLog.AddInfo("Vertex Bridge loaded assembly {0}", vertexAssembly.Location); MethodInfo vertexMethod = vertexAssembly.GetType(className) .GetMethod(methodName, BindingFlags.Static | BindingFlags.Public); vertexMethod.Invoke(null, new object[] { nativeChannelString }); } catch (Exception e) { // Any exception that happens in the vertex code will come wrapped in a // TargetInvocationException since we're using Invoke(). We only want to // report the inner exception in this case. If the exception is of another // type (most likely one coming from the Assembly.LoadFrom() call), then // we will report it as is. if (e is TargetInvocationException && e.InnerException != null) { ReportVertexError(e.InnerException); if (ErrorCode == 0) { throw e.InnerException; } } else { ReportVertexError(e); if (ErrorCode == 0) { throw; } } } }
internal override void Close() { if (!this.m_isClosed) { this.m_isClosed = true; this.Flush(); DryadLinqNative.Close(this.m_vertexInfo, this.m_portNum); string ctype = (this.m_isInput) ? "Input" : "Output"; DryadLinqLog.AddInfo(ctype + " channel {0} was closed.", this.m_portNum); } GC.SuppressFinalize(this); }
internal void AddLogEntry() { if (this.LastReadTime == this.FirstReadTime) { this.LastReadTime = DateTime.Now; } DryadLinqLog.AddInfo("Read {0} records from {1} from {2} to {3} ", this.RecordsRead, this.ToString(), this.FirstReadTime.ToString("MM/dd/yyyy HH:mm:ss.fff"), this.LastReadTime.ToString("MM/dd/yyyy HH:mm:ss.fff")); }
// Called by DryadLinqVertexWrite.WriteItemSequence, DataProvider.IngressDirectlyToDsc etc. // Note: async writer thread will only be started after nRecords>InitRecords (default=100) public void WriteRecordAsync(T rec) { if (this.m_worker == null) { this.WriteRecord(rec); this.m_numRecordsWritten++; if (this.m_numRecordsWritten == InitRecords) { // Decide if we want to use async and the buffer size Int32 bsize = (this.BufferSizeHint / (4 * (Int32)this.Length)) * InitRecords; if (this.BufferSizeHint > (64 * BufferMaxSize) && bsize > 1) { bsize = Math.Min(bsize, BufferMaxSize); this.m_buffer1 = new T[bsize]; this.m_buffer2 = new T[bsize]; this.m_index1 = 0; this.m_count2 = -1; this.m_isClosed = false; this.m_worker = new Thread(this.WriteBuffer); this.m_worker.Start(); DryadLinqLog.AddInfo("Async writer with buffer size {0}", bsize); } } } else { if (this.m_index1 == this.m_buffer1.Length) { lock (this.m_lockObj) { while (this.m_count2 != -1) { Monitor.Wait(this.m_lockObj); } T[] temp = this.m_buffer1; this.m_buffer1 = this.m_buffer2; this.m_buffer2 = temp; this.m_count2 = this.m_index1; this.m_index1 = 0; Monitor.Pulse(this.m_lockObj); } } this.m_buffer1[this.m_index1++] = rec; } }
public static void CheckVertexDebugRequest() { string debugEnvVar = Environment.GetEnvironmentVariable("DRYADLINQ_DEBUGVERTEX"); if (debugEnvVar == null) { return; } if (String.Compare(debugEnvVar, "LAUNCH", StringComparison.OrdinalIgnoreCase) == 0) { System.Diagnostics.Debugger.Launch(); } else { DryadLinqLog.AddInfo("Waiting for debugger to attach..."); while (!System.Diagnostics.Debugger.IsAttached) { System.Threading.Thread.Sleep(1000); } System.Diagnostics.Debugger.Break(); } }
private void FillBuffer() { DryadLinqLog.AddInfo("DryadLinqRecordReader reader thread started. ThreadId=" + Thread.CurrentThread.ManagedThreadId); lock (this.m_bufLck) { while (true) { try { while (this.m_count2 > 0) { Monitor.Wait(this.m_bufLck); } if (this.m_count2 == -2) { return; } this.m_count2 = 0; while (this.m_count2 < this.m_buffer2.Length && this.ReadRecord(ref this.m_buffer2[this.m_count2])) { this.m_count2++; } Monitor.Pulse(this.m_bufLck); if (this.m_count2 < this.m_buffer2.Length) { return; } } catch (Exception e) { this.m_workerException = e; Monitor.Pulse(this.m_bufLck); return; } } } }
private void WriteBuffer() { try { while (true) { lock (this.m_lockObj) { while (this.m_count2 == -1) { Monitor.Wait(this.m_lockObj); } } // Write the records for (int i = 0; i < this.m_count2; i++) { this.WriteRecord(this.m_buffer2[i]); } this.m_numRecordsWritten += this.m_count2; lock (this.m_lockObj) { this.m_count2 = -1; Monitor.Pulse(this.m_lockObj); if (this.m_isClosed) { break; } } } } catch (Exception e) { DryadLinqLog.AddInfo(e.ToString()); throw; } }
/// <summary> /// This method is called by the generated vertex code, as well as VertexBridge /// to report exceptions. The exception will be dumped to "VertexException.txt" /// in the working directory. /// </summary> /// <param name="e">The exception that triggers to call this method.</param> public static void ReportVertexError(Exception e) { // We first need to check whether the same exception object was already // reported recently, and ignore the second call. // // This will be the case for most vertex exceptions because 1) the generated // vertex code catches the exceptions, calls ReportVertexError and rethrows, // and right after that 2) VertexBridge will receive the same exception // wrapped in a TargetInvocationException, and call ReportVertexError again // after extracting the inner exception. // // The second call from the VertexBridge is necessary because some exceptions // (particularly TypeLoadException due to static ctors) happen in the vertex DLL, // but just before the try/catch blocks in the vertex entry point (therefore // are missed by 1). if (s_lastReportedException == e) { return; } s_lastReportedException = e; // add to DryadLinqLog DryadLinqLog.AddInfo("Vertex failed with the following exception:"); DryadLinqLog.AddInfo("{0}", e.ToString()); // also write out to the standalone vertex exception file in the working directory using (StreamWriter exceptionFile = new StreamWriter(VERTEX_EXCEPTION_FILENAME)) { exceptionFile.WriteLine(e.ToString()); } if (ErrorCode == 0) { throw e; } }
public void Close() { this.Flush(true); this.CloseInternal(); DryadLinqLog.AddInfo("Wrote {0} records to {1}", this.m_numRecordsWritten, this.ToString()); }
public static IEnumerable<K> Phase1Sampling<T, K>(IEnumerable<T> source, Func<T, K> keySelector, VertexEnv denv) { // note: vertexID is constant for each repetition of a specific vertex (eg in fail-and-retry scenarios) // this is very good as it ensure the sampling is idempotent w.r.t. retries. long vertexID = DryadLinqNative.GetVertexId(denv.NativeHandle); int seed = unchecked((int)(vertexID)); long nEmitted = 0; Random rdm = new Random(seed); List<K> allSoFar = new List<K>(); List<K> samples = new List<K>(); // try to collect 10 samples, but keep all the records just in case IEnumerator<T> sourceEnumerator = source.GetEnumerator(); while (sourceEnumerator.MoveNext()) { T elem = sourceEnumerator.Current; K key = keySelector(elem); allSoFar.Add(key); if (rdm.NextDouble() < SAMPLE_RATE) { samples.Add(key); if (samples.Count >= 10) break; } } if (samples.Count >= 10) { // we have lots of samples.. emit them and continue sampling allSoFar = null; // not needed. foreach (K key in samples) { yield return key; nEmitted++; } while (sourceEnumerator.MoveNext()) { T elem = sourceEnumerator.Current; if (rdm.NextDouble() < SAMPLE_RATE) { yield return keySelector(elem); nEmitted++; } } } else { // sampling didn't produce much, so emit all the records instead. DryadLinqLog.AddInfo("Sampling produced only {0} records. Emitting all records instead.", samples.Count()); Debug.Assert(sourceEnumerator.MoveNext() == false, "The source enumerator wasn't finished"); samples = null; // the samples list is not needed. foreach (K key in allSoFar) { yield return key; nEmitted++; } } DryadLinqLog.AddInfo("Stage1 sampling: num keys emitted = {0}", nEmitted); }