// // This method gets called by the generated vertex code, as well as VertexBridge to report exceptions. // The exception will be dumped to "VertexException.txt" in the working directory. // public static void ReportVertexError(Exception e) { // We first need to check whether the same exception object was already reported recently, // and ignore the second call. // This will be the case for most vertex exceptions because 1) the generated vertex code catches the exceptions, // calls ReportVertexError and rethrows, and right after that 2) VertexBridge will receive the same exception // wrapped in a TargetInvocationException, and call ReportVertexError again after extracting the inner exception. // // The second call from the VertexBridge is necessary because some exceptions // (particularly TypeLoadException due to static ctors) happen in the vertex DLL, // but just before the try/catch blocks in the vertex entry point (therefore are missed by 1). if (s_lastReportedException == e) { return; } s_lastReportedException = e; // add to HpcLog DryadLinqLog.Add("Vertex failed with the following exception:"); DryadLinqLog.Add("{0}", e.ToString()); // also write out to the standalone vertex exception file in the working directory using (StreamWriter exceptionFile = new StreamWriter(VERTEX_EXCEPTION_FILENAME)) { exceptionFile.WriteLine(e.ToString()); } if (ErrorCode == 0) { throw e; } }
private unsafe static void DoEviction(object stateInfo) { while (true) { try { MEMORYSTATUSEX memStatus = new MEMORYSTATUSEX(); memStatus.dwLength = (UInt32)sizeof(MEMORYSTATUSEX); HpcLinqNative.GlobalMemoryStatusEx(ref memStatus); if (HpcLinqNative.GlobalMemoryStatusEx(ref memStatus) && memStatus.ullAvailPhys < 4 * 1024 * 1024 * 1024UL) { // Perform eviction only when feeling memory pressure lock (s_cache) { var candidates = s_cache.Where(x => x.Value.RefCount == 0); foreach (var rec in candidates) { s_cache.Remove(rec.Key); } } } } catch (Exception e) { DryadLinqLog.Add("Exception occurred when performing cache eviction: {0}.", e.Message); } } }
// The Vertex Host native layer will use this bridge method to invoke the vertex // entry point instead of invoking it directly through the CLR host. // This has the advantage of doing all the assembly load and invoke work for the // generated vertex assembly to happen in a managed context, so that any type or // assembly load exceptions can be caught and reported in full detail. private static void VertexBridge(string logFileName, string vertexBridgeArgs) { DryadLinqLog.Initialize(Constants.LoggingInfoLevel, logFileName); DryadLinqLog.AddInfo(".NET runtime version = v{0}.{1}.{2}", Environment.Version.Major, Environment.Version.Minor, Environment.Version.Build); DryadLinqLog.AddInfo(".NET runtime GC = {0}({1})", (GCSettings.IsServerGC) ? "ServerGC" : "WorkstationGC", GCSettings.LatencyMode); try { string[] splitArgs = vertexBridgeArgs.Split(','); if (splitArgs.Length != 4) { throw new ArgumentException(string.Format(SR.VertexBridgeBadArgs, vertexBridgeArgs), "vertexBridgeArgs"); } // We assume that the vertex DLL is in the job dir (currently always one level up from the WD). string moduleName = Path.Combine("..", splitArgs[0]); string className = splitArgs[1]; string methodName = splitArgs[2]; string nativeChannelString = splitArgs[3]; Assembly vertexAssembly = Assembly.LoadFrom(moduleName); DryadLinqLog.AddInfo("Vertex Bridge loaded assembly {0}", vertexAssembly.Location); MethodInfo vertexMethod = vertexAssembly.GetType(className) .GetMethod(methodName, BindingFlags.Static | BindingFlags.Public); vertexMethod.Invoke(null, new object[] { nativeChannelString }); } catch (Exception e) { // Any exception that happens in the vertex code will come wrapped in a // TargetInvocationException since we're using Invoke(). We only want to // report the inner exception in this case. If the exception is of another // type (most likely one coming from the Assembly.LoadFrom() call), then // we will report it as is. if (e is TargetInvocationException && e.InnerException != null) { ReportVertexError(e.InnerException); if (ErrorCode == 0) { throw e.InnerException; } } else { ReportVertexError(e); if (ErrorCode == 0) { throw; } } } }
internal void AddLogEntry() { if (this.LastReadTime == this.FirstReadTime) { this.LastReadTime = DateTime.Now; } DryadLinqLog.Add("Read {0} records from {1} from {2} to {3} ", this.RecordsRead, this.ToString(), this.FirstReadTime.ToString("MM/dd/yyyy HH:mm:ss.fff"), this.LastReadTime.ToString("MM/dd/yyyy HH:mm:ss.fff")); }
internal override void Close() { if (!this.m_isClosed) { this.m_isClosed = true; this.Flush(); DryadLinqNative.Close(this.m_vertexInfo, this.m_portNum); string ctype = (this.m_isInput) ? "Input" : "Output"; DryadLinqLog.AddInfo(ctype + " channel {0} was closed.", this.m_portNum); } GC.SuppressFinalize(this); }
// Called by DryadLinqVertexWrite.WriteItemSequence, DataProvider.IngressDirectlyToDsc etc. // Note: async writer thread will only be started after nRecords>InitRecords (default=100) public void WriteRecordAsync(T rec) { if (this.m_worker == null) { this.WriteRecord(rec); this.m_numRecordsWritten++; if (this.m_numRecordsWritten == InitRecords) { // Decide if we want to use async and the buffer size Int32 bsize = (this.BufferSizeHint / (4 * (Int32)this.Length)) * InitRecords; if (this.BufferSizeHint > (64 * BufferMaxSize) && bsize > 1) { bsize = Math.Min(bsize, BufferMaxSize); this.m_buffer1 = new T[bsize]; this.m_buffer2 = new T[bsize]; this.m_index1 = 0; this.m_count2 = -1; this.m_isClosed = false; this.m_worker = new Thread(this.WriteBuffer); this.m_worker.Start(); DryadLinqLog.AddInfo("Async writer with buffer size {0}", bsize); } } } else { if (this.m_index1 == this.m_buffer1.Length) { lock (this.m_lockObj) { while (this.m_count2 != -1) { Monitor.Wait(this.m_lockObj); } T[] temp = this.m_buffer1; this.m_buffer1 = this.m_buffer2; this.m_buffer2 = temp; this.m_count2 = this.m_index1; this.m_index1 = 0; Monitor.Pulse(this.m_lockObj); } } this.m_buffer1[this.m_index1++] = rec; } }
public static void DecRefCount(string key) { lock (s_cache) { CacheRecord rec; bool found = s_cache.TryGetValue(key, out rec); if (!found) { DryadLinqLog.Add("Can't find the cache entry with key {0}.", key); } else if (rec.RefCount > 0) { rec.RefCount--; } else { DryadLinqLog.Add("The reference count of the cache entry {0} is already 0.", key); } } }
public static void CheckVertexDebugRequest() { string debugEnvVar = Environment.GetEnvironmentVariable("DRYADLINQ_DEBUGVERTEX"); if (debugEnvVar == null) { return; } if (String.Compare(debugEnvVar, "LAUNCH", StringComparison.OrdinalIgnoreCase) == 0) { System.Diagnostics.Debugger.Launch(); } else { DryadLinqLog.AddInfo("Waiting for debugger to attach..."); while (!System.Diagnostics.Debugger.IsAttached) { System.Threading.Thread.Sleep(1000); } System.Diagnostics.Debugger.Break(); } }
private void FillBuffer() { DryadLinqLog.AddInfo("DryadLinqRecordReader reader thread started. ThreadId=" + Thread.CurrentThread.ManagedThreadId); lock (this.m_bufLck) { while (true) { try { while (this.m_count2 > 0) { Monitor.Wait(this.m_bufLck); } if (this.m_count2 == -2) { return; } this.m_count2 = 0; while (this.m_count2 < this.m_buffer2.Length && this.ReadRecord(ref this.m_buffer2[this.m_count2])) { this.m_count2++; } Monitor.Pulse(this.m_bufLck); if (this.m_count2 < this.m_buffer2.Length) { return; } } catch (Exception e) { this.m_workerException = e; Monitor.Pulse(this.m_bufLck); return; } } } }
private void WriteBuffer() { try { while (true) { lock (this.m_lockObj) { while (this.m_count2 == -1) { Monitor.Wait(this.m_lockObj); } } // Write the records for (int i = 0; i < this.m_count2; i++) { this.WriteRecord(this.m_buffer2[i]); } this.m_numRecordsWritten += this.m_count2; lock (this.m_lockObj) { this.m_count2 = -1; Monitor.Pulse(this.m_lockObj); if (this.m_isClosed) { break; } } } } catch (Exception e) { DryadLinqLog.AddInfo(e.ToString()); throw; } }
public void Close() { this.Flush(true); this.CloseInternal(); DryadLinqLog.AddInfo("Wrote {0} records to {1}", this.m_numRecordsWritten, this.ToString()); }
public static IEnumerable<K> Phase1Sampling<T, K>(IEnumerable<T> source, Func<T, K> keySelector, VertexEnv denv) { // note: vertexID is constant for each repetition of a specific vertex (eg in fail-and-retry scenarios) // this is very good as it ensure the sampling is idempotent w.r.t. retries. long vertexID = DryadLinqNative.GetVertexId(denv.NativeHandle); int seed = unchecked((int)(vertexID)); long nEmitted = 0; Random rdm = new Random(seed); List<K> allSoFar = new List<K>(); List<K> samples = new List<K>(); // try to collect 10 samples, but keep all the records just in case IEnumerator<T> sourceEnumerator = source.GetEnumerator(); while (sourceEnumerator.MoveNext()) { T elem = sourceEnumerator.Current; K key = keySelector(elem); allSoFar.Add(key); if (rdm.NextDouble() < SAMPLE_RATE) { samples.Add(key); if (samples.Count >= 10) break; } } if (samples.Count >= 10) { // we have lots of samples.. emit them and continue sampling allSoFar = null; // not needed. foreach (K key in samples) { yield return key; nEmitted++; } while (sourceEnumerator.MoveNext()) { T elem = sourceEnumerator.Current; if (rdm.NextDouble() < SAMPLE_RATE) { yield return keySelector(elem); nEmitted++; } } } else { // sampling didn't produce much, so emit all the records instead. DryadLinqLog.AddInfo("Sampling produced only {0} records. Emitting all records instead.", samples.Count()); Debug.Assert(sourceEnumerator.MoveNext() == false, "The source enumerator wasn't finished"); samples = null; // the samples list is not needed. foreach (K key in allSoFar) { yield return key; nEmitted++; } } DryadLinqLog.AddInfo("Stage1 sampling: num keys emitted = {0}", nEmitted); }
RangeSamplerCore<K>(IEnumerable<K> firstPhaseSamples, IComparer<K> comparer, bool isDescending, int pcount) { //Reservoir sampling to produce at most MAX_SECOND_PHASE_SAMPLES records. K[] samples = new K[MAX_SECOND_PHASE_SAMPLES]; int inputCount = 0; int reservoirCount = 0; // fixed-seed is ok here as second-phase-sampler is a singleton vertex. Idempotency is important. Random r = new Random(314159); foreach (K key in firstPhaseSamples) // this completely enumerates each source in turn. { if (inputCount < MAX_SECOND_PHASE_SAMPLES) { samples[reservoirCount] = key; inputCount++; reservoirCount++; } else { int idx = r.Next(inputCount); // ie a number between 0..inputCount-1 inclusive. if (idx < MAX_SECOND_PHASE_SAMPLES) { samples[idx] = key; } inputCount++; } } // Sort and Emit the keys Array.Sort(samples, 0, reservoirCount, comparer); DryadLinqLog.AddVerbose("Range-partition separator keys: "); DryadLinqLog.AddVerbose("samples: {0}", reservoirCount); DryadLinqLog.AddVerbose("pCount: {0}", pcount); if (reservoirCount == 0) { DryadLinqLog.AddVerbose(" case: cnt==0. No separators produced."); yield break; } if (reservoirCount < pcount) { //DryadLinqLog.AddVerbose(" case: cnt < pcount"); if (isDescending) { //DryadLinqLog.AddVerbose(" case: isDescending=true"); for (int i = reservoirCount - 1; i >= 0; i--) { //DryadLinqLog.AddVerbose(" [{0}]", samples[i]); yield return samples[i]; } K first = samples[0]; for (int i = reservoirCount; i < pcount - 1; i++) { //DryadLinqLog.AddVerbose(" [{0}]", first); yield return first; } } else { //DryadLinqLog.AddVerbose(" case: isDescending=false"); for (int i = 0; i < reservoirCount; i++) { //DryadLinqLog.AddVerbose(" [{0}]", samples[i]); yield return samples[i]; } K last = samples[reservoirCount - 1]; for (int i = reservoirCount; i < pcount - 1; i++) { //DryadLinqLog.AddVerbose(" [{0}]", last); yield return last; } } } else { //DryadLinqLog.AddVerbose(" case: cnt >= pcount"); int intv = reservoirCount / pcount; if (isDescending) { //DryadLinqLog.AddVerbose(" case: isDescending=true"); int idx = reservoirCount - intv; for (int i = 0; i < pcount-1; i++) { //DryadLinqLog.AddVerbose(" [{0}]", samples[idx]); yield return samples[idx]; idx -= intv; } } else { //DryadLinqLog.AddVerbose(" case: isDescending=false"); int idx = intv; for (int i = 0; i < pcount-1; i++) { //DryadLinqLog.AddVerbose(" [{0}]", samples[idx]); yield return samples[idx]; idx += intv; } } } }