internal unsafe Int32 GetWriteBuffSize() { MEMORYSTATUSEX memStatus = new MEMORYSTATUSEX(); memStatus.dwLength = (UInt32)sizeof(MEMORYSTATUSEX); UInt64 maxSize = 512 * 1024 * 1024UL; if (DryadLinqNative.GlobalMemoryStatusEx(ref memStatus)) { maxSize = memStatus.ullAvailPhys / 4; } if (this.m_vertexParams.RemoteArch == "i386") { maxSize = Math.Min(maxSize, 1024 * 1024 * 1024UL); } if (this.NumberOfOutputs > 0) { maxSize = maxSize / this.NumberOfOutputs; } UInt64 buffSize = (this.UseLargeBuffer) ? (256 * 1024 * 1024UL) : (8 * 1024 * 1024UL); if (buffSize > maxSize) { buffSize = maxSize; } if (buffSize < (16 * 1024UL)) { buffSize = 16 * 1024; } return((Int32)buffSize); }
/// <summary> /// Initializes an instnace of VertexEnv. This is called in auto-generated vertex code. /// </summary> /// <param name="args"></param> /// <param name="vertexParams"></param> public VertexEnv(string args, DryadLinqVertexParams vertexParams) { this.m_argList = args.Split('|'); this.m_nativeHandle = new IntPtr(Int64.Parse(this.m_argList[0], NumberStyles.HexNumber)); this.m_numberOfInputs = DryadLinqNative.GetNumOfInputs(this.m_nativeHandle); this.m_numberOfOutputs = DryadLinqNative.GetNumOfOutputs(this.m_nativeHandle); this.m_nextInput = 0; this.m_nextInputPort = 0; this.m_nextOutputPort = 0; this.m_vertexParams = vertexParams; this.m_useLargeBuffer = vertexParams.UseLargeBuffer; if (this.m_numberOfOutputs > 0) { this.SetInitialWriteSizeHint(); } // Set the thread count for DryadLINQ vertex runtime string threadCountStr = Environment.GetEnvironmentVariable("DRYAD_THREADS_PER_WORKER"); DryadLinqVertex.ThreadCount = Environment.ProcessorCount; if (!String.IsNullOrEmpty(threadCountStr)) { if (!Int32.TryParse(threadCountStr, out DryadLinqVertex.ThreadCount)) { throw new DryadLinqException("The env variable DRYAD_THREADS_PER_WORKER was set to " + threadCountStr); } if (DryadLinqVertex.ThreadCount < 1) { DryadLinqVertex.ThreadCount = Environment.ProcessorCount; } } }
internal override unsafe void ReleaseDataBlock(IntPtr itemHandle) { if (itemHandle != IntPtr.Zero) { DryadLinqNative.ReleaseDataBlock(this.m_vertexInfo, itemHandle); } // DryadLinqLog.AddInfo("Released data block {0}.", itemHandle); }
internal void SetInitialWriteSizeHint() { Int64 inputSize = this.GetInputSize(); UInt64 hsize = (inputSize == -1) ? (5 * 1024 * 1024 * 1024UL) : (UInt64)inputSize; hsize /= this.NumberOfOutputs; for (UInt32 i = 0; i < this.NumberOfOutputs; i++) { DryadLinqNative.SetInitialSizeHint(this.m_nativeHandle, i, hsize); } }
internal override unsafe Int64 GetTotalLength() { if (this.m_isInput) { return(DryadLinqNative.GetExpectedLength(this.m_vertexInfo, this.m_portNum)); } else { throw new NotImplementedException(); } }
internal override unsafe bool WriteDataBlock(IntPtr itemHandle, Int32 numBytesToWrite) { byte *dataBlock = (byte *)itemHandle; if (this.m_compressionScheme == CompressionScheme.None) { Int32 numBytesWritten = 0; Int32 remainingBytes = numBytesToWrite; while (remainingBytes > 0) { Int32 *pNumBytesWritten = &numBytesWritten; bool success = DryadLinqNative.WriteFile(this.m_fhandle, dataBlock, (UInt32)remainingBytes, (IntPtr)pNumBytesWritten, null); if (!success) { throw new DryadLinqException(DryadLinqErrorCode.WriteFileError, String.Format(SR.WriteFileError, Marshal.GetLastWin32Error())); } dataBlock += numBytesWritten; remainingBytes -= numBytesWritten; } } else { if (this.m_compressStream == null) { if (this.m_compressionScheme == CompressionScheme.Gzip) { this.m_compressStream = new GZipStream(this.m_fstream, CompressionMode.Compress); } else { throw new DryadLinqException(DryadLinqErrorCode.UnknownCompressionScheme, SR.UnknownCompressionScheme); } } // YY: Made an extra copy here. Could do better. byte[] buffer = new byte[numBytesToWrite]; fixed(byte *pBuffer = buffer) { DryadLinqUtil.memcpy(dataBlock, pBuffer, numBytesToWrite); } this.m_compressStream.Write(buffer, 0, numBytesToWrite); } return(true); }
internal override void Close() { if (!this.m_isClosed) { this.m_isClosed = true; this.Flush(); DryadLinqNative.Close(this.m_vertexInfo, this.m_portNum); string ctype = (this.m_isInput) ? "Input" : "Output"; DryadLinqLog.AddInfo(ctype + " channel {0} was closed.", this.m_portNum); } GC.SuppressFinalize(this); }
internal override unsafe Int64 GetTotalLength() { Int64 totalLen; bool success = DryadLinqNative.GetFileSizeEx(this.m_fhandle, out totalLen); if (!success) { throw new DryadLinqException(DryadLinqErrorCode.GetFileSizeError, String.Format(SR.GetFileSizeError, Marshal.GetLastWin32Error())); } return(totalLen); }
internal override unsafe string GetURI() { IntPtr uriPtr; if (this.m_isInput) { uriPtr = DryadLinqNative.GetInputChannelURI(this.m_vertexInfo, this.m_portNum); } else { uriPtr = DryadLinqNative.GetOutputChannelURI(this.m_vertexInfo, this.m_portNum); } return(Marshal.PtrToStringAnsi(uriPtr)); }
internal override unsafe DataBlockInfo AllocateDataBlock(Int32 size) { DataBlockInfo blockInfo; blockInfo.ItemHandle = DryadLinqNative.AllocateDataBlock(this.m_vertexInfo, size, &blockInfo.DataBlock); blockInfo.BlockSize = size; if (blockInfo.ItemHandle == IntPtr.Zero) { throw new DryadLinqException(DryadLinqErrorCode.FailedToAllocateNewNativeBuffer, String.Format(SR.FailedToAllocateNewNativeBuffer, size)); } // DryadLinqLog.AddInfo("Allocated data block {0} of {1} bytes.", blockInfo.itemHandle, size); return(blockInfo); }
internal Int64 GetInputSize() { Int64 totalSize = 0; for (UInt32 i = 0; i < this.m_numberOfInputs; i++) { Int64 channelSize = DryadLinqNative.GetExpectedLength(this.NativeHandle, i); if (channelSize == -1) { return(-1); } totalSize += channelSize; } return(totalSize); }
internal override unsafe DataBlockInfo ReadDataBlock() { DataBlockInfo blockInfo; blockInfo.DataBlock = (byte *)Marshal.AllocHGlobal(DefaultBuffSize); blockInfo.ItemHandle = (IntPtr)blockInfo.DataBlock; if (this.m_compressionScheme == CompressionScheme.None) { Int32 *pBlockSize = &blockInfo.BlockSize; bool success = DryadLinqNative.ReadFile(this.m_fhandle, blockInfo.DataBlock, DefaultBuffSize, (IntPtr)pBlockSize, null); if (!success) { throw new DryadLinqException(DryadLinqErrorCode.ReadFileError, String.Format(SR.ReadFileError, Marshal.GetLastWin32Error())); } } else { if (this.m_compressStream == null) { if (this.m_compressionScheme == CompressionScheme.Gzip) { this.m_compressStream = new GZipStream(this.m_fstream, CompressionMode.Decompress); } else { throw new DryadLinqException(DryadLinqErrorCode.UnknownCompressionScheme, SR.UnknownCompressionScheme); } } // YY: Made an extra copy here. Could do better. byte[] buffer = new byte[DefaultBuffSize]; blockInfo.BlockSize = this.m_compressStream.Read(buffer, 0, DefaultBuffSize); fixed(byte *pBuffer = buffer) { DryadLinqUtil.memcpy(pBuffer, blockInfo.DataBlock, blockInfo.BlockSize); } } return(blockInfo); }
internal override unsafe DataBlockInfo ReadDataBlock() { DataBlockInfo blockInfo; Int32 errorCode = 0; blockInfo.ItemHandle = DryadLinqNative.ReadDataBlock(this.m_vertexInfo, this.m_portNum, &blockInfo.DataBlock, &blockInfo.BlockSize, &errorCode); if (errorCode != 0) { VertexEnv.ErrorCode = errorCode; throw new DryadLinqException(DryadLinqErrorCode.FailedToReadFromInputChannel, String.Format(SR.FailedToReadFromInputChannel, this.m_portNum, errorCode)); } return(blockInfo); }
internal override unsafe bool WriteDataBlock(IntPtr itemHandle, Int32 numBytesToWrite) { bool success = true; if (numBytesToWrite > 0) { success = DryadLinqNative.WriteDataBlock(this.m_vertexInfo, this.m_portNum, itemHandle, numBytesToWrite); if (!success) { throw new DryadLinqException(DryadLinqErrorCode.FailedToWriteToOutputChannel, String.Format(SR.FailedToWriteToOutputChannel, this.m_portNum)); } } return(success); }
/// <summary> /// Initializes an instnace of VertexEnv. This is called in auto-generated code. /// </summary> /// <param name="args"></param> /// <param name="vertexParams"></param> public VertexEnv(string args, DryadLinqVertexParams vertexParams) { this.m_argList = args.Split('|'); this.m_nativeHandle = new IntPtr(Int64.Parse(this.m_argList[0], NumberStyles.HexNumber)); this.m_numberOfInputs = DryadLinqNative.GetNumOfInputs(this.m_nativeHandle); this.m_numberOfOutputs = DryadLinqNative.GetNumOfOutputs(this.m_nativeHandle); this.m_nextInput = 0; this.m_nextInputPort = 0; this.m_nextOutputPort = 0; this.m_vertexParams = vertexParams; this.m_useLargeBuffer = vertexParams.UseLargeBuffer; this.m_multiThreading = vertexParams.MultiThreading; if (this.m_numberOfOutputs > 0) { this.SetInitialWriteSizeHint(); } Debug.Assert(vertexParams.InputArity <= this.m_numberOfInputs); Debug.Assert(vertexParams.OutputArity <= this.m_numberOfOutputs); }
public static IEnumerable<K> Phase1Sampling<T, K>(IEnumerable<T> source, Func<T, K> keySelector, VertexEnv denv) { // note: vertexID is constant for each repetition of a specific vertex (eg in fail-and-retry scenarios) // this is very good as it ensure the sampling is idempotent w.r.t. retries. long vertexID = DryadLinqNative.GetVertexId(denv.NativeHandle); int seed = unchecked((int)(vertexID)); long nEmitted = 0; Random rdm = new Random(seed); List<K> allSoFar = new List<K>(); List<K> samples = new List<K>(); // try to collect 10 samples, but keep all the records just in case IEnumerator<T> sourceEnumerator = source.GetEnumerator(); while (sourceEnumerator.MoveNext()) { T elem = sourceEnumerator.Current; K key = keySelector(elem); allSoFar.Add(key); if (rdm.NextDouble() < SAMPLE_RATE) { samples.Add(key); if (samples.Count >= 10) break; } } if (samples.Count >= 10) { // we have lots of samples.. emit them and continue sampling allSoFar = null; // not needed. foreach (K key in samples) { yield return key; nEmitted++; } while (sourceEnumerator.MoveNext()) { T elem = sourceEnumerator.Current; if (rdm.NextDouble() < SAMPLE_RATE) { yield return keySelector(elem); nEmitted++; } } } else { // sampling didn't produce much, so emit all the records instead. DryadLinqLog.AddInfo("Sampling produced only {0} records. Emitting all records instead.", samples.Count()); Debug.Assert(sourceEnumerator.MoveNext() == false, "The source enumerator wasn't finished"); samples = null; // the samples list is not needed. foreach (K key in allSoFar) { yield return key; nEmitted++; } } DryadLinqLog.AddInfo("Stage1 sampling: num keys emitted = {0}", nEmitted); }
internal override void Flush() { DryadLinqNative.Flush(this.m_vertexInfo, this.m_portNum); }