private unsafe void AioReceive(List <TSocket> readableSockets) { long PackReceiveState(int received, int advanced, int iovLength) => ((long)received << 32) + (advanced << 8) + (iovLength); (int received, int advanced, int iovLength) UnpackReceiveState(long data) => ((int)(data >> 32), (int)((data >> 8) & 0xffffff), (int)(data & 0xff)); int readableSocketCount = readableSockets.Count; AioCb * aioCb = AioCbs; IOVector * ioVectors = IoVectorTable; PosixResult * receiveResults = stackalloc PosixResult[readableSocketCount]; bool checkAvailable = _transportOptions.CheckAvailable; Span <MemoryHandle> receiveMemoryHandles = MemoryHandles; int receiveMemoryHandleCount = 0; for (int i = 0; i < readableSocketCount; i++) { TSocket socket = readableSockets[i]; int availableBytes = !checkAvailable ? 0 : socket.GetAvailableBytes(); int ioVectorLength = socket.CalcIOVectorLengthForReceive(availableBytes, IoVectorsPerAioSocket); int advanced = socket.FillReceiveIOVector(availableBytes, ioVectors, receiveMemoryHandles, ref ioVectorLength); aioCb->Fd = socket.Fd; aioCb->Data = PackReceiveState(0, advanced, ioVectorLength); aioCb->OpCode = AioOpCode.PReadv; aioCb->Buffer = ioVectors; aioCb->Length = ioVectorLength; aioCb++; ioVectors += ioVectorLength; receiveMemoryHandleCount += ioVectorLength; receiveMemoryHandles = receiveMemoryHandles.Slice(ioVectorLength); } int eAgainCount = 0; while (readableSocketCount > 0) { IntPtr ctxp = _aioContext; PosixResult res = AioInterop.IoSubmit(ctxp, readableSocketCount, AioCbsTable); if (res != readableSocketCount) { throw new NotSupportedException("Unexpected IoSubmit retval " + res); } AioEvent *aioEvents = AioEvents; res = AioInterop.IoGetEvents(ctxp, readableSocketCount, aioEvents); if (res != readableSocketCount) { throw new NotSupportedException("Unexpected IoGetEvents retval " + res); } int socketsRemaining = readableSocketCount; bool allEAgain = true; AioEvent *aioEvent = aioEvents; for (int i = 0; i < readableSocketCount; i++) { PosixResult result = aioEvent->Result; int socketIndex = i; // assumes in-order events TSocket socket = readableSockets[socketIndex]; (int received, int advanced, int iovLength) = UnpackReceiveState(aioEvent->Data); (bool done, PosixResult retval) = socket.InterpretReceiveResult(result, ref received, advanced, (IOVector *)aioEvent->AioCb->Buffer, iovLength); if (done) { receiveResults[socketIndex] = retval; socketsRemaining--; aioEvent->AioCb->OpCode = AioOpCode.Noop; allEAgain = false; } else if (retval != PosixResult.EAGAIN) { aioEvent->AioCb->Data = PackReceiveState(received, advanced, iovLength); allEAgain = false; } aioEvent++; } if (socketsRemaining > 0) { if (allEAgain) { eAgainCount++; if (eAgainCount == TransportConstants.MaxEAgainCount) { throw new NotSupportedException("Too many EAGAIN, unable to receive available bytes."); } } else { aioCb = AioCbs; AioCb *aioCbWriteAt = aioCb; // The kernel doesn't handle Noop, we need to remove them from the aioCbs for (int i = 0; i < readableSocketCount; i++) { if (aioCb[i].OpCode != AioOpCode.Noop) { if (aioCbWriteAt != aioCb) { *aioCbWriteAt = *aioCb; } aioCbWriteAt++; } aioCb++; } readableSocketCount = socketsRemaining; eAgainCount = 0; } } else { readableSocketCount = 0; } } for (int i = 0; i < readableSockets.Count; i++) { readableSockets[i].OnReceiveFromSocket(receiveResults[i]); } readableSockets.Clear(); receiveMemoryHandles = MemoryHandles; for (int i = 0; i < receiveMemoryHandleCount; i++) { receiveMemoryHandles[i].Dispose(); } }
public unsafe void Run() { try { Start(); CompleteStateChange(TransportThreadState.Started); } catch (Exception e) { CompleteStateChange(TransportThreadState.Stopped, e); return; } try { int notPacked = !EPoll.PackedEvents ? 1 : 0; var buffer = stackalloc int[EventBufferLength * (3 + notPacked)]; int statReadEvents = 0; int statWriteEvents = 0; int statAcceptEvents = 0; int statAccepts = 0; int statZeroCopySuccess = 0; int statZeroCopyCopied = 0; var acceptableSockets = new List <TSocket>(1); var readableSockets = new List <TSocket>(EventBufferLength); var writableSockets = new List <TSocket>(EventBufferLength); var reregisterEventSockets = new List <TSocket>(EventBufferLength); var zeroCopyCompletions = new List <TSocket>(EventBufferLength); bool pipeReadable = false; bool running = true; do { int numEvents = EPollInterop.EPollWait(_epollFd, buffer, EventBufferLength, timeout: EPoll.TimeoutInfinite).Value; // actions can be scheduled without unblocking epoll SetEpollNotBlocked(); // check events // we don't handle them immediately: // - this ensures we don't mismatch a closed socket with a new socket that have the same fd // ~ To have the same fd, the previous fd must be closed, which means it is removed from the epoll // ~ and won't show up in our next call to epoll.Wait. // ~ The old fd may be present in the buffer still, but lookup won't give a match, since it is removed // ~ from the dictionary before it is closed. If we were accepting already, a new socket could match. // - this also improves cache/cpu locality of the lookup int *ptr = buffer; lock (_sockets) { for (int i = 0; i < numEvents; i++) { // Packed Non-Packed // ------ ------ // 0:Events == Events // 1:Int1 = Key [Padding] // 2:Int2 = Key == Int1 = Key // 3:~~~~~~~~~~ Int2 = Key // ~~~~~~~~~~ EPollEvents events = (EPollEvents)ptr[0]; int key = ptr[2]; ptr += 3 + notPacked; TSocket tsocket; if (_sockets.TryGetValue(key, out tsocket)) { var type = tsocket.Type; if (type == SocketFlags.TypeClient) { lock (tsocket.Gate) { var pendingEventState = tsocket.PendingEventState; // zero copy if ((pendingEventState & EPollEvents.Error & events) != EPollEvents.None) { var copyResult = SocketInterop.CompleteZeroCopy(tsocket.Fd); if (copyResult != PosixResult.EAGAIN) { events &= ~EPollEvents.Error; pendingEventState &= ~EPollEvents.Error; zeroCopyCompletions.Add(tsocket); if (copyResult == SocketInterop.ZeroCopyCopied) { tsocket.ZeroCopyThreshold = LinuxTransportOptions.NoZeroCopy; statZeroCopyCopied++; } else if (copyResult == SocketInterop.ZeroCopySuccess) { statZeroCopySuccess++; } else { Environment.FailFast($"Error occurred while trying to complete zero copy: {copyResult}"); } } } // treat Error as Readable, Writable if ((events & EPollEvents.Error) != EPollEvents.None) { events |= EPollEvents.Readable | EPollEvents.Writable; } events &= pendingEventState & (EPollEvents.Readable | EPollEvents.Writable); // readable if ((events & EPollEvents.Readable) != EPollEvents.None) { readableSockets.Add(tsocket); pendingEventState &= ~EPollEvents.Readable; } // writable if ((events & EPollEvents.Writable) != EPollEvents.None) { writableSockets.Add(tsocket); pendingEventState &= ~EPollEvents.Writable; } // reregister tsocket.PendingEventState = pendingEventState; if ((pendingEventState & (EPollEvents.Readable | EPollEvents.Writable)) != EPollEvents.None) { tsocket.PendingEventState |= TSocket.EventControlPending; reregisterEventSockets.Add(tsocket); } } } else { statAcceptEvents++; acceptableSockets.Add(tsocket); } } else if (key == PipeKey) { pipeReadable = true; } } } // zero copy for (int i = 0; i < zeroCopyCompletions.Count; i++) { zeroCopyCompletions[i].OnZeroCopyCompleted(); } zeroCopyCompletions.Clear(); // handle accepts statAcceptEvents += acceptableSockets.Count; for (int i = 0; i < acceptableSockets.Count; i++) { statAccepts += HandleAccept(acceptableSockets[i]); } acceptableSockets.Clear(); // handle writes statWriteEvents += writableSockets.Count; for (int i = 0; i < writableSockets.Count; i++) { writableSockets[i].OnWritable(stopped: false); } writableSockets.Clear(); // handle reads statReadEvents += readableSockets.Count; if (!_transportOptions.AioReceive) { bool checkAvailable = _transportOptions.CheckAvailable; Span <MemoryHandle> receiveMemoryHandles = MemoryHandles; for (int i = 0; i < readableSockets.Count; i++) { TSocket socket = readableSockets[i]; int availableBytes = !checkAvailable ? 0 : socket.GetAvailableBytes(); var receiveResult = socket.Receive(availableBytes, receiveMemoryHandles); socket.OnReceiveFromSocket(receiveResult); } readableSockets.Clear(); } else if (readableSockets.Count > 0) { AioReceive(readableSockets); } // reregister for events for (int i = 0; i < reregisterEventSockets.Count; i++) { var tsocket = reregisterEventSockets[i]; lock (tsocket.Gate) { var pendingEventState = tsocket.PendingEventState & ~TSocket.EventControlPending; tsocket.PendingEventState = pendingEventState; UpdateEPollControl(tsocket, pendingEventState, registered: true); } } reregisterEventSockets.Clear(); // handle pipe if (pipeReadable) { PosixResult result; do { result = _pipeEnds.ReadEnd.TryReadByte(); if (result.Value == PipeStopSockets) { StopSockets(); } else if (result.Value == PipeStopThread) { running = false; } else if (result.Value == PipeCloseAccept) { CloseAccept(); } } while (result); pipeReadable = false; } // scheduled work // note: this may write a byte to the pipe DoScheduledWork(_transportOptions.AioSend); } while (running); _logger.LogDebug($"Stats A/AE:{statAccepts}/{statAcceptEvents} RE:{statReadEvents} WE:{statWriteEvents} ZCS/ZCC:{statZeroCopySuccess}/{statZeroCopyCopied}"); CompleteStateChange(TransportThreadState.Stopped); } catch (Exception ex) { Console.WriteLine(ex.Message); Console.WriteLine(ex.StackTrace); Environment.FailFast("TransportThread", ex); } }