private void AcceptOn(TSocket tsocket) { try { _acceptSockets.Add(tsocket); lock (_sockets) { _sockets.Add(tsocket.Fd, tsocket); } EPollInterop.EPollControl(_epollFd, EPollOperation.Add, tsocket.Fd, EPollEvents.Readable, EPollData(tsocket.Fd)); } catch { tsocket.Close(); _acceptSockets.Remove(tsocket); lock (_sockets) { _sockets.Remove(tsocket.Fd); } throw; } }
private void Start() { // register pipe EPollInterop.EPollControl(_epollFd, EPollOperation.Add, _pipeEnds.ReadEnd.DangerousGetHandle().ToInt32(), EPollEvents.Readable, EPollData(PipeKey)); // create accept socket { TSocket acceptSocket; SocketFlags flags = SocketFlags.None; if (_transportOptions.DeferSend) { flags |= SocketFlags.DeferSend; } ; if (_transportThread.AcceptThread != null) { flags |= SocketFlags.TypePassFd; int acceptSocketFd = _transportThread.AcceptThread.CreateReceiveSocket(); acceptSocket = new TSocket(this, acceptSocketFd, flags); acceptSocket.ZeroCopyThreshold = LinuxTransportOptions.NoZeroCopy; } else { flags |= SocketFlags.TypeAccept; acceptSocket = CreateAcceptSocket(_transportThread.EndPoint, flags); } // accept connections AcceptOn(acceptSocket); } }
private unsafe void AcceptThreadStart(object state) { try { var socket = _socket; using (socket) { using (EPoll epoll = EPoll.Create()) { int epollFd = epoll.DangerousGetHandle().ToInt32(); const int acceptKey = 0; const int pipeKey = 1; // accept socket epoll.Control(EPollOperation.Add, _socket, EPollEvents.Readable, new EPollData { Int1 = acceptKey, Int2 = acceptKey }); // add pipe epoll.Control(EPollOperation.Add, _pipeEnds.ReadEnd, EPollEvents.Readable, new EPollData { Int1 = pipeKey, Int2 = pipeKey }); const int EventBufferLength = 1; int notPacked = !EPoll.PackedEvents ? 1 : 0; var buffer = stackalloc int[EventBufferLength * (3 + notPacked)]; int * key = &buffer[2]; bool running = true; int nextHandler = 0; var handlers = _handlers; do { int numEvents = EPollInterop.EPollWait(epollFd, buffer, EventBufferLength, timeout: EPoll.TimeoutInfinite).Value; if (numEvents == 1) { if (*key == acceptKey) { var handler = handlers[nextHandler]; nextHandler = (nextHandler + 1) % handlers.Length; socket.TryAcceptAndSendHandleTo(handler); } else { running = false; } } } while (running); } } _stoppedTcs.TrySetResult(null); } catch (Exception e) { _stoppedTcs.SetException(e); } finally { Cleanup(); } }
public static EPoll Create() { EPoll epoll; var result = EPollInterop.EPollCreate(out epoll); result.ThrowOnError(); return(epoll); }
// must be called under tsocket.Gate public void UpdateEPollControl(TSocket tsocket, EPollEvents flags, bool registered) { flags &= EPollEvents.Readable | EPollEvents.Writable | EPollEvents.Error; EPollInterop.EPollControl(_epollFd, registered ? EPollOperation.Modify : EPollOperation.Add, tsocket.Fd, flags | EPollEvents.OneShot, EPollData(tsocket.Fd)); }
private static void RegisterForReadable(TSocket tsocket) { bool register = tsocket.SetRegistered(); EPollInterop.EPollControl(tsocket.ThreadContext.EPollFd, register ? EPollOperation.Add : EPollOperation.Modify, tsocket.Fd, EPollEvents.Readable | EPollEvents.OneShot, EPollData(tsocket.Fd)); }
static EPoll() { var epollEventSize = EPollInterop.SizeOfEPollEvent(); if (epollEventSize == Marshal.SizeOf <EPollEventPacked>()) { s_packedEvents = true; } else if (epollEventSize == Marshal.SizeOf <EPollEvent>()) { s_packedEvents = false; } else { throw new NotSupportedException(); } }
private static void RegisterForWritable(TSocket tsocket) { bool registered = tsocket.DupSocket != null; // To avoid having to synchronize the event mask with the Readable // we dup the socket. // In the EPollData we set the highest bit to indicate this is the // poll for writable. if (!registered) { tsocket.DupSocket = tsocket.Socket.Duplicate(); } EPollInterop.EPollControl(tsocket.ThreadContext.EPollFd, registered ? EPollOperation.Modify : EPollOperation.Add, tsocket.DupSocket.DangerousGetHandle().ToInt32(), EPollEvents.Writable | EPollEvents.OneShot, EPollData(tsocket.Fd | DupKeyMask)); }
private unsafe void PollThread(object obj) { ThreadContext threadContext = null; Exception error = null; try { // .NET doesn't support setting thread affinity on Start // We could change it before starting the thread // so it gets inherited, but we don't know how many threads // the runtime may start. if (_cpuId != -1) { Scheduler.SetCurrentThreadAffinity(_cpuId); } // objects are allocated on the PollThread heap int pipeKey; threadContext = new ThreadContext(this, _transportOptions, _connectionHandler, CreateLogger()); threadContext.Initialize(); { // register pipe pipeKey = threadContext.PipeEnds.ReadEnd.DangerousGetHandle().ToInt32(); EPollInterop.EPollControl(threadContext.EPollFd, EPollOperation.Add, threadContext.PipeEnds.ReadEnd.DangerousGetHandle().ToInt32(), EPollEvents.Readable, EPollData(pipeKey)); // accept connections AcceptOn(_endPoint, _cpuId, _transportOptions, threadContext); _threadContext = threadContext; } int epollFd = threadContext.EPollFd; var readEnd = threadContext.PipeEnds.ReadEnd; int notPacked = !EPoll.PackedEvents ? 1 : 0; var buffer = stackalloc int[EventBufferLength * (3 + notPacked)]; int statReadEvents = 0; int statWriteEvents = 0; int statAcceptEvents = 0; int statAccepts = 0; var sockets = threadContext.Sockets; var acceptableSockets = new List <TSocket>(1); var readableSockets = new List <TSocket>(EventBufferLength); var writableSockets = new List <TSocket>(EventBufferLength); bool pipeReadable = false; CompleteStateChange(State.Started); bool running = true; do { int numEvents = EPollInterop.EPollWait(epollFd, buffer, EventBufferLength, timeout: EPoll.TimeoutInfinite).Value; // actions can be scheduled without unblocking epoll threadContext.SetEpollNotBlocked(); // check events // we don't handle them immediately: // - this ensures we don't mismatch a closed socket with a new socket that have the same fd // ~ To have the same fd, the previous fd must be closed, which means it is removed from the epoll // ~ and won't show up in our next call to epoll.Wait. // ~ The old fd may be present in the buffer still, but lookup won't give a match, since it is removed // ~ from the dictionary before it is closed. If we were accepting already, a new socket could match. // - this also improves cache/cpu locality of the lookup int *ptr = buffer; lock (sockets) { for (int i = 0; i < numEvents; i++) { // Packed Non-Packed // ------ ------ // 0:Events == Events // 1:Int1 = Key [Padding] // 2:Int2 = Key == Int1 = Key // 3:~~~~~~~~~~ Int2 = Key // ~~~~~~~~~~ int key = ptr[2]; ptr += 3 + notPacked; TSocket tsocket; if (sockets.TryGetValue(key & ~DupKeyMask, out tsocket)) { var type = tsocket.Flags & SocketFlags.TypeMask; if (type == SocketFlags.TypeClient) { bool read = (key & DupKeyMask) == 0; if (read) { readableSockets.Add(tsocket); } else { writableSockets.Add(tsocket); } } else { statAcceptEvents++; acceptableSockets.Add(tsocket); } } else if (key == pipeKey) { pipeReadable = true; } } } // handle accepts statAcceptEvents += acceptableSockets.Count; for (int i = 0; i < acceptableSockets.Count; i++) { statAccepts += HandleAccept(acceptableSockets[i], threadContext); } acceptableSockets.Clear(); // handle writes statWriteEvents += writableSockets.Count; for (int i = 0; i < writableSockets.Count; i++) { writableSockets[i].CompleteWritable(); } writableSockets.Clear(); // handle reads statReadEvents += readableSockets.Count; for (int i = 0; i < readableSockets.Count; i++) { readableSockets[i].CompleteReadable(); } readableSockets.Clear(); // handle pipe if (pipeReadable) { PosixResult result; do { result = readEnd.TryReadByte(); if (result.Value == PipeStopSockets) { StopSockets(threadContext.Sockets); } else if (result.Value == PipeStopThread) { running = false; } } while (result); pipeReadable = false; } // scheduled work threadContext.DoScheduledWork(); } while (running); threadContext.Logger.LogInformation($"Thread {_threadId}: Stats A/AE:{statAccepts}/{statAcceptEvents} RE:{statReadEvents} WE:{statWriteEvents}"); } catch (Exception ex) { error = ex; } finally { // We are not using SafeHandles for epoll to increase performance. // running == false when there are no more Sockets // so we are sure there are no more epoll users. threadContext?.Dispose(); CompleteStateChange(State.Stopped, error); } }
private static void AcceptOn(IPEndPoint endPoint, int cpuId, LinuxTransportOptions transportOptions, ThreadContext threadContext) { Socket acceptSocket = null; int fd = 0; int port = endPoint.Port; SocketFlags flags = SocketFlags.TypeAccept; try { bool ipv4 = endPoint.AddressFamily == System.Net.Sockets.AddressFamily.InterNetwork; acceptSocket = Socket.Create(ipv4 ? AddressFamily.InterNetwork : AddressFamily.InterNetworkV6, SocketType.Stream, ProtocolType.Tcp, blocking: false); fd = acceptSocket.DangerousGetHandle().ToInt32(); if (!ipv4) { // Don't do mapped ipv4 acceptSocket.SetSocketOption(SocketOptionLevel.IPv6, SocketOptionName.IPv6Only, 1); } if (transportOptions.ReceiveOnIncomingCpu) { if (cpuId != -1) { if (!acceptSocket.TrySetSocketOption(SocketOptionLevel.Socket, SocketOptionName.IncomingCpu, cpuId)) { threadContext.Logger.LogWarning($"Cannot enable nameof{SocketOptionName.IncomingCpu} for {endPoint}"); } } } // Linux: allow bind during linger time acceptSocket.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.ReuseAddress, 1); // Linux: allow concurrent binds and let the kernel do load-balancing acceptSocket.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.ReusePort, 1); if (transportOptions.DeferAccept) { // Linux: wait up to 1 sec for data to arrive before accepting socket acceptSocket.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.DeferAccept, 1); flags |= SocketFlags.DeferAccept; } acceptSocket.Bind(endPoint); if (port == 0) { // When testing we want the OS to select a free port port = acceptSocket.GetLocalIPAddress().Port; } acceptSocket.Listen(ListenBacklog); } catch { acceptSocket?.Dispose(); throw; } TSocket tsocket = null; var sockets = threadContext.Sockets; try { tsocket = new TSocket(threadContext) { Flags = flags, Fd = fd, Socket = acceptSocket }; threadContext.AcceptSockets.Add(tsocket); lock (sockets) { sockets.Add(tsocket.Fd, tsocket); } EPollInterop.EPollControl(threadContext.EPollFd, EPollOperation.Add, fd, EPollEvents.Readable, EPollData(fd)); } catch { acceptSocket.Dispose(); threadContext.AcceptSockets.Remove(tsocket); lock (sockets) { sockets.Remove(fd); } throw; } endPoint.Port = port; }
public PosixResult TryControl(EPollOperation operation, SafeHandle fd, EPollEvents events, EPollData data) { return(EPollInterop.EPollControl(this, operation, fd, events, data.Long)); }
public unsafe PosixResult TryWait(void *events, int maxEvents, int timeout) { return(EPollInterop.EPollWait(this, events, maxEvents, timeout)); }
public unsafe void Run() { try { Start(); CompleteStateChange(TransportThreadState.Started); } catch (Exception e) { CompleteStateChange(TransportThreadState.Stopped, e); return; } try { int notPacked = !EPoll.PackedEvents ? 1 : 0; var buffer = stackalloc int[EventBufferLength * (3 + notPacked)]; int statReadEvents = 0; int statWriteEvents = 0; int statAcceptEvents = 0; int statAccepts = 0; int statZeroCopySuccess = 0; int statZeroCopyCopied = 0; var acceptableSockets = new List <TSocket>(1); var readableSockets = new List <TSocket>(EventBufferLength); var writableSockets = new List <TSocket>(EventBufferLength); var reregisterEventSockets = new List <TSocket>(EventBufferLength); var zeroCopyCompletions = new List <TSocket>(EventBufferLength); bool pipeReadable = false; bool running = true; do { int numEvents = EPollInterop.EPollWait(_epollFd, buffer, EventBufferLength, timeout: EPoll.TimeoutInfinite).Value; // actions can be scheduled without unblocking epoll SetEpollNotBlocked(); // check events // we don't handle them immediately: // - this ensures we don't mismatch a closed socket with a new socket that have the same fd // ~ To have the same fd, the previous fd must be closed, which means it is removed from the epoll // ~ and won't show up in our next call to epoll.Wait. // ~ The old fd may be present in the buffer still, but lookup won't give a match, since it is removed // ~ from the dictionary before it is closed. If we were accepting already, a new socket could match. // - this also improves cache/cpu locality of the lookup int *ptr = buffer; lock (_sockets) { for (int i = 0; i < numEvents; i++) { // Packed Non-Packed // ------ ------ // 0:Events == Events // 1:Int1 = Key [Padding] // 2:Int2 = Key == Int1 = Key // 3:~~~~~~~~~~ Int2 = Key // ~~~~~~~~~~ EPollEvents events = (EPollEvents)ptr[0]; int key = ptr[2]; ptr += 3 + notPacked; TSocket tsocket; if (_sockets.TryGetValue(key, out tsocket)) { var type = tsocket.Type; if (type == SocketFlags.TypeClient) { lock (tsocket.Gate) { var pendingEventState = tsocket.PendingEventState; // zero copy if ((pendingEventState & EPollEvents.Error & events) != EPollEvents.None) { var copyResult = SocketInterop.CompleteZeroCopy(tsocket.Fd); if (copyResult != PosixResult.EAGAIN) { events &= ~EPollEvents.Error; pendingEventState &= ~EPollEvents.Error; zeroCopyCompletions.Add(tsocket); if (copyResult == SocketInterop.ZeroCopyCopied) { tsocket.ZeroCopyThreshold = LinuxTransportOptions.NoZeroCopy; statZeroCopyCopied++; } else if (copyResult == SocketInterop.ZeroCopySuccess) { statZeroCopySuccess++; } else { Environment.FailFast($"Error occurred while trying to complete zero copy: {copyResult}"); } } } // treat Error as Readable, Writable if ((events & EPollEvents.Error) != EPollEvents.None) { events |= EPollEvents.Readable | EPollEvents.Writable; } events &= pendingEventState & (EPollEvents.Readable | EPollEvents.Writable); // readable if ((events & EPollEvents.Readable) != EPollEvents.None) { readableSockets.Add(tsocket); pendingEventState &= ~EPollEvents.Readable; } // writable if ((events & EPollEvents.Writable) != EPollEvents.None) { writableSockets.Add(tsocket); pendingEventState &= ~EPollEvents.Writable; } // reregister tsocket.PendingEventState = pendingEventState; if ((pendingEventState & (EPollEvents.Readable | EPollEvents.Writable)) != EPollEvents.None) { tsocket.PendingEventState |= TSocket.EventControlPending; reregisterEventSockets.Add(tsocket); } } } else { statAcceptEvents++; acceptableSockets.Add(tsocket); } } else if (key == PipeKey) { pipeReadable = true; } } } // zero copy for (int i = 0; i < zeroCopyCompletions.Count; i++) { zeroCopyCompletions[i].OnZeroCopyCompleted(); } zeroCopyCompletions.Clear(); // handle accepts statAcceptEvents += acceptableSockets.Count; for (int i = 0; i < acceptableSockets.Count; i++) { statAccepts += HandleAccept(acceptableSockets[i]); } acceptableSockets.Clear(); // handle writes statWriteEvents += writableSockets.Count; for (int i = 0; i < writableSockets.Count; i++) { writableSockets[i].OnWritable(stopped: false); } writableSockets.Clear(); // handle reads statReadEvents += readableSockets.Count; if (!_transportOptions.AioReceive) { bool checkAvailable = _transportOptions.CheckAvailable; Span <MemoryHandle> receiveMemoryHandles = MemoryHandles; for (int i = 0; i < readableSockets.Count; i++) { TSocket socket = readableSockets[i]; int availableBytes = !checkAvailable ? 0 : socket.GetAvailableBytes(); var receiveResult = socket.Receive(availableBytes, receiveMemoryHandles); socket.OnReceiveFromSocket(receiveResult); } readableSockets.Clear(); } else if (readableSockets.Count > 0) { AioReceive(readableSockets); } // reregister for events for (int i = 0; i < reregisterEventSockets.Count; i++) { var tsocket = reregisterEventSockets[i]; lock (tsocket.Gate) { var pendingEventState = tsocket.PendingEventState & ~TSocket.EventControlPending; tsocket.PendingEventState = pendingEventState; UpdateEPollControl(tsocket, pendingEventState, registered: true); } } reregisterEventSockets.Clear(); // handle pipe if (pipeReadable) { PosixResult result; do { result = _pipeEnds.ReadEnd.TryReadByte(); if (result.Value == PipeStopSockets) { StopSockets(); } else if (result.Value == PipeStopThread) { running = false; } else if (result.Value == PipeCloseAccept) { CloseAccept(); } } while (result); pipeReadable = false; } // scheduled work // note: this may write a byte to the pipe DoScheduledWork(_transportOptions.AioSend); } while (running); _logger.LogDebug($"Stats A/AE:{statAccepts}/{statAcceptEvents} RE:{statReadEvents} WE:{statWriteEvents} ZCS/ZCC:{statZeroCopySuccess}/{statZeroCopyCopied}"); CompleteStateChange(TransportThreadState.Stopped); } catch (Exception ex) { Console.WriteLine(ex.Message); Console.WriteLine(ex.StackTrace); Environment.FailFast("TransportThread", ex); } }