//Section 4.6.8 private void InitTx() { //CRC offload and small packet padding SetFlags(IxgbeDefs.HLREG0, IxgbeDefs.HLREG0_TXCRCEN | IxgbeDefs.HLREG0_TXPADEN); //Set default buffer size allocations (section 4.6.11.3.4) SetReg(IxgbeDefs.TXPBSIZE(0), IxgbeDefs.TXPBSIZE_40KB); for (uint i = 1; i < 8; i++) { SetReg(IxgbeDefs.TXPBSIZE(i), 0); } //Required when not using DCB/VTd SetReg(IxgbeDefs.DTXMXSZRQ, 0xFFFF); ClearFlags(IxgbeDefs.RTTDCS, IxgbeDefs.RTTDCS_ARBDIS); //Per queue config for all queues for (uint i = 0; i < TxQueues.Length; i++) { Log.Notice("Initializing TX queue {0}", i); //Section 7.1.9 - Setup descriptor ring uint ringSizeBytes = NumTxQueueEntries * TxDescriptorSize; var dmaMem = MemoryHelper.AllocateDmaC(ringSizeBytes, true); //TODO : The C version sets the allocated memory to -1 here SetReg(IxgbeDefs.TDBAL(i), (uint)(dmaMem.PhysicalAddress & 0xFFFFFFFFL)); SetReg(IxgbeDefs.TDBAH(i), (uint)(dmaMem.PhysicalAddress >> 32)); SetReg(IxgbeDefs.TDLEN(i), (uint)ringSizeBytes); Log.Notice("TX ring {0} physical addr: {1}", i, dmaMem.PhysicalAddress); Log.Notice("TX ring {0} virtual addr: {1}", i, dmaMem.VirtualAddress); //Descriptor writeback magic values, important to get good performance and low PCIe overhead //See sec. 7.2.3.4.1 and 7.2.3.5 uint txdctl = GetReg(IxgbeDefs.TXDCTL(i)); //Seems like overflow is irrelevant here unchecked { //Clear bits txdctl &= (uint)(~(0x3F | (0x3F << 8) | (0x3F << 16))); //From DPDK txdctl |= (36 | (8 << 8) | (4 << 16)); } SetReg(IxgbeDefs.TXDCTL(i), txdctl); var queue = new IxgbeTxQueue(NumTxQueueEntries); queue.Index = 0; queue.DescriptorsAddr = dmaMem.VirtualAddress; TxQueues[i] = queue; } //Enable DMA SetReg(IxgbeDefs.DMATXCTL, IxgbeDefs.DMATXCTL_TE); }
private void StartTxQueue(int queueId) { Log.Notice("Starting TX queue {0}", queueId); var queue = (IxgbeTxQueue)TxQueues[queueId]; if ((queue.EntriesCount & (queue.EntriesCount - 1)) != 0) { Log.Error("FATAL: number of queue entries must be a power of 2"); Environment.Exit(1); } //TX queue starts out empty SetReg(IxgbeDefs.TDH((uint)queueId), 0); SetReg(IxgbeDefs.TDT((uint)queueId), 0); //Enable queue and wait if necessary SetFlags(IxgbeDefs.TXDCTL((uint)queueId), IxgbeDefs.TXDCTL_ENABLE); WaitSetReg(IxgbeDefs.TXDCTL((uint)queueId), IxgbeDefs.TXDCTL_ENABLE); }
private void StartRxQueue(int queueId) { Log.Notice("Starting RX queue {0}", queueId); var queue = (IxgbeRxQueue)RxQueues[queueId]; //Mempool should be >= number of rx and tx descriptors uint mempoolSize = NumRxQueueEntries + NumTxQueueEntries; queue.Mempool = MemoryHelper.AllocateMempool(mempoolSize < 4096 ? 4096 : mempoolSize, 2048); if ((queue.EntriesCount & (queue.EntriesCount - 1)) != 0) { Log.Error("FATAL: number of queue entries must be a power of 2"); Environment.Exit(1); } for (ushort ei = 0; ei < queue.EntriesCount; ei++) { var descrAddr = queue.GetDescriptorAddress(ei); Log.Notice("Setting up descriptor at index #{0}", ei); //Allocate packet buffer var packetBuffer = queue.Mempool.GetPacketBufferFast(); if (packetBuffer.IsNull) { Log.Error("Fatal: Could not allocate packet buffer"); Environment.Exit(1); } queue.WriteBufferAddress(descrAddr, packetBuffer.PhysicalAddress + PacketBuffer.DataOffset); queue.WriteHeaderBufferAddress(descrAddr, 0); queue.VirtualAddresses[ei] = packetBuffer.VirtualAddress; } //Enable queue and wait if necessary SetFlags(IxgbeDefs.RXDCTL((uint)queueId), IxgbeDefs.RXDCTL_ENABLE); WaitSetReg(IxgbeDefs.RXDCTL((uint)queueId), IxgbeDefs.RXDCTL_ENABLE); //Rx queue starts out full SetReg(IxgbeDefs.RDH((uint)queueId), 0); //Was set to 0 before in the init function SetReg(IxgbeDefs.RDT((uint)queueId), (uint)(queue.EntriesCount - 1)); }
private void InitRx() { //Disable RX while re-configuring ClearFlags(IxgbeDefs.RXCTRL, IxgbeDefs.RXCTRL_RXEN); //No DCB or VT, just a single 128kb packet buffer SetReg(IxgbeDefs.RXPBSIZE(0), IxgbeDefs.RXPBSIZE_128KB); for (uint i = 1; i < 8; i++) { SetReg(IxgbeDefs.RXPBSIZE(i), 0); } //Always enable CRC offloading SetFlags(IxgbeDefs.HLREG0, IxgbeDefs.HLREG0_RXCRCSTRP); SetFlags(IxgbeDefs.RDRXCTL, IxgbeDefs.RDRXCTL_CRCSTRIP); //Accept broadcast packets SetFlags(IxgbeDefs.FCTRL, IxgbeDefs.FCTRL_BAM); //Per queue config for (uint i = 0; i < RxQueues.Length; i++) { Log.Notice("Initializing rx queue {0}", i); //Enable advanced rx descriptors SetReg(IxgbeDefs.SRRCTL(i), (GetReg(IxgbeDefs.SRRCTL(i)) & ~IxgbeDefs.SRRCTL_DESCTYPE_MASK) | IxgbeDefs.SRRCTL_DESCTYPE_ADV_ONEBUF); //DROP_EN causes the NIC to drop packets if no descriptors are available instead of buffering them //A single overflowing queue can fill up the whole buffer and impact operations if not setting this SetFlags(IxgbeDefs.SRRCTL(i), IxgbeDefs.SRRCTL_DROP_EN); //Sec 7.1.9 - Set up descriptor ring int ringSizeBytes = NumRxQueueEntries * RxDescriptorSize; var dmaMem = MemoryHelper.AllocateDmaC((uint)ringSizeBytes, true); //TODO : The C version sets the allocated memory to -1 here SetReg(IxgbeDefs.RDBAL(i), (uint)(dmaMem.PhysicalAddress & 0xFFFFFFFFL)); SetReg(IxgbeDefs.RDBAH(i), (uint)(dmaMem.PhysicalAddress >> 32)); SetReg(IxgbeDefs.RDLEN(i), (uint)ringSizeBytes); Log.Notice("RX ring {0} physical address: {1}", i, dmaMem.PhysicalAddress); Log.Notice("RX ring {0} virtual address: {1}", i, dmaMem.VirtualAddress); //Set ring to empty SetReg(IxgbeDefs.RDH(i), 0); SetReg(IxgbeDefs.RDT(i), 0); var queue = new IxgbeRxQueue(NumRxQueueEntries); queue.Index = 0; queue.DescriptorsAddr = dmaMem.VirtualAddress; RxQueues[i] = queue; } //Section 4.6.7 - set some magic bits SetFlags(IxgbeDefs.CTRL_EXT, IxgbeDefs.CTRL_EXT_NS_DIS); //This flag probably refers to a broken feature: It's reserved and initialized as '1' but it must be '0' for (uint i = 0; i < RxQueues.Length; i++) { ClearFlags(IxgbeDefs.DCA_RXCTRL(i), 1 << 12); } //Start RX SetFlags(IxgbeDefs.RXCTRL, IxgbeDefs.RXCTRL_RXEN); }
public override int TxBatch(int queueId, Span <PacketBuffer> buffers) { if (queueId < 0 || queueId >= RxQueues.Length) { throw new ArgumentOutOfRangeException("Queue id out of bounds"); } var queue = TxQueues[queueId] as IxgbeTxQueue; ushort cleanIndex = queue.CleanIndex; ushort currentIndex = (ushort)queue.Index; var cmdTypeFlags = IxgbeDefs.ADVTXD_DCMD_EOP | IxgbeDefs.ADVTXD_DCMD_RS | IxgbeDefs.ADVTXD_DCMD_IFCS | IxgbeDefs.ADVTXD_DCMD_DEXT | IxgbeDefs.ADVTXD_DTYP_DATA; //All packet buffers that will be handled here will belong to the same mempool Mempool pool = null; //Step 1: Clean up descriptors that were sent out by the hardware and return them to the mempool //Start by reading step 2 which is done first for each packet //Cleaning up must be done in batches for performance reasons, so this is unfortunately somewhat complicated while (true) { //currentIndex is always ahead of clean (invariant of our queue) int cleanable = currentIndex - cleanIndex; if (cleanable < 0) { cleanable = queue.EntriesCount + cleanable; } if (cleanable < TxCleanBatch) { break; } //Calculate the index of the last transcriptor in the clean batch //We can't check all descriptors for performance reasons int cleanupTo = cleanIndex + TxCleanBatch - 1; if (cleanupTo >= queue.EntriesCount) { cleanupTo -= queue.EntriesCount; } var descAddr = queue.GetDescriptorAddress((ushort)cleanupTo); uint status = queue.ReadWbStatus(descAddr); //Hardware sets this flag as soon as it's sent out, we can give back all bufs in the batch back to the mempool if ((status & IxgbeDefs.ADVTXD_STAT_DD) != 0) { int i = cleanIndex; while (true) { var packetBuffer = new PacketBuffer(queue.VirtualAddresses[i]); if (pool == null) { pool = Mempool.FindPool(packetBuffer.MempoolId); if (pool == null) { throw new NullReferenceException("Could not find mempool with id specified by PacketBuffer"); } } pool.FreeBufferFast(packetBuffer); if (i == cleanupTo) { break; } i = WrapRing(i, queue.EntriesCount); } //Next descriptor to be cleaned up is one after the one we just cleaned cleanIndex = (ushort)WrapRing(cleanupTo, queue.EntriesCount); } //Clean the whole batch or nothing. This will leave some packets in the queue forever //if you stop transmitting but that's not a real concern else { break; } } queue.CleanIndex = cleanIndex; //Step 2: Send out as many of our packets as possible int sent; for (sent = 0; sent < buffers.Length; sent++) { var descAddr = queue.GetDescriptorAddress(currentIndex); ushort nextIndex = WrapRing(currentIndex, (ushort)queue.EntriesCount); //We are full if the next index is the one we are trying to reclaim if (cleanIndex == nextIndex) { break; } var buffer = buffers[sent]; //Remember virtual address to clean it up later queue.VirtualAddresses[currentIndex] = buffer.VirtualAddress; queue.Index = WrapRing(queue.Index, queue.EntriesCount); //NIC reads from here queue.WriteBufferAddress(descAddr, buffer.PhysicalAddress + PacketBuffer.DataOffset); //Always the same flags: One buffer (EOP), advanced data descriptor, CRC offload, data length var bufSize = buffer.Size; queue.WriteCmdTypeLength(descAddr, cmdTypeFlags | bufSize); //No fancy offloading - only the total payload length //implement offloading flags here: // * ip checksum offloading is trivial: just set the offset // * tcp/udp checksum offloading is more annoying, you have to precalculate the pseudo-header checksum queue.WriteOlInfoStatus(descAddr, bufSize << (int)IxgbeDefs.ADVTXD_PAYLEN_SHIFT); currentIndex = nextIndex; } //Send out by advancing tail, i.e. pass control of the bus to the NIC SetReg(IxgbeDefs.TDT((uint)queueId), (uint)queue.Index); return(sent); }
//Section 1.8.2 and 7.1 //Try to receive a single packet if one is available, non-blocking //Section 7.1.9 explains RX ring structure //We control the tail of the queue, hardware controls the head public override int RxBatch(int queueId, Span <PacketBuffer> buffers) { if (queueId < 0 || queueId >= RxQueues.Length) { throw new ArgumentOutOfRangeException("Queue id out of bounds"); } var queue = RxQueues[queueId] as IxgbeRxQueue; ushort rxIndex = (ushort)queue.Index; ushort lastRxIndex = rxIndex; int bufInd; for (bufInd = 0; bufInd < buffers.Length; bufInd++) { var descAddr = queue.GetDescriptorAddress(rxIndex); var status = queue.ReadWbStatusError(descAddr); //Status DONE if ((status & IxgbeDefs.RXDADV_STAT_DD) != 0) { //Status END OF PACKET if ((status & IxgbeDefs.RXDADV_STAT_EOP) == 0) { throw new InvalidOperationException("Multi segment packets are not supported - increase buffer size or decrease MTU"); } //We got a packet - read and copy the whole descriptor var packetBuffer = new PacketBuffer(queue.VirtualAddresses[rxIndex]); packetBuffer.Size = queue.ReadWbLength(descAddr); //This would be the place to implement RX offloading by translating the device-specific //flags to an independent representation in that buffer (similar to how DPDK works) var newBuf = queue.Mempool.GetPacketBufferFast(); if (newBuf.IsNull) { Log.Error("Cannot allocate RX buffer - Out of memory! Either there is a memory leak, or the mempool is too small"); throw new OutOfMemoryException("Failed to allocate new buffer for rx - you are either leaking memory or your mempool is too small"); } queue.WriteBufferAddress(descAddr, newBuf.PhysicalAddress + PacketBuffer.DataOffset); queue.WriteHeaderBufferAddress(descAddr, 0); //This resets the flags queue.VirtualAddresses[rxIndex] = newBuf.VirtualAddress; buffers[bufInd] = packetBuffer; //Want to read the next one in the next iteration but we still need the current one to update RDT later lastRxIndex = rxIndex; rxIndex = WrapRing(rxIndex, (ushort)queue.EntriesCount); } else { break; } } if (rxIndex != lastRxIndex) { //Tell hardware that we are done. This is intentionally off by one, otherwise we'd set //RDT=RDH if we are receiving faster than packets are coming in, which would mean queue is full SetReg(IxgbeDefs.RDT((uint)queueId), lastRxIndex); queue.Index = rxIndex; } return(bufInd); }