/// <summary> /// Processes a command buffer. /// </summary> /// <param name="baseGpuVa">Base GPU virtual address of the command buffer</param> /// <param name="commandBuffer">Command buffer</param> public void Process(ulong baseGpuVa, ReadOnlySpan <int> commandBuffer) { for (int index = 0; index < commandBuffer.Length; index++) { int command = commandBuffer[index]; ulong gpuVa = baseGpuVa + (ulong)index * 4; if (_state.MethodCount != 0) { if (TryFastI2mBufferUpdate(commandBuffer, ref index)) { continue; } Send(gpuVa, _state.Method, command, _state.SubChannel, _state.MethodCount <= 1); if (!_state.NonIncrementing) { _state.Method++; } if (_state.IncrementOnce) { _state.NonIncrementing = true; } _state.MethodCount--; } else { CompressedMethod meth = Unsafe.As <int, CompressedMethod>(ref command); if (TryFastUniformBufferUpdate(meth, commandBuffer, index)) { index += meth.MethodCount; continue; } switch (meth.SecOp) { case SecOp.IncMethod: case SecOp.NonIncMethod: case SecOp.OneInc: _state.Method = meth.MethodAddress; _state.SubChannel = meth.MethodSubchannel; _state.MethodCount = meth.MethodCount; _state.IncrementOnce = meth.SecOp == SecOp.OneInc; _state.NonIncrementing = meth.SecOp == SecOp.NonIncMethod; break; case SecOp.ImmdDataMethod: Send(gpuVa, meth.MethodAddress, meth.ImmdData, meth.MethodSubchannel, true); break; } } } _3dClass.FlushUboDirty(); }
/// <summary> /// Performs a buffer to buffer, or buffer to texture copy. /// </summary> /// <param name="argument">The LaunchDma call argument</param> private void DmaCopy(int argument) { var memoryManager = _channel.MemoryManager; CopyFlags copyFlags = (CopyFlags)argument; bool srcLinear = copyFlags.HasFlag(CopyFlags.SrcLinear); bool dstLinear = copyFlags.HasFlag(CopyFlags.DstLinear); bool copy2D = copyFlags.HasFlag(CopyFlags.MultiLineEnable); bool remap = copyFlags.HasFlag(CopyFlags.RemapEnable); uint size = _state.State.LineLengthIn; if (size == 0) { return; } ulong srcGpuVa = ((ulong)_state.State.OffsetInUpperUpper << 32) | _state.State.OffsetInLower; ulong dstGpuVa = ((ulong)_state.State.OffsetOutUpperUpper << 32) | _state.State.OffsetOutLower; int xCount = (int)_state.State.LineLengthIn; int yCount = (int)_state.State.LineCount; _3dEngine.FlushUboDirty(); if (copy2D) { // Buffer to texture copy. int componentSize = (int)_state.State.SetRemapComponentsComponentSize + 1; int srcBpp = remap ? ((int)_state.State.SetRemapComponentsNumSrcComponents + 1) * componentSize : 1; int dstBpp = remap ? ((int)_state.State.SetRemapComponentsNumDstComponents + 1) * componentSize : 1; var dst = Unsafe.As <uint, DmaTexture>(ref _state.State.SetDstBlockSize); var src = Unsafe.As <uint, DmaTexture>(ref _state.State.SetSrcBlockSize); int srcStride = (int)_state.State.PitchIn; int dstStride = (int)_state.State.PitchOut; var srcCalculator = new OffsetCalculator( src.Width, src.Height, srcStride, srcLinear, src.MemoryLayout.UnpackGobBlocksInY(), src.MemoryLayout.UnpackGobBlocksInZ(), srcBpp); var dstCalculator = new OffsetCalculator( dst.Width, dst.Height, dstStride, dstLinear, dst.MemoryLayout.UnpackGobBlocksInY(), dst.MemoryLayout.UnpackGobBlocksInZ(), dstBpp); (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(src.RegionX, src.RegionY, xCount, yCount); (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dst.RegionX, dst.RegionY, xCount, yCount); if (srcLinear && srcStride < 0) { srcBaseOffset += srcStride * (yCount - 1); } if (dstLinear && dstStride < 0) { dstBaseOffset += dstStride * (yCount - 1); } ReadOnlySpan <byte> srcSpan = memoryManager.GetSpan(srcGpuVa + (ulong)srcBaseOffset, srcSize, true); Span <byte> dstSpan = memoryManager.GetSpan(dstGpuVa + (ulong)dstBaseOffset, dstSize).ToArray(); bool completeSource = IsTextureCopyComplete(src, srcLinear, srcBpp, srcStride, xCount, yCount); bool completeDest = IsTextureCopyComplete(dst, dstLinear, dstBpp, dstStride, xCount, yCount); if (completeSource && completeDest) { var target = memoryManager.Physical.TextureCache.FindTexture( memoryManager, dst, dstGpuVa, dstBpp, dstStride, xCount, yCount, dstLinear); if (target != null) { ReadOnlySpan <byte> data; if (srcLinear) { data = LayoutConverter.ConvertLinearStridedToLinear( target.Info.Width, target.Info.Height, 1, 1, xCount * srcBpp, srcStride, target.Info.FormatInfo.BytesPerPixel, srcSpan); } else { data = LayoutConverter.ConvertBlockLinearToLinear( src.Width, src.Height, src.Depth, 1, 1, 1, 1, 1, srcBpp, src.MemoryLayout.UnpackGobBlocksInY(), src.MemoryLayout.UnpackGobBlocksInZ(), 1, new SizeInfo((int)target.Size), srcSpan); } target.SynchronizeMemory(); target.SetData(data); target.SignalModified(); return; } else if (srcCalculator.LayoutMatches(dstCalculator)) { srcSpan.CopyTo(dstSpan); // No layout conversion has to be performed, just copy the data entirely. memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, dstSpan); return; } } unsafe bool Convert <T>(Span <byte> dstSpan, ReadOnlySpan <byte> srcSpan) where T : unmanaged { fixed(byte *dstPtr = dstSpan, srcPtr = srcSpan) { byte *dstBase = dstPtr - dstBaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset. byte *srcBase = srcPtr - srcBaseOffset; for (int y = 0; y < yCount; y++) { srcCalculator.SetY(src.RegionY + y); dstCalculator.SetY(dst.RegionY + y); for (int x = 0; x < xCount; x++) { int srcOffset = srcCalculator.GetOffset(src.RegionX + x); int dstOffset = dstCalculator.GetOffset(dst.RegionX + x); *(T *)(dstBase + dstOffset) = *(T *)(srcBase + srcOffset); } } } return(true); } bool _ = srcBpp switch { 1 => Convert <byte>(dstSpan, srcSpan), 2 => Convert <ushort>(dstSpan, srcSpan), 4 => Convert <uint>(dstSpan, srcSpan), 8 => Convert <ulong>(dstSpan, srcSpan), 12 => Convert <Bpp12Pixel>(dstSpan, srcSpan), 16 => Convert <Vector128 <byte> >(dstSpan, srcSpan), _ => throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.") }; memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, dstSpan); } else { if (remap && _state.State.SetRemapComponentsDstX == SetRemapComponentsDst.ConstA && _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.ConstA && _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.ConstA && _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.ConstA && _state.State.SetRemapComponentsNumSrcComponents == SetRemapComponentsNumComponents.One && _state.State.SetRemapComponentsNumDstComponents == SetRemapComponentsNumComponents.One && _state.State.SetRemapComponentsComponentSize == SetRemapComponentsComponentSize.Four) { // Fast path for clears when remap is enabled. memoryManager.Physical.BufferCache.ClearBuffer(memoryManager, dstGpuVa, size * 4, _state.State.SetRemapConstA); } else { // TODO: Implement remap functionality. // Buffer to buffer copy. memoryManager.Physical.BufferCache.CopyBuffer(memoryManager, srcGpuVa, dstGpuVa, size); } } }
/// <summary> /// Performs the compute dispatch operation. /// </summary> /// <param name="argument">Method call argument</param> private void SendSignalingPcasB(int argument) { var memoryManager = _channel.MemoryManager; _3dEngine.FlushUboDirty(); uint qmdAddress = _state.State.SendPcasA; var qmd = _channel.MemoryManager.Read <ComputeQmd>((ulong)qmdAddress << 8); ulong shaderGpuVa = ((ulong)_state.State.SetProgramRegionAAddressUpper << 32) | _state.State.SetProgramRegionB; shaderGpuVa += (uint)qmd.ProgramOffset; int localMemorySize = qmd.ShaderLocalMemoryLowSize + qmd.ShaderLocalMemoryHighSize; int sharedMemorySize = Math.Min(qmd.SharedMemorySize, _context.Capabilities.MaximumComputeSharedMemorySize); for (int index = 0; index < Constants.TotalCpUniformBuffers; index++) { if (!qmd.ConstantBufferValid(index)) { continue; } ulong gpuVa = (uint)qmd.ConstantBufferAddrLower(index) | (ulong)qmd.ConstantBufferAddrUpper(index) << 32; ulong size = (ulong)qmd.ConstantBufferSize(index); _channel.BufferManager.SetComputeUniformBuffer(index, gpuVa, size); } ulong samplerPoolGpuVa = ((ulong)_state.State.SetTexSamplerPoolAOffsetUpper << 32) | _state.State.SetTexSamplerPoolB; ulong texturePoolGpuVa = ((ulong)_state.State.SetTexHeaderPoolAOffsetUpper << 32) | _state.State.SetTexHeaderPoolB; GpuChannelPoolState poolState = new GpuChannelPoolState( texturePoolGpuVa, _state.State.SetTexHeaderPoolCMaximumIndex, _state.State.SetBindlessTextureConstantBufferSlotSelect); GpuChannelComputeState computeState = new GpuChannelComputeState( qmd.CtaThreadDimension0, qmd.CtaThreadDimension1, qmd.CtaThreadDimension2, localMemorySize, sharedMemorySize); CachedShaderProgram cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa); _context.Renderer.Pipeline.SetProgram(cs.HostProgram); _channel.TextureManager.SetComputeSamplerPool(samplerPoolGpuVa, _state.State.SetTexSamplerPoolCMaximumIndex, qmd.SamplerIndex); _channel.TextureManager.SetComputeTexturePool(texturePoolGpuVa, _state.State.SetTexHeaderPoolCMaximumIndex); _channel.TextureManager.SetComputeTextureBufferIndex(_state.State.SetBindlessTextureConstantBufferSlotSelect); ShaderProgramInfo info = cs.Shaders[0].Info; for (int index = 0; index < info.CBuffers.Count; index++) { BufferDescriptor cb = info.CBuffers[index]; // NVN uses the "hardware" constant buffer for anything that is less than 8, // and those are already bound above. // Anything greater than or equal to 8 uses the emulated constant buffers. // They are emulated using global memory loads. if (cb.Slot < 8) { continue; } ulong cbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0); int cbDescOffset = 0x260 + (cb.Slot - 8) * 0x10; cbDescAddress += (ulong)cbDescOffset; SbDescriptor cbDescriptor = _channel.MemoryManager.Physical.Read <SbDescriptor>(cbDescAddress); _channel.BufferManager.SetComputeUniformBuffer(cb.Slot, cbDescriptor.PackAddress(), (uint)cbDescriptor.Size); } for (int index = 0; index < info.SBuffers.Count; index++) { BufferDescriptor sb = info.SBuffers[index]; ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0); int sbDescOffset = 0x310 + sb.Slot * 0x10; sbDescAddress += (ulong)sbDescOffset; SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read <SbDescriptor>(sbDescAddress); _channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags); } _channel.BufferManager.SetComputeStorageBufferBindings(info.SBuffers); _channel.BufferManager.SetComputeUniformBufferBindings(info.CBuffers); int maxTextureBinding = -1; int maxImageBinding = -1; TextureBindingInfo[] textureBindings = _channel.TextureManager.RentComputeTextureBindings(info.Textures.Count); for (int index = 0; index < info.Textures.Count; index++) { var descriptor = info.Textures[index]; Target target = ShaderTexture.GetTarget(descriptor.Type); textureBindings[index] = new TextureBindingInfo( target, descriptor.Binding, descriptor.CbufSlot, descriptor.HandleIndex, descriptor.Flags); if (descriptor.Binding > maxTextureBinding) { maxTextureBinding = descriptor.Binding; } } TextureBindingInfo[] imageBindings = _channel.TextureManager.RentComputeImageBindings(info.Images.Count); for (int index = 0; index < info.Images.Count; index++) { var descriptor = info.Images[index]; Target target = ShaderTexture.GetTarget(descriptor.Type); Format format = ShaderTexture.GetFormat(descriptor.Format); imageBindings[index] = new TextureBindingInfo( target, format, descriptor.Binding, descriptor.CbufSlot, descriptor.HandleIndex, descriptor.Flags); if (descriptor.Binding > maxImageBinding) { maxImageBinding = descriptor.Binding; } } _channel.TextureManager.SetComputeMaxBindings(maxTextureBinding, maxImageBinding); // Should never return false for mismatching spec state, since the shader was fetched above. _channel.TextureManager.CommitComputeBindings(cs.SpecializationState); _channel.BufferManager.CommitComputeBindings(); _context.Renderer.Pipeline.DispatchCompute(qmd.CtaRasterWidth, qmd.CtaRasterHeight, qmd.CtaRasterDepth); _3dEngine.ForceShaderUpdate(); }