/// <summary> /// Creates a new GPU accessor state. /// </summary> /// <param name="poolState">GPU texture pool state</param> /// <param name="computeState">GPU compute state, for compute shaders</param> /// <param name="graphicsState">GPU graphics state, for vertex, tessellation, geometry and fragment shaders</param> /// <param name="specializationState">Shader specialization state (shared by all stages)</param> /// <param name="transformFeedbackDescriptors">Transform feedback information, if the shader uses transform feedback. Otherwise, should be null</param> public GpuAccessorState( GpuChannelPoolState poolState, GpuChannelComputeState computeState, GpuChannelGraphicsState graphicsState, ShaderSpecializationState specializationState, TransformFeedbackDescriptor[] transformFeedbackDescriptors = null) { PoolState = poolState; GraphicsState = graphicsState; ComputeState = computeState; SpecializationState = specializationState; TransformFeedbackDescriptors = transformFeedbackDescriptors; ResourceCounts = new ResourceCounts(); }
/// <summary> /// Performs the compute dispatch operation. /// </summary> /// <param name="argument">Method call argument</param> private void SendSignalingPcasB(int argument) { var memoryManager = _channel.MemoryManager; _3dEngine.FlushUboDirty(); uint qmdAddress = _state.State.SendPcasA; var qmd = _channel.MemoryManager.Read <ComputeQmd>((ulong)qmdAddress << 8); ulong shaderGpuVa = ((ulong)_state.State.SetProgramRegionAAddressUpper << 32) | _state.State.SetProgramRegionB; shaderGpuVa += (uint)qmd.ProgramOffset; int localMemorySize = qmd.ShaderLocalMemoryLowSize + qmd.ShaderLocalMemoryHighSize; int sharedMemorySize = Math.Min(qmd.SharedMemorySize, _context.Capabilities.MaximumComputeSharedMemorySize); for (int index = 0; index < Constants.TotalCpUniformBuffers; index++) { if (!qmd.ConstantBufferValid(index)) { continue; } ulong gpuVa = (uint)qmd.ConstantBufferAddrLower(index) | (ulong)qmd.ConstantBufferAddrUpper(index) << 32; ulong size = (ulong)qmd.ConstantBufferSize(index); _channel.BufferManager.SetComputeUniformBuffer(index, gpuVa, size); } ulong samplerPoolGpuVa = ((ulong)_state.State.SetTexSamplerPoolAOffsetUpper << 32) | _state.State.SetTexSamplerPoolB; ulong texturePoolGpuVa = ((ulong)_state.State.SetTexHeaderPoolAOffsetUpper << 32) | _state.State.SetTexHeaderPoolB; GpuChannelPoolState poolState = new GpuChannelPoolState( texturePoolGpuVa, _state.State.SetTexHeaderPoolCMaximumIndex, _state.State.SetBindlessTextureConstantBufferSlotSelect); GpuChannelComputeState computeState = new GpuChannelComputeState( qmd.CtaThreadDimension0, qmd.CtaThreadDimension1, qmd.CtaThreadDimension2, localMemorySize, sharedMemorySize); CachedShaderProgram cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa); _context.Renderer.Pipeline.SetProgram(cs.HostProgram); _channel.TextureManager.SetComputeSamplerPool(samplerPoolGpuVa, _state.State.SetTexSamplerPoolCMaximumIndex, qmd.SamplerIndex); _channel.TextureManager.SetComputeTexturePool(texturePoolGpuVa, _state.State.SetTexHeaderPoolCMaximumIndex); _channel.TextureManager.SetComputeTextureBufferIndex(_state.State.SetBindlessTextureConstantBufferSlotSelect); ShaderProgramInfo info = cs.Shaders[0].Info; for (int index = 0; index < info.CBuffers.Count; index++) { BufferDescriptor cb = info.CBuffers[index]; // NVN uses the "hardware" constant buffer for anything that is less than 8, // and those are already bound above. // Anything greater than or equal to 8 uses the emulated constant buffers. // They are emulated using global memory loads. if (cb.Slot < 8) { continue; } ulong cbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0); int cbDescOffset = 0x260 + (cb.Slot - 8) * 0x10; cbDescAddress += (ulong)cbDescOffset; SbDescriptor cbDescriptor = _channel.MemoryManager.Physical.Read <SbDescriptor>(cbDescAddress); _channel.BufferManager.SetComputeUniformBuffer(cb.Slot, cbDescriptor.PackAddress(), (uint)cbDescriptor.Size); } for (int index = 0; index < info.SBuffers.Count; index++) { BufferDescriptor sb = info.SBuffers[index]; ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0); int sbDescOffset = 0x310 + sb.Slot * 0x10; sbDescAddress += (ulong)sbDescOffset; SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read <SbDescriptor>(sbDescAddress); _channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags); } _channel.BufferManager.SetComputeStorageBufferBindings(info.SBuffers); _channel.BufferManager.SetComputeUniformBufferBindings(info.CBuffers); int maxTextureBinding = -1; int maxImageBinding = -1; TextureBindingInfo[] textureBindings = _channel.TextureManager.RentComputeTextureBindings(info.Textures.Count); for (int index = 0; index < info.Textures.Count; index++) { var descriptor = info.Textures[index]; Target target = ShaderTexture.GetTarget(descriptor.Type); textureBindings[index] = new TextureBindingInfo( target, descriptor.Binding, descriptor.CbufSlot, descriptor.HandleIndex, descriptor.Flags); if (descriptor.Binding > maxTextureBinding) { maxTextureBinding = descriptor.Binding; } } TextureBindingInfo[] imageBindings = _channel.TextureManager.RentComputeImageBindings(info.Images.Count); for (int index = 0; index < info.Images.Count; index++) { var descriptor = info.Images[index]; Target target = ShaderTexture.GetTarget(descriptor.Type); Format format = ShaderTexture.GetFormat(descriptor.Format); imageBindings[index] = new TextureBindingInfo( target, format, descriptor.Binding, descriptor.CbufSlot, descriptor.HandleIndex, descriptor.Flags); if (descriptor.Binding > maxImageBinding) { maxImageBinding = descriptor.Binding; } } _channel.TextureManager.SetComputeMaxBindings(maxTextureBinding, maxImageBinding); // Should never return false for mismatching spec state, since the shader was fetched above. _channel.TextureManager.CommitComputeBindings(cs.SpecializationState); _channel.BufferManager.CommitComputeBindings(); _context.Renderer.Pipeline.DispatchCompute(qmd.CtaRasterWidth, qmd.CtaRasterHeight, qmd.CtaRasterDepth); _3dEngine.ForceShaderUpdate(); }
/// <summary> /// Migrates from the old cache format to the new one. /// </summary> /// <param name="context">GPU context</param> /// <param name="hostStorage">Disk cache host storage (used to create the new shader files)</param> /// <returns>Number of migrated shaders</returns> public static int MigrateFromLegacyCache(GpuContext context, DiskCacheHostStorage hostStorage) { string baseCacheDirectory = CacheHelper.GetBaseCacheDirectory(GraphicsConfig.TitleId); string cacheDirectory = CacheHelper.GenerateCachePath(baseCacheDirectory, CacheGraphicsApi.Guest, "", "program"); // If the directory does not exist, we have no old cache. // Exist early as the CacheManager constructor will create the directories. if (!Directory.Exists(cacheDirectory)) { return(0); } if (GraphicsConfig.EnableShaderCache && GraphicsConfig.TitleId != null) { CacheManager cacheManager = new CacheManager(CacheGraphicsApi.OpenGL, CacheHashType.XxHash128, "glsl", GraphicsConfig.TitleId, ShaderCodeGenVersion); bool isReadOnly = cacheManager.IsReadOnly; HashSet <Hash128> invalidEntries = null; if (isReadOnly) { Logger.Warning?.Print(LogClass.Gpu, "Loading shader cache in read-only mode (cache in use by another program!)"); } else { invalidEntries = new HashSet <Hash128>(); } ReadOnlySpan <Hash128> guestProgramList = cacheManager.GetGuestProgramList(); for (int programIndex = 0; programIndex < guestProgramList.Length; programIndex++) { Hash128 key = guestProgramList[programIndex]; byte[] guestProgram = cacheManager.GetGuestProgramByHash(ref key); if (guestProgram == null) { Logger.Error?.Print(LogClass.Gpu, $"Ignoring orphan shader hash {key} in cache (is the cache incomplete?)"); continue; } ReadOnlySpan <byte> guestProgramReadOnlySpan = guestProgram; ReadOnlySpan <GuestShaderCacheEntry> cachedShaderEntries = GuestShaderCacheEntry.Parse(ref guestProgramReadOnlySpan, out GuestShaderCacheHeader fileHeader); if (cachedShaderEntries[0].Header.Stage == ShaderStage.Compute) { Debug.Assert(cachedShaderEntries.Length == 1); GuestShaderCacheEntry entry = cachedShaderEntries[0]; byte[] code = entry.Code.AsSpan(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray(); Span <byte> codeSpan = entry.Code; byte[] cb1Data = codeSpan.Slice(codeSpan.Length - entry.Header.Cb1DataSize).ToArray(); ShaderProgramInfo info = new ShaderProgramInfo( Array.Empty <BufferDescriptor>(), Array.Empty <BufferDescriptor>(), Array.Empty <TextureDescriptor>(), Array.Empty <TextureDescriptor>(), ShaderStage.Compute, false, false, 0, 0); GpuChannelComputeState computeState = new GpuChannelComputeState( entry.Header.GpuAccessorHeader.ComputeLocalSizeX, entry.Header.GpuAccessorHeader.ComputeLocalSizeY, entry.Header.GpuAccessorHeader.ComputeLocalSizeZ, entry.Header.GpuAccessorHeader.ComputeLocalMemorySize, entry.Header.GpuAccessorHeader.ComputeSharedMemorySize); ShaderSpecializationState specState = new ShaderSpecializationState(computeState); foreach (var td in entry.TextureDescriptors) { var handle = td.Key; var data = td.Value; specState.RegisterTexture( 0, handle, -1, data.UnpackFormat(), data.UnpackSrgb(), data.UnpackTextureTarget(), data.UnpackTextureCoordNormalized()); } CachedShaderStage shader = new CachedShaderStage(info, code, cb1Data); CachedShaderProgram program = new CachedShaderProgram(null, specState, shader); hostStorage.AddShader(context, program, ReadOnlySpan <byte> .Empty); } else { Debug.Assert(cachedShaderEntries.Length == Constants.ShaderStages); CachedShaderStage[] shaders = new CachedShaderStage[Constants.ShaderStages + 1]; List <ShaderProgram> shaderPrograms = new List <ShaderProgram>(); TransformFeedbackDescriptorOld[] tfd = CacheHelper.ReadTransformFeedbackInformation(ref guestProgramReadOnlySpan, fileHeader); GuestShaderCacheEntry[] entries = cachedShaderEntries.ToArray(); GuestGpuAccessorHeader accessorHeader = entries[0].Header.GpuAccessorHeader; TessMode tessMode = new TessMode(); int tessPatchType = accessorHeader.TessellationModePacked & 3; int tessSpacing = (accessorHeader.TessellationModePacked >> 2) & 3; bool tessCw = (accessorHeader.TessellationModePacked & 0x10) != 0; tessMode.Packed = (uint)tessPatchType; tessMode.Packed |= (uint)(tessSpacing << 4); if (tessCw) { tessMode.Packed |= 0x100; } PrimitiveTopology topology = accessorHeader.PrimitiveTopology switch { InputTopology.Lines => PrimitiveTopology.Lines, InputTopology.LinesAdjacency => PrimitiveTopology.LinesAdjacency, InputTopology.Triangles => PrimitiveTopology.Triangles, InputTopology.TrianglesAdjacency => PrimitiveTopology.TrianglesAdjacency, _ => PrimitiveTopology.Points }; GpuChannelGraphicsState graphicsState = new GpuChannelGraphicsState( accessorHeader.StateFlags.HasFlag(GuestGpuStateFlags.EarlyZForce), topology, tessMode); TransformFeedbackDescriptor[] tfdNew = null; if (tfd != null) { tfdNew = new TransformFeedbackDescriptor[tfd.Length]; for (int tfIndex = 0; tfIndex < tfd.Length; tfIndex++) { Array32 <uint> varyingLocations = new Array32 <uint>(); Span <byte> varyingLocationsSpan = MemoryMarshal.Cast <uint, byte>(varyingLocations.ToSpan()); tfd[tfIndex].VaryingLocations.CopyTo(varyingLocationsSpan.Slice(0, tfd[tfIndex].VaryingLocations.Length)); tfdNew[tfIndex] = new TransformFeedbackDescriptor( tfd[tfIndex].BufferIndex, tfd[tfIndex].Stride, tfd[tfIndex].VaryingLocations.Length, ref varyingLocations); } } ShaderSpecializationState specState = new ShaderSpecializationState(graphicsState, tfdNew); for (int i = 0; i < entries.Length; i++) { GuestShaderCacheEntry entry = entries[i]; if (entry == null) { continue; } ShaderProgramInfo info = new ShaderProgramInfo( Array.Empty <BufferDescriptor>(), Array.Empty <BufferDescriptor>(), Array.Empty <TextureDescriptor>(), Array.Empty <TextureDescriptor>(), (ShaderStage)(i + 1), false, false, 0, 0); // NOTE: Vertex B comes first in the shader cache. byte[] code = entry.Code.AsSpan(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray(); byte[] code2 = entry.Header.SizeA != 0 ? entry.Code.AsSpan(entry.Header.Size, entry.Header.SizeA).ToArray() : null; Span <byte> codeSpan = entry.Code; byte[] cb1Data = codeSpan.Slice(codeSpan.Length - entry.Header.Cb1DataSize).ToArray(); shaders[i + 1] = new CachedShaderStage(info, code, cb1Data); if (code2 != null) { shaders[0] = new CachedShaderStage(null, code2, cb1Data); } foreach (var td in entry.TextureDescriptors) { var handle = td.Key; var data = td.Value; specState.RegisterTexture( i, handle, -1, data.UnpackFormat(), data.UnpackSrgb(), data.UnpackTextureTarget(), data.UnpackTextureCoordNormalized()); } } CachedShaderProgram program = new CachedShaderProgram(null, specState, shaders); hostStorage.AddShader(context, program, ReadOnlySpan <byte> .Empty); } } return(guestProgramList.Length); } return(0); } }