Exemple #1
0
        public static ShaderProgram Translate(ulong addressA, ulong addressB, IGpuAccessor gpuAccessor, TranslationFlags flags)
        {
            Operation[] opsA = DecodeShader(addressA, gpuAccessor, flags | TranslationFlags.VertexA, out _, out int sizeA);
            Operation[] opsB = DecodeShader(addressB, gpuAccessor, flags, out ShaderConfig config, out int sizeB);

            return(Translate(Combine(opsA, opsB), config, sizeB, sizeA));
        }
Exemple #2
0
 public ShaderConfig(IGpuAccessor gpuAccessor, TranslationOptions options)
 {
     Stage         = ShaderStage.Compute;
     GpuAccessor   = gpuAccessor;
     Options       = options;
     _usedTextures = new Dictionary <TextureInfo, TextureMeta>();
     _usedImages   = new Dictionary <TextureInfo, TextureMeta>();
 }
Exemple #3
0
        public static ShaderProgram Translate(ulong address, IGpuAccessor gpuAccessor, TranslationFlags flags)
        {
            Operation[] ops = DecodeShader(address, gpuAccessor, flags, out ShaderConfig config, out int size, out FeatureFlags featureFlags);

            config.UsedFeatures = featureFlags;

            return(Translate(ops, config, size));
        }
Exemple #4
0
        private static Block[][] DecodeShader(
            ulong address,
            IGpuAccessor gpuAccessor,
            TranslationOptions options,
            TranslationCounts counts,
            out ShaderConfig config)
        {
            Block[][] cfg;
            ulong     maxEndAddress = 0;

            bool hasBindless;

            if ((options.Flags & TranslationFlags.Compute) != 0)
            {
                config = new ShaderConfig(gpuAccessor, options, counts);

                cfg = Decoder.Decode(gpuAccessor, address, out hasBindless);
            }
            else
            {
                config = new ShaderConfig(new ShaderHeader(gpuAccessor, address), gpuAccessor, options, counts);

                cfg = Decoder.Decode(gpuAccessor, address + HeaderSize, out hasBindless);
            }

            if (hasBindless)
            {
                config.SetUsedFeature(FeatureFlags.Bindless);
            }

            for (int funcIndex = 0; funcIndex < cfg.Length; funcIndex++)
            {
                for (int blkIndex = 0; blkIndex < cfg[funcIndex].Length; blkIndex++)
                {
                    Block block = cfg[funcIndex][blkIndex];

                    if (maxEndAddress < block.EndAddress)
                    {
                        maxEndAddress = block.EndAddress;
                    }

                    if (!hasBindless)
                    {
                        for (int index = 0; index < block.OpCodes.Count; index++)
                        {
                            if (block.OpCodes[index] is OpCodeTextureBase texture)
                            {
                                config.TextureHandlesForCache.Add(texture.HandleOffset);
                            }
                        }
                    }
                }
            }

            config.SizeAdd((int)maxEndAddress + (options.Flags.HasFlag(TranslationFlags.Compute) ? 0 : HeaderSize));

            return(cfg);
        }
Exemple #5
0
        public static ShaderProgram Translate(ulong addressA, ulong addressB, IGpuAccessor gpuAccessor, TranslationFlags flags)
        {
            Operation[] opsA = DecodeShader(addressA, gpuAccessor, flags | TranslationFlags.VertexA, out ShaderConfig configA);
            Operation[] opsB = DecodeShader(addressB, gpuAccessor, flags, out ShaderConfig config);

            config.SetUsedFeature(configA.UsedFeatures);

            return(Translate(Combine(opsA, opsB), config, configA.Size));
        }
Exemple #6
0
        public static ShaderProgram Translate(ulong addressA, ulong addressB, IGpuAccessor gpuAccessor, TranslationFlags flags)
        {
            FunctionCode[] funcA = DecodeShader(addressA, gpuAccessor, flags | TranslationFlags.VertexA, out ShaderConfig configA);
            FunctionCode[] funcB = DecodeShader(addressB, gpuAccessor, flags, out ShaderConfig config);

            config.SetUsedFeature(configA.UsedFeatures);

            return(Translate(Combine(funcA, funcB), config, configA.Size));
        }
Exemple #7
0
        private static void FillBlock(ShaderConfig config, Block block, ulong limitAddress, ulong startAddress)
        {
            IGpuAccessor gpuAccessor = config.GpuAccessor;

            ulong address               = block.Address;
            int   bufferOffset          = 0;
            ReadOnlySpan <ulong> buffer = ReadOnlySpan <ulong> .Empty;

            InstOp op = default;

            do
            {
                if (address + 7 >= limitAddress)
                {
                    break;
                }

                // Ignore scheduling instructions, which are written every 32 bytes.
                if ((address & 0x1f) == 0)
                {
                    address += 8;
                    bufferOffset++;
                    continue;
                }

                if (bufferOffset >= buffer.Length)
                {
                    buffer       = gpuAccessor.GetCode(startAddress + address, 8);
                    bufferOffset = 0;
                }

                ulong opCode = buffer[bufferOffset++];

                op = InstTable.GetOp(address, opCode);

                if (op.Props.HasFlag(InstProps.TexB))
                {
                    config.SetUsedFeature(FeatureFlags.Bindless);
                }

                if (op.Name == InstName.Ald || op.Name == InstName.Ast || op.Name == InstName.Ipa)
                {
                    SetUserAttributeUses(config, op.Name, opCode);
                }
                else if (op.Name == InstName.Ssy || op.Name == InstName.Pbk)
                {
                    block.AddPushOp(op);
                }

                block.OpCodes.Add(op);

                address += 8;
            }while (!op.Props.HasFlag(InstProps.Bra));

            block.EndAddress = address;
        }
 public ShaderConfig(IGpuAccessor gpuAccessor, TranslationOptions options, TranslationCounts counts)
 {
     Stage                  = ShaderStage.Compute;
     GpuAccessor            = gpuAccessor;
     Options                = options;
     _counts                = counts;
     TextureHandlesForCache = new HashSet <int>();
     _usedTextures          = new Dictionary <TextureInfo, TextureMeta>();
     _usedImages            = new Dictionary <TextureInfo, TextureMeta>();
 }
Exemple #9
0
        public static TranslatorContext CreateContext(
            ulong address,
            IGpuAccessor gpuAccessor,
            TranslationOptions options,
            TranslationCounts counts = null)
        {
            counts ??= new TranslationCounts();

            return(DecodeShader(address, gpuAccessor, options, counts));
        }
Exemple #10
0
        public static ShaderProgram Translate(
            ulong address,
            IGpuAccessor gpuAccessor,
            TranslationFlags flags,
            TranslationCounts counts = null)
        {
            counts ??= new TranslationCounts();

            return(Translate(DecodeShader(address, gpuAccessor, flags, counts, out ShaderConfig config), config));
        }
Exemple #11
0
        /// <summary>
        /// Builds gpu state flags using information from the given gpu accessor.
        /// </summary>
        /// <param name="gpuAccessor">The gpu accessor</param>
        /// <returns>The gpu state flags</returns>
        private static GuestGpuStateFlags GetGpuStateFlags(IGpuAccessor gpuAccessor)
        {
            GuestGpuStateFlags flags = 0;

            if (gpuAccessor.QueryEarlyZForce())
            {
                flags |= GuestGpuStateFlags.EarlyZForce;
            }

            return(flags);
        }
Exemple #12
0
        private static void FillBlock(
            IGpuAccessor gpuAccessor,
            Block block,
            ulong limitAddress,
            ulong startAddress)
        {
            ulong address = block.Address;

            do
            {
                if (address + 7 >= limitAddress)
                {
                    break;
                }

                // Ignore scheduling instructions, which are written every 32 bytes.
                if ((address & 0x1f) == 0)
                {
                    address += 8;

                    continue;
                }

                ulong opAddress = address;

                address += 8;

                long opCode = gpuAccessor.MemoryRead <long>(startAddress + opAddress);

                (InstEmitter emitter, OpCodeTable.OpActivator opActivator) = OpCodeTable.GetEmitter(opCode);

                if (emitter == null)
                {
                    // TODO: Warning, illegal encoding.

                    block.OpCodes.Add(new OpCode(null, opAddress, opCode));

                    continue;
                }

                if (opActivator == null)
                {
                    throw new ArgumentNullException(nameof(opActivator));
                }

                OpCode op = (OpCode)opActivator(emitter, opAddress, opCode);

                block.OpCodes.Add(op);
            }while (!IsBranch(block.GetLastOp()));

            block.EndAddress = address;

            block.UpdatePushOps();
        }
Exemple #13
0
 /// <summary>
 /// Create a new instance of <see cref="GuestGpuAccessorHeader"/> from an gpu accessor.
 /// </summary>
 /// <param name="gpuAccessor">The gpu accessor</param>
 /// <returns>a new instance of <see cref="GuestGpuAccessorHeader"/></returns>
 private static GuestGpuAccessorHeader CreateGuestGpuAccessorCache(IGpuAccessor gpuAccessor)
 {
     return(new GuestGpuAccessorHeader
     {
         ComputeLocalSizeX = gpuAccessor.QueryComputeLocalSizeX(),
         ComputeLocalSizeY = gpuAccessor.QueryComputeLocalSizeY(),
         ComputeLocalSizeZ = gpuAccessor.QueryComputeLocalSizeZ(),
         ComputeLocalMemorySize = gpuAccessor.QueryComputeLocalMemorySize(),
         ComputeSharedMemorySize = gpuAccessor.QueryComputeSharedMemorySize(),
         PrimitiveTopology = gpuAccessor.QueryPrimitiveTopology(),
     });
 }
Exemple #14
0
        private static bool HasBlockAfter(IGpuAccessor gpuAccessor, Block currBlock, ulong startAdddress)
        {
            if (!gpuAccessor.MemoryMapped(startAdddress + currBlock.EndAddress) ||
                !gpuAccessor.MemoryMapped(startAdddress + currBlock.EndAddress + 7))
            {
                return(false);
            }

            ulong inst = gpuAccessor.MemoryRead <ulong>(startAdddress + currBlock.EndAddress);

            return(inst != 0UL && inst != ShaderEndDelimiter);
        }
 public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options, TranslationCounts counts) : this(gpuAccessor, options, counts)
 {
     Stage             = header.Stage;
     GpPassthrough     = header.Stage == ShaderStage.Geometry && header.GpPassthrough;
     OutputTopology    = header.OutputTopology;
     MaxOutputVertices = header.MaxOutputVertexCount;
     LocalMemorySize   = header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize;
     ImapTypes         = header.ImapTypes;
     OmapTargets       = header.OmapTargets;
     OmapSampleMask    = header.OmapSampleMask;
     OmapDepth         = header.OmapDepth;
 }
Exemple #16
0
        public static TranslatorContext CreateContext(
            ulong address,
            IGpuAccessor gpuAccessor,
            TranslationFlags flags,
            TranslationCounts counts = null)
        {
            counts ??= new TranslationCounts();

            Block[][] cfg = DecodeShader(address, gpuAccessor, flags, counts, out ShaderConfig config);

            return(new TranslatorContext(address, cfg, config));
        }
Exemple #17
0
 public ShaderConfig(IGpuAccessor gpuAccessor, TranslationFlags flags)
 {
     Stage             = ShaderStage.Compute;
     OutputTopology    = OutputTopology.PointList;
     MaxOutputVertices = 0;
     LocalMemorySize   = 0;
     ImapTypes         = null;
     OmapTargets       = null;
     OmapSampleMask    = false;
     OmapDepth         = false;
     GpuAccessor       = gpuAccessor;
     Flags             = flags;
 }
Exemple #18
0
 public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationFlags flags)
 {
     Stage             = header.Stage;
     OutputTopology    = header.OutputTopology;
     MaxOutputVertices = header.MaxOutputVertexCount;
     LocalMemorySize   = header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize;
     ImapTypes         = header.ImapTypes;
     OmapTargets       = header.OmapTargets;
     OmapSampleMask    = header.OmapSampleMask;
     OmapDepth         = header.OmapDepth;
     GpuAccessor       = gpuAccessor;
     Flags             = flags;
 }
Exemple #19
0
        public static TranslatorContext CreateContext(
            ulong addressA,
            ulong addressB,
            IGpuAccessor gpuAccessor,
            TranslationFlags flags,
            TranslationCounts counts = null)
        {
            counts ??= new TranslationCounts();

            Block[][] cfgA = DecodeShader(addressA, gpuAccessor, flags | TranslationFlags.VertexA, counts, out ShaderConfig configA);
            Block[][] cfgB = DecodeShader(addressB, gpuAccessor, flags, counts, out ShaderConfig configB);

            return(new TranslatorContext(addressA, addressB, cfgA, cfgB, configA, configB));
        }
Exemple #20
0
 /// <summary>
 /// Create a new instance of <see cref="GuestGpuAccessorHeader"/> from an gpu accessor.
 /// </summary>
 /// <param name="gpuAccessor">The gpu accessor</param>
 /// <returns>A new instance of <see cref="GuestGpuAccessorHeader"/></returns>
 public static GuestGpuAccessorHeader CreateGuestGpuAccessorCache(IGpuAccessor gpuAccessor)
 {
     return(new GuestGpuAccessorHeader
     {
         ComputeLocalSizeX = gpuAccessor.QueryComputeLocalSizeX(),
         ComputeLocalSizeY = gpuAccessor.QueryComputeLocalSizeY(),
         ComputeLocalSizeZ = gpuAccessor.QueryComputeLocalSizeZ(),
         ComputeLocalMemorySize = gpuAccessor.QueryComputeLocalMemorySize(),
         ComputeSharedMemorySize = gpuAccessor.QueryComputeSharedMemorySize(),
         PrimitiveTopology = gpuAccessor.QueryPrimitiveTopology(),
         TessellationModePacked = GetTessellationModePacked(gpuAccessor),
         StateFlags = GetGpuStateFlags(gpuAccessor)
     });
 }
Exemple #21
0
        /// <summary>
        /// Packs the tessellation parameters from the gpu accessor.
        /// </summary>
        /// <param name="gpuAccessor">The gpu accessor</param>
        /// <returns>The packed tessellation parameters</returns>
        private static byte GetTessellationModePacked(IGpuAccessor gpuAccessor)
        {
            byte value;

            value  = (byte)((int)gpuAccessor.QueryTessPatchType() & 3);
            value |= (byte)(((int)gpuAccessor.QueryTessSpacing() & 3) << 2);

            if (gpuAccessor.QueryTessCw())
            {
                value |= 0x10;
            }

            return(value);
        }
Exemple #22
0
 public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options, TranslationCounts counts) : this(gpuAccessor, options, counts)
 {
     Stage                    = header.Stage;
     GpPassthrough            = header.Stage == ShaderStage.Geometry && header.GpPassthrough;
     ThreadsPerInputPrimitive = header.ThreadsPerInputPrimitive;
     OutputTopology           = header.OutputTopology;
     MaxOutputVertices        = header.MaxOutputVertexCount;
     LocalMemorySize          = header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize;
     ImapTypes                = header.ImapTypes;
     OmapTargets              = header.OmapTargets;
     OmapSampleMask           = header.OmapSampleMask;
     OmapDepth                = header.OmapDepth;
     TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled();
 }
Exemple #23
0
        private static TranslatorContext DecodeShader(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options, TranslationCounts counts)
        {
            ShaderConfig   config;
            DecodedProgram program;
            ulong          maxEndAddress = 0;

            if ((options.Flags & TranslationFlags.Compute) != 0)
            {
                config = new ShaderConfig(gpuAccessor, options, counts);

                program = Decoder.Decode(config, address);
            }
            else
            {
                config = new ShaderConfig(new ShaderHeader(gpuAccessor, address), gpuAccessor, options, counts);

                program = Decoder.Decode(config, address + HeaderSize);
            }

            foreach (DecodedFunction function in program)
            {
                foreach (Block block in function.Blocks)
                {
                    if (maxEndAddress < block.EndAddress)
                    {
                        maxEndAddress = block.EndAddress;
                    }

                    if (!config.UsedFeatures.HasFlag(FeatureFlags.Bindless))
                    {
                        for (int index = 0; index < block.OpCodes.Count; index++)
                        {
                            InstOp op = block.OpCodes[index];

                            if (op.Props.HasFlag(InstProps.Tex))
                            {
                                int tidB = (int)((op.RawOpCode >> 36) & 0x1fff);
                                config.TextureHandlesForCache.Add(tidB);
                            }
                        }
                    }
                }
            }

            config.SizeAdd((int)maxEndAddress + (options.Flags.HasFlag(TranslationFlags.Compute) ? 0 : HeaderSize));

            return(new TranslatorContext(address, program, config));
        }
Exemple #24
0
 public ShaderConfig(IGpuAccessor gpuAccessor, TranslationFlags flags, TranslationCounts counts)
 {
     Stage             = ShaderStage.Compute;
     OutputTopology    = OutputTopology.PointList;
     MaxOutputVertices = 0;
     LocalMemorySize   = 0;
     ImapTypes         = null;
     OmapTargets       = null;
     OmapSampleMask    = false;
     OmapDepth         = false;
     GpuAccessor       = gpuAccessor;
     Flags             = flags;
     Size                   = 0;
     UsedFeatures           = FeatureFlags.None;
     Counts                 = counts;
     TextureHandlesForCache = new HashSet <int>();
 }
Exemple #25
0
 public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationFlags flags, TranslationCounts counts)
 {
     Stage             = header.Stage;
     OutputTopology    = header.OutputTopology;
     MaxOutputVertices = header.MaxOutputVertexCount;
     LocalMemorySize   = header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize;
     ImapTypes         = header.ImapTypes;
     OmapTargets       = header.OmapTargets;
     OmapSampleMask    = header.OmapSampleMask;
     OmapDepth         = header.OmapDepth;
     GpuAccessor       = gpuAccessor;
     Flags             = flags;
     Size                   = 0;
     UsedFeatures           = FeatureFlags.None;
     Counts                 = counts;
     TextureHandlesForCache = new HashSet <int>();
 }
Exemple #26
0
        public ShaderHeader(IGpuAccessor gpuAccessor, ulong address)
        {
            int commonWord0 = gpuAccessor.MemoryRead <int>(address + 0);
            int commonWord1 = gpuAccessor.MemoryRead <int>(address + 4);
            int commonWord2 = gpuAccessor.MemoryRead <int>(address + 8);
            int commonWord3 = gpuAccessor.MemoryRead <int>(address + 12);
            int commonWord4 = gpuAccessor.MemoryRead <int>(address + 16);

            SphType = commonWord0.Extract(0, 5);
            Version = commonWord0.Extract(5, 5);

            Stage = (ShaderStage)commonWord0.Extract(10, 4);

            // Invalid.
            if (Stage == ShaderStage.Compute)
            {
                Stage = ShaderStage.Vertex;
            }

            MrtEnable = commonWord0.Extract(14);

            KillsPixels = commonWord0.Extract(15);

            DoesGlobalStore = commonWord0.Extract(16);

            SassVersion = commonWord0.Extract(17, 4);

            GpPassthrough = commonWord0.Extract(24);

            DoesLoadOrStore = commonWord0.Extract(26);
            DoesFp64        = commonWord0.Extract(27);

            StreamOutMask = commonWord0.Extract(28, 4);

            ShaderLocalMemoryLowSize = commonWord1.Extract(0, 24);

            PerPatchAttributeCount = commonWord1.Extract(24, 8);

            ShaderLocalMemoryHighSize = commonWord2.Extract(0, 24);

            ThreadsPerInputPrimitive = commonWord2.Extract(24, 8);

            ShaderLocalMemoryCrsSize = commonWord3.Extract(0, 24);

            OutputTopology = (OutputTopology)commonWord3.Extract(24, 4);

            MaxOutputVertexCount = commonWord4.Extract(0, 12);

            StoreReqStart = commonWord4.Extract(12, 8);
            StoreReqEnd   = commonWord4.Extract(24, 8);

            ImapTypes = new ImapPixelType[32];

            for (ulong i = 0; i < 32; i++)
            {
                byte imap = gpuAccessor.MemoryRead <byte>(address + 0x18 + i);

                ImapTypes[i] = new ImapPixelType(
                    (PixelImap)((imap >> 0) & 3),
                    (PixelImap)((imap >> 2) & 3),
                    (PixelImap)((imap >> 4) & 3),
                    (PixelImap)((imap >> 6) & 3));
            }

            int type2OmapTarget = gpuAccessor.MemoryRead <int>(address + 0x48);
            int type2Omap       = gpuAccessor.MemoryRead <int>(address + 0x4c);

            OmapTargets = new OmapTarget[8];

            for (int offset = 0; offset < OmapTargets.Length * 4; offset += 4)
            {
                OmapTargets[offset >> 2] = new OmapTarget(
                    type2OmapTarget.Extract(offset + 0),
                    type2OmapTarget.Extract(offset + 1),
                    type2OmapTarget.Extract(offset + 2),
                    type2OmapTarget.Extract(offset + 3));
            }

            OmapSampleMask = type2Omap.Extract(0);
            OmapDepth      = type2Omap.Extract(1);
        }
Exemple #27
0
        public static TranslatorContext CreateContext(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options)
        {
            Block[][] cfg = DecodeShader(address, gpuAccessor, options, out ShaderConfig config);

            return(new TranslatorContext(address, cfg, config));
        }
Exemple #28
0
        private static void FillBlock(ShaderConfig config, Block block, ulong limitAddress, ulong startAddress)
        {
            IGpuAccessor gpuAccessor = config.GpuAccessor;

            ulong address = block.Address;

            do
            {
                if (address + 7 >= limitAddress)
                {
                    break;
                }

                // Ignore scheduling instructions, which are written every 32 bytes.
                if ((address & 0x1f) == 0)
                {
                    address += 8;

                    continue;
                }

                ulong opAddress = address;

                address += 8;

                long opCode = gpuAccessor.MemoryRead <long>(startAddress + opAddress);

                (InstEmitter emitter, OpCodeTable.MakeOp makeOp) = OpCodeTable.GetEmitter(opCode);

                if (emitter == null)
                {
                    // TODO: Warning, illegal encoding.

                    block.OpCodes.Add(new OpCode(null, opAddress, opCode));

                    continue;
                }

                if (makeOp == null)
                {
                    throw new ArgumentNullException(nameof(makeOp));
                }

                OpCode op = makeOp(emitter, opAddress, opCode);

                // We check these patterns to figure out the presence of bindless access
                if ((op is OpCodeImage image && image.IsBindless) ||
                    (op is OpCodeTxd txd && txd.IsBindless) ||
                    (op is OpCodeTld4B) ||
                    (emitter == InstEmit.TexB) ||
                    (emitter == InstEmit.TldB) ||
                    (emitter == InstEmit.TmmlB) ||
                    (emitter == InstEmit.TxqB))
                {
                    config.SetUsedFeature(FeatureFlags.Bindless);
                }

                // Populate used attributes.
                if (op is IOpCodeAttribute opAttr)
                {
                    SetUserAttributeUses(config, opAttr);
                }

                block.OpCodes.Add(op);
            }while (!IsControlFlowChange(block.GetLastOp()));

            block.EndAddress = address;

            block.UpdatePushOps();
        }
Exemple #29
0
 public static TranslatorContext CreateContext(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options)
 {
     return(DecodeShader(address, gpuAccessor, options));
 }
Exemple #30
0
        public static Block[][] Decode(IGpuAccessor gpuAccessor, ulong startAddress, out bool hasBindless)
        {
            hasBindless = false;

            List <Block[]> funcs = new List <Block[]>();

            Queue <ulong>   funcQueue   = new Queue <ulong>();
            HashSet <ulong> funcVisited = new HashSet <ulong>();

            void EnqueueFunction(ulong funcAddress)
            {
                if (funcVisited.Add(funcAddress))
                {
                    funcQueue.Enqueue(funcAddress);
                }
            }

            funcQueue.Enqueue(0);

            while (funcQueue.TryDequeue(out ulong funcAddress))
            {
                List <Block>              blocks    = new List <Block>();
                Queue <Block>             workQueue = new Queue <Block>();
                Dictionary <ulong, Block> visited   = new Dictionary <ulong, Block>();

                Block GetBlock(ulong blkAddress)
                {
                    if (!visited.TryGetValue(blkAddress, out Block block))
                    {
                        block = new Block(blkAddress);

                        workQueue.Enqueue(block);
                        visited.Add(blkAddress, block);
                    }

                    return(block);
                }

                GetBlock(funcAddress);

                while (workQueue.TryDequeue(out Block currBlock))
                {
                    // Check if the current block is inside another block.
                    if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
                    {
                        Block nBlock = blocks[nBlkIndex];

                        if (nBlock.Address == currBlock.Address)
                        {
                            throw new InvalidOperationException("Found duplicate block address on the list.");
                        }

                        nBlock.Split(currBlock);
                        blocks.Insert(nBlkIndex + 1, currBlock);

                        continue;
                    }

                    // If we have a block after the current one, set the limit address.
                    ulong limitAddress = ulong.MaxValue;

                    if (nBlkIndex != blocks.Count)
                    {
                        Block nBlock = blocks[nBlkIndex];

                        int nextIndex = nBlkIndex + 1;

                        if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
                        {
                            limitAddress = blocks[nextIndex].Address;
                        }
                        else if (nBlock.Address > currBlock.Address)
                        {
                            limitAddress = blocks[nBlkIndex].Address;
                        }
                    }

                    FillBlock(gpuAccessor, currBlock, limitAddress, startAddress, out bool blockHasBindless);
                    hasBindless |= blockHasBindless;

                    if (currBlock.OpCodes.Count != 0)
                    {
                        // We should have blocks for all possible branch targets,
                        // including those from SSY/PBK instructions.
                        foreach (OpCodePush pushOp in currBlock.PushOpCodes)
                        {
                            GetBlock(pushOp.GetAbsoluteAddress());
                        }

                        // Set child blocks. "Branch" is the block the branch instruction
                        // points to (when taken), "Next" is the block at the next address,
                        // executed when the branch is not taken. For Unconditional Branches
                        // or end of program, Next is null.
                        OpCode lastOp = currBlock.GetLastOp();

                        if (lastOp is OpCodeBranch opBr)
                        {
                            if (lastOp.Emitter == InstEmit.Cal)
                            {
                                EnqueueFunction(opBr.GetAbsoluteAddress());
                            }
                            else
                            {
                                currBlock.Branch = GetBlock(opBr.GetAbsoluteAddress());
                            }
                        }
                        else if (lastOp is OpCodeBranchIndir opBrIndir)
                        {
                            // An indirect branch could go anywhere, we don't know the target.
                            // Those instructions are usually used on a switch to jump table
                            // compiler optimization, and in those cases the possible targets
                            // seems to be always right after the BRX itself. We can assume
                            // that the possible targets are all the blocks in-between the
                            // instruction right after the BRX, and the common target that
                            // all the "cases" should eventually jump to, acting as the
                            // switch break.
                            Block firstTarget = GetBlock(currBlock.EndAddress);

                            firstTarget.BrIndir = opBrIndir;

                            opBrIndir.PossibleTargets.Add(firstTarget);
                        }

                        if (!IsUnconditionalBranch(lastOp))
                        {
                            currBlock.Next = GetBlock(currBlock.EndAddress);
                        }
                    }

                    // Insert the new block on the list (sorted by address).
                    if (blocks.Count != 0)
                    {
                        Block nBlock = blocks[nBlkIndex];

                        blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
                    }
                    else
                    {
                        blocks.Add(currBlock);
                    }

                    // Do we have a block after the current one?
                    if (currBlock.BrIndir != null && HasBlockAfter(gpuAccessor, currBlock, startAddress))
                    {
                        bool targetVisited = visited.ContainsKey(currBlock.EndAddress);

                        Block possibleTarget = GetBlock(currBlock.EndAddress);

                        currBlock.BrIndir.PossibleTargets.Add(possibleTarget);

                        if (!targetVisited)
                        {
                            possibleTarget.BrIndir = currBlock.BrIndir;
                        }
                    }
                }

                foreach (Block block in blocks.Where(x => x.PushOpCodes.Count != 0))
                {
                    for (int pushOpIndex = 0; pushOpIndex < block.PushOpCodes.Count; pushOpIndex++)
                    {
                        PropagatePushOp(visited, block, pushOpIndex);
                    }
                }

                funcs.Add(blocks.ToArray());
            }

            return(funcs.ToArray());
        }