예제 #1
0
        /// <summary>
        /// Performs actual copy of the inline data after the transfer is finished.
        /// </summary>
        private void FinishTransfer()
        {
            Span <byte> data = MemoryMarshal.Cast <int, byte>(_buffer).Slice(0, _size);

            if (_isLinear && _params.LineCount == 1)
            {
                ulong address = _context.MemoryManager.Translate(_params.DstAddress.Pack());

                _context.PhysicalMemory.Write(address, data);
            }
            else
            {
                var dstCalculator = new OffsetCalculator(
                    _params.DstWidth,
                    _params.DstHeight,
                    _params.DstStride,
                    _isLinear,
                    _params.DstMemoryLayout.UnpackGobBlocksInY(),
                    1);

                int srcOffset = 0;

                ulong dstBaseAddress = _context.MemoryManager.Translate(_params.DstAddress.Pack());

                for (int y = _params.DstY; y < _params.DstY + _params.LineCount; y++)
                {
                    int x1      = _params.DstX;
                    int x2      = _params.DstX + _params.LineLengthIn;
                    int x2Trunc = _params.DstX + BitUtils.AlignDown(_params.LineLengthIn, 16);

                    int x;

                    for (x = x1; x < x2Trunc; x += 16, srcOffset += 16)
                    {
                        int dstOffset = dstCalculator.GetOffset(x, y);

                        ulong dstAddress = dstBaseAddress + (ulong)dstOffset;

                        Span <byte> pixel = data.Slice(srcOffset, 16);

                        _context.PhysicalMemory.Write(dstAddress, pixel);
                    }

                    for (; x < x2; x++, srcOffset++)
                    {
                        int dstOffset = dstCalculator.GetOffset(x, y);

                        ulong dstAddress = dstBaseAddress + (ulong)dstOffset;

                        Span <byte> pixel = data.Slice(srcOffset, 1);

                        _context.PhysicalMemory.Write(dstAddress, pixel);
                    }
                }
            }

            _finished = true;

            _context.AdvanceSequence();
        }
예제 #2
0
        /// <summary>
        /// Determines if a given texture is "safe" for upscaling from its info.
        /// Note that this is different from being compatible - this elilinates targets that would have detrimental effects when scaled.
        /// </summary>
        /// <param name="info">The texture info to check</param>
        /// <returns>True if safe</returns>
        private static bool UpscaleSafeMode(TextureInfo info)
        {
            // While upscaling works for all targets defined by IsUpscaleCompatible, we additionally blacklist targets here that
            // may have undesirable results (upscaling blur textures) or simply waste GPU resources (upscaling texture atlas).

            if (info.Levels > 3)
            {
                // Textures with more than 3 levels are likely to be game textures, rather than render textures.
                // Small textures with full mips are likely to be removed by the next check.
                return(false);
            }

            if (info.Width < 8 || info.Height < 8)
            {
                // Discount textures with small dimensions.
                return(false);
            }

            int widthAlignment = (info.IsLinear ? Constants.StrideAlignment : Constants.GobAlignment) / info.FormatInfo.BytesPerPixel;

            if (!(info.FormatInfo.Format.IsDepthOrStencil() || info.FormatInfo.Components == 1))
            {
                // Discount square textures that aren't depth-stencil like. (excludes game textures, cubemap faces, most 3D texture LUT, texture atlas)
                // Detect if the texture is possibly square. Widths may be aligned, so to remove the uncertainty we align both the width and height.

                bool possiblySquare = BitUtils.AlignUp(info.Width, widthAlignment) == BitUtils.AlignUp(info.Height, widthAlignment);

                if (possiblySquare)
                {
                    return(false);
                }
            }

            if (info.Height < 360)
            {
                int aspectWidth    = (int)MathF.Ceiling((info.Height / 9f) * 16f);
                int aspectMaxWidth = BitUtils.AlignUp(aspectWidth, widthAlignment);
                int aspectMinWidth = BitUtils.AlignDown(aspectWidth, widthAlignment);

                if (info.Width >= aspectMinWidth && info.Width <= aspectMaxWidth && info.Height < 360)
                {
                    // Targets that are roughly 16:9 can only be rescaled if they're equal to or above 360p. (excludes blur and bloom textures)
                    return(false);
                }
            }

            if (info.Width == info.Height * info.Height)
            {
                // Possibly used for a "3D texture" drawn onto a 2D surface.
                // Some games do this to generate a tone mapping LUT without rendering into 3D texture slices.

                return(false);
            }

            return(true);
        }
예제 #3
0
        /// <summary>
        /// Sets a storage buffer on the compute pipeline.
        /// Storage buffers can be read and written to on shaders.
        /// </summary>
        /// <param name="index">Index of the storage buffer</param>
        /// <param name="gpuVa">Start GPU virtual address of the buffer</param>
        /// <param name="size">Size in bytes of the storage buffer</param>
        /// <param name="flags">Buffer usage flags</param>
        public void SetComputeStorageBuffer(int index, ulong gpuVa, ulong size, BufferUsageFlags flags)
        {
            size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1);

            gpuVa = BitUtils.AlignDown(gpuVa, _context.Capabilities.StorageBufferOffsetAlignment);

            ulong address = TranslateAndCreateBuffer(gpuVa, size);

            _cpStorageBuffers.SetBounds(index, address, size, flags);
        }
예제 #4
0
        /// <summary>
        /// Sets a storage buffer on the graphics pipeline.
        /// Storage buffers can be read and written to on shaders.
        /// </summary>
        /// <param name="stage">Index of the shader stage</param>
        /// <param name="index">Index of the storage buffer</param>
        /// <param name="gpuVa">Start GPU virtual address of the buffer</param>
        /// <param name="size">Size in bytes of the storage buffer</param>
        /// <param name="flags">Buffer usage flags</param>
        public void SetGraphicsStorageBuffer(int stage, int index, ulong gpuVa, ulong size, BufferUsageFlags flags)
        {
            size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1);

            gpuVa = BitUtils.AlignDown(gpuVa, _context.Capabilities.StorageBufferOffsetAlignment);

            ulong address = TranslateAndCreateBuffer(gpuVa, size);

            if (_gpStorageBuffers[stage].Buffers[index].Address != address ||
                _gpStorageBuffers[stage].Buffers[index].Size != size)
            {
                _gpStorageBuffersDirty = true;
            }

            _gpStorageBuffers[stage].SetBounds(index, address, size, flags);
        }
예제 #5
0
        public KernelResult FreeThreadLocalStorage(ulong tlsSlotAddr)
        {
            ulong tlsPageAddr = BitUtils.AlignDown(tlsSlotAddr, KMemoryManager.PageSize);

            System.CriticalSection.Enter();

            KernelResult result = KernelResult.Success;

            KTlsPageInfo pageInfo = null;

            if (_fullTlsPages.TryGetValue(tlsPageAddr, out pageInfo))
            {
                // TLS page was full, free slot and move to free pages tree.
                _fullTlsPages.Remove(tlsPageAddr);

                _freeTlsPages.Add(tlsPageAddr, pageInfo);
            }
            else if (!_freeTlsPages.TryGetValue(tlsPageAddr, out pageInfo))
            {
                result = KernelResult.InvalidAddress;
            }

            if (pageInfo != null)
            {
                pageInfo.FreeTlsSlot(tlsSlotAddr);

                if (pageInfo.IsEmpty())
                {
                    // TLS page is now empty, we should ensure it is removed
                    // from all trees, and free the memory it was using.
                    _freeTlsPages.Remove(tlsPageAddr);

                    System.CriticalSection.Leave();

                    FreeTlsPage(pageInfo);

                    return(KernelResult.Success);
                }
            }

            System.CriticalSection.Leave();

            return(result);
        }
예제 #6
0
            public ArraySegment <ulong> Initialize(ulong address, ulong size, int blockShift, int nextBlockShift, ArraySegment <ulong> bitStorage)
            {
                Shift     = blockShift;
                NextShift = nextBlockShift;

                ulong endAddress = address + size;

                ulong align = nextBlockShift != 0
                    ? 1UL << nextBlockShift
                    : 1UL << blockShift;

                address    = BitUtils.AlignDown(address, align);
                endAddress = BitUtils.AlignUp(endAddress, align);

                _heapAddress = address;
                _endOffset   = (endAddress - address) / (1UL << blockShift);

                return(_bitmap.Initialize(bitStorage, _endOffset));
            }
예제 #7
0
            public ulong PushBlock(ulong address)
            {
                ulong offset = (address - _heapAddress) >> Shift;

                _bitmap.SetBit(offset);

                if (NextShift != 0)
                {
                    int diff = 1 << (NextShift - Shift);

                    offset = BitUtils.AlignDown(offset, diff);

                    if (_bitmap.ClearRange(offset, diff))
                    {
                        return(_heapAddress + (offset << Shift));
                    }
                }

                return(0);
            }
예제 #8
0
        public static Span <byte> ConvertBlockLinearToLinear(
            int width,
            int height,
            int depth,
            int levels,
            int layers,
            int blockWidth,
            int blockHeight,
            int bytesPerPixel,
            int gobBlocksInY,
            int gobBlocksInZ,
            int gobBlocksInTileX,
            SizeInfo sizeInfo,
            ReadOnlySpan <byte> data)
        {
            int outSize = GetTextureSize(
                width,
                height,
                depth,
                levels,
                layers,
                blockWidth,
                blockHeight,
                bytesPerPixel);

            Span <byte> output = new byte[outSize];

            int outOffs = 0;

            int mipGobBlocksInY = gobBlocksInY;
            int mipGobBlocksInZ = gobBlocksInZ;

            int gobWidth  = (GobStride / bytesPerPixel) * gobBlocksInTileX;
            int gobHeight = gobBlocksInY * GobHeight;

            for (int level = 0; level < levels; level++)
            {
                int w = Math.Max(1, width >> level);
                int h = Math.Max(1, height >> level);
                int d = Math.Max(1, depth >> level);

                w = BitUtils.DivRoundUp(w, blockWidth);
                h = BitUtils.DivRoundUp(h, blockHeight);

                while (h <= (mipGobBlocksInY >> 1) * GobHeight && mipGobBlocksInY != 1)
                {
                    mipGobBlocksInY >>= 1;
                }

                while (d <= (mipGobBlocksInZ >> 1) && mipGobBlocksInZ != 1)
                {
                    mipGobBlocksInZ >>= 1;
                }

                int strideTrunc = BitUtils.AlignDown(w * bytesPerPixel, 16);

                int xStart = strideTrunc / bytesPerPixel;

                int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);

                int alignment = gobWidth;

                if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight)
                {
                    alignment = GobStride / bytesPerPixel;
                }

                int wAligned = BitUtils.AlignUp(w, alignment);

                BlockLinearLayout layoutConverter = new BlockLinearLayout(
                    wAligned,
                    h,
                    d,
                    mipGobBlocksInY,
                    mipGobBlocksInZ,
                    bytesPerPixel);

                for (int layer = 0; layer < layers; layer++)
                {
                    int inBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level);

                    for (int z = 0; z < d; z++)
                    {
                        for (int y = 0; y < h; y++)
                        {
                            for (int x = 0; x < strideTrunc; x += 16)
                            {
                                int offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset(x, y, z);

                                Span <byte> dest = output.Slice(outOffs + x, 16);

                                data.Slice(offset, 16).CopyTo(dest);
                            }

                            for (int x = xStart; x < w; x++)
                            {
                                int offset = inBaseOffset + layoutConverter.GetOffset(x, y, z);

                                Span <byte> dest = output.Slice(outOffs + x * bytesPerPixel, bytesPerPixel);

                                data.Slice(offset, bytesPerPixel).CopyTo(dest);
                            }

                            outOffs += stride;
                        }
                    }
                }
            }

            return(output);
        }
예제 #9
0
        public void Free(ulong address, ulong pagesCount)
        {
            if (pagesCount == 0)
            {
                return;
            }

            int bigIndex = _blocksCount - 1;

            ulong start       = address;
            ulong end         = address + pagesCount * KPageTableBase.PageSize;
            ulong beforeStart = start;
            ulong beforeEnd   = start;
            ulong afterStart  = end;
            ulong afterEnd    = end;

            while (bigIndex >= 0)
            {
                ulong blockSize = _blocks[bigIndex].Size;

                ulong bigStart = BitUtils.AlignUp(start, blockSize);
                ulong bigEnd   = BitUtils.AlignDown(end, blockSize);

                if (bigStart < bigEnd)
                {
                    for (ulong block = bigStart; block < bigEnd; block += blockSize)
                    {
                        FreeBlock(block, bigIndex);
                    }

                    beforeEnd  = bigStart;
                    afterStart = bigEnd;

                    break;
                }

                bigIndex--;
            }

            for (int i = bigIndex - 1; i >= 0; i--)
            {
                ulong blockSize = _blocks[i].Size;

                while (beforeStart + blockSize <= beforeEnd)
                {
                    beforeEnd -= blockSize;
                    FreeBlock(beforeEnd, i);
                }
            }

            for (int i = bigIndex - 1; i >= 0; i--)
            {
                ulong blockSize = _blocks[i].Size;

                while (afterStart + blockSize <= afterEnd)
                {
                    FreeBlock(afterStart, i);
                    afterStart += blockSize;
                }
            }
        }
예제 #10
0
        /// <summary>
        /// Performs a full data copy between two textures, reading and writing guest memory directly.
        /// The textures must have a matching layout, size, and bytes per pixel.
        /// </summary>
        /// <param name="src">The source texture</param>
        /// <param name="dst">The destination texture</param>
        /// <param name="w">Copy width</param>
        /// <param name="h">Copy height</param>
        /// <param name="bpp">Bytes per pixel</param>
        private void UnscaledFullCopy(TwodTexture src, TwodTexture dst, int w, int h, int bpp)
        {
            var srcCalculator = new OffsetCalculator(
                w,
                h,
                src.Stride,
                src.LinearLayout,
                src.MemoryLayout.UnpackGobBlocksInY(),
                src.MemoryLayout.UnpackGobBlocksInZ(),
                bpp);

            (int _, int srcSize) = srcCalculator.GetRectangleRange(0, 0, w, h);

            var memoryManager = _channel.MemoryManager;

            ulong srcGpuVa = src.Address.Pack();
            ulong dstGpuVa = dst.Address.Pack();

            ReadOnlySpan <byte> srcSpan = memoryManager.GetSpan(srcGpuVa, srcSize, true);

            int width;
            int height = src.Height;

            if (src.LinearLayout)
            {
                width = src.Stride / bpp;
            }
            else
            {
                width = src.Width;
            }

            // If the copy is not equal to the width and height of the texture, we will need to copy partially.
            // It's worth noting that it has already been established that the src and dst are the same size.

            if (w == width && h == height)
            {
                memoryManager.Write(dstGpuVa, srcSpan);
            }
            else
            {
                using WritableRegion dstRegion = memoryManager.GetWritableRegion(dstGpuVa, srcSize, true);
                Span <byte> dstSpan = dstRegion.Memory.Span;

                if (src.LinearLayout)
                {
                    int stride   = src.Stride;
                    int offset   = 0;
                    int lineSize = width * bpp;

                    for (int y = 0; y < height; y++)
                    {
                        srcSpan.Slice(offset, lineSize).CopyTo(dstSpan.Slice(offset));

                        offset += stride;
                    }
                }
                else
                {
                    // Copy with the block linear layout in mind.
                    // Recreate the offset calculate with bpp 1 for copy.

                    int stride = w * bpp;

                    srcCalculator = new OffsetCalculator(
                        stride,
                        h,
                        0,
                        false,
                        src.MemoryLayout.UnpackGobBlocksInY(),
                        src.MemoryLayout.UnpackGobBlocksInZ(),
                        1);

                    int strideTrunc = BitUtils.AlignDown(stride, 16);

                    ReadOnlySpan <Vector128 <byte> > srcVec = MemoryMarshal.Cast <byte, Vector128 <byte> >(srcSpan);
                    Span <Vector128 <byte> >         dstVec = MemoryMarshal.Cast <byte, Vector128 <byte> >(dstSpan);

                    for (int y = 0; y < h; y++)
                    {
                        int x = 0;

                        srcCalculator.SetY(y);

                        for (; x < strideTrunc; x += 16)
                        {
                            int offset = srcCalculator.GetOffset(x) >> 4;

                            dstVec[offset] = srcVec[offset];
                        }

                        for (; x < stride; x++)
                        {
                            int offset = srcCalculator.GetOffset(x);

                            dstSpan[offset] = srcSpan[offset];
                        }
                    }
                }
            }
        }
예제 #11
0
        public static void ConvertLinearToBlockLinear(
            Span <byte> dst,
            int width,
            int height,
            int stride,
            int bytesPerPixel,
            int gobBlocksInY,
            ReadOnlySpan <byte> data)
        {
            int gobHeight = gobBlocksInY * GobHeight;

            int strideTrunc   = BitUtils.AlignDown(width * bytesPerPixel, 16);
            int strideTrunc64 = BitUtils.AlignDown(width * bytesPerPixel, 64);

            int xStart = strideTrunc / bytesPerPixel;

            int inStrideGap = stride - width * bytesPerPixel;

            int alignment = GobStride / bytesPerPixel;

            int wAligned = BitUtils.AlignUp(width, alignment);

            BlockLinearLayout layoutConverter = new BlockLinearLayout(wAligned, height, gobBlocksInY, 1, bytesPerPixel);

            unsafe bool Convert <T>(Span <byte> output, ReadOnlySpan <byte> data) where T : unmanaged
            {
                fixed(byte *outputPtr = output, dataPtr = data)
                {
                    byte *inPtr = dataPtr;

                    for (int y = 0; y < height; y++)
                    {
                        layoutConverter.SetY(y);

                        for (int x = 0; x < strideTrunc64; x += 64, inPtr += 64)
                        {
                            byte *offset  = outputPtr + layoutConverter.GetOffsetWithLineOffset64(x);
                            byte *offset2 = offset + 0x20;
                            byte *offset3 = offset + 0x100;
                            byte *offset4 = offset + 0x120;

                            Vector128 <byte> value  = *(Vector128 <byte> *)inPtr;
                            Vector128 <byte> value2 = *(Vector128 <byte> *)(inPtr + 16);
                            Vector128 <byte> value3 = *(Vector128 <byte> *)(inPtr + 32);
                            Vector128 <byte> value4 = *(Vector128 <byte> *)(inPtr + 48);

                            *(Vector128 <byte> *)offset  = value;
                            *(Vector128 <byte> *)offset2 = value2;
                            *(Vector128 <byte> *)offset3 = value3;
                            *(Vector128 <byte> *)offset4 = value4;
                        }

                        for (int x = strideTrunc64; x < strideTrunc; x += 16, inPtr += 16)
                        {
                            byte *offset = outputPtr + layoutConverter.GetOffsetWithLineOffset16(x);

                            *(Vector128 <byte> *)offset = *(Vector128 <byte> *)inPtr;
                        }

                        for (int x = xStart; x < width; x++, inPtr += bytesPerPixel)
                        {
                            byte *offset = outputPtr + layoutConverter.GetOffset(x);

                            *(T *)offset = *(T *)inPtr;
                        }

                        inPtr += inStrideGap;
                    }
                }

                return(true);
            }

            bool _ = bytesPerPixel switch
            {
                1 => Convert <byte>(dst, data),
                2 => Convert <ushort>(dst, data),
                4 => Convert <uint>(dst, data),
                8 => Convert <ulong>(dst, data),
                12 => Convert <Bpp12Pixel>(dst, data),
                16 => Convert <Vector128 <byte> >(dst, data),
                _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.")
            };
        }
예제 #12
0
        public KMemoryRegionManager(ulong address, ulong size, ulong endAddr)
        {
            _blocks = new KMemoryRegionBlock[BlockOrders.Length];

            Address = address;
            Size    = size;
            EndAddr = endAddr;

            _blockOrdersCount = BlockOrders.Length;

            for (int blockIndex = 0; blockIndex < _blockOrdersCount; blockIndex++)
            {
                _blocks[blockIndex] = new KMemoryRegionBlock();

                _blocks[blockIndex].Order = BlockOrders[blockIndex];

                int nextOrder = blockIndex == _blockOrdersCount - 1 ? 0 : BlockOrders[blockIndex + 1];

                _blocks[blockIndex].NextOrder = nextOrder;

                int currBlockSize = 1 << BlockOrders[blockIndex];
                int nextBlockSize = currBlockSize;

                if (nextOrder != 0)
                {
                    nextBlockSize = 1 << nextOrder;
                }

                ulong startAligned   = BitUtils.AlignDown(address, nextBlockSize);
                ulong endAddrAligned = BitUtils.AlignDown(endAddr, currBlockSize);

                ulong sizeInBlocksTruncated = (endAddrAligned - startAligned) >> BlockOrders[blockIndex];

                ulong endAddrRounded = BitUtils.AlignUp(address + size, nextBlockSize);

                ulong sizeInBlocksRounded = (endAddrRounded - startAligned) >> BlockOrders[blockIndex];

                _blocks[blockIndex].StartAligned          = startAligned;
                _blocks[blockIndex].SizeInBlocksTruncated = sizeInBlocksTruncated;
                _blocks[blockIndex].SizeInBlocksRounded   = sizeInBlocksRounded;

                ulong currSizeInBlocks = sizeInBlocksRounded;

                int maxLevel = 0;

                do
                {
                    maxLevel++;
                }while ((currSizeInBlocks /= 64) != 0);

                _blocks[blockIndex].MaxLevel = maxLevel;

                _blocks[blockIndex].Masks = new long[maxLevel][];

                currSizeInBlocks = sizeInBlocksRounded;

                for (int level = maxLevel - 1; level >= 0; level--)
                {
                    currSizeInBlocks = (currSizeInBlocks + 63) / 64;

                    _blocks[blockIndex].Masks[level] = new long[currSizeInBlocks];
                }
            }

            _pageReferenceCounts = new ushort[size / KPageTableBase.PageSize];

            if (size != 0)
            {
                FreePages(address, size / KPageTableBase.PageSize);
            }
        }
예제 #13
0
        public static bool LoadStaticObjects(
            Horizon System,
            Npdm MetaData,
            IExecutable[] StaticObjects,
            byte[]        Arguments = null)
        {
            ulong ArgsStart = 0;
            int   ArgsSize  = 0;
            ulong CodeStart = 0x8000000;
            int   CodeSize  = 0;

            ulong[] NsoBase = new ulong[StaticObjects.Length];

            for (int Index = 0; Index < StaticObjects.Length; Index++)
            {
                IExecutable StaticObject = StaticObjects[Index];

                int TextEnd = StaticObject.TextOffset + StaticObject.Text.Length;
                int ROEnd   = StaticObject.ROOffset + StaticObject.RO.Length;
                int DataEnd = StaticObject.DataOffset + StaticObject.Data.Length + StaticObject.BssSize;

                int NsoSize = TextEnd;

                if ((uint)NsoSize < (uint)ROEnd)
                {
                    NsoSize = ROEnd;
                }

                if ((uint)NsoSize < (uint)DataEnd)
                {
                    NsoSize = DataEnd;
                }

                NsoSize = BitUtils.AlignUp(NsoSize, KMemoryManager.PageSize);

                NsoBase[Index] = CodeStart + (ulong)CodeSize;

                CodeSize += NsoSize;

                if (Arguments != null && ArgsSize == 0)
                {
                    ArgsStart = (ulong)CodeSize;

                    ArgsSize = BitUtils.AlignDown(Arguments.Length * 2 + ArgsTotalSize - 1, KMemoryManager.PageSize);

                    CodeSize += ArgsSize;
                }
            }

            int CodePagesCount = CodeSize / KMemoryManager.PageSize;

            int PersonalMmHeapPagesCount = MetaData.PersonalMmHeapSize / KMemoryManager.PageSize;

            ProcessCreationInfo CreationInfo = new ProcessCreationInfo(
                MetaData.TitleName,
                MetaData.ProcessCategory,
                MetaData.ACI0.TitleId,
                CodeStart,
                CodePagesCount,
                MetaData.MmuFlags,
                0,
                PersonalMmHeapPagesCount);

            KernelResult Result;

            KResourceLimit ResourceLimit = new KResourceLimit(System);

            long ApplicationRgSize = (long)System.MemoryRegions[(int)MemoryRegion.Application].Size;

            Result  = ResourceLimit.SetLimitValue(LimitableResource.Memory, ApplicationRgSize);
            Result |= ResourceLimit.SetLimitValue(LimitableResource.Thread, 608);
            Result |= ResourceLimit.SetLimitValue(LimitableResource.Event, 700);
            Result |= ResourceLimit.SetLimitValue(LimitableResource.TransferMemory, 128);
            Result |= ResourceLimit.SetLimitValue(LimitableResource.Session, 894);

            if (Result != KernelResult.Success)
            {
                Logger.PrintError(LogClass.Loader, $"Process initialization failed setting resource limit values.");

                return(false);
            }

            KProcess Process = new KProcess(System);

            Result = Process.Initialize(
                CreationInfo,
                MetaData.ACI0.KernelAccessControl.Capabilities,
                ResourceLimit,
                MemoryRegion.Application);

            if (Result != KernelResult.Success)
            {
                Logger.PrintError(LogClass.Loader, $"Process initialization returned error \"{Result}\".");

                return(false);
            }

            for (int Index = 0; Index < StaticObjects.Length; Index++)
            {
                Logger.PrintInfo(LogClass.Loader, $"Loading image {Index} at 0x{NsoBase[Index]:x16}...");

                Result = LoadIntoMemory(Process, StaticObjects[Index], NsoBase[Index]);

                if (Result != KernelResult.Success)
                {
                    Logger.PrintError(LogClass.Loader, $"Process initialization returned error \"{Result}\".");

                    return(false);
                }
            }

            Result = Process.Start(MetaData.MainThreadPriority, (ulong)MetaData.MainThreadStackSize);

            if (Result != KernelResult.Success)
            {
                Logger.PrintError(LogClass.Loader, $"Process start returned error \"{Result}\".");

                return(false);
            }

            System.Processes.Add(Process.Pid, Process);

            return(true);
        }
예제 #14
0
        public static bool LoadStaticObjects(
            Horizon system,
            Npdm metaData,
            IExecutable[] staticObjects,
            byte[]        arguments = null)
        {
            ulong argsStart = 0;
            int   argsSize  = 0;
            ulong codeStart = 0x8000000;
            int   codeSize  = 0;

            ulong[] nsoBase = new ulong[staticObjects.Length];

            for (int index = 0; index < staticObjects.Length; index++)
            {
                IExecutable staticObject = staticObjects[index];

                int textEnd = staticObject.TextOffset + staticObject.Text.Length;
                int roEnd   = staticObject.RoOffset + staticObject.Ro.Length;
                int dataEnd = staticObject.DataOffset + staticObject.Data.Length + staticObject.BssSize;

                int nsoSize = textEnd;

                if ((uint)nsoSize < (uint)roEnd)
                {
                    nsoSize = roEnd;
                }

                if ((uint)nsoSize < (uint)dataEnd)
                {
                    nsoSize = dataEnd;
                }

                nsoSize = BitUtils.AlignUp(nsoSize, KMemoryManager.PageSize);

                nsoBase[index] = codeStart + (ulong)codeSize;

                codeSize += nsoSize;

                if (arguments != null && argsSize == 0)
                {
                    argsStart = (ulong)codeSize;

                    argsSize = BitUtils.AlignDown(arguments.Length * 2 + ArgsTotalSize - 1, KMemoryManager.PageSize);

                    codeSize += argsSize;
                }
            }

            int codePagesCount = codeSize / KMemoryManager.PageSize;

            int personalMmHeapPagesCount = metaData.PersonalMmHeapSize / KMemoryManager.PageSize;

            ProcessCreationInfo creationInfo = new ProcessCreationInfo(
                metaData.TitleName,
                metaData.ProcessCategory,
                metaData.Aci0.TitleId,
                codeStart,
                codePagesCount,
                metaData.MmuFlags,
                0,
                personalMmHeapPagesCount);

            KernelResult result;

            KResourceLimit resourceLimit = new KResourceLimit(system);

            long applicationRgSize = (long)system.MemoryRegions[(int)MemoryRegion.Application].Size;

            result  = resourceLimit.SetLimitValue(LimitableResource.Memory, applicationRgSize);
            result |= resourceLimit.SetLimitValue(LimitableResource.Thread, 608);
            result |= resourceLimit.SetLimitValue(LimitableResource.Event, 700);
            result |= resourceLimit.SetLimitValue(LimitableResource.TransferMemory, 128);
            result |= resourceLimit.SetLimitValue(LimitableResource.Session, 894);

            if (result != KernelResult.Success)
            {
                Logger.PrintError(LogClass.Loader, $"Process initialization failed setting resource limit values.");

                return(false);
            }

            KProcess process = new KProcess(system);

            result = process.Initialize(
                creationInfo,
                metaData.Aci0.KernelAccessControl.Capabilities,
                resourceLimit,
                MemoryRegion.Application);

            if (result != KernelResult.Success)
            {
                Logger.PrintError(LogClass.Loader, $"Process initialization returned error \"{result}\".");

                return(false);
            }

            for (int index = 0; index < staticObjects.Length; index++)
            {
                Logger.PrintInfo(LogClass.Loader, $"Loading image {index} at 0x{nsoBase[index]:x16}...");

                result = LoadIntoMemory(process, staticObjects[index], nsoBase[index]);

                if (result != KernelResult.Success)
                {
                    Logger.PrintError(LogClass.Loader, $"Process initialization returned error \"{result}\".");

                    return(false);
                }
            }

            result = process.Start(metaData.MainThreadPriority, (ulong)metaData.MainThreadStackSize);

            if (result != KernelResult.Success)
            {
                Logger.PrintError(LogClass.Loader, $"Process start returned error \"{result}\".");

                return(false);
            }

            system.Processes.Add(process.Pid, process);

            return(true);
        }
예제 #15
0
        private void FreePages(ulong Address, ulong PagesCount)
        {
            ulong EndAddr = Address + PagesCount * KMemoryManager.PageSize;

            int BlockIndex = BlockOrdersCount - 1;

            ulong AddressRounded   = 0;
            ulong EndAddrTruncated = 0;

            for (; BlockIndex >= 0; BlockIndex--)
            {
                KMemoryRegionBlock AllocInfo = Blocks[BlockIndex];

                int BlockSize = 1 << AllocInfo.Order;

                AddressRounded   = BitUtils.AlignUp(Address, BlockSize);
                EndAddrTruncated = BitUtils.AlignDown(EndAddr, BlockSize);

                if (AddressRounded < EndAddrTruncated)
                {
                    break;
                }
            }

            void FreeRegion(ulong CurrAddress)
            {
                for (int CurrBlockIndex = BlockIndex;
                     CurrBlockIndex < BlockOrdersCount && CurrAddress != 0;
                     CurrBlockIndex++)
                {
                    KMemoryRegionBlock Block = Blocks[CurrBlockIndex];

                    Block.FreeCount++;

                    ulong FreedBlocks = (CurrAddress - Block.StartAligned) >> Block.Order;

                    int Index = (int)FreedBlocks;

                    for (int Level = Block.MaxLevel - 1; Level >= 0; Level--, Index /= 64)
                    {
                        long Mask = Block.Masks[Level][Index / 64];

                        Block.Masks[Level][Index / 64] = Mask | (1L << (Index & 63));

                        if (Mask != 0)
                        {
                            break;
                        }
                    }

                    int BlockSizeDelta = 1 << (Block.NextOrder - Block.Order);

                    int FreedBlocksTruncated = BitUtils.AlignDown((int)FreedBlocks, BlockSizeDelta);

                    if (!Block.TryCoalesce(FreedBlocksTruncated, BlockSizeDelta))
                    {
                        break;
                    }

                    CurrAddress = Block.StartAligned + ((ulong)FreedBlocksTruncated << Block.Order);
                }
            }

            //Free inside aligned region.
            ulong BaseAddress = AddressRounded;

            while (BaseAddress < EndAddrTruncated)
            {
                ulong BlockSize = 1UL << Blocks[BlockIndex].Order;

                FreeRegion(BaseAddress);

                BaseAddress += BlockSize;
            }

            int NextBlockIndex = BlockIndex - 1;

            //Free region between Address and aligned region start.
            BaseAddress = AddressRounded;

            for (BlockIndex = NextBlockIndex; BlockIndex >= 0; BlockIndex--)
            {
                ulong BlockSize = 1UL << Blocks[BlockIndex].Order;

                while (BaseAddress - BlockSize >= Address)
                {
                    BaseAddress -= BlockSize;

                    FreeRegion(BaseAddress);
                }
            }

            //Free region between aligned region end and End Address.
            BaseAddress = EndAddrTruncated;

            for (BlockIndex = NextBlockIndex; BlockIndex >= 0; BlockIndex--)
            {
                ulong BlockSize = 1UL << Blocks[BlockIndex].Order;

                while (BaseAddress + BlockSize <= EndAddr)
                {
                    FreeRegion(BaseAddress);

                    BaseAddress += BlockSize;
                }
            }
        }
예제 #16
0
        public KMemoryRegionManager(ulong Address, ulong Size, ulong EndAddr)
        {
            Blocks = new KMemoryRegionBlock[BlockOrders.Length];

            this.Address = Address;
            this.Size    = Size;
            this.EndAddr = EndAddr;

            BlockOrdersCount = BlockOrders.Length;

            for (int BlockIndex = 0; BlockIndex < BlockOrdersCount; BlockIndex++)
            {
                Blocks[BlockIndex] = new KMemoryRegionBlock();

                Blocks[BlockIndex].Order = BlockOrders[BlockIndex];

                int NextOrder = BlockIndex == BlockOrdersCount - 1 ? 0 : BlockOrders[BlockIndex + 1];

                Blocks[BlockIndex].NextOrder = NextOrder;

                int CurrBlockSize = 1 << BlockOrders[BlockIndex];
                int NextBlockSize = CurrBlockSize;

                if (NextOrder != 0)
                {
                    NextBlockSize = 1 << NextOrder;
                }

                ulong StartAligned   = BitUtils.AlignDown(Address, NextBlockSize);
                ulong EndAddrAligned = BitUtils.AlignDown(EndAddr, CurrBlockSize);

                ulong SizeInBlocksTruncated = (EndAddrAligned - StartAligned) >> BlockOrders[BlockIndex];

                ulong EndAddrRounded = BitUtils.AlignUp(Address + Size, NextBlockSize);

                ulong SizeInBlocksRounded = (EndAddrRounded - StartAligned) >> BlockOrders[BlockIndex];

                Blocks[BlockIndex].StartAligned          = StartAligned;
                Blocks[BlockIndex].SizeInBlocksTruncated = SizeInBlocksTruncated;
                Blocks[BlockIndex].SizeInBlocksRounded   = SizeInBlocksRounded;

                ulong CurrSizeInBlocks = SizeInBlocksRounded;

                int MaxLevel = 0;

                do
                {
                    MaxLevel++;
                }while ((CurrSizeInBlocks /= 64) != 0);

                Blocks[BlockIndex].MaxLevel = MaxLevel;

                Blocks[BlockIndex].Masks = new long[MaxLevel][];

                CurrSizeInBlocks = SizeInBlocksRounded;

                for (int Level = MaxLevel - 1; Level >= 0; Level--)
                {
                    CurrSizeInBlocks = (CurrSizeInBlocks + 63) / 64;

                    Blocks[BlockIndex].Masks[Level] = new long[CurrSizeInBlocks];
                }
            }

            if (Size != 0)
            {
                FreePages(Address, Size / KMemoryManager.PageSize);
            }
        }
예제 #17
0
        /// <summary>
        /// Performs actual copy of the inline data after the transfer is finished.
        /// </summary>
        private void FinishTransfer()
        {
            var memoryManager = _channel.MemoryManager;

            var data = MemoryMarshal.Cast <int, byte>(_buffer).Slice(0, _size);

            if (_isLinear && _lineCount == 1)
            {
                memoryManager.WriteTrackedResource(_dstGpuVa, data.Slice(0, _lineLengthIn));
                _context.AdvanceSequence();
            }
            else
            {
                var dstCalculator = new OffsetCalculator(
                    _dstWidth,
                    _dstHeight,
                    _dstStride,
                    _isLinear,
                    _dstGobBlocksInY,
                    1);

                int srcOffset = 0;

                for (int y = _dstY; y < _dstY + _lineCount; y++)
                {
                    int x1      = _dstX;
                    int x2      = _dstX + _lineLengthIn;
                    int x1Round = BitUtils.AlignUp(_dstX, 16);
                    int x2Trunc = BitUtils.AlignDown(x2, 16);

                    int x = x1;

                    if (x1Round <= x2)
                    {
                        for (; x < x1Round; x++, srcOffset++)
                        {
                            int dstOffset = dstCalculator.GetOffset(x, y);

                            ulong dstAddress = _dstGpuVa + (uint)dstOffset;

                            memoryManager.Write(dstAddress, data[srcOffset]);
                        }
                    }

                    for (; x < x2Trunc; x += 16, srcOffset += 16)
                    {
                        int dstOffset = dstCalculator.GetOffset(x, y);

                        ulong dstAddress = _dstGpuVa + (uint)dstOffset;

                        memoryManager.Write(dstAddress, MemoryMarshal.Cast <byte, Vector128 <byte> >(data.Slice(srcOffset, 16))[0]);
                    }

                    for (; x < x2; x++, srcOffset++)
                    {
                        int dstOffset = dstCalculator.GetOffset(x, y);

                        ulong dstAddress = _dstGpuVa + (uint)dstOffset;

                        memoryManager.Write(dstAddress, data[srcOffset]);
                    }

                    // All lines must be aligned to 4 bytes, as the data is pushed one word at a time.
                    // If our copy length is not a multiple of 4, then we need to skip the padding bytes here.
                    int misalignment = _lineLengthIn & 3;

                    if (misalignment != 0)
                    {
                        srcOffset += 4 - misalignment;
                    }
                }

                _context.AdvanceSequence();
            }

            _finished = true;
        }
예제 #18
0
        public static bool LoadNsos(
            KernelContext context,
            Npdm metaData,
            byte[]        arguments = null,
            params IExecutable[] executables)
        {
            ulong argsStart = 0;
            int   argsSize  = 0;
            ulong codeStart = metaData.Is64Bit ? 0x8000000UL : 0x200000UL;
            int   codeSize  = 0;

            ulong[] nsoBase = new ulong[executables.Length];

            for (int index = 0; index < executables.Length; index++)
            {
                IExecutable staticObject = executables[index];

                int textEnd = staticObject.TextOffset + staticObject.Text.Length;
                int roEnd   = staticObject.RoOffset + staticObject.Ro.Length;
                int dataEnd = staticObject.DataOffset + staticObject.Data.Length + staticObject.BssSize;

                int nsoSize = textEnd;

                if ((uint)nsoSize < (uint)roEnd)
                {
                    nsoSize = roEnd;
                }

                if ((uint)nsoSize < (uint)dataEnd)
                {
                    nsoSize = dataEnd;
                }

                nsoSize = BitUtils.AlignUp(nsoSize, KMemoryManager.PageSize);

                nsoBase[index] = codeStart + (ulong)codeSize;

                codeSize += nsoSize;

                if (arguments != null && argsSize == 0)
                {
                    argsStart = (ulong)codeSize;

                    argsSize = BitUtils.AlignDown(arguments.Length * 2 + ArgsTotalSize - 1, KMemoryManager.PageSize);

                    codeSize += argsSize;
                }
            }

            PtcProfiler.StaticCodeStart = codeStart;
            PtcProfiler.StaticCodeSize  = codeSize;

            int codePagesCount = codeSize / KMemoryManager.PageSize;

            int personalMmHeapPagesCount = metaData.PersonalMmHeapSize / KMemoryManager.PageSize;

            ProcessCreationInfo creationInfo = new ProcessCreationInfo(
                metaData.TitleName,
                metaData.Version,
                metaData.Aci0.TitleId,
                codeStart,
                codePagesCount,
                metaData.MmuFlags,
                0,
                personalMmHeapPagesCount);

            KernelResult result;

            KResourceLimit resourceLimit = new KResourceLimit(context);

            long applicationRgSize = (long)context.MemoryRegions[(int)MemoryRegion.Application].Size;

            result  = resourceLimit.SetLimitValue(LimitableResource.Memory, applicationRgSize);
            result |= resourceLimit.SetLimitValue(LimitableResource.Thread, 608);
            result |= resourceLimit.SetLimitValue(LimitableResource.Event, 700);
            result |= resourceLimit.SetLimitValue(LimitableResource.TransferMemory, 128);
            result |= resourceLimit.SetLimitValue(LimitableResource.Session, 894);

            if (result != KernelResult.Success)
            {
                Logger.PrintError(LogClass.Loader, $"Process initialization failed setting resource limit values.");

                return(false);
            }

            KProcess process = new KProcess(context);

            MemoryRegion memoryRegion = (MemoryRegion)((metaData.Acid.Flags >> 2) & 0xf);

            if (memoryRegion > MemoryRegion.NvServices)
            {
                Logger.PrintError(LogClass.Loader, $"Process initialization failed due to invalid ACID flags.");

                return(false);
            }

            result = process.Initialize(
                creationInfo,
                metaData.Aci0.KernelAccessControl.Capabilities,
                resourceLimit,
                memoryRegion);

            if (result != KernelResult.Success)
            {
                Logger.PrintError(LogClass.Loader, $"Process initialization returned error \"{result}\".");

                return(false);
            }

            for (int index = 0; index < executables.Length; index++)
            {
                Logger.PrintInfo(LogClass.Loader, $"Loading image {index} at 0x{nsoBase[index]:x16}...");

                result = LoadIntoMemory(process, executables[index], nsoBase[index]);

                if (result != KernelResult.Success)
                {
                    Logger.PrintError(LogClass.Loader, $"Process initialization returned error \"{result}\".");

                    return(false);
                }
            }

            process.DefaultCpuCore = metaData.DefaultCpuId;

            result = process.Start(metaData.MainThreadPriority, (ulong)metaData.MainThreadStackSize);

            if (result != KernelResult.Success)
            {
                Logger.PrintError(LogClass.Loader, $"Process start returned error \"{result}\".");

                return(false);
            }

            context.Processes.TryAdd(process.Pid, process);

            return(true);
        }
예제 #19
0
        /// <summary>
        /// Performs actual copy of the inline data after the transfer is finished.
        /// </summary>
        private void FinishTransfer()
        {
            var memoryManager = _channel.MemoryManager;

            var data = MemoryMarshal.Cast <int, byte>(_buffer).Slice(0, _size);

            if (_isLinear && _lineCount == 1)
            {
                memoryManager.WriteTrackedResource(_dstGpuVa, data);
                _context.AdvanceSequence();
            }
            else
            {
                var dstCalculator = new OffsetCalculator(
                    _dstWidth,
                    _dstHeight,
                    _dstStride,
                    _isLinear,
                    _dstGobBlocksInY,
                    1);

                int srcOffset = 0;

                for (int y = _dstY; y < _dstY + _lineCount; y++)
                {
                    int x1      = _dstX;
                    int x2      = _dstX + _lineLengthIn;
                    int x1Round = BitUtils.AlignUp(_dstX, 16);
                    int x2Trunc = BitUtils.AlignDown(x2, 16);

                    int x = x1;

                    if (x1Round <= x2)
                    {
                        for (; x < x1Round; x++, srcOffset++)
                        {
                            int dstOffset = dstCalculator.GetOffset(x, y);

                            ulong dstAddress = _dstGpuVa + (uint)dstOffset;

                            memoryManager.Write(dstAddress, data[srcOffset]);
                        }
                    }

                    for (; x < x2Trunc; x += 16, srcOffset += 16)
                    {
                        int dstOffset = dstCalculator.GetOffset(x, y);

                        ulong dstAddress = _dstGpuVa + (uint)dstOffset;

                        memoryManager.Write(dstAddress, MemoryMarshal.Cast <byte, Vector128 <byte> >(data.Slice(srcOffset, 16))[0]);
                    }

                    for (; x < x2; x++, srcOffset++)
                    {
                        int dstOffset = dstCalculator.GetOffset(x, y);

                        ulong dstAddress = _dstGpuVa + (uint)dstOffset;

                        memoryManager.Write(dstAddress, data[srcOffset]);
                    }
                }

                _context.AdvanceSequence();
            }

            _finished = true;
        }
예제 #20
0
        private void FreePages(ulong address, ulong pagesCount)
        {
            lock (_blocks)
            {
                ulong endAddr = address + pagesCount * KPageTableBase.PageSize;

                int blockIndex = _blockOrdersCount - 1;

                ulong addressRounded   = 0;
                ulong endAddrTruncated = 0;

                for (; blockIndex >= 0; blockIndex--)
                {
                    KMemoryRegionBlock allocInfo = _blocks[blockIndex];

                    int blockSize = 1 << allocInfo.Order;

                    addressRounded   = BitUtils.AlignUp(address, blockSize);
                    endAddrTruncated = BitUtils.AlignDown(endAddr, blockSize);

                    if (addressRounded < endAddrTruncated)
                    {
                        break;
                    }
                }

                void FreeRegion(ulong currAddress)
                {
                    for (int currBlockIndex = blockIndex;
                         currBlockIndex < _blockOrdersCount && currAddress != 0;
                         currBlockIndex++)
                    {
                        KMemoryRegionBlock block = _blocks[currBlockIndex];

                        block.FreeCount++;

                        ulong freedBlocks = (currAddress - block.StartAligned) >> block.Order;

                        int index = (int)freedBlocks;

                        for (int level = block.MaxLevel - 1; level >= 0; level--, index /= 64)
                        {
                            long mask = block.Masks[level][index / 64];

                            block.Masks[level][index / 64] = mask | (1L << (index & 63));

                            if (mask != 0)
                            {
                                break;
                            }
                        }

                        int blockSizeDelta = 1 << (block.NextOrder - block.Order);

                        int freedBlocksTruncated = BitUtils.AlignDown((int)freedBlocks, blockSizeDelta);

                        if (!block.TryCoalesce(freedBlocksTruncated, blockSizeDelta))
                        {
                            break;
                        }

                        currAddress = block.StartAligned + ((ulong)freedBlocksTruncated << block.Order);
                    }
                }

                // Free inside aligned region.
                ulong baseAddress = addressRounded;

                while (baseAddress < endAddrTruncated)
                {
                    ulong blockSize = 1UL << _blocks[blockIndex].Order;

                    FreeRegion(baseAddress);

                    baseAddress += blockSize;
                }

                int nextBlockIndex = blockIndex - 1;

                // Free region between Address and aligned region start.
                baseAddress = addressRounded;

                for (blockIndex = nextBlockIndex; blockIndex >= 0; blockIndex--)
                {
                    ulong blockSize = 1UL << _blocks[blockIndex].Order;

                    while (baseAddress - blockSize >= address)
                    {
                        baseAddress -= blockSize;

                        FreeRegion(baseAddress);
                    }
                }

                // Free region between aligned region end and End Address.
                baseAddress = endAddrTruncated;

                for (blockIndex = nextBlockIndex; blockIndex >= 0; blockIndex--)
                {
                    ulong blockSize = 1UL << _blocks[blockIndex].Order;

                    while (baseAddress + blockSize <= endAddr)
                    {
                        FreeRegion(baseAddress);

                        baseAddress += blockSize;
                    }
                }
            }
        }
예제 #21
0
        /// <summary>
        /// Performs actual copy of the inline data after the transfer is finished.
        /// </summary>
        private void FinishTransfer()
        {
            var memoryManager = _channel.MemoryManager;

            var data = MemoryMarshal.Cast <int, byte>(_buffer).Slice(0, _size);

            if (_isLinear && _lineCount == 1)
            {
                memoryManager.WriteTrackedResource(_dstGpuVa, data.Slice(0, _lineLengthIn));
                _context.AdvanceSequence();
            }
            else
            {
                // TODO: Verify if the destination X/Y and width/height are taken into account
                // for linear texture transfers. If not, we can use the fast path for that aswell.
                // Right now the copy code at the bottom assumes that it is used on both which might be incorrect.
                if (!_isLinear)
                {
                    var target = memoryManager.Physical.TextureCache.FindTexture(
                        memoryManager,
                        _dstGpuVa,
                        1,
                        _dstStride,
                        _dstHeight,
                        _lineLengthIn,
                        _lineCount,
                        _isLinear,
                        _dstGobBlocksInY,
                        _dstGobBlocksInZ);

                    if (target != null)
                    {
                        target.SetData(data, 0, 0, new GAL.Rectangle <int>(_dstX, _dstY, _lineLengthIn / target.Info.FormatInfo.BytesPerPixel, _lineCount));

                        return;
                    }
                }

                var dstCalculator = new OffsetCalculator(
                    _dstWidth,
                    _dstHeight,
                    _dstStride,
                    _isLinear,
                    _dstGobBlocksInY,
                    1);

                int srcOffset = 0;

                for (int y = _dstY; y < _dstY + _lineCount; y++)
                {
                    int x1      = _dstX;
                    int x2      = _dstX + _lineLengthIn;
                    int x1Round = BitUtils.AlignUp(_dstX, 16);
                    int x2Trunc = BitUtils.AlignDown(x2, 16);

                    int x = x1;

                    if (x1Round <= x2)
                    {
                        for (; x < x1Round; x++, srcOffset++)
                        {
                            int dstOffset = dstCalculator.GetOffset(x, y);

                            ulong dstAddress = _dstGpuVa + (uint)dstOffset;

                            memoryManager.Write(dstAddress, data[srcOffset]);
                        }
                    }

                    for (; x < x2Trunc; x += 16, srcOffset += 16)
                    {
                        int dstOffset = dstCalculator.GetOffset(x, y);

                        ulong dstAddress = _dstGpuVa + (uint)dstOffset;

                        memoryManager.Write(dstAddress, MemoryMarshal.Cast <byte, Vector128 <byte> >(data.Slice(srcOffset, 16))[0]);
                    }

                    for (; x < x2; x++, srcOffset++)
                    {
                        int dstOffset = dstCalculator.GetOffset(x, y);

                        ulong dstAddress = _dstGpuVa + (uint)dstOffset;

                        memoryManager.Write(dstAddress, data[srcOffset]);
                    }

                    // All lines must be aligned to 4 bytes, as the data is pushed one word at a time.
                    // If our copy length is not a multiple of 4, then we need to skip the padding bytes here.
                    int misalignment = _lineLengthIn & 3;

                    if (misalignment != 0)
                    {
                        srcOffset += 4 - misalignment;
                    }
                }

                _context.AdvanceSequence();
            }

            _finished = true;
        }
예제 #22
0
        public static Span <byte> ConvertBlockLinearToLinear(
            int width,
            int height,
            int depth,
            int levels,
            int layers,
            int blockWidth,
            int blockHeight,
            int bytesPerPixel,
            int gobBlocksInY,
            int gobBlocksInZ,
            int gobBlocksInTileX,
            SizeInfo sizeInfo,
            ReadOnlySpan <byte> data)
        {
            int outSize = GetTextureSize(
                width,
                height,
                depth,
                levels,
                layers,
                blockWidth,
                blockHeight,
                bytesPerPixel);

            Span <byte> output = new byte[outSize];

            int outOffs = 0;

            int mipGobBlocksInY = gobBlocksInY;
            int mipGobBlocksInZ = gobBlocksInZ;

            int gobWidth  = (GobStride / bytesPerPixel) * gobBlocksInTileX;
            int gobHeight = gobBlocksInY * GobHeight;

            for (int level = 0; level < levels; level++)
            {
                int w = Math.Max(1, width >> level);
                int h = Math.Max(1, height >> level);
                int d = Math.Max(1, depth >> level);

                w = BitUtils.DivRoundUp(w, blockWidth);
                h = BitUtils.DivRoundUp(h, blockHeight);

                while (h <= (mipGobBlocksInY >> 1) * GobHeight && mipGobBlocksInY != 1)
                {
                    mipGobBlocksInY >>= 1;
                }

                while (d <= (mipGobBlocksInZ >> 1) && mipGobBlocksInZ != 1)
                {
                    mipGobBlocksInZ >>= 1;
                }

                int strideTrunc   = BitUtils.AlignDown(w * bytesPerPixel, 16);
                int strideTrunc64 = BitUtils.AlignDown(w * bytesPerPixel, 64);

                int xStart = strideTrunc / bytesPerPixel;

                int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment);

                int outStrideGap = stride - w * bytesPerPixel;

                int alignment = gobWidth;

                if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight)
                {
                    alignment = GobStride / bytesPerPixel;
                }

                int wAligned = BitUtils.AlignUp(w, alignment);

                BlockLinearLayout layoutConverter = new BlockLinearLayout(
                    wAligned,
                    h,
                    mipGobBlocksInY,
                    mipGobBlocksInZ,
                    bytesPerPixel);

                unsafe bool Convert <T>(Span <byte> output, ReadOnlySpan <byte> data) where T : unmanaged
                {
                    fixed(byte *outputPtr = output, dataPtr = data)
                    {
                        byte *outPtr = outputPtr + outOffs;

                        for (int layer = 0; layer < layers; layer++)
                        {
                            byte *inBaseOffset = dataPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level));

                            for (int z = 0; z < d; z++)
                            {
                                layoutConverter.SetZ(z);
                                for (int y = 0; y < h; y++)
                                {
                                    layoutConverter.SetY(y);

                                    for (int x = 0; x < strideTrunc64; x += 64, outPtr += 64)
                                    {
                                        byte *offset  = inBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x);
                                        byte *offset2 = offset + 0x20;
                                        byte *offset3 = offset + 0x100;
                                        byte *offset4 = offset + 0x120;

                                        Vector128 <byte> value  = *(Vector128 <byte> *)offset;
                                        Vector128 <byte> value2 = *(Vector128 <byte> *)offset2;
                                        Vector128 <byte> value3 = *(Vector128 <byte> *)offset3;
                                        Vector128 <byte> value4 = *(Vector128 <byte> *)offset4;

                                        *(Vector128 <byte> *)outPtr        = value;
                                        *(Vector128 <byte> *)(outPtr + 16) = value2;
                                        *(Vector128 <byte> *)(outPtr + 32) = value3;
                                        *(Vector128 <byte> *)(outPtr + 48) = value4;
                                    }

                                    for (int x = strideTrunc64; x < strideTrunc; x += 16, outPtr += 16)
                                    {
                                        byte *offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x);

                                        *(Vector128 <byte> *)outPtr = *(Vector128 <byte> *)offset;
                                    }

                                    for (int x = xStart; x < w; x++, outPtr += bytesPerPixel)
                                    {
                                        byte *offset = inBaseOffset + layoutConverter.GetOffset(x);

                                        *(T *)outPtr = *(T *)offset;
                                    }

                                    outPtr += outStrideGap;
                                }
                            }
                        }
                        outOffs += stride * h * d * layers;
                    }

                    return(true);
                }

                bool _ = bytesPerPixel switch
                {
                    1 => Convert <byte>(output, data),
                    2 => Convert <ushort>(output, data),
                    4 => Convert <uint>(output, data),
                    8 => Convert <ulong>(output, data),
                    12 => Convert <Bpp12Pixel>(output, data),
                    16 => Convert <Vector128 <byte> >(output, data),
                    _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.")
                };
            }
            return(output);
        }
예제 #23
0
        private unsafe static void WriteChroma(
            Span <byte> dst,
            ReadOnlySpan <byte> srcU,
            ReadOnlySpan <byte> srcV,
            int srcStride,
            int width,
            int height)
        {
            OffsetCalculator calc = new OffsetCalculator(width, height, 0, false, 2, 2);

            if (Sse2.IsSupported)
            {
                int strideTrunc64 = BitUtils.AlignDown(width * 2, 64);

                int inStrideGap = srcStride - width;

                fixed(byte *outputPtr = dst, srcUPtr = srcU, srcVPtr = srcV)
                {
                    byte *inUPtr = srcUPtr;
                    byte *inVPtr = srcVPtr;

                    for (int y = 0; y < height; y++)
                    {
                        calc.SetY(y);

                        for (int x = 0; x < strideTrunc64; x += 64, inUPtr += 32, inVPtr += 32)
                        {
                            byte *offset  = outputPtr + calc.GetOffsetWithLineOffset64(x);
                            byte *offset2 = offset + 0x20;
                            byte *offset3 = offset + 0x100;
                            byte *offset4 = offset + 0x120;

                            Vector128 <byte> value  = *(Vector128 <byte> *)inUPtr;
                            Vector128 <byte> value2 = *(Vector128 <byte> *)inVPtr;
                            Vector128 <byte> value3 = *(Vector128 <byte> *)(inUPtr + 16);
                            Vector128 <byte> value4 = *(Vector128 <byte> *)(inVPtr + 16);

                            Vector128 <byte> uv0 = Sse2.UnpackLow(value, value2);
                            Vector128 <byte> uv1 = Sse2.UnpackHigh(value, value2);
                            Vector128 <byte> uv2 = Sse2.UnpackLow(value3, value4);
                            Vector128 <byte> uv3 = Sse2.UnpackHigh(value3, value4);

                            *(Vector128 <byte> *)offset  = uv0;
                            *(Vector128 <byte> *)offset2 = uv1;
                            *(Vector128 <byte> *)offset3 = uv2;
                            *(Vector128 <byte> *)offset4 = uv3;
                        }

                        for (int x = strideTrunc64 / 2; x < width; x++, inUPtr++, inVPtr++)
                        {
                            byte *offset = outputPtr + calc.GetOffset(x);

                            *offset = *inUPtr;
                            *(offset + 1) = *inVPtr;
                        }

                        inUPtr += inStrideGap;
                        inVPtr += inStrideGap;
                    }
                }
            }
            else
            {
                for (int y = 0; y < height; y++)
                {
                    int srcBaseOffset = y * srcStride;

                    calc.SetY(y);

                    for (int x = 0; x < width; x++)
                    {
                        int dstOffset = calc.GetOffset(x);

                        dst[dstOffset + 0] = srcU[srcBaseOffset + x];
                        dst[dstOffset + 1] = srcV[srcBaseOffset + x];
                    }
                }
            }
        }
예제 #24
0
        private KernelResult CopyToClient(KMemoryManager memoryManager, List<KBufferDescriptor> list)
        {
            foreach (KBufferDescriptor desc in list)
            {
                MemoryState stateMask;

                switch (desc.State)
                {
                    case MemoryState.IpcBuffer0: stateMask = MemoryState.IpcSendAllowedType0; break;
                    case MemoryState.IpcBuffer1: stateMask = MemoryState.IpcSendAllowedType1; break;
                    case MemoryState.IpcBuffer3: stateMask = MemoryState.IpcSendAllowedType3; break;

                    default: return KernelResult.InvalidCombination;
                }

                MemoryAttribute attributeMask = MemoryAttribute.Borrowed | MemoryAttribute.Uncached;

                if (desc.State == MemoryState.IpcBuffer0)
                {
                    attributeMask |= MemoryAttribute.DeviceMapped;
                }

                ulong clientAddrTruncated = BitUtils.AlignDown(desc.ClientAddress, KMemoryManager.PageSize);
                ulong clientAddrRounded   = BitUtils.AlignUp  (desc.ClientAddress, KMemoryManager.PageSize);

                // Check if address is not aligned, in this case we need to perform 2 copies.
                if (clientAddrTruncated != clientAddrRounded)
                {
                    ulong copySize = clientAddrRounded - desc.ClientAddress;

                    if (copySize > desc.Size)
                    {
                        copySize = desc.Size;
                    }

                    KernelResult result = memoryManager.CopyDataFromCurrentProcess(
                        desc.ClientAddress,
                        copySize,
                        stateMask,
                        stateMask,
                        KMemoryPermission.ReadAndWrite,
                        attributeMask,
                        MemoryAttribute.None,
                        desc.ServerAddress);

                    if (result != KernelResult.Success)
                    {
                        return result;
                    }
                }

                ulong clientEndAddr = desc.ClientAddress + desc.Size;
                ulong serverEndAddr = desc.ServerAddress + desc.Size;

                ulong clientEndAddrTruncated = BitUtils.AlignDown(clientEndAddr, KMemoryManager.PageSize);
                ulong clientEndAddrRounded   = BitUtils.AlignUp  (clientEndAddr, KMemoryManager.PageSize);
                ulong serverEndAddrTruncated = BitUtils.AlignDown(serverEndAddr, KMemoryManager.PageSize);

                if (clientEndAddrTruncated < clientEndAddrRounded &&
                    (clientAddrTruncated == clientAddrRounded || clientAddrTruncated < clientEndAddrTruncated))
                {
                    KernelResult result = memoryManager.CopyDataFromCurrentProcess(
                        clientEndAddrTruncated,
                        clientEndAddr - clientEndAddrTruncated,
                        stateMask,
                        stateMask,
                        KMemoryPermission.ReadAndWrite,
                        attributeMask,
                        MemoryAttribute.None,
                        serverEndAddrTruncated);

                    if (result != KernelResult.Success)
                    {
                        return result;
                    }
                }
            }

            return KernelResult.Success;
        }
예제 #25
0
        private unsafe static void ReadChroma(
            Span <byte> dstU,
            Span <byte> dstV,
            ReadOnlySpan <byte> src,
            int dstStride,
            int width,
            int height)
        {
            OffsetCalculator calc = new OffsetCalculator(width, height, 0, false, 2, 2);

            if (Sse2.IsSupported)
            {
                int strideTrunc64 = BitUtils.AlignDown(width * 2, 64);

                int outStrideGap = dstStride - width;

                fixed(byte *dstUPtr = dstU, dstVPtr = dstV, dataPtr = src)
                {
                    byte *uPtr = dstUPtr;
                    byte *vPtr = dstVPtr;

                    for (int y = 0; y < height; y++)
                    {
                        calc.SetY(y);

                        for (int x = 0; x < strideTrunc64; x += 64, uPtr += 32, vPtr += 32)
                        {
                            byte *offset  = dataPtr + calc.GetOffsetWithLineOffset64(x);
                            byte *offset2 = offset + 0x20;
                            byte *offset3 = offset + 0x100;
                            byte *offset4 = offset + 0x120;

                            Vector128 <byte> value  = *(Vector128 <byte> *)offset;
                            Vector128 <byte> value2 = *(Vector128 <byte> *)offset2;
                            Vector128 <byte> value3 = *(Vector128 <byte> *)offset3;
                            Vector128 <byte> value4 = *(Vector128 <byte> *)offset4;

                            Vector128 <byte> u00 = Sse2.UnpackLow(value, value2);
                            Vector128 <byte> v00 = Sse2.UnpackHigh(value, value2);
                            Vector128 <byte> u01 = Sse2.UnpackLow(value3, value4);
                            Vector128 <byte> v01 = Sse2.UnpackHigh(value3, value4);

                            Vector128 <byte> u10 = Sse2.UnpackLow(u00, v00);
                            Vector128 <byte> v10 = Sse2.UnpackHigh(u00, v00);
                            Vector128 <byte> u11 = Sse2.UnpackLow(u01, v01);
                            Vector128 <byte> v11 = Sse2.UnpackHigh(u01, v01);

                            Vector128 <byte> u20 = Sse2.UnpackLow(u10, v10);
                            Vector128 <byte> v20 = Sse2.UnpackHigh(u10, v10);
                            Vector128 <byte> u21 = Sse2.UnpackLow(u11, v11);
                            Vector128 <byte> v21 = Sse2.UnpackHigh(u11, v11);

                            Vector128 <byte> u30 = Sse2.UnpackLow(u20, v20);
                            Vector128 <byte> v30 = Sse2.UnpackHigh(u20, v20);
                            Vector128 <byte> u31 = Sse2.UnpackLow(u21, v21);
                            Vector128 <byte> v31 = Sse2.UnpackHigh(u21, v21);

                            *(Vector128 <byte> *)uPtr        = u30;
                            *(Vector128 <byte> *)(uPtr + 16) = u31;
                            *(Vector128 <byte> *)vPtr        = v30;
                            *(Vector128 <byte> *)(vPtr + 16) = v31;
                        }

                        for (int x = strideTrunc64 / 2; x < width; x++, uPtr++, vPtr++)
                        {
                            byte *offset = dataPtr + calc.GetOffset(x);

                            *uPtr = *offset;
                            *vPtr = *(offset + 1);
                        }

                        uPtr += outStrideGap;
                        vPtr += outStrideGap;
                    }
                }
            }
            else
            {
                for (int y = 0; y < height; y++)
                {
                    int dstBaseOffset = y * dstStride;

                    calc.SetY(y);

                    for (int x = 0; x < width; x++)
                    {
                        int srcOffset = calc.GetOffset(x);

                        dstU[dstBaseOffset + x] = src[srcOffset];
                        dstV[dstBaseOffset + x] = src[srcOffset + 1];
                    }
                }
            }
        }