/// <summary> /// Performs actual copy of the inline data after the transfer is finished. /// </summary> private void FinishTransfer() { Span <byte> data = MemoryMarshal.Cast <int, byte>(_buffer).Slice(0, _size); if (_isLinear && _params.LineCount == 1) { ulong address = _context.MemoryManager.Translate(_params.DstAddress.Pack()); _context.PhysicalMemory.Write(address, data); } else { var dstCalculator = new OffsetCalculator( _params.DstWidth, _params.DstHeight, _params.DstStride, _isLinear, _params.DstMemoryLayout.UnpackGobBlocksInY(), 1); int srcOffset = 0; ulong dstBaseAddress = _context.MemoryManager.Translate(_params.DstAddress.Pack()); for (int y = _params.DstY; y < _params.DstY + _params.LineCount; y++) { int x1 = _params.DstX; int x2 = _params.DstX + _params.LineLengthIn; int x2Trunc = _params.DstX + BitUtils.AlignDown(_params.LineLengthIn, 16); int x; for (x = x1; x < x2Trunc; x += 16, srcOffset += 16) { int dstOffset = dstCalculator.GetOffset(x, y); ulong dstAddress = dstBaseAddress + (ulong)dstOffset; Span <byte> pixel = data.Slice(srcOffset, 16); _context.PhysicalMemory.Write(dstAddress, pixel); } for (; x < x2; x++, srcOffset++) { int dstOffset = dstCalculator.GetOffset(x, y); ulong dstAddress = dstBaseAddress + (ulong)dstOffset; Span <byte> pixel = data.Slice(srcOffset, 1); _context.PhysicalMemory.Write(dstAddress, pixel); } } } _finished = true; _context.AdvanceSequence(); }
/// <summary> /// Determines if a given texture is "safe" for upscaling from its info. /// Note that this is different from being compatible - this elilinates targets that would have detrimental effects when scaled. /// </summary> /// <param name="info">The texture info to check</param> /// <returns>True if safe</returns> private static bool UpscaleSafeMode(TextureInfo info) { // While upscaling works for all targets defined by IsUpscaleCompatible, we additionally blacklist targets here that // may have undesirable results (upscaling blur textures) or simply waste GPU resources (upscaling texture atlas). if (info.Levels > 3) { // Textures with more than 3 levels are likely to be game textures, rather than render textures. // Small textures with full mips are likely to be removed by the next check. return(false); } if (info.Width < 8 || info.Height < 8) { // Discount textures with small dimensions. return(false); } int widthAlignment = (info.IsLinear ? Constants.StrideAlignment : Constants.GobAlignment) / info.FormatInfo.BytesPerPixel; if (!(info.FormatInfo.Format.IsDepthOrStencil() || info.FormatInfo.Components == 1)) { // Discount square textures that aren't depth-stencil like. (excludes game textures, cubemap faces, most 3D texture LUT, texture atlas) // Detect if the texture is possibly square. Widths may be aligned, so to remove the uncertainty we align both the width and height. bool possiblySquare = BitUtils.AlignUp(info.Width, widthAlignment) == BitUtils.AlignUp(info.Height, widthAlignment); if (possiblySquare) { return(false); } } if (info.Height < 360) { int aspectWidth = (int)MathF.Ceiling((info.Height / 9f) * 16f); int aspectMaxWidth = BitUtils.AlignUp(aspectWidth, widthAlignment); int aspectMinWidth = BitUtils.AlignDown(aspectWidth, widthAlignment); if (info.Width >= aspectMinWidth && info.Width <= aspectMaxWidth && info.Height < 360) { // Targets that are roughly 16:9 can only be rescaled if they're equal to or above 360p. (excludes blur and bloom textures) return(false); } } if (info.Width == info.Height * info.Height) { // Possibly used for a "3D texture" drawn onto a 2D surface. // Some games do this to generate a tone mapping LUT without rendering into 3D texture slices. return(false); } return(true); }
/// <summary> /// Sets a storage buffer on the compute pipeline. /// Storage buffers can be read and written to on shaders. /// </summary> /// <param name="index">Index of the storage buffer</param> /// <param name="gpuVa">Start GPU virtual address of the buffer</param> /// <param name="size">Size in bytes of the storage buffer</param> /// <param name="flags">Buffer usage flags</param> public void SetComputeStorageBuffer(int index, ulong gpuVa, ulong size, BufferUsageFlags flags) { size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1); gpuVa = BitUtils.AlignDown(gpuVa, _context.Capabilities.StorageBufferOffsetAlignment); ulong address = TranslateAndCreateBuffer(gpuVa, size); _cpStorageBuffers.SetBounds(index, address, size, flags); }
/// <summary> /// Sets a storage buffer on the graphics pipeline. /// Storage buffers can be read and written to on shaders. /// </summary> /// <param name="stage">Index of the shader stage</param> /// <param name="index">Index of the storage buffer</param> /// <param name="gpuVa">Start GPU virtual address of the buffer</param> /// <param name="size">Size in bytes of the storage buffer</param> /// <param name="flags">Buffer usage flags</param> public void SetGraphicsStorageBuffer(int stage, int index, ulong gpuVa, ulong size, BufferUsageFlags flags) { size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1); gpuVa = BitUtils.AlignDown(gpuVa, _context.Capabilities.StorageBufferOffsetAlignment); ulong address = TranslateAndCreateBuffer(gpuVa, size); if (_gpStorageBuffers[stage].Buffers[index].Address != address || _gpStorageBuffers[stage].Buffers[index].Size != size) { _gpStorageBuffersDirty = true; } _gpStorageBuffers[stage].SetBounds(index, address, size, flags); }
public KernelResult FreeThreadLocalStorage(ulong tlsSlotAddr) { ulong tlsPageAddr = BitUtils.AlignDown(tlsSlotAddr, KMemoryManager.PageSize); System.CriticalSection.Enter(); KernelResult result = KernelResult.Success; KTlsPageInfo pageInfo = null; if (_fullTlsPages.TryGetValue(tlsPageAddr, out pageInfo)) { // TLS page was full, free slot and move to free pages tree. _fullTlsPages.Remove(tlsPageAddr); _freeTlsPages.Add(tlsPageAddr, pageInfo); } else if (!_freeTlsPages.TryGetValue(tlsPageAddr, out pageInfo)) { result = KernelResult.InvalidAddress; } if (pageInfo != null) { pageInfo.FreeTlsSlot(tlsSlotAddr); if (pageInfo.IsEmpty()) { // TLS page is now empty, we should ensure it is removed // from all trees, and free the memory it was using. _freeTlsPages.Remove(tlsPageAddr); System.CriticalSection.Leave(); FreeTlsPage(pageInfo); return(KernelResult.Success); } } System.CriticalSection.Leave(); return(result); }
public ArraySegment <ulong> Initialize(ulong address, ulong size, int blockShift, int nextBlockShift, ArraySegment <ulong> bitStorage) { Shift = blockShift; NextShift = nextBlockShift; ulong endAddress = address + size; ulong align = nextBlockShift != 0 ? 1UL << nextBlockShift : 1UL << blockShift; address = BitUtils.AlignDown(address, align); endAddress = BitUtils.AlignUp(endAddress, align); _heapAddress = address; _endOffset = (endAddress - address) / (1UL << blockShift); return(_bitmap.Initialize(bitStorage, _endOffset)); }
public ulong PushBlock(ulong address) { ulong offset = (address - _heapAddress) >> Shift; _bitmap.SetBit(offset); if (NextShift != 0) { int diff = 1 << (NextShift - Shift); offset = BitUtils.AlignDown(offset, diff); if (_bitmap.ClearRange(offset, diff)) { return(_heapAddress + (offset << Shift)); } } return(0); }
public static Span <byte> ConvertBlockLinearToLinear( int width, int height, int depth, int levels, int layers, int blockWidth, int blockHeight, int bytesPerPixel, int gobBlocksInY, int gobBlocksInZ, int gobBlocksInTileX, SizeInfo sizeInfo, ReadOnlySpan <byte> data) { int outSize = GetTextureSize( width, height, depth, levels, layers, blockWidth, blockHeight, bytesPerPixel); Span <byte> output = new byte[outSize]; int outOffs = 0; int mipGobBlocksInY = gobBlocksInY; int mipGobBlocksInZ = gobBlocksInZ; int gobWidth = (GobStride / bytesPerPixel) * gobBlocksInTileX; int gobHeight = gobBlocksInY * GobHeight; for (int level = 0; level < levels; level++) { int w = Math.Max(1, width >> level); int h = Math.Max(1, height >> level); int d = Math.Max(1, depth >> level); w = BitUtils.DivRoundUp(w, blockWidth); h = BitUtils.DivRoundUp(h, blockHeight); while (h <= (mipGobBlocksInY >> 1) * GobHeight && mipGobBlocksInY != 1) { mipGobBlocksInY >>= 1; } while (d <= (mipGobBlocksInZ >> 1) && mipGobBlocksInZ != 1) { mipGobBlocksInZ >>= 1; } int strideTrunc = BitUtils.AlignDown(w * bytesPerPixel, 16); int xStart = strideTrunc / bytesPerPixel; int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); int alignment = gobWidth; if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight) { alignment = GobStride / bytesPerPixel; } int wAligned = BitUtils.AlignUp(w, alignment); BlockLinearLayout layoutConverter = new BlockLinearLayout( wAligned, h, d, mipGobBlocksInY, mipGobBlocksInZ, bytesPerPixel); for (int layer = 0; layer < layers; layer++) { int inBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level); for (int z = 0; z < d; z++) { for (int y = 0; y < h; y++) { for (int x = 0; x < strideTrunc; x += 16) { int offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset(x, y, z); Span <byte> dest = output.Slice(outOffs + x, 16); data.Slice(offset, 16).CopyTo(dest); } for (int x = xStart; x < w; x++) { int offset = inBaseOffset + layoutConverter.GetOffset(x, y, z); Span <byte> dest = output.Slice(outOffs + x * bytesPerPixel, bytesPerPixel); data.Slice(offset, bytesPerPixel).CopyTo(dest); } outOffs += stride; } } } } return(output); }
public void Free(ulong address, ulong pagesCount) { if (pagesCount == 0) { return; } int bigIndex = _blocksCount - 1; ulong start = address; ulong end = address + pagesCount * KPageTableBase.PageSize; ulong beforeStart = start; ulong beforeEnd = start; ulong afterStart = end; ulong afterEnd = end; while (bigIndex >= 0) { ulong blockSize = _blocks[bigIndex].Size; ulong bigStart = BitUtils.AlignUp(start, blockSize); ulong bigEnd = BitUtils.AlignDown(end, blockSize); if (bigStart < bigEnd) { for (ulong block = bigStart; block < bigEnd; block += blockSize) { FreeBlock(block, bigIndex); } beforeEnd = bigStart; afterStart = bigEnd; break; } bigIndex--; } for (int i = bigIndex - 1; i >= 0; i--) { ulong blockSize = _blocks[i].Size; while (beforeStart + blockSize <= beforeEnd) { beforeEnd -= blockSize; FreeBlock(beforeEnd, i); } } for (int i = bigIndex - 1; i >= 0; i--) { ulong blockSize = _blocks[i].Size; while (afterStart + blockSize <= afterEnd) { FreeBlock(afterStart, i); afterStart += blockSize; } } }
/// <summary> /// Performs a full data copy between two textures, reading and writing guest memory directly. /// The textures must have a matching layout, size, and bytes per pixel. /// </summary> /// <param name="src">The source texture</param> /// <param name="dst">The destination texture</param> /// <param name="w">Copy width</param> /// <param name="h">Copy height</param> /// <param name="bpp">Bytes per pixel</param> private void UnscaledFullCopy(TwodTexture src, TwodTexture dst, int w, int h, int bpp) { var srcCalculator = new OffsetCalculator( w, h, src.Stride, src.LinearLayout, src.MemoryLayout.UnpackGobBlocksInY(), src.MemoryLayout.UnpackGobBlocksInZ(), bpp); (int _, int srcSize) = srcCalculator.GetRectangleRange(0, 0, w, h); var memoryManager = _channel.MemoryManager; ulong srcGpuVa = src.Address.Pack(); ulong dstGpuVa = dst.Address.Pack(); ReadOnlySpan <byte> srcSpan = memoryManager.GetSpan(srcGpuVa, srcSize, true); int width; int height = src.Height; if (src.LinearLayout) { width = src.Stride / bpp; } else { width = src.Width; } // If the copy is not equal to the width and height of the texture, we will need to copy partially. // It's worth noting that it has already been established that the src and dst are the same size. if (w == width && h == height) { memoryManager.Write(dstGpuVa, srcSpan); } else { using WritableRegion dstRegion = memoryManager.GetWritableRegion(dstGpuVa, srcSize, true); Span <byte> dstSpan = dstRegion.Memory.Span; if (src.LinearLayout) { int stride = src.Stride; int offset = 0; int lineSize = width * bpp; for (int y = 0; y < height; y++) { srcSpan.Slice(offset, lineSize).CopyTo(dstSpan.Slice(offset)); offset += stride; } } else { // Copy with the block linear layout in mind. // Recreate the offset calculate with bpp 1 for copy. int stride = w * bpp; srcCalculator = new OffsetCalculator( stride, h, 0, false, src.MemoryLayout.UnpackGobBlocksInY(), src.MemoryLayout.UnpackGobBlocksInZ(), 1); int strideTrunc = BitUtils.AlignDown(stride, 16); ReadOnlySpan <Vector128 <byte> > srcVec = MemoryMarshal.Cast <byte, Vector128 <byte> >(srcSpan); Span <Vector128 <byte> > dstVec = MemoryMarshal.Cast <byte, Vector128 <byte> >(dstSpan); for (int y = 0; y < h; y++) { int x = 0; srcCalculator.SetY(y); for (; x < strideTrunc; x += 16) { int offset = srcCalculator.GetOffset(x) >> 4; dstVec[offset] = srcVec[offset]; } for (; x < stride; x++) { int offset = srcCalculator.GetOffset(x); dstSpan[offset] = srcSpan[offset]; } } } } }
public static void ConvertLinearToBlockLinear( Span <byte> dst, int width, int height, int stride, int bytesPerPixel, int gobBlocksInY, ReadOnlySpan <byte> data) { int gobHeight = gobBlocksInY * GobHeight; int strideTrunc = BitUtils.AlignDown(width * bytesPerPixel, 16); int strideTrunc64 = BitUtils.AlignDown(width * bytesPerPixel, 64); int xStart = strideTrunc / bytesPerPixel; int inStrideGap = stride - width * bytesPerPixel; int alignment = GobStride / bytesPerPixel; int wAligned = BitUtils.AlignUp(width, alignment); BlockLinearLayout layoutConverter = new BlockLinearLayout(wAligned, height, gobBlocksInY, 1, bytesPerPixel); unsafe bool Convert <T>(Span <byte> output, ReadOnlySpan <byte> data) where T : unmanaged { fixed(byte *outputPtr = output, dataPtr = data) { byte *inPtr = dataPtr; for (int y = 0; y < height; y++) { layoutConverter.SetY(y); for (int x = 0; x < strideTrunc64; x += 64, inPtr += 64) { byte *offset = outputPtr + layoutConverter.GetOffsetWithLineOffset64(x); byte *offset2 = offset + 0x20; byte *offset3 = offset + 0x100; byte *offset4 = offset + 0x120; Vector128 <byte> value = *(Vector128 <byte> *)inPtr; Vector128 <byte> value2 = *(Vector128 <byte> *)(inPtr + 16); Vector128 <byte> value3 = *(Vector128 <byte> *)(inPtr + 32); Vector128 <byte> value4 = *(Vector128 <byte> *)(inPtr + 48); *(Vector128 <byte> *)offset = value; *(Vector128 <byte> *)offset2 = value2; *(Vector128 <byte> *)offset3 = value3; *(Vector128 <byte> *)offset4 = value4; } for (int x = strideTrunc64; x < strideTrunc; x += 16, inPtr += 16) { byte *offset = outputPtr + layoutConverter.GetOffsetWithLineOffset16(x); *(Vector128 <byte> *)offset = *(Vector128 <byte> *)inPtr; } for (int x = xStart; x < width; x++, inPtr += bytesPerPixel) { byte *offset = outputPtr + layoutConverter.GetOffset(x); *(T *)offset = *(T *)inPtr; } inPtr += inStrideGap; } } return(true); } bool _ = bytesPerPixel switch { 1 => Convert <byte>(dst, data), 2 => Convert <ushort>(dst, data), 4 => Convert <uint>(dst, data), 8 => Convert <ulong>(dst, data), 12 => Convert <Bpp12Pixel>(dst, data), 16 => Convert <Vector128 <byte> >(dst, data), _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.") }; }
public KMemoryRegionManager(ulong address, ulong size, ulong endAddr) { _blocks = new KMemoryRegionBlock[BlockOrders.Length]; Address = address; Size = size; EndAddr = endAddr; _blockOrdersCount = BlockOrders.Length; for (int blockIndex = 0; blockIndex < _blockOrdersCount; blockIndex++) { _blocks[blockIndex] = new KMemoryRegionBlock(); _blocks[blockIndex].Order = BlockOrders[blockIndex]; int nextOrder = blockIndex == _blockOrdersCount - 1 ? 0 : BlockOrders[blockIndex + 1]; _blocks[blockIndex].NextOrder = nextOrder; int currBlockSize = 1 << BlockOrders[blockIndex]; int nextBlockSize = currBlockSize; if (nextOrder != 0) { nextBlockSize = 1 << nextOrder; } ulong startAligned = BitUtils.AlignDown(address, nextBlockSize); ulong endAddrAligned = BitUtils.AlignDown(endAddr, currBlockSize); ulong sizeInBlocksTruncated = (endAddrAligned - startAligned) >> BlockOrders[blockIndex]; ulong endAddrRounded = BitUtils.AlignUp(address + size, nextBlockSize); ulong sizeInBlocksRounded = (endAddrRounded - startAligned) >> BlockOrders[blockIndex]; _blocks[blockIndex].StartAligned = startAligned; _blocks[blockIndex].SizeInBlocksTruncated = sizeInBlocksTruncated; _blocks[blockIndex].SizeInBlocksRounded = sizeInBlocksRounded; ulong currSizeInBlocks = sizeInBlocksRounded; int maxLevel = 0; do { maxLevel++; }while ((currSizeInBlocks /= 64) != 0); _blocks[blockIndex].MaxLevel = maxLevel; _blocks[blockIndex].Masks = new long[maxLevel][]; currSizeInBlocks = sizeInBlocksRounded; for (int level = maxLevel - 1; level >= 0; level--) { currSizeInBlocks = (currSizeInBlocks + 63) / 64; _blocks[blockIndex].Masks[level] = new long[currSizeInBlocks]; } } _pageReferenceCounts = new ushort[size / KPageTableBase.PageSize]; if (size != 0) { FreePages(address, size / KPageTableBase.PageSize); } }
public static bool LoadStaticObjects( Horizon System, Npdm MetaData, IExecutable[] StaticObjects, byte[] Arguments = null) { ulong ArgsStart = 0; int ArgsSize = 0; ulong CodeStart = 0x8000000; int CodeSize = 0; ulong[] NsoBase = new ulong[StaticObjects.Length]; for (int Index = 0; Index < StaticObjects.Length; Index++) { IExecutable StaticObject = StaticObjects[Index]; int TextEnd = StaticObject.TextOffset + StaticObject.Text.Length; int ROEnd = StaticObject.ROOffset + StaticObject.RO.Length; int DataEnd = StaticObject.DataOffset + StaticObject.Data.Length + StaticObject.BssSize; int NsoSize = TextEnd; if ((uint)NsoSize < (uint)ROEnd) { NsoSize = ROEnd; } if ((uint)NsoSize < (uint)DataEnd) { NsoSize = DataEnd; } NsoSize = BitUtils.AlignUp(NsoSize, KMemoryManager.PageSize); NsoBase[Index] = CodeStart + (ulong)CodeSize; CodeSize += NsoSize; if (Arguments != null && ArgsSize == 0) { ArgsStart = (ulong)CodeSize; ArgsSize = BitUtils.AlignDown(Arguments.Length * 2 + ArgsTotalSize - 1, KMemoryManager.PageSize); CodeSize += ArgsSize; } } int CodePagesCount = CodeSize / KMemoryManager.PageSize; int PersonalMmHeapPagesCount = MetaData.PersonalMmHeapSize / KMemoryManager.PageSize; ProcessCreationInfo CreationInfo = new ProcessCreationInfo( MetaData.TitleName, MetaData.ProcessCategory, MetaData.ACI0.TitleId, CodeStart, CodePagesCount, MetaData.MmuFlags, 0, PersonalMmHeapPagesCount); KernelResult Result; KResourceLimit ResourceLimit = new KResourceLimit(System); long ApplicationRgSize = (long)System.MemoryRegions[(int)MemoryRegion.Application].Size; Result = ResourceLimit.SetLimitValue(LimitableResource.Memory, ApplicationRgSize); Result |= ResourceLimit.SetLimitValue(LimitableResource.Thread, 608); Result |= ResourceLimit.SetLimitValue(LimitableResource.Event, 700); Result |= ResourceLimit.SetLimitValue(LimitableResource.TransferMemory, 128); Result |= ResourceLimit.SetLimitValue(LimitableResource.Session, 894); if (Result != KernelResult.Success) { Logger.PrintError(LogClass.Loader, $"Process initialization failed setting resource limit values."); return(false); } KProcess Process = new KProcess(System); Result = Process.Initialize( CreationInfo, MetaData.ACI0.KernelAccessControl.Capabilities, ResourceLimit, MemoryRegion.Application); if (Result != KernelResult.Success) { Logger.PrintError(LogClass.Loader, $"Process initialization returned error \"{Result}\"."); return(false); } for (int Index = 0; Index < StaticObjects.Length; Index++) { Logger.PrintInfo(LogClass.Loader, $"Loading image {Index} at 0x{NsoBase[Index]:x16}..."); Result = LoadIntoMemory(Process, StaticObjects[Index], NsoBase[Index]); if (Result != KernelResult.Success) { Logger.PrintError(LogClass.Loader, $"Process initialization returned error \"{Result}\"."); return(false); } } Result = Process.Start(MetaData.MainThreadPriority, (ulong)MetaData.MainThreadStackSize); if (Result != KernelResult.Success) { Logger.PrintError(LogClass.Loader, $"Process start returned error \"{Result}\"."); return(false); } System.Processes.Add(Process.Pid, Process); return(true); }
public static bool LoadStaticObjects( Horizon system, Npdm metaData, IExecutable[] staticObjects, byte[] arguments = null) { ulong argsStart = 0; int argsSize = 0; ulong codeStart = 0x8000000; int codeSize = 0; ulong[] nsoBase = new ulong[staticObjects.Length]; for (int index = 0; index < staticObjects.Length; index++) { IExecutable staticObject = staticObjects[index]; int textEnd = staticObject.TextOffset + staticObject.Text.Length; int roEnd = staticObject.RoOffset + staticObject.Ro.Length; int dataEnd = staticObject.DataOffset + staticObject.Data.Length + staticObject.BssSize; int nsoSize = textEnd; if ((uint)nsoSize < (uint)roEnd) { nsoSize = roEnd; } if ((uint)nsoSize < (uint)dataEnd) { nsoSize = dataEnd; } nsoSize = BitUtils.AlignUp(nsoSize, KMemoryManager.PageSize); nsoBase[index] = codeStart + (ulong)codeSize; codeSize += nsoSize; if (arguments != null && argsSize == 0) { argsStart = (ulong)codeSize; argsSize = BitUtils.AlignDown(arguments.Length * 2 + ArgsTotalSize - 1, KMemoryManager.PageSize); codeSize += argsSize; } } int codePagesCount = codeSize / KMemoryManager.PageSize; int personalMmHeapPagesCount = metaData.PersonalMmHeapSize / KMemoryManager.PageSize; ProcessCreationInfo creationInfo = new ProcessCreationInfo( metaData.TitleName, metaData.ProcessCategory, metaData.Aci0.TitleId, codeStart, codePagesCount, metaData.MmuFlags, 0, personalMmHeapPagesCount); KernelResult result; KResourceLimit resourceLimit = new KResourceLimit(system); long applicationRgSize = (long)system.MemoryRegions[(int)MemoryRegion.Application].Size; result = resourceLimit.SetLimitValue(LimitableResource.Memory, applicationRgSize); result |= resourceLimit.SetLimitValue(LimitableResource.Thread, 608); result |= resourceLimit.SetLimitValue(LimitableResource.Event, 700); result |= resourceLimit.SetLimitValue(LimitableResource.TransferMemory, 128); result |= resourceLimit.SetLimitValue(LimitableResource.Session, 894); if (result != KernelResult.Success) { Logger.PrintError(LogClass.Loader, $"Process initialization failed setting resource limit values."); return(false); } KProcess process = new KProcess(system); result = process.Initialize( creationInfo, metaData.Aci0.KernelAccessControl.Capabilities, resourceLimit, MemoryRegion.Application); if (result != KernelResult.Success) { Logger.PrintError(LogClass.Loader, $"Process initialization returned error \"{result}\"."); return(false); } for (int index = 0; index < staticObjects.Length; index++) { Logger.PrintInfo(LogClass.Loader, $"Loading image {index} at 0x{nsoBase[index]:x16}..."); result = LoadIntoMemory(process, staticObjects[index], nsoBase[index]); if (result != KernelResult.Success) { Logger.PrintError(LogClass.Loader, $"Process initialization returned error \"{result}\"."); return(false); } } result = process.Start(metaData.MainThreadPriority, (ulong)metaData.MainThreadStackSize); if (result != KernelResult.Success) { Logger.PrintError(LogClass.Loader, $"Process start returned error \"{result}\"."); return(false); } system.Processes.Add(process.Pid, process); return(true); }
private void FreePages(ulong Address, ulong PagesCount) { ulong EndAddr = Address + PagesCount * KMemoryManager.PageSize; int BlockIndex = BlockOrdersCount - 1; ulong AddressRounded = 0; ulong EndAddrTruncated = 0; for (; BlockIndex >= 0; BlockIndex--) { KMemoryRegionBlock AllocInfo = Blocks[BlockIndex]; int BlockSize = 1 << AllocInfo.Order; AddressRounded = BitUtils.AlignUp(Address, BlockSize); EndAddrTruncated = BitUtils.AlignDown(EndAddr, BlockSize); if (AddressRounded < EndAddrTruncated) { break; } } void FreeRegion(ulong CurrAddress) { for (int CurrBlockIndex = BlockIndex; CurrBlockIndex < BlockOrdersCount && CurrAddress != 0; CurrBlockIndex++) { KMemoryRegionBlock Block = Blocks[CurrBlockIndex]; Block.FreeCount++; ulong FreedBlocks = (CurrAddress - Block.StartAligned) >> Block.Order; int Index = (int)FreedBlocks; for (int Level = Block.MaxLevel - 1; Level >= 0; Level--, Index /= 64) { long Mask = Block.Masks[Level][Index / 64]; Block.Masks[Level][Index / 64] = Mask | (1L << (Index & 63)); if (Mask != 0) { break; } } int BlockSizeDelta = 1 << (Block.NextOrder - Block.Order); int FreedBlocksTruncated = BitUtils.AlignDown((int)FreedBlocks, BlockSizeDelta); if (!Block.TryCoalesce(FreedBlocksTruncated, BlockSizeDelta)) { break; } CurrAddress = Block.StartAligned + ((ulong)FreedBlocksTruncated << Block.Order); } } //Free inside aligned region. ulong BaseAddress = AddressRounded; while (BaseAddress < EndAddrTruncated) { ulong BlockSize = 1UL << Blocks[BlockIndex].Order; FreeRegion(BaseAddress); BaseAddress += BlockSize; } int NextBlockIndex = BlockIndex - 1; //Free region between Address and aligned region start. BaseAddress = AddressRounded; for (BlockIndex = NextBlockIndex; BlockIndex >= 0; BlockIndex--) { ulong BlockSize = 1UL << Blocks[BlockIndex].Order; while (BaseAddress - BlockSize >= Address) { BaseAddress -= BlockSize; FreeRegion(BaseAddress); } } //Free region between aligned region end and End Address. BaseAddress = EndAddrTruncated; for (BlockIndex = NextBlockIndex; BlockIndex >= 0; BlockIndex--) { ulong BlockSize = 1UL << Blocks[BlockIndex].Order; while (BaseAddress + BlockSize <= EndAddr) { FreeRegion(BaseAddress); BaseAddress += BlockSize; } } }
public KMemoryRegionManager(ulong Address, ulong Size, ulong EndAddr) { Blocks = new KMemoryRegionBlock[BlockOrders.Length]; this.Address = Address; this.Size = Size; this.EndAddr = EndAddr; BlockOrdersCount = BlockOrders.Length; for (int BlockIndex = 0; BlockIndex < BlockOrdersCount; BlockIndex++) { Blocks[BlockIndex] = new KMemoryRegionBlock(); Blocks[BlockIndex].Order = BlockOrders[BlockIndex]; int NextOrder = BlockIndex == BlockOrdersCount - 1 ? 0 : BlockOrders[BlockIndex + 1]; Blocks[BlockIndex].NextOrder = NextOrder; int CurrBlockSize = 1 << BlockOrders[BlockIndex]; int NextBlockSize = CurrBlockSize; if (NextOrder != 0) { NextBlockSize = 1 << NextOrder; } ulong StartAligned = BitUtils.AlignDown(Address, NextBlockSize); ulong EndAddrAligned = BitUtils.AlignDown(EndAddr, CurrBlockSize); ulong SizeInBlocksTruncated = (EndAddrAligned - StartAligned) >> BlockOrders[BlockIndex]; ulong EndAddrRounded = BitUtils.AlignUp(Address + Size, NextBlockSize); ulong SizeInBlocksRounded = (EndAddrRounded - StartAligned) >> BlockOrders[BlockIndex]; Blocks[BlockIndex].StartAligned = StartAligned; Blocks[BlockIndex].SizeInBlocksTruncated = SizeInBlocksTruncated; Blocks[BlockIndex].SizeInBlocksRounded = SizeInBlocksRounded; ulong CurrSizeInBlocks = SizeInBlocksRounded; int MaxLevel = 0; do { MaxLevel++; }while ((CurrSizeInBlocks /= 64) != 0); Blocks[BlockIndex].MaxLevel = MaxLevel; Blocks[BlockIndex].Masks = new long[MaxLevel][]; CurrSizeInBlocks = SizeInBlocksRounded; for (int Level = MaxLevel - 1; Level >= 0; Level--) { CurrSizeInBlocks = (CurrSizeInBlocks + 63) / 64; Blocks[BlockIndex].Masks[Level] = new long[CurrSizeInBlocks]; } } if (Size != 0) { FreePages(Address, Size / KMemoryManager.PageSize); } }
/// <summary> /// Performs actual copy of the inline data after the transfer is finished. /// </summary> private void FinishTransfer() { var memoryManager = _channel.MemoryManager; var data = MemoryMarshal.Cast <int, byte>(_buffer).Slice(0, _size); if (_isLinear && _lineCount == 1) { memoryManager.WriteTrackedResource(_dstGpuVa, data.Slice(0, _lineLengthIn)); _context.AdvanceSequence(); } else { var dstCalculator = new OffsetCalculator( _dstWidth, _dstHeight, _dstStride, _isLinear, _dstGobBlocksInY, 1); int srcOffset = 0; for (int y = _dstY; y < _dstY + _lineCount; y++) { int x1 = _dstX; int x2 = _dstX + _lineLengthIn; int x1Round = BitUtils.AlignUp(_dstX, 16); int x2Trunc = BitUtils.AlignDown(x2, 16); int x = x1; if (x1Round <= x2) { for (; x < x1Round; x++, srcOffset++) { int dstOffset = dstCalculator.GetOffset(x, y); ulong dstAddress = _dstGpuVa + (uint)dstOffset; memoryManager.Write(dstAddress, data[srcOffset]); } } for (; x < x2Trunc; x += 16, srcOffset += 16) { int dstOffset = dstCalculator.GetOffset(x, y); ulong dstAddress = _dstGpuVa + (uint)dstOffset; memoryManager.Write(dstAddress, MemoryMarshal.Cast <byte, Vector128 <byte> >(data.Slice(srcOffset, 16))[0]); } for (; x < x2; x++, srcOffset++) { int dstOffset = dstCalculator.GetOffset(x, y); ulong dstAddress = _dstGpuVa + (uint)dstOffset; memoryManager.Write(dstAddress, data[srcOffset]); } // All lines must be aligned to 4 bytes, as the data is pushed one word at a time. // If our copy length is not a multiple of 4, then we need to skip the padding bytes here. int misalignment = _lineLengthIn & 3; if (misalignment != 0) { srcOffset += 4 - misalignment; } } _context.AdvanceSequence(); } _finished = true; }
public static bool LoadNsos( KernelContext context, Npdm metaData, byte[] arguments = null, params IExecutable[] executables) { ulong argsStart = 0; int argsSize = 0; ulong codeStart = metaData.Is64Bit ? 0x8000000UL : 0x200000UL; int codeSize = 0; ulong[] nsoBase = new ulong[executables.Length]; for (int index = 0; index < executables.Length; index++) { IExecutable staticObject = executables[index]; int textEnd = staticObject.TextOffset + staticObject.Text.Length; int roEnd = staticObject.RoOffset + staticObject.Ro.Length; int dataEnd = staticObject.DataOffset + staticObject.Data.Length + staticObject.BssSize; int nsoSize = textEnd; if ((uint)nsoSize < (uint)roEnd) { nsoSize = roEnd; } if ((uint)nsoSize < (uint)dataEnd) { nsoSize = dataEnd; } nsoSize = BitUtils.AlignUp(nsoSize, KMemoryManager.PageSize); nsoBase[index] = codeStart + (ulong)codeSize; codeSize += nsoSize; if (arguments != null && argsSize == 0) { argsStart = (ulong)codeSize; argsSize = BitUtils.AlignDown(arguments.Length * 2 + ArgsTotalSize - 1, KMemoryManager.PageSize); codeSize += argsSize; } } PtcProfiler.StaticCodeStart = codeStart; PtcProfiler.StaticCodeSize = codeSize; int codePagesCount = codeSize / KMemoryManager.PageSize; int personalMmHeapPagesCount = metaData.PersonalMmHeapSize / KMemoryManager.PageSize; ProcessCreationInfo creationInfo = new ProcessCreationInfo( metaData.TitleName, metaData.Version, metaData.Aci0.TitleId, codeStart, codePagesCount, metaData.MmuFlags, 0, personalMmHeapPagesCount); KernelResult result; KResourceLimit resourceLimit = new KResourceLimit(context); long applicationRgSize = (long)context.MemoryRegions[(int)MemoryRegion.Application].Size; result = resourceLimit.SetLimitValue(LimitableResource.Memory, applicationRgSize); result |= resourceLimit.SetLimitValue(LimitableResource.Thread, 608); result |= resourceLimit.SetLimitValue(LimitableResource.Event, 700); result |= resourceLimit.SetLimitValue(LimitableResource.TransferMemory, 128); result |= resourceLimit.SetLimitValue(LimitableResource.Session, 894); if (result != KernelResult.Success) { Logger.PrintError(LogClass.Loader, $"Process initialization failed setting resource limit values."); return(false); } KProcess process = new KProcess(context); MemoryRegion memoryRegion = (MemoryRegion)((metaData.Acid.Flags >> 2) & 0xf); if (memoryRegion > MemoryRegion.NvServices) { Logger.PrintError(LogClass.Loader, $"Process initialization failed due to invalid ACID flags."); return(false); } result = process.Initialize( creationInfo, metaData.Aci0.KernelAccessControl.Capabilities, resourceLimit, memoryRegion); if (result != KernelResult.Success) { Logger.PrintError(LogClass.Loader, $"Process initialization returned error \"{result}\"."); return(false); } for (int index = 0; index < executables.Length; index++) { Logger.PrintInfo(LogClass.Loader, $"Loading image {index} at 0x{nsoBase[index]:x16}..."); result = LoadIntoMemory(process, executables[index], nsoBase[index]); if (result != KernelResult.Success) { Logger.PrintError(LogClass.Loader, $"Process initialization returned error \"{result}\"."); return(false); } } process.DefaultCpuCore = metaData.DefaultCpuId; result = process.Start(metaData.MainThreadPriority, (ulong)metaData.MainThreadStackSize); if (result != KernelResult.Success) { Logger.PrintError(LogClass.Loader, $"Process start returned error \"{result}\"."); return(false); } context.Processes.TryAdd(process.Pid, process); return(true); }
/// <summary> /// Performs actual copy of the inline data after the transfer is finished. /// </summary> private void FinishTransfer() { var memoryManager = _channel.MemoryManager; var data = MemoryMarshal.Cast <int, byte>(_buffer).Slice(0, _size); if (_isLinear && _lineCount == 1) { memoryManager.WriteTrackedResource(_dstGpuVa, data); _context.AdvanceSequence(); } else { var dstCalculator = new OffsetCalculator( _dstWidth, _dstHeight, _dstStride, _isLinear, _dstGobBlocksInY, 1); int srcOffset = 0; for (int y = _dstY; y < _dstY + _lineCount; y++) { int x1 = _dstX; int x2 = _dstX + _lineLengthIn; int x1Round = BitUtils.AlignUp(_dstX, 16); int x2Trunc = BitUtils.AlignDown(x2, 16); int x = x1; if (x1Round <= x2) { for (; x < x1Round; x++, srcOffset++) { int dstOffset = dstCalculator.GetOffset(x, y); ulong dstAddress = _dstGpuVa + (uint)dstOffset; memoryManager.Write(dstAddress, data[srcOffset]); } } for (; x < x2Trunc; x += 16, srcOffset += 16) { int dstOffset = dstCalculator.GetOffset(x, y); ulong dstAddress = _dstGpuVa + (uint)dstOffset; memoryManager.Write(dstAddress, MemoryMarshal.Cast <byte, Vector128 <byte> >(data.Slice(srcOffset, 16))[0]); } for (; x < x2; x++, srcOffset++) { int dstOffset = dstCalculator.GetOffset(x, y); ulong dstAddress = _dstGpuVa + (uint)dstOffset; memoryManager.Write(dstAddress, data[srcOffset]); } } _context.AdvanceSequence(); } _finished = true; }
private void FreePages(ulong address, ulong pagesCount) { lock (_blocks) { ulong endAddr = address + pagesCount * KPageTableBase.PageSize; int blockIndex = _blockOrdersCount - 1; ulong addressRounded = 0; ulong endAddrTruncated = 0; for (; blockIndex >= 0; blockIndex--) { KMemoryRegionBlock allocInfo = _blocks[blockIndex]; int blockSize = 1 << allocInfo.Order; addressRounded = BitUtils.AlignUp(address, blockSize); endAddrTruncated = BitUtils.AlignDown(endAddr, blockSize); if (addressRounded < endAddrTruncated) { break; } } void FreeRegion(ulong currAddress) { for (int currBlockIndex = blockIndex; currBlockIndex < _blockOrdersCount && currAddress != 0; currBlockIndex++) { KMemoryRegionBlock block = _blocks[currBlockIndex]; block.FreeCount++; ulong freedBlocks = (currAddress - block.StartAligned) >> block.Order; int index = (int)freedBlocks; for (int level = block.MaxLevel - 1; level >= 0; level--, index /= 64) { long mask = block.Masks[level][index / 64]; block.Masks[level][index / 64] = mask | (1L << (index & 63)); if (mask != 0) { break; } } int blockSizeDelta = 1 << (block.NextOrder - block.Order); int freedBlocksTruncated = BitUtils.AlignDown((int)freedBlocks, blockSizeDelta); if (!block.TryCoalesce(freedBlocksTruncated, blockSizeDelta)) { break; } currAddress = block.StartAligned + ((ulong)freedBlocksTruncated << block.Order); } } // Free inside aligned region. ulong baseAddress = addressRounded; while (baseAddress < endAddrTruncated) { ulong blockSize = 1UL << _blocks[blockIndex].Order; FreeRegion(baseAddress); baseAddress += blockSize; } int nextBlockIndex = blockIndex - 1; // Free region between Address and aligned region start. baseAddress = addressRounded; for (blockIndex = nextBlockIndex; blockIndex >= 0; blockIndex--) { ulong blockSize = 1UL << _blocks[blockIndex].Order; while (baseAddress - blockSize >= address) { baseAddress -= blockSize; FreeRegion(baseAddress); } } // Free region between aligned region end and End Address. baseAddress = endAddrTruncated; for (blockIndex = nextBlockIndex; blockIndex >= 0; blockIndex--) { ulong blockSize = 1UL << _blocks[blockIndex].Order; while (baseAddress + blockSize <= endAddr) { FreeRegion(baseAddress); baseAddress += blockSize; } } } }
/// <summary> /// Performs actual copy of the inline data after the transfer is finished. /// </summary> private void FinishTransfer() { var memoryManager = _channel.MemoryManager; var data = MemoryMarshal.Cast <int, byte>(_buffer).Slice(0, _size); if (_isLinear && _lineCount == 1) { memoryManager.WriteTrackedResource(_dstGpuVa, data.Slice(0, _lineLengthIn)); _context.AdvanceSequence(); } else { // TODO: Verify if the destination X/Y and width/height are taken into account // for linear texture transfers. If not, we can use the fast path for that aswell. // Right now the copy code at the bottom assumes that it is used on both which might be incorrect. if (!_isLinear) { var target = memoryManager.Physical.TextureCache.FindTexture( memoryManager, _dstGpuVa, 1, _dstStride, _dstHeight, _lineLengthIn, _lineCount, _isLinear, _dstGobBlocksInY, _dstGobBlocksInZ); if (target != null) { target.SetData(data, 0, 0, new GAL.Rectangle <int>(_dstX, _dstY, _lineLengthIn / target.Info.FormatInfo.BytesPerPixel, _lineCount)); return; } } var dstCalculator = new OffsetCalculator( _dstWidth, _dstHeight, _dstStride, _isLinear, _dstGobBlocksInY, 1); int srcOffset = 0; for (int y = _dstY; y < _dstY + _lineCount; y++) { int x1 = _dstX; int x2 = _dstX + _lineLengthIn; int x1Round = BitUtils.AlignUp(_dstX, 16); int x2Trunc = BitUtils.AlignDown(x2, 16); int x = x1; if (x1Round <= x2) { for (; x < x1Round; x++, srcOffset++) { int dstOffset = dstCalculator.GetOffset(x, y); ulong dstAddress = _dstGpuVa + (uint)dstOffset; memoryManager.Write(dstAddress, data[srcOffset]); } } for (; x < x2Trunc; x += 16, srcOffset += 16) { int dstOffset = dstCalculator.GetOffset(x, y); ulong dstAddress = _dstGpuVa + (uint)dstOffset; memoryManager.Write(dstAddress, MemoryMarshal.Cast <byte, Vector128 <byte> >(data.Slice(srcOffset, 16))[0]); } for (; x < x2; x++, srcOffset++) { int dstOffset = dstCalculator.GetOffset(x, y); ulong dstAddress = _dstGpuVa + (uint)dstOffset; memoryManager.Write(dstAddress, data[srcOffset]); } // All lines must be aligned to 4 bytes, as the data is pushed one word at a time. // If our copy length is not a multiple of 4, then we need to skip the padding bytes here. int misalignment = _lineLengthIn & 3; if (misalignment != 0) { srcOffset += 4 - misalignment; } } _context.AdvanceSequence(); } _finished = true; }
public static Span <byte> ConvertBlockLinearToLinear( int width, int height, int depth, int levels, int layers, int blockWidth, int blockHeight, int bytesPerPixel, int gobBlocksInY, int gobBlocksInZ, int gobBlocksInTileX, SizeInfo sizeInfo, ReadOnlySpan <byte> data) { int outSize = GetTextureSize( width, height, depth, levels, layers, blockWidth, blockHeight, bytesPerPixel); Span <byte> output = new byte[outSize]; int outOffs = 0; int mipGobBlocksInY = gobBlocksInY; int mipGobBlocksInZ = gobBlocksInZ; int gobWidth = (GobStride / bytesPerPixel) * gobBlocksInTileX; int gobHeight = gobBlocksInY * GobHeight; for (int level = 0; level < levels; level++) { int w = Math.Max(1, width >> level); int h = Math.Max(1, height >> level); int d = Math.Max(1, depth >> level); w = BitUtils.DivRoundUp(w, blockWidth); h = BitUtils.DivRoundUp(h, blockHeight); while (h <= (mipGobBlocksInY >> 1) * GobHeight && mipGobBlocksInY != 1) { mipGobBlocksInY >>= 1; } while (d <= (mipGobBlocksInZ >> 1) && mipGobBlocksInZ != 1) { mipGobBlocksInZ >>= 1; } int strideTrunc = BitUtils.AlignDown(w * bytesPerPixel, 16); int strideTrunc64 = BitUtils.AlignDown(w * bytesPerPixel, 64); int xStart = strideTrunc / bytesPerPixel; int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); int outStrideGap = stride - w * bytesPerPixel; int alignment = gobWidth; if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight) { alignment = GobStride / bytesPerPixel; } int wAligned = BitUtils.AlignUp(w, alignment); BlockLinearLayout layoutConverter = new BlockLinearLayout( wAligned, h, mipGobBlocksInY, mipGobBlocksInZ, bytesPerPixel); unsafe bool Convert <T>(Span <byte> output, ReadOnlySpan <byte> data) where T : unmanaged { fixed(byte *outputPtr = output, dataPtr = data) { byte *outPtr = outputPtr + outOffs; for (int layer = 0; layer < layers; layer++) { byte *inBaseOffset = dataPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level)); for (int z = 0; z < d; z++) { layoutConverter.SetZ(z); for (int y = 0; y < h; y++) { layoutConverter.SetY(y); for (int x = 0; x < strideTrunc64; x += 64, outPtr += 64) { byte *offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x); byte *offset2 = offset + 0x20; byte *offset3 = offset + 0x100; byte *offset4 = offset + 0x120; Vector128 <byte> value = *(Vector128 <byte> *)offset; Vector128 <byte> value2 = *(Vector128 <byte> *)offset2; Vector128 <byte> value3 = *(Vector128 <byte> *)offset3; Vector128 <byte> value4 = *(Vector128 <byte> *)offset4; *(Vector128 <byte> *)outPtr = value; *(Vector128 <byte> *)(outPtr + 16) = value2; *(Vector128 <byte> *)(outPtr + 32) = value3; *(Vector128 <byte> *)(outPtr + 48) = value4; } for (int x = strideTrunc64; x < strideTrunc; x += 16, outPtr += 16) { byte *offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x); *(Vector128 <byte> *)outPtr = *(Vector128 <byte> *)offset; } for (int x = xStart; x < w; x++, outPtr += bytesPerPixel) { byte *offset = inBaseOffset + layoutConverter.GetOffset(x); *(T *)outPtr = *(T *)offset; } outPtr += outStrideGap; } } } outOffs += stride * h * d * layers; } return(true); } bool _ = bytesPerPixel switch { 1 => Convert <byte>(output, data), 2 => Convert <ushort>(output, data), 4 => Convert <uint>(output, data), 8 => Convert <ulong>(output, data), 12 => Convert <Bpp12Pixel>(output, data), 16 => Convert <Vector128 <byte> >(output, data), _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.") }; } return(output); }
private unsafe static void WriteChroma( Span <byte> dst, ReadOnlySpan <byte> srcU, ReadOnlySpan <byte> srcV, int srcStride, int width, int height) { OffsetCalculator calc = new OffsetCalculator(width, height, 0, false, 2, 2); if (Sse2.IsSupported) { int strideTrunc64 = BitUtils.AlignDown(width * 2, 64); int inStrideGap = srcStride - width; fixed(byte *outputPtr = dst, srcUPtr = srcU, srcVPtr = srcV) { byte *inUPtr = srcUPtr; byte *inVPtr = srcVPtr; for (int y = 0; y < height; y++) { calc.SetY(y); for (int x = 0; x < strideTrunc64; x += 64, inUPtr += 32, inVPtr += 32) { byte *offset = outputPtr + calc.GetOffsetWithLineOffset64(x); byte *offset2 = offset + 0x20; byte *offset3 = offset + 0x100; byte *offset4 = offset + 0x120; Vector128 <byte> value = *(Vector128 <byte> *)inUPtr; Vector128 <byte> value2 = *(Vector128 <byte> *)inVPtr; Vector128 <byte> value3 = *(Vector128 <byte> *)(inUPtr + 16); Vector128 <byte> value4 = *(Vector128 <byte> *)(inVPtr + 16); Vector128 <byte> uv0 = Sse2.UnpackLow(value, value2); Vector128 <byte> uv1 = Sse2.UnpackHigh(value, value2); Vector128 <byte> uv2 = Sse2.UnpackLow(value3, value4); Vector128 <byte> uv3 = Sse2.UnpackHigh(value3, value4); *(Vector128 <byte> *)offset = uv0; *(Vector128 <byte> *)offset2 = uv1; *(Vector128 <byte> *)offset3 = uv2; *(Vector128 <byte> *)offset4 = uv3; } for (int x = strideTrunc64 / 2; x < width; x++, inUPtr++, inVPtr++) { byte *offset = outputPtr + calc.GetOffset(x); *offset = *inUPtr; *(offset + 1) = *inVPtr; } inUPtr += inStrideGap; inVPtr += inStrideGap; } } } else { for (int y = 0; y < height; y++) { int srcBaseOffset = y * srcStride; calc.SetY(y); for (int x = 0; x < width; x++) { int dstOffset = calc.GetOffset(x); dst[dstOffset + 0] = srcU[srcBaseOffset + x]; dst[dstOffset + 1] = srcV[srcBaseOffset + x]; } } } }
private KernelResult CopyToClient(KMemoryManager memoryManager, List<KBufferDescriptor> list) { foreach (KBufferDescriptor desc in list) { MemoryState stateMask; switch (desc.State) { case MemoryState.IpcBuffer0: stateMask = MemoryState.IpcSendAllowedType0; break; case MemoryState.IpcBuffer1: stateMask = MemoryState.IpcSendAllowedType1; break; case MemoryState.IpcBuffer3: stateMask = MemoryState.IpcSendAllowedType3; break; default: return KernelResult.InvalidCombination; } MemoryAttribute attributeMask = MemoryAttribute.Borrowed | MemoryAttribute.Uncached; if (desc.State == MemoryState.IpcBuffer0) { attributeMask |= MemoryAttribute.DeviceMapped; } ulong clientAddrTruncated = BitUtils.AlignDown(desc.ClientAddress, KMemoryManager.PageSize); ulong clientAddrRounded = BitUtils.AlignUp (desc.ClientAddress, KMemoryManager.PageSize); // Check if address is not aligned, in this case we need to perform 2 copies. if (clientAddrTruncated != clientAddrRounded) { ulong copySize = clientAddrRounded - desc.ClientAddress; if (copySize > desc.Size) { copySize = desc.Size; } KernelResult result = memoryManager.CopyDataFromCurrentProcess( desc.ClientAddress, copySize, stateMask, stateMask, KMemoryPermission.ReadAndWrite, attributeMask, MemoryAttribute.None, desc.ServerAddress); if (result != KernelResult.Success) { return result; } } ulong clientEndAddr = desc.ClientAddress + desc.Size; ulong serverEndAddr = desc.ServerAddress + desc.Size; ulong clientEndAddrTruncated = BitUtils.AlignDown(clientEndAddr, KMemoryManager.PageSize); ulong clientEndAddrRounded = BitUtils.AlignUp (clientEndAddr, KMemoryManager.PageSize); ulong serverEndAddrTruncated = BitUtils.AlignDown(serverEndAddr, KMemoryManager.PageSize); if (clientEndAddrTruncated < clientEndAddrRounded && (clientAddrTruncated == clientAddrRounded || clientAddrTruncated < clientEndAddrTruncated)) { KernelResult result = memoryManager.CopyDataFromCurrentProcess( clientEndAddrTruncated, clientEndAddr - clientEndAddrTruncated, stateMask, stateMask, KMemoryPermission.ReadAndWrite, attributeMask, MemoryAttribute.None, serverEndAddrTruncated); if (result != KernelResult.Success) { return result; } } } return KernelResult.Success; }
private unsafe static void ReadChroma( Span <byte> dstU, Span <byte> dstV, ReadOnlySpan <byte> src, int dstStride, int width, int height) { OffsetCalculator calc = new OffsetCalculator(width, height, 0, false, 2, 2); if (Sse2.IsSupported) { int strideTrunc64 = BitUtils.AlignDown(width * 2, 64); int outStrideGap = dstStride - width; fixed(byte *dstUPtr = dstU, dstVPtr = dstV, dataPtr = src) { byte *uPtr = dstUPtr; byte *vPtr = dstVPtr; for (int y = 0; y < height; y++) { calc.SetY(y); for (int x = 0; x < strideTrunc64; x += 64, uPtr += 32, vPtr += 32) { byte *offset = dataPtr + calc.GetOffsetWithLineOffset64(x); byte *offset2 = offset + 0x20; byte *offset3 = offset + 0x100; byte *offset4 = offset + 0x120; Vector128 <byte> value = *(Vector128 <byte> *)offset; Vector128 <byte> value2 = *(Vector128 <byte> *)offset2; Vector128 <byte> value3 = *(Vector128 <byte> *)offset3; Vector128 <byte> value4 = *(Vector128 <byte> *)offset4; Vector128 <byte> u00 = Sse2.UnpackLow(value, value2); Vector128 <byte> v00 = Sse2.UnpackHigh(value, value2); Vector128 <byte> u01 = Sse2.UnpackLow(value3, value4); Vector128 <byte> v01 = Sse2.UnpackHigh(value3, value4); Vector128 <byte> u10 = Sse2.UnpackLow(u00, v00); Vector128 <byte> v10 = Sse2.UnpackHigh(u00, v00); Vector128 <byte> u11 = Sse2.UnpackLow(u01, v01); Vector128 <byte> v11 = Sse2.UnpackHigh(u01, v01); Vector128 <byte> u20 = Sse2.UnpackLow(u10, v10); Vector128 <byte> v20 = Sse2.UnpackHigh(u10, v10); Vector128 <byte> u21 = Sse2.UnpackLow(u11, v11); Vector128 <byte> v21 = Sse2.UnpackHigh(u11, v11); Vector128 <byte> u30 = Sse2.UnpackLow(u20, v20); Vector128 <byte> v30 = Sse2.UnpackHigh(u20, v20); Vector128 <byte> u31 = Sse2.UnpackLow(u21, v21); Vector128 <byte> v31 = Sse2.UnpackHigh(u21, v21); *(Vector128 <byte> *)uPtr = u30; *(Vector128 <byte> *)(uPtr + 16) = u31; *(Vector128 <byte> *)vPtr = v30; *(Vector128 <byte> *)(vPtr + 16) = v31; } for (int x = strideTrunc64 / 2; x < width; x++, uPtr++, vPtr++) { byte *offset = dataPtr + calc.GetOffset(x); *uPtr = *offset; *vPtr = *(offset + 1); } uPtr += outStrideGap; vPtr += outStrideGap; } } } else { for (int y = 0; y < height; y++) { int dstBaseOffset = y * dstStride; calc.SetY(y); for (int x = 0; x < width; x++) { int srcOffset = calc.GetOffset(x); dstU[dstBaseOffset + x] = src[srcOffset]; dstV[dstBaseOffset + x] = src[srcOffset + 1]; } } } }