Example #1
0
        /// <summary>
        /// Performs actual copy of the inline data after the transfer is finished.
        /// </summary>
        private void FinishTransfer()
        {
            Span <byte> data = MemoryMarshal.Cast <int, byte>(_buffer).Slice(0, _size);

            if (_isLinear && _params.LineCount == 1)
            {
                ulong address = _context.MemoryManager.Translate(_params.DstAddress.Pack());

                _context.PhysicalMemory.Write(address, data);
            }
            else
            {
                var dstCalculator = new OffsetCalculator(
                    _params.DstWidth,
                    _params.DstHeight,
                    _params.DstStride,
                    _isLinear,
                    _params.DstMemoryLayout.UnpackGobBlocksInY(),
                    1);

                int srcOffset = 0;

                ulong dstBaseAddress = _context.MemoryManager.Translate(_params.DstAddress.Pack());

                for (int y = _params.DstY; y < _params.DstY + _params.LineCount; y++)
                {
                    int x1      = _params.DstX;
                    int x2      = _params.DstX + _params.LineLengthIn;
                    int x2Trunc = _params.DstX + BitUtils.AlignDown(_params.LineLengthIn, 16);

                    int x;

                    for (x = x1; x < x2Trunc; x += 16, srcOffset += 16)
                    {
                        int dstOffset = dstCalculator.GetOffset(x, y);

                        ulong dstAddress = dstBaseAddress + (ulong)dstOffset;

                        Span <byte> pixel = data.Slice(srcOffset, 16);

                        _context.PhysicalMemory.Write(dstAddress, pixel);
                    }

                    for (; x < x2; x++, srcOffset++)
                    {
                        int dstOffset = dstCalculator.GetOffset(x, y);

                        ulong dstAddress = dstBaseAddress + (ulong)dstOffset;

                        Span <byte> pixel = data.Slice(srcOffset, 1);

                        _context.PhysicalMemory.Write(dstAddress, pixel);
                    }
                }
            }

            _finished = true;

            _context.AdvanceSequence();
        }
Example #2
0
        /// <summary>
        /// Performs a buffer to buffer, or buffer to texture copy.
        /// </summary>
        /// <param name="argument">The LaunchDma call argument</param>
        private void DmaCopy(int argument)
        {
            var memoryManager = _channel.MemoryManager;

            CopyFlags copyFlags = (CopyFlags)argument;

            bool srcLinear = copyFlags.HasFlag(CopyFlags.SrcLinear);
            bool dstLinear = copyFlags.HasFlag(CopyFlags.DstLinear);
            bool copy2D    = copyFlags.HasFlag(CopyFlags.MultiLineEnable);
            bool remap     = copyFlags.HasFlag(CopyFlags.RemapEnable);

            uint size = _state.State.LineLengthIn;

            if (size == 0)
            {
                return;
            }

            ulong srcGpuVa = ((ulong)_state.State.OffsetInUpperUpper << 32) | _state.State.OffsetInLower;
            ulong dstGpuVa = ((ulong)_state.State.OffsetOutUpperUpper << 32) | _state.State.OffsetOutLower;

            int xCount = (int)_state.State.LineLengthIn;
            int yCount = (int)_state.State.LineCount;

            _3dEngine.FlushUboDirty();

            if (copy2D)
            {
                // Buffer to texture copy.
                int componentSize = (int)_state.State.SetRemapComponentsComponentSize + 1;
                int srcBpp        = remap ? ((int)_state.State.SetRemapComponentsNumSrcComponents + 1) * componentSize : 1;
                int dstBpp        = remap ? ((int)_state.State.SetRemapComponentsNumDstComponents + 1) * componentSize : 1;

                var dst = Unsafe.As <uint, DmaTexture>(ref _state.State.SetDstBlockSize);
                var src = Unsafe.As <uint, DmaTexture>(ref _state.State.SetSrcBlockSize);

                int srcStride = (int)_state.State.PitchIn;
                int dstStride = (int)_state.State.PitchOut;

                var srcCalculator = new OffsetCalculator(
                    src.Width,
                    src.Height,
                    srcStride,
                    srcLinear,
                    src.MemoryLayout.UnpackGobBlocksInY(),
                    src.MemoryLayout.UnpackGobBlocksInZ(),
                    srcBpp);

                var dstCalculator = new OffsetCalculator(
                    dst.Width,
                    dst.Height,
                    dstStride,
                    dstLinear,
                    dst.MemoryLayout.UnpackGobBlocksInY(),
                    dst.MemoryLayout.UnpackGobBlocksInZ(),
                    dstBpp);

                (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(src.RegionX, src.RegionY, xCount, yCount);
                (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dst.RegionX, dst.RegionY, xCount, yCount);

                if (srcLinear && srcStride < 0)
                {
                    srcBaseOffset += srcStride * (yCount - 1);
                }

                if (dstLinear && dstStride < 0)
                {
                    dstBaseOffset += dstStride * (yCount - 1);
                }

                ReadOnlySpan <byte> srcSpan = memoryManager.GetSpan(srcGpuVa + (ulong)srcBaseOffset, srcSize, true);
                Span <byte>         dstSpan = memoryManager.GetSpan(dstGpuVa + (ulong)dstBaseOffset, dstSize).ToArray();

                bool completeSource = IsTextureCopyComplete(src, srcLinear, srcBpp, srcStride, xCount, yCount);
                bool completeDest   = IsTextureCopyComplete(dst, dstLinear, dstBpp, dstStride, xCount, yCount);

                if (completeSource && completeDest)
                {
                    var target = memoryManager.Physical.TextureCache.FindTexture(
                        memoryManager,
                        dst,
                        dstGpuVa,
                        dstBpp,
                        dstStride,
                        xCount,
                        yCount,
                        dstLinear);

                    if (target != null)
                    {
                        ReadOnlySpan <byte> data;
                        if (srcLinear)
                        {
                            data = LayoutConverter.ConvertLinearStridedToLinear(
                                target.Info.Width,
                                target.Info.Height,
                                1,
                                1,
                                xCount * srcBpp,
                                srcStride,
                                target.Info.FormatInfo.BytesPerPixel,
                                srcSpan);
                        }
                        else
                        {
                            data = LayoutConverter.ConvertBlockLinearToLinear(
                                src.Width,
                                src.Height,
                                src.Depth,
                                1,
                                1,
                                1,
                                1,
                                1,
                                srcBpp,
                                src.MemoryLayout.UnpackGobBlocksInY(),
                                src.MemoryLayout.UnpackGobBlocksInZ(),
                                1,
                                new SizeInfo((int)target.Size),
                                srcSpan);
                        }

                        target.SynchronizeMemory();
                        target.SetData(data);
                        target.SignalModified();

                        return;
                    }
                    else if (srcCalculator.LayoutMatches(dstCalculator))
                    {
                        srcSpan.CopyTo(dstSpan); // No layout conversion has to be performed, just copy the data entirely.

                        memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, dstSpan);

                        return;
                    }
                }

                unsafe bool Convert <T>(Span <byte> dstSpan, ReadOnlySpan <byte> srcSpan) where T : unmanaged
                {
                    fixed(byte *dstPtr = dstSpan, srcPtr = srcSpan)
                    {
                        byte *dstBase = dstPtr - dstBaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
                        byte *srcBase = srcPtr - srcBaseOffset;

                        for (int y = 0; y < yCount; y++)
                        {
                            srcCalculator.SetY(src.RegionY + y);
                            dstCalculator.SetY(dst.RegionY + y);

                            for (int x = 0; x < xCount; x++)
                            {
                                int srcOffset = srcCalculator.GetOffset(src.RegionX + x);
                                int dstOffset = dstCalculator.GetOffset(dst.RegionX + x);

                                *(T *)(dstBase + dstOffset) = *(T *)(srcBase + srcOffset);
                            }
                        }
                    }

                    return(true);
                }

                bool _ = srcBpp switch
                {
                    1 => Convert <byte>(dstSpan, srcSpan),
                    2 => Convert <ushort>(dstSpan, srcSpan),
                    4 => Convert <uint>(dstSpan, srcSpan),
                    8 => Convert <ulong>(dstSpan, srcSpan),
                    12 => Convert <Bpp12Pixel>(dstSpan, srcSpan),
                    16 => Convert <Vector128 <byte> >(dstSpan, srcSpan),
                    _ => throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.")
                };

                memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, dstSpan);
            }
            else
            {
                if (remap &&
                    _state.State.SetRemapComponentsDstX == SetRemapComponentsDst.ConstA &&
                    _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.ConstA &&
                    _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.ConstA &&
                    _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.ConstA &&
                    _state.State.SetRemapComponentsNumSrcComponents == SetRemapComponentsNumComponents.One &&
                    _state.State.SetRemapComponentsNumDstComponents == SetRemapComponentsNumComponents.One &&
                    _state.State.SetRemapComponentsComponentSize == SetRemapComponentsComponentSize.Four)
                {
                    // Fast path for clears when remap is enabled.
                    memoryManager.Physical.BufferCache.ClearBuffer(memoryManager, dstGpuVa, size * 4, _state.State.SetRemapConstA);
                }
                else
                {
                    // TODO: Implement remap functionality.
                    // Buffer to buffer copy.
                    memoryManager.Physical.BufferCache.CopyBuffer(memoryManager, srcGpuVa, dstGpuVa, size);
                }
            }
        }
Example #3
0
        /// <summary>
        /// Performs a buffer to buffer, or buffer to texture copy.
        /// </summary>
        /// <param name="state">Current GPU state</param>
        /// <param name="argument">Method call argument</param>
        private void CopyBuffer(GpuState state, int argument)
        {
            var cbp = state.Get <CopyBufferParams>(MethodOffset.CopyBufferParams);

            var swizzle = state.Get <CopyBufferSwizzle>(MethodOffset.CopyBufferSwizzle);

            bool srcLinear = (argument & (1 << 7)) != 0;
            bool dstLinear = (argument & (1 << 8)) != 0;
            bool copy2D    = (argument & (1 << 9)) != 0;

            int size = cbp.XCount;

            if (size == 0)
            {
                return;
            }

            if (copy2D)
            {
                // Buffer to texture copy.
                var dst = state.Get <CopyBufferTexture>(MethodOffset.CopyBufferDstTexture);
                var src = state.Get <CopyBufferTexture>(MethodOffset.CopyBufferSrcTexture);

                var srcCalculator = new OffsetCalculator(
                    src.Width,
                    src.Height,
                    cbp.SrcStride,
                    srcLinear,
                    src.MemoryLayout.UnpackGobBlocksInY(),
                    src.MemoryLayout.UnpackGobBlocksInZ(),
                    1);

                var dstCalculator = new OffsetCalculator(
                    dst.Width,
                    dst.Height,
                    cbp.DstStride,
                    dstLinear,
                    dst.MemoryLayout.UnpackGobBlocksInY(),
                    dst.MemoryLayout.UnpackGobBlocksInZ(),
                    1);

                ulong srcBaseAddress = _context.MemoryManager.Translate(cbp.SrcAddress.Pack());
                ulong dstBaseAddress = _context.MemoryManager.Translate(cbp.DstAddress.Pack());

                (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(src.RegionX, src.RegionY, cbp.XCount, cbp.YCount);
                (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dst.RegionX, dst.RegionY, cbp.XCount, cbp.YCount);

                ReadOnlySpan <byte> srcSpan = _context.PhysicalMemory.GetSpan(srcBaseAddress + (ulong)srcBaseOffset, srcSize, true);
                Span <byte>         dstSpan = _context.PhysicalMemory.GetSpan(dstBaseAddress + (ulong)dstBaseOffset, dstSize).ToArray();

                bool completeSource = IsTextureCopyComplete(cbp, src, srcLinear, cbp.SrcStride);
                bool completeDest   = IsTextureCopyComplete(cbp, dst, dstLinear, cbp.DstStride);

                if (completeSource && completeDest)
                {
                    Image.Texture target = TextureManager.FindTexture(dst, cbp, swizzle, dstLinear);
                    if (target != null)
                    {
                        ReadOnlySpan <byte> data;
                        if (srcLinear)
                        {
                            data = LayoutConverter.ConvertLinearStridedToLinear(
                                target.Info.Width,
                                target.Info.Height,
                                1,
                                1,
                                cbp.SrcStride,
                                target.Info.FormatInfo.BytesPerPixel,
                                srcSpan);
                        }
                        else
                        {
                            data = LayoutConverter.ConvertBlockLinearToLinear(
                                src.Width,
                                src.Height,
                                1,
                                target.Info.Levels,
                                1,
                                1,
                                1,
                                1,
                                src.MemoryLayout.UnpackGobBlocksInY(),
                                src.MemoryLayout.UnpackGobBlocksInZ(),
                                1,
                                new SizeInfo((int)target.Size),
                                srcSpan);
                        }

                        target.SetData(data);
                        target.SignalModified();

                        return;
                    }
                    else if (srcCalculator.LayoutMatches(dstCalculator))
                    {
                        srcSpan.CopyTo(dstSpan); // No layout conversion has to be performed, just copy the data entirely.

                        _context.PhysicalMemory.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan);

                        return;
                    }
                }

                unsafe bool Convert <T>(Span <byte> dstSpan, ReadOnlySpan <byte> srcSpan) where T : unmanaged
                {
                    fixed(byte *dstPtr = dstSpan, srcPtr = srcSpan)
                    {
                        byte *dstBase = dstPtr - dstBaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
                        byte *srcBase = srcPtr - srcBaseOffset;

                        for (int y = 0; y < cbp.YCount; y++)
                        {
                            srcCalculator.SetY(src.RegionY + y);
                            dstCalculator.SetY(dst.RegionY + y);

                            for (int x = 0; x < cbp.XCount; x++)
                            {
                                int srcOffset = srcCalculator.GetOffset(src.RegionX + x);
                                int dstOffset = dstCalculator.GetOffset(dst.RegionX + x);

                                *(T *)(dstBase + dstOffset) = *(T *)(srcBase + srcOffset);
                            }
                        }
                    }

                    return(true);
                }

                Convert <byte>(dstSpan, srcSpan);

                _context.PhysicalMemory.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan);
            }
            else
            {
                // Buffer to buffer copy.
                BufferManager.CopyBuffer(cbp.SrcAddress, cbp.DstAddress, (uint)size);
            }
        }
Example #4
0
        /// <summary>
        /// Performs a buffer to buffer, or buffer to texture copy.
        /// </summary>
        /// <param name="state">Current GPU state</param>
        /// <param name="argument">Method call argument</param>
        private void CopyBuffer(GpuState state, int argument)
        {
            var cbp = state.Get <CopyBufferParams>(MethodOffset.CopyBufferParams);

            var swizzle = state.Get <CopyBufferSwizzle>(MethodOffset.CopyBufferSwizzle);

            CopyFlags copyFlags = (CopyFlags)argument;

            bool srcLinear = copyFlags.HasFlag(CopyFlags.SrcLinear);
            bool dstLinear = copyFlags.HasFlag(CopyFlags.DstLinear);
            bool copy2D    = copyFlags.HasFlag(CopyFlags.MultiLineEnable);
            bool remap     = copyFlags.HasFlag(CopyFlags.RemapEnable);

            int size = cbp.XCount;

            if (size == 0)
            {
                return;
            }

            if (copy2D)
            {
                // Buffer to texture copy.
                int srcBpp = remap ? swizzle.UnpackSrcComponentsCount() * swizzle.UnpackComponentSize() : 1;
                int dstBpp = remap ? swizzle.UnpackDstComponentsCount() * swizzle.UnpackComponentSize() : 1;

                var dst = state.Get <CopyBufferTexture>(MethodOffset.CopyBufferDstTexture);
                var src = state.Get <CopyBufferTexture>(MethodOffset.CopyBufferSrcTexture);

                var srcCalculator = new OffsetCalculator(
                    src.Width,
                    src.Height,
                    cbp.SrcStride,
                    srcLinear,
                    src.MemoryLayout.UnpackGobBlocksInY(),
                    src.MemoryLayout.UnpackGobBlocksInZ(),
                    srcBpp);

                var dstCalculator = new OffsetCalculator(
                    dst.Width,
                    dst.Height,
                    cbp.DstStride,
                    dstLinear,
                    dst.MemoryLayout.UnpackGobBlocksInY(),
                    dst.MemoryLayout.UnpackGobBlocksInZ(),
                    dstBpp);

                ulong srcBaseAddress = _context.MemoryManager.Translate(cbp.SrcAddress.Pack());
                ulong dstBaseAddress = _context.MemoryManager.Translate(cbp.DstAddress.Pack());

                (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(src.RegionX, src.RegionY, cbp.XCount, cbp.YCount);
                (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dst.RegionX, dst.RegionY, cbp.XCount, cbp.YCount);

                ReadOnlySpan <byte> srcSpan = _context.PhysicalMemory.GetSpan(srcBaseAddress + (ulong)srcBaseOffset, srcSize, true);
                Span <byte>         dstSpan = _context.PhysicalMemory.GetSpan(dstBaseAddress + (ulong)dstBaseOffset, dstSize).ToArray();

                bool completeSource = IsTextureCopyComplete(cbp, src, srcLinear, srcBpp, cbp.SrcStride);
                bool completeDest   = IsTextureCopyComplete(cbp, dst, dstLinear, dstBpp, cbp.DstStride);

                if (completeSource && completeDest)
                {
                    Image.Texture target = TextureManager.FindTexture(dst, cbp, swizzle, dstLinear);
                    if (target != null)
                    {
                        ReadOnlySpan <byte> data;
                        if (srcLinear)
                        {
                            data = LayoutConverter.ConvertLinearStridedToLinear(
                                target.Info.Width,
                                target.Info.Height,
                                1,
                                1,
                                cbp.SrcStride,
                                target.Info.FormatInfo.BytesPerPixel,
                                srcSpan);
                        }
                        else
                        {
                            data = LayoutConverter.ConvertBlockLinearToLinear(
                                src.Width,
                                src.Height,
                                1,
                                target.Info.Levels,
                                1,
                                1,
                                1,
                                srcBpp,
                                src.MemoryLayout.UnpackGobBlocksInY(),
                                src.MemoryLayout.UnpackGobBlocksInZ(),
                                1,
                                new SizeInfo((int)target.Size),
                                srcSpan);
                        }

                        target.SetData(data);
                        target.SignalModified();

                        return;
                    }
                    else if (srcCalculator.LayoutMatches(dstCalculator))
                    {
                        srcSpan.CopyTo(dstSpan); // No layout conversion has to be performed, just copy the data entirely.

                        _context.PhysicalMemory.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan);

                        return;
                    }
                }

                unsafe bool Convert <T>(Span <byte> dstSpan, ReadOnlySpan <byte> srcSpan) where T : unmanaged
                {
                    fixed(byte *dstPtr = dstSpan, srcPtr = srcSpan)
                    {
                        byte *dstBase = dstPtr - dstBaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
                        byte *srcBase = srcPtr - srcBaseOffset;

                        for (int y = 0; y < cbp.YCount; y++)
                        {
                            srcCalculator.SetY(src.RegionY + y);
                            dstCalculator.SetY(dst.RegionY + y);

                            for (int x = 0; x < cbp.XCount; x++)
                            {
                                int srcOffset = srcCalculator.GetOffset(src.RegionX + x);
                                int dstOffset = dstCalculator.GetOffset(dst.RegionX + x);

                                *(T *)(dstBase + dstOffset) = *(T *)(srcBase + srcOffset);
                            }
                        }
                    }

                    return(true);
                }

                bool _ = srcBpp switch
                {
                    1 => Convert <byte>(dstSpan, srcSpan),
                    2 => Convert <ushort>(dstSpan, srcSpan),
                    4 => Convert <uint>(dstSpan, srcSpan),
                    8 => Convert <ulong>(dstSpan, srcSpan),
                    12 => Convert <Bpp12Pixel>(dstSpan, srcSpan),
                    16 => Convert <Vector128 <byte> >(dstSpan, srcSpan),
                    _ => throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.")
                };

                _context.PhysicalMemory.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan);
            }
            else
            {
                if (remap &&
                    swizzle.UnpackDstX() == BufferSwizzleComponent.ConstA &&
                    swizzle.UnpackDstY() == BufferSwizzleComponent.ConstA &&
                    swizzle.UnpackDstZ() == BufferSwizzleComponent.ConstA &&
                    swizzle.UnpackDstW() == BufferSwizzleComponent.ConstA &&
                    swizzle.UnpackSrcComponentsCount() == 1 &&
                    swizzle.UnpackDstComponentsCount() == 1 &&
                    swizzle.UnpackComponentSize() == 4)
                {
                    // Fast path for clears when remap is enabled.
                    BufferManager.ClearBuffer(cbp.DstAddress, (uint)size * 4, state.Get <uint>(MethodOffset.CopyBufferConstA));
                }
                else
                {
                    // TODO: Implement remap functionality.
                    // Buffer to buffer copy.
                    BufferManager.CopyBuffer(cbp.SrcAddress, cbp.DstAddress, (uint)size);
                }
            }
        }
Example #5
0
        private unsafe static void WriteChroma(
            Span <byte> dst,
            ReadOnlySpan <byte> srcU,
            ReadOnlySpan <byte> srcV,
            int srcStride,
            int width,
            int height)
        {
            OffsetCalculator calc = new OffsetCalculator(width, height, 0, false, 2, 2);

            if (Sse2.IsSupported)
            {
                int strideTrunc64 = BitUtils.AlignDown(width * 2, 64);

                int inStrideGap = srcStride - width;

                fixed(byte *outputPtr = dst, srcUPtr = srcU, srcVPtr = srcV)
                {
                    byte *inUPtr = srcUPtr;
                    byte *inVPtr = srcVPtr;

                    for (int y = 0; y < height; y++)
                    {
                        calc.SetY(y);

                        for (int x = 0; x < strideTrunc64; x += 64, inUPtr += 32, inVPtr += 32)
                        {
                            byte *offset  = outputPtr + calc.GetOffsetWithLineOffset64(x);
                            byte *offset2 = offset + 0x20;
                            byte *offset3 = offset + 0x100;
                            byte *offset4 = offset + 0x120;

                            Vector128 <byte> value  = *(Vector128 <byte> *)inUPtr;
                            Vector128 <byte> value2 = *(Vector128 <byte> *)inVPtr;
                            Vector128 <byte> value3 = *(Vector128 <byte> *)(inUPtr + 16);
                            Vector128 <byte> value4 = *(Vector128 <byte> *)(inVPtr + 16);

                            Vector128 <byte> uv0 = Sse2.UnpackLow(value, value2);
                            Vector128 <byte> uv1 = Sse2.UnpackHigh(value, value2);
                            Vector128 <byte> uv2 = Sse2.UnpackLow(value3, value4);
                            Vector128 <byte> uv3 = Sse2.UnpackHigh(value3, value4);

                            *(Vector128 <byte> *)offset  = uv0;
                            *(Vector128 <byte> *)offset2 = uv1;
                            *(Vector128 <byte> *)offset3 = uv2;
                            *(Vector128 <byte> *)offset4 = uv3;
                        }

                        for (int x = strideTrunc64 / 2; x < width; x++, inUPtr++, inVPtr++)
                        {
                            byte *offset = outputPtr + calc.GetOffset(x);

                            *offset = *inUPtr;
                            *(offset + 1) = *inVPtr;
                        }

                        inUPtr += inStrideGap;
                        inVPtr += inStrideGap;
                    }
                }
            }
            else
            {
                for (int y = 0; y < height; y++)
                {
                    int srcBaseOffset = y * srcStride;

                    calc.SetY(y);

                    for (int x = 0; x < width; x++)
                    {
                        int dstOffset = calc.GetOffset(x);

                        dst[dstOffset + 0] = srcU[srcBaseOffset + x];
                        dst[dstOffset + 1] = srcV[srcBaseOffset + x];
                    }
                }
            }
        }
Example #6
0
        /// <summary>
        /// Performs actual copy of the inline data after the transfer is finished.
        /// </summary>
        private void FinishTransfer()
        {
            var memoryManager = _channel.MemoryManager;

            var data = MemoryMarshal.Cast <int, byte>(_buffer).Slice(0, _size);

            if (_isLinear && _lineCount == 1)
            {
                memoryManager.WriteTrackedResource(_dstGpuVa, data.Slice(0, _lineLengthIn));
                _context.AdvanceSequence();
            }
            else
            {
                // TODO: Verify if the destination X/Y and width/height are taken into account
                // for linear texture transfers. If not, we can use the fast path for that aswell.
                // Right now the copy code at the bottom assumes that it is used on both which might be incorrect.
                if (!_isLinear)
                {
                    var target = memoryManager.Physical.TextureCache.FindTexture(
                        memoryManager,
                        _dstGpuVa,
                        1,
                        _dstStride,
                        _dstHeight,
                        _lineLengthIn,
                        _lineCount,
                        _isLinear,
                        _dstGobBlocksInY,
                        _dstGobBlocksInZ);

                    if (target != null)
                    {
                        target.SetData(data, 0, 0, new GAL.Rectangle <int>(_dstX, _dstY, _lineLengthIn / target.Info.FormatInfo.BytesPerPixel, _lineCount));

                        return;
                    }
                }

                var dstCalculator = new OffsetCalculator(
                    _dstWidth,
                    _dstHeight,
                    _dstStride,
                    _isLinear,
                    _dstGobBlocksInY,
                    1);

                int srcOffset = 0;

                for (int y = _dstY; y < _dstY + _lineCount; y++)
                {
                    int x1      = _dstX;
                    int x2      = _dstX + _lineLengthIn;
                    int x1Round = BitUtils.AlignUp(_dstX, 16);
                    int x2Trunc = BitUtils.AlignDown(x2, 16);

                    int x = x1;

                    if (x1Round <= x2)
                    {
                        for (; x < x1Round; x++, srcOffset++)
                        {
                            int dstOffset = dstCalculator.GetOffset(x, y);

                            ulong dstAddress = _dstGpuVa + (uint)dstOffset;

                            memoryManager.Write(dstAddress, data[srcOffset]);
                        }
                    }

                    for (; x < x2Trunc; x += 16, srcOffset += 16)
                    {
                        int dstOffset = dstCalculator.GetOffset(x, y);

                        ulong dstAddress = _dstGpuVa + (uint)dstOffset;

                        memoryManager.Write(dstAddress, MemoryMarshal.Cast <byte, Vector128 <byte> >(data.Slice(srcOffset, 16))[0]);
                    }

                    for (; x < x2; x++, srcOffset++)
                    {
                        int dstOffset = dstCalculator.GetOffset(x, y);

                        ulong dstAddress = _dstGpuVa + (uint)dstOffset;

                        memoryManager.Write(dstAddress, data[srcOffset]);
                    }

                    // All lines must be aligned to 4 bytes, as the data is pushed one word at a time.
                    // If our copy length is not a multiple of 4, then we need to skip the padding bytes here.
                    int misalignment = _lineLengthIn & 3;

                    if (misalignment != 0)
                    {
                        srcOffset += 4 - misalignment;
                    }
                }

                _context.AdvanceSequence();
            }

            _finished = true;
        }
Example #7
0
        /// <summary>
        /// Performs actual copy of the inline data after the transfer is finished.
        /// </summary>
        private void FinishTransfer()
        {
            var memoryManager = _channel.MemoryManager;

            var data = MemoryMarshal.Cast <int, byte>(_buffer).Slice(0, _size);

            if (_isLinear && _lineCount == 1)
            {
                memoryManager.WriteTrackedResource(_dstGpuVa, data);
                _context.AdvanceSequence();
            }
            else
            {
                var dstCalculator = new OffsetCalculator(
                    _dstWidth,
                    _dstHeight,
                    _dstStride,
                    _isLinear,
                    _dstGobBlocksInY,
                    1);

                int srcOffset = 0;

                for (int y = _dstY; y < _dstY + _lineCount; y++)
                {
                    int x1      = _dstX;
                    int x2      = _dstX + _lineLengthIn;
                    int x1Round = BitUtils.AlignUp(_dstX, 16);
                    int x2Trunc = BitUtils.AlignDown(x2, 16);

                    int x = x1;

                    if (x1Round <= x2)
                    {
                        for (; x < x1Round; x++, srcOffset++)
                        {
                            int dstOffset = dstCalculator.GetOffset(x, y);

                            ulong dstAddress = _dstGpuVa + (uint)dstOffset;

                            memoryManager.Write(dstAddress, data[srcOffset]);
                        }
                    }

                    for (; x < x2Trunc; x += 16, srcOffset += 16)
                    {
                        int dstOffset = dstCalculator.GetOffset(x, y);

                        ulong dstAddress = _dstGpuVa + (uint)dstOffset;

                        memoryManager.Write(dstAddress, MemoryMarshal.Cast <byte, Vector128 <byte> >(data.Slice(srcOffset, 16))[0]);
                    }

                    for (; x < x2; x++, srcOffset++)
                    {
                        int dstOffset = dstCalculator.GetOffset(x, y);

                        ulong dstAddress = _dstGpuVa + (uint)dstOffset;

                        memoryManager.Write(dstAddress, data[srcOffset]);
                    }
                }

                _context.AdvanceSequence();
            }

            _finished = true;
        }
Example #8
0
        /// <summary>
        /// Performs a buffer to buffer, or buffer to texture copy.
        /// </summary>
        /// <param name="state">Current GPU state</param>
        /// <param name="argument">Method call argument</param>
        private void CopyBuffer(GpuState state, int argument)
        {
            var cbp = state.Get <CopyBufferParams>(MethodOffset.CopyBufferParams);

            var swizzle = state.Get <CopyBufferSwizzle>(MethodOffset.CopyBufferSwizzle);

            bool srcLinear = (argument & (1 << 7)) != 0;
            bool dstLinear = (argument & (1 << 8)) != 0;
            bool copy2D    = (argument & (1 << 9)) != 0;

            int size = cbp.XCount;

            if (size == 0)
            {
                return;
            }

            if (copy2D)
            {
                // Buffer to texture copy.
                int srcBpp = swizzle.UnpackSrcComponentsCount() * swizzle.UnpackComponentSize();
                int dstBpp = swizzle.UnpackDstComponentsCount() * swizzle.UnpackComponentSize();

                var dst = state.Get <CopyBufferTexture>(MethodOffset.CopyBufferDstTexture);
                var src = state.Get <CopyBufferTexture>(MethodOffset.CopyBufferSrcTexture);

                var srcCalculator = new OffsetCalculator(
                    src.Width,
                    src.Height,
                    cbp.SrcStride,
                    srcLinear,
                    src.MemoryLayout.UnpackGobBlocksInY(),
                    srcBpp);

                var dstCalculator = new OffsetCalculator(
                    dst.Width,
                    dst.Height,
                    cbp.DstStride,
                    dstLinear,
                    dst.MemoryLayout.UnpackGobBlocksInY(),
                    dstBpp);

                ulong srcBaseAddress = _context.MemoryManager.Translate(cbp.SrcAddress.Pack());
                ulong dstBaseAddress = _context.MemoryManager.Translate(cbp.DstAddress.Pack());

                for (int y = 0; y < cbp.YCount; y++)
                {
                    for (int x = 0; x < cbp.XCount; x++)
                    {
                        int srcOffset = srcCalculator.GetOffset(src.RegionX + x, src.RegionY + y);
                        int dstOffset = dstCalculator.GetOffset(dst.RegionX + x, dst.RegionY + y);

                        ulong srcAddress = srcBaseAddress + (ulong)srcOffset;
                        ulong dstAddress = dstBaseAddress + (ulong)dstOffset;

                        ReadOnlySpan <byte> pixel = _context.PhysicalMemory.GetSpan(srcAddress, (ulong)srcBpp);

                        _context.PhysicalMemory.Write(dstAddress, pixel);
                    }
                }
            }
            else
            {
                // Buffer to buffer copy.
                BufferManager.CopyBuffer(cbp.SrcAddress, cbp.DstAddress, (uint)size);
            }
        }
Example #9
0
        /// <summary>
        /// Performs a buffer to buffer, or buffer to texture copy.
        /// </summary>
        /// <param name="state">Current GPU state</param>
        /// <param name="argument">Method call argument</param>
        private void CopyBuffer(GpuState state, int argument)
        {
            var cbp = state.Get <CopyBufferParams>(MethodOffset.CopyBufferParams);

            var swizzle = state.Get <CopyBufferSwizzle>(MethodOffset.CopyBufferSwizzle);

            bool srcLinear = (argument & (1 << 7)) != 0;
            bool dstLinear = (argument & (1 << 8)) != 0;
            bool copy2D    = (argument & (1 << 9)) != 0;

            int size = cbp.XCount;

            if (size == 0)
            {
                return;
            }

            if (copy2D)
            {
                // Buffer to texture copy.
                int srcBpp = swizzle.UnpackSrcComponentsCount() * swizzle.UnpackComponentSize();
                int dstBpp = swizzle.UnpackDstComponentsCount() * swizzle.UnpackComponentSize();

                var dst = state.Get <CopyBufferTexture>(MethodOffset.CopyBufferDstTexture);
                var src = state.Get <CopyBufferTexture>(MethodOffset.CopyBufferSrcTexture);

                var srcCalculator = new OffsetCalculator(
                    src.Width,
                    src.Height,
                    cbp.SrcStride,
                    srcLinear,
                    src.MemoryLayout.UnpackGobBlocksInY(),
                    src.MemoryLayout.UnpackGobBlocksInZ(),
                    srcBpp);

                var dstCalculator = new OffsetCalculator(
                    dst.Width,
                    dst.Height,
                    cbp.DstStride,
                    dstLinear,
                    dst.MemoryLayout.UnpackGobBlocksInY(),
                    dst.MemoryLayout.UnpackGobBlocksInZ(),
                    dstBpp);

                ulong srcBaseAddress = _context.MemoryManager.Translate(cbp.SrcAddress.Pack());
                ulong dstBaseAddress = _context.MemoryManager.Translate(cbp.DstAddress.Pack());

                (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(src.RegionX, src.RegionY, cbp.XCount, cbp.YCount);
                (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dst.RegionX, dst.RegionY, cbp.XCount, cbp.YCount);

                ReadOnlySpan <byte> srcSpan = _context.PhysicalMemory.GetSpan(srcBaseAddress + (ulong)srcBaseOffset, srcSize);
                Span <byte>         dstSpan = _context.PhysicalMemory.GetSpan(dstBaseAddress + (ulong)dstBaseOffset, dstSize).ToArray();

                bool completeSource = src.RegionX == 0 && src.RegionY == 0 && src.Width == cbp.XCount && src.Height == cbp.YCount;
                bool completeDest   = dst.RegionX == 0 && dst.RegionY == 0 && dst.Width == cbp.XCount && dst.Height == cbp.YCount;

                if (completeSource && completeDest && srcCalculator.LayoutMatches(dstCalculator))
                {
                    srcSpan.CopyTo(dstSpan); // No layout conversion has to be performed, just copy the data entirely.
                }
                else
                {
                    unsafe bool Convert <T>(Span <byte> dstSpan, ReadOnlySpan <byte> srcSpan) where T : unmanaged
                    {
                        fixed(byte *dstPtr = dstSpan, srcPtr = srcSpan)
                        {
                            byte *dstBase = dstPtr - dstBaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset.
                            byte *srcBase = srcPtr - srcBaseOffset;

                            for (int y = 0; y < cbp.YCount; y++)
                            {
                                srcCalculator.SetY(src.RegionY + y);
                                dstCalculator.SetY(dst.RegionY + y);

                                for (int x = 0; x < cbp.XCount; x++)
                                {
                                    int srcOffset = srcCalculator.GetOffset(src.RegionX + x);
                                    int dstOffset = dstCalculator.GetOffset(dst.RegionX + x);

                                    *(T *)(dstBase + dstOffset) = *(T *)(srcBase + srcOffset);
                                }
                            }
                        }

                        return(true);
                    }

                    bool _ = srcBpp switch
                    {
                        1 => Convert <byte>(dstSpan, srcSpan),
                        2 => Convert <ushort>(dstSpan, srcSpan),
                        4 => Convert <uint>(dstSpan, srcSpan),
                        8 => Convert <ulong>(dstSpan, srcSpan),
                        12 => Convert <Bpp12Pixel>(dstSpan, srcSpan),
                        16 => Convert <Vector128 <byte> >(dstSpan, srcSpan),
                        _ => throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.")
                    };
                }

                _context.PhysicalMemory.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan);
            }
            else
            {
                // Buffer to buffer copy.
                BufferManager.CopyBuffer(cbp.SrcAddress, cbp.DstAddress, (uint)size);
            }
        }
Example #10
0
        /// <summary>
        /// Performs actual copy of the inline data after the transfer is finished.
        /// </summary>
        private void FinishTransfer()
        {
            var memoryManager = _channel.MemoryManager;

            var data = MemoryMarshal.Cast <int, byte>(_buffer).Slice(0, _size);

            if (_isLinear && _lineCount == 1)
            {
                memoryManager.WriteTrackedResource(_dstGpuVa, data.Slice(0, _lineLengthIn));
                _context.AdvanceSequence();
            }
            else
            {
                var dstCalculator = new OffsetCalculator(
                    _dstWidth,
                    _dstHeight,
                    _dstStride,
                    _isLinear,
                    _dstGobBlocksInY,
                    1);

                int srcOffset = 0;

                for (int y = _dstY; y < _dstY + _lineCount; y++)
                {
                    int x1      = _dstX;
                    int x2      = _dstX + _lineLengthIn;
                    int x1Round = BitUtils.AlignUp(_dstX, 16);
                    int x2Trunc = BitUtils.AlignDown(x2, 16);

                    int x = x1;

                    if (x1Round <= x2)
                    {
                        for (; x < x1Round; x++, srcOffset++)
                        {
                            int dstOffset = dstCalculator.GetOffset(x, y);

                            ulong dstAddress = _dstGpuVa + (uint)dstOffset;

                            memoryManager.Write(dstAddress, data[srcOffset]);
                        }
                    }

                    for (; x < x2Trunc; x += 16, srcOffset += 16)
                    {
                        int dstOffset = dstCalculator.GetOffset(x, y);

                        ulong dstAddress = _dstGpuVa + (uint)dstOffset;

                        memoryManager.Write(dstAddress, MemoryMarshal.Cast <byte, Vector128 <byte> >(data.Slice(srcOffset, 16))[0]);
                    }

                    for (; x < x2; x++, srcOffset++)
                    {
                        int dstOffset = dstCalculator.GetOffset(x, y);

                        ulong dstAddress = _dstGpuVa + (uint)dstOffset;

                        memoryManager.Write(dstAddress, data[srcOffset]);
                    }

                    // All lines must be aligned to 4 bytes, as the data is pushed one word at a time.
                    // If our copy length is not a multiple of 4, then we need to skip the padding bytes here.
                    int misalignment = _lineLengthIn & 3;

                    if (misalignment != 0)
                    {
                        srcOffset += 4 - misalignment;
                    }
                }

                _context.AdvanceSequence();
            }

            _finished = true;
        }
Example #11
0
        /// <summary>
        /// Performs a full data copy between two textures, reading and writing guest memory directly.
        /// The textures must have a matching layout, size, and bytes per pixel.
        /// </summary>
        /// <param name="src">The source texture</param>
        /// <param name="dst">The destination texture</param>
        /// <param name="w">Copy width</param>
        /// <param name="h">Copy height</param>
        /// <param name="bpp">Bytes per pixel</param>
        private void UnscaledFullCopy(TwodTexture src, TwodTexture dst, int w, int h, int bpp)
        {
            var srcCalculator = new OffsetCalculator(
                w,
                h,
                src.Stride,
                src.LinearLayout,
                src.MemoryLayout.UnpackGobBlocksInY(),
                src.MemoryLayout.UnpackGobBlocksInZ(),
                bpp);

            (int _, int srcSize) = srcCalculator.GetRectangleRange(0, 0, w, h);

            var memoryManager = _channel.MemoryManager;

            ulong srcGpuVa = src.Address.Pack();
            ulong dstGpuVa = dst.Address.Pack();

            ReadOnlySpan <byte> srcSpan = memoryManager.GetSpan(srcGpuVa, srcSize, true);

            int width;
            int height = src.Height;

            if (src.LinearLayout)
            {
                width = src.Stride / bpp;
            }
            else
            {
                width = src.Width;
            }

            // If the copy is not equal to the width and height of the texture, we will need to copy partially.
            // It's worth noting that it has already been established that the src and dst are the same size.

            if (w == width && h == height)
            {
                memoryManager.Write(dstGpuVa, srcSpan);
            }
            else
            {
                using WritableRegion dstRegion = memoryManager.GetWritableRegion(dstGpuVa, srcSize, true);
                Span <byte> dstSpan = dstRegion.Memory.Span;

                if (src.LinearLayout)
                {
                    int stride   = src.Stride;
                    int offset   = 0;
                    int lineSize = width * bpp;

                    for (int y = 0; y < height; y++)
                    {
                        srcSpan.Slice(offset, lineSize).CopyTo(dstSpan.Slice(offset));

                        offset += stride;
                    }
                }
                else
                {
                    // Copy with the block linear layout in mind.
                    // Recreate the offset calculate with bpp 1 for copy.

                    int stride = w * bpp;

                    srcCalculator = new OffsetCalculator(
                        stride,
                        h,
                        0,
                        false,
                        src.MemoryLayout.UnpackGobBlocksInY(),
                        src.MemoryLayout.UnpackGobBlocksInZ(),
                        1);

                    int strideTrunc = BitUtils.AlignDown(stride, 16);

                    ReadOnlySpan <Vector128 <byte> > srcVec = MemoryMarshal.Cast <byte, Vector128 <byte> >(srcSpan);
                    Span <Vector128 <byte> >         dstVec = MemoryMarshal.Cast <byte, Vector128 <byte> >(dstSpan);

                    for (int y = 0; y < h; y++)
                    {
                        int x = 0;

                        srcCalculator.SetY(y);

                        for (; x < strideTrunc; x += 16)
                        {
                            int offset = srcCalculator.GetOffset(x) >> 4;

                            dstVec[offset] = srcVec[offset];
                        }

                        for (; x < stride; x++)
                        {
                            int offset = srcCalculator.GetOffset(x);

                            dstSpan[offset] = srcSpan[offset];
                        }
                    }
                }
            }
        }
Example #12
0
        private unsafe static void ReadChroma(
            Span <byte> dstU,
            Span <byte> dstV,
            ReadOnlySpan <byte> src,
            int dstStride,
            int width,
            int height)
        {
            OffsetCalculator calc = new OffsetCalculator(width, height, 0, false, 2, 2);

            if (Sse2.IsSupported)
            {
                int strideTrunc64 = BitUtils.AlignDown(width * 2, 64);

                int outStrideGap = dstStride - width;

                fixed(byte *dstUPtr = dstU, dstVPtr = dstV, dataPtr = src)
                {
                    byte *uPtr = dstUPtr;
                    byte *vPtr = dstVPtr;

                    for (int y = 0; y < height; y++)
                    {
                        calc.SetY(y);

                        for (int x = 0; x < strideTrunc64; x += 64, uPtr += 32, vPtr += 32)
                        {
                            byte *offset  = dataPtr + calc.GetOffsetWithLineOffset64(x);
                            byte *offset2 = offset + 0x20;
                            byte *offset3 = offset + 0x100;
                            byte *offset4 = offset + 0x120;

                            Vector128 <byte> value  = *(Vector128 <byte> *)offset;
                            Vector128 <byte> value2 = *(Vector128 <byte> *)offset2;
                            Vector128 <byte> value3 = *(Vector128 <byte> *)offset3;
                            Vector128 <byte> value4 = *(Vector128 <byte> *)offset4;

                            Vector128 <byte> u00 = Sse2.UnpackLow(value, value2);
                            Vector128 <byte> v00 = Sse2.UnpackHigh(value, value2);
                            Vector128 <byte> u01 = Sse2.UnpackLow(value3, value4);
                            Vector128 <byte> v01 = Sse2.UnpackHigh(value3, value4);

                            Vector128 <byte> u10 = Sse2.UnpackLow(u00, v00);
                            Vector128 <byte> v10 = Sse2.UnpackHigh(u00, v00);
                            Vector128 <byte> u11 = Sse2.UnpackLow(u01, v01);
                            Vector128 <byte> v11 = Sse2.UnpackHigh(u01, v01);

                            Vector128 <byte> u20 = Sse2.UnpackLow(u10, v10);
                            Vector128 <byte> v20 = Sse2.UnpackHigh(u10, v10);
                            Vector128 <byte> u21 = Sse2.UnpackLow(u11, v11);
                            Vector128 <byte> v21 = Sse2.UnpackHigh(u11, v11);

                            Vector128 <byte> u30 = Sse2.UnpackLow(u20, v20);
                            Vector128 <byte> v30 = Sse2.UnpackHigh(u20, v20);
                            Vector128 <byte> u31 = Sse2.UnpackLow(u21, v21);
                            Vector128 <byte> v31 = Sse2.UnpackHigh(u21, v21);

                            *(Vector128 <byte> *)uPtr        = u30;
                            *(Vector128 <byte> *)(uPtr + 16) = u31;
                            *(Vector128 <byte> *)vPtr        = v30;
                            *(Vector128 <byte> *)(vPtr + 16) = v31;
                        }

                        for (int x = strideTrunc64 / 2; x < width; x++, uPtr++, vPtr++)
                        {
                            byte *offset = dataPtr + calc.GetOffset(x);

                            *uPtr = *offset;
                            *vPtr = *(offset + 1);
                        }

                        uPtr += outStrideGap;
                        vPtr += outStrideGap;
                    }
                }
            }
            else
            {
                for (int y = 0; y < height; y++)
                {
                    int dstBaseOffset = y * dstStride;

                    calc.SetY(y);

                    for (int x = 0; x < width; x++)
                    {
                        int srcOffset = calc.GetOffset(x);

                        dstU[dstBaseOffset + x] = src[srcOffset];
                        dstV[dstBaseOffset + x] = src[srcOffset + 1];
                    }
                }
            }
        }