private void PushData(NvGpuVmm vmm, GpuMethodCall methCall) { if (_buffer == null) { return; } Profile.Begin(Profiles.GPU.EngineP2mf.PushData); for (int shift = 0; shift < 32 && _copyOffset < _copySize; shift += 8, _copyOffset++) { _buffer[_copyOffset] = (byte)(methCall.Argument >> shift); } if (methCall.IsLastCall) { if (_copyLinear) { vmm.WriteBytes(_copyAddress, _buffer); } else { BlockLinearSwizzle swizzle = new BlockLinearSwizzle( _copyWidth, _copyHeight, 1, _copyGobBlockHeight, 1, 1); int srcOffset = 0; for (int y = _copyStartY; y < _copyHeight && srcOffset < _copySize; y++) { for (int x = _copyStartX; x < _copyWidth && srcOffset < _copySize; x++) { int dstOffset = swizzle.GetSwizzleOffset(x, y, 0); vmm.WriteByte(_copyAddress + dstOffset, _buffer[srcOffset++]); } } } _buffer = null; } Profile.End(Profiles.GPU.EngineP2mf.PushData); }
private void PushData(NvGpuVmm Vmm, GpuMethodCall MethCall) { if (Buffer == null) { return; } for (int Shift = 0; Shift < 32 && CopyOffset < CopySize; Shift += 8, CopyOffset++) { Buffer[CopyOffset] = (byte)(MethCall.Argument >> Shift); } if (MethCall.IsLastCall) { if (CopyLinear) { Vmm.WriteBytes(CopyAddress, Buffer); } else { BlockLinearSwizzle Swizzle = new BlockLinearSwizzle(CopyWidth, 1, CopyGobBlockHeight); int SrcOffset = 0; for (int Y = CopyStartY; Y < CopyHeight && SrcOffset < CopySize; Y++) { for (int X = CopyStartX; X < CopyWidth && SrcOffset < CopySize; X++) { int DstOffset = Swizzle.GetSwizzleOffset(X, Y); Vmm.WriteByte(CopyAddress + DstOffset, Buffer[SrcOffset++]); } } } Buffer = null; } }
private void Execute(NvGpuVmm vmm, GpuMethodCall methCall) { Profile.Begin(Profiles.GPU.EngineM2mf.Execute); //TODO: Some registers and copy modes are still not implemented. int control = methCall.Argument; bool srcLinear = ((control >> 7) & 1) != 0; bool dstLinear = ((control >> 8) & 1) != 0; bool copy2D = ((control >> 9) & 1) != 0; long srcAddress = MakeInt64From2xInt32(NvGpuEngineM2mfReg.SrcAddress); long dstAddress = MakeInt64From2xInt32(NvGpuEngineM2mfReg.DstAddress); int srcPitch = ReadRegister(NvGpuEngineM2mfReg.SrcPitch); int dstPitch = ReadRegister(NvGpuEngineM2mfReg.DstPitch); int xCount = ReadRegister(NvGpuEngineM2mfReg.XCount); int yCount = ReadRegister(NvGpuEngineM2mfReg.YCount); int swizzle = ReadRegister(NvGpuEngineM2mfReg.Swizzle); int dstBlkDim = ReadRegister(NvGpuEngineM2mfReg.DstBlkDim); int dstSizeX = ReadRegister(NvGpuEngineM2mfReg.DstSizeX); int dstSizeY = ReadRegister(NvGpuEngineM2mfReg.DstSizeY); int dstSizeZ = ReadRegister(NvGpuEngineM2mfReg.DstSizeZ); int dstPosXY = ReadRegister(NvGpuEngineM2mfReg.DstPosXY); int dstPosZ = ReadRegister(NvGpuEngineM2mfReg.DstPosZ); int srcBlkDim = ReadRegister(NvGpuEngineM2mfReg.SrcBlkDim); int srcSizeX = ReadRegister(NvGpuEngineM2mfReg.SrcSizeX); int srcSizeY = ReadRegister(NvGpuEngineM2mfReg.SrcSizeY); int srcSizeZ = ReadRegister(NvGpuEngineM2mfReg.SrcSizeZ); int srcPosXY = ReadRegister(NvGpuEngineM2mfReg.SrcPosXY); int srcPosZ = ReadRegister(NvGpuEngineM2mfReg.SrcPosZ); int srcCpp = ((swizzle >> 20) & 7) + 1; int dstCpp = ((swizzle >> 24) & 7) + 1; int dstPosX = (dstPosXY >> 0) & 0xffff; int dstPosY = (dstPosXY >> 16) & 0xffff; int srcPosX = (srcPosXY >> 0) & 0xffff; int srcPosY = (srcPosXY >> 16) & 0xffff; int srcBlockHeight = 1 << ((srcBlkDim >> 4) & 0xf); int dstBlockHeight = 1 << ((dstBlkDim >> 4) & 0xf); long srcPa = vmm.GetPhysicalAddress(srcAddress); long dstPa = vmm.GetPhysicalAddress(dstAddress); if (copy2D) { if (srcLinear) { srcPosX = srcPosY = srcPosZ = 0; } if (dstLinear) { dstPosX = dstPosY = dstPosZ = 0; } if (srcLinear && dstLinear) { for (int y = 0; y < yCount; y++) { int srcOffset = (srcPosY + y) * srcPitch + srcPosX * srcCpp; int dstOffset = (dstPosY + y) * dstPitch + dstPosX * dstCpp; long src = srcPa + (uint)srcOffset; long dst = dstPa + (uint)dstOffset; vmm.Memory.CopyBytes(src, dst, xCount * srcCpp); } } else { ISwizzle srcSwizzle; if (srcLinear) { srcSwizzle = new LinearSwizzle(srcPitch, srcCpp, srcSizeX, srcSizeY); } else { srcSwizzle = new BlockLinearSwizzle( srcSizeX, srcSizeY, 1, srcBlockHeight, 1, srcCpp); } ISwizzle dstSwizzle; if (dstLinear) { dstSwizzle = new LinearSwizzle(dstPitch, dstCpp, srcSizeX, srcSizeY); } else { dstSwizzle = new BlockLinearSwizzle( dstSizeX, dstSizeY, 1, dstBlockHeight, 1, dstCpp); } // Calculate the bits per pixel int bpp = srcPitch / xCount; // Copying all the bits at the same time corrupts the texture, unknown why but probably because the texture isn't linear // To avoid this we will simply loop more times to cover all the bits, // this allows up to recalculate the memory locations for each iteration around the loop xCount *= bpp / srcCpp; for (int y = 0; y < yCount; y++) { for (int x = 0; x < xCount; x++) { int srcOffset = srcSwizzle.GetSwizzleOffset(srcPosX + x, srcPosY + y, 0); int dstOffset = dstSwizzle.GetSwizzleOffset(dstPosX + x, dstPosY + y, 0); long src = srcPa + (uint)srcOffset; long dst = dstPa + (uint)dstOffset; vmm.Memory.CopyBytes(src, dst, srcCpp); } } } } else { vmm.Memory.CopyBytes(srcPa, dstPa, xCount); } Profile.End(Profiles.GPU.EngineM2mf.Execute); }
private void Execute(NvGpuVmm vmm, GpuMethodCall methCall) { //TODO: Some registers and copy modes are still not implemented. int control = methCall.Argument; bool srcLinear = ((control >> 7) & 1) != 0; bool dstLinear = ((control >> 8) & 1) != 0; bool copy2D = ((control >> 9) & 1) != 0; long srcAddress = MakeInt64From2xInt32(NvGpuEngineM2mfReg.SrcAddress); long dstAddress = MakeInt64From2xInt32(NvGpuEngineM2mfReg.DstAddress); int srcPitch = ReadRegister(NvGpuEngineM2mfReg.SrcPitch); int dstPitch = ReadRegister(NvGpuEngineM2mfReg.DstPitch); int xCount = ReadRegister(NvGpuEngineM2mfReg.XCount); int yCount = ReadRegister(NvGpuEngineM2mfReg.YCount); int swizzle = ReadRegister(NvGpuEngineM2mfReg.Swizzle); int dstBlkDim = ReadRegister(NvGpuEngineM2mfReg.DstBlkDim); int dstSizeX = ReadRegister(NvGpuEngineM2mfReg.DstSizeX); int dstSizeY = ReadRegister(NvGpuEngineM2mfReg.DstSizeY); int dstSizeZ = ReadRegister(NvGpuEngineM2mfReg.DstSizeZ); int dstPosXY = ReadRegister(NvGpuEngineM2mfReg.DstPosXY); int dstPosZ = ReadRegister(NvGpuEngineM2mfReg.DstPosZ); int srcBlkDim = ReadRegister(NvGpuEngineM2mfReg.SrcBlkDim); int srcSizeX = ReadRegister(NvGpuEngineM2mfReg.SrcSizeX); int srcSizeY = ReadRegister(NvGpuEngineM2mfReg.SrcSizeY); int srcSizeZ = ReadRegister(NvGpuEngineM2mfReg.SrcSizeZ); int srcPosXY = ReadRegister(NvGpuEngineM2mfReg.SrcPosXY); int srcPosZ = ReadRegister(NvGpuEngineM2mfReg.SrcPosZ); int srcCpp = ((swizzle >> 20) & 7) + 1; int dstCpp = ((swizzle >> 24) & 7) + 1; int dstPosX = (dstPosXY >> 0) & 0xffff; int dstPosY = (dstPosXY >> 16) & 0xffff; int srcPosX = (srcPosXY >> 0) & 0xffff; int srcPosY = (srcPosXY >> 16) & 0xffff; int srcBlockHeight = 1 << ((srcBlkDim >> 4) & 0xf); int dstBlockHeight = 1 << ((dstBlkDim >> 4) & 0xf); long srcPa = vmm.GetPhysicalAddress(srcAddress); long dstPa = vmm.GetPhysicalAddress(dstAddress); if (copy2D) { if (srcLinear) { srcPosX = srcPosY = srcPosZ = 0; } if (dstLinear) { dstPosX = dstPosY = dstPosZ = 0; } if (srcLinear && dstLinear) { for (int y = 0; y < yCount; y++) { int srcOffset = (srcPosY + y) * srcPitch + srcPosX * srcCpp; int dstOffset = (dstPosY + y) * dstPitch + dstPosX * dstCpp; long src = srcPa + (uint)srcOffset; long dst = dstPa + (uint)dstOffset; vmm.Memory.CopyBytes(src, dst, xCount * srcCpp); } } else { ISwizzle srcSwizzle; if (srcLinear) { srcSwizzle = new LinearSwizzle(srcPitch, srcCpp, srcSizeX, srcSizeY); } else { srcSwizzle = new BlockLinearSwizzle( srcSizeX, srcSizeY, 1, srcBlockHeight, 1, srcCpp); } ISwizzle dstSwizzle; if (dstLinear) { dstSwizzle = new LinearSwizzle(dstPitch, dstCpp, srcSizeX, srcSizeY); } else { dstSwizzle = new BlockLinearSwizzle( dstSizeX, dstSizeY, 1, dstBlockHeight, 1, dstCpp); } for (int y = 0; y < yCount; y++) { for (int x = 0; x < xCount; x++) { int srcOffset = srcSwizzle.GetSwizzleOffset(srcPosX + x, srcPosY + y, 0); int dstOffset = dstSwizzle.GetSwizzleOffset(dstPosX + x, dstPosY + y, 0); long src = srcPa + (uint)srcOffset; long dst = dstPa + (uint)dstOffset; vmm.Memory.CopyBytes(src, dst, srcCpp); } } } } else { vmm.Memory.CopyBytes(srcPa, dstPa, xCount); } }
private void Execute(NvGpuVmm Vmm, NvGpuPBEntry PBEntry) { //TODO: Some registers and copy modes are still not implemented. int Control = PBEntry.Arguments[0]; bool SrcLinear = ((Control >> 7) & 1) != 0; bool DstLinear = ((Control >> 8) & 1) != 0; bool Copy2d = ((Control >> 9) & 1) != 0; long SrcAddress = MakeInt64From2xInt32(NvGpuEngineM2mfReg.SrcAddress); long DstAddress = MakeInt64From2xInt32(NvGpuEngineM2mfReg.DstAddress); int SrcPitch = ReadRegister(NvGpuEngineM2mfReg.SrcPitch); int DstPitch = ReadRegister(NvGpuEngineM2mfReg.DstPitch); int XCount = ReadRegister(NvGpuEngineM2mfReg.XCount); int YCount = ReadRegister(NvGpuEngineM2mfReg.YCount); int Swizzle = ReadRegister(NvGpuEngineM2mfReg.Swizzle); int DstBlkDim = ReadRegister(NvGpuEngineM2mfReg.DstBlkDim); int DstSizeX = ReadRegister(NvGpuEngineM2mfReg.DstSizeX); int DstSizeY = ReadRegister(NvGpuEngineM2mfReg.DstSizeY); int DstSizeZ = ReadRegister(NvGpuEngineM2mfReg.DstSizeZ); int DstPosXY = ReadRegister(NvGpuEngineM2mfReg.DstPosXY); int DstPosZ = ReadRegister(NvGpuEngineM2mfReg.DstPosZ); int SrcBlkDim = ReadRegister(NvGpuEngineM2mfReg.SrcBlkDim); int SrcSizeX = ReadRegister(NvGpuEngineM2mfReg.SrcSizeX); int SrcSizeY = ReadRegister(NvGpuEngineM2mfReg.SrcSizeY); int SrcSizeZ = ReadRegister(NvGpuEngineM2mfReg.SrcSizeZ); int SrcPosXY = ReadRegister(NvGpuEngineM2mfReg.SrcPosXY); int SrcPosZ = ReadRegister(NvGpuEngineM2mfReg.SrcPosZ); int SrcCpp = ((Swizzle >> 20) & 7) + 1; int DstCpp = ((Swizzle >> 24) & 7) + 1; int DstPosX = (DstPosXY >> 0) & 0xffff; int DstPosY = (DstPosXY >> 16) & 0xffff; int SrcPosX = (SrcPosXY >> 0) & 0xffff; int SrcPosY = (SrcPosXY >> 16) & 0xffff; int SrcBlockHeight = 1 << ((SrcBlkDim >> 4) & 0xf); int DstBlockHeight = 1 << ((DstBlkDim >> 4) & 0xf); long SrcPA = Vmm.GetPhysicalAddress(SrcAddress); long DstPA = Vmm.GetPhysicalAddress(DstAddress); if (Copy2d) { if (SrcLinear) { SrcPosX = SrcPosY = SrcPosZ = 0; } if (DstLinear) { DstPosX = DstPosY = DstPosZ = 0; } if (SrcLinear && DstLinear) { for (int Y = 0; Y < YCount; Y++) { int SrcOffset = (SrcPosY + Y) * SrcPitch + SrcPosX * SrcCpp; int DstOffset = (DstPosY + Y) * DstPitch + DstPosX * DstCpp; long Src = SrcPA + (uint)SrcOffset; long Dst = DstPA + (uint)DstOffset; Vmm.Memory.CopyBytes(Src, Dst, XCount * SrcCpp); } } else { ISwizzle SrcSwizzle; if (SrcLinear) { SrcSwizzle = new LinearSwizzle(SrcPitch, SrcCpp); } else { SrcSwizzle = new BlockLinearSwizzle(SrcSizeX, SrcCpp, SrcBlockHeight); } ISwizzle DstSwizzle; if (DstLinear) { DstSwizzle = new LinearSwizzle(DstPitch, DstCpp); } else { DstSwizzle = new BlockLinearSwizzle(DstSizeX, DstCpp, DstBlockHeight); } for (int Y = 0; Y < YCount; Y++) { for (int X = 0; X < XCount; X++) { int SrcOffset = SrcSwizzle.GetSwizzleOffset(SrcPosX + X, SrcPosY + Y); int DstOffset = DstSwizzle.GetSwizzleOffset(DstPosX + X, DstPosY + Y); long Src = SrcPA + (uint)SrcOffset; long Dst = DstPA + (uint)DstOffset; Vmm.Memory.CopyBytes(Src, Dst, SrcCpp); } } } } else { Vmm.Memory.CopyBytes(SrcPA, DstPA, XCount); } }
private void Execute(NvGpuVmm Vmm, NvGpuPBEntry PBEntry) { int Control = PBEntry.Arguments[0]; bool SrcLinear = ((Control >> 7) & 1) != 0; bool DstLinear = ((Control >> 8) & 1) != 0; long SrcAddress = MakeInt64From2xInt32(NvGpuEngineDmaReg.SrcAddress); long DstAddress = MakeInt64From2xInt32(NvGpuEngineDmaReg.DstAddress); int SrcPitch = ReadRegister(NvGpuEngineDmaReg.SrcPitch); int DstPitch = ReadRegister(NvGpuEngineDmaReg.DstPitch); int DstBlkDim = ReadRegister(NvGpuEngineDmaReg.DstBlkDim); int DstSizeX = ReadRegister(NvGpuEngineDmaReg.DstSizeX); int DstSizeY = ReadRegister(NvGpuEngineDmaReg.DstSizeY); int DstSizeZ = ReadRegister(NvGpuEngineDmaReg.DstSizeZ); int DstPosXY = ReadRegister(NvGpuEngineDmaReg.DstPosXY); int DstPosZ = ReadRegister(NvGpuEngineDmaReg.DstPosZ); int SrcBlkDim = ReadRegister(NvGpuEngineDmaReg.SrcBlkDim); int SrcSizeX = ReadRegister(NvGpuEngineDmaReg.SrcSizeX); int SrcSizeY = ReadRegister(NvGpuEngineDmaReg.SrcSizeY); int SrcSizeZ = ReadRegister(NvGpuEngineDmaReg.SrcSizeZ); int SrcPosXY = ReadRegister(NvGpuEngineDmaReg.SrcPosXY); int SrcPosZ = ReadRegister(NvGpuEngineDmaReg.SrcPosZ); int DstPosX = (DstPosXY >> 0) & 0xffff; int DstPosY = (DstPosXY >> 16) & 0xffff; int SrcPosX = (SrcPosXY >> 0) & 0xffff; int SrcPosY = (SrcPosXY >> 16) & 0xffff; int SrcBlockHeight = 1 << ((SrcBlkDim >> 4) & 0xf); int DstBlockHeight = 1 << ((DstBlkDim >> 4) & 0xf); ISwizzle SrcSwizzle; if (SrcLinear) { SrcSwizzle = new LinearSwizzle(SrcPitch, 1); } else { SrcSwizzle = new BlockLinearSwizzle(SrcSizeX, 1, SrcBlockHeight); } ISwizzle DstSwizzle; if (DstLinear) { DstSwizzle = new LinearSwizzle(DstPitch, 1); } else { DstSwizzle = new BlockLinearSwizzle(DstSizeX, 1, DstBlockHeight); } for (int Y = 0; Y < DstSizeY; Y++) { for (int X = 0; X < DstSizeX; X++) { long SrcOffset = SrcAddress + (uint)SrcSwizzle.GetSwizzleOffset(X, Y); long DstOffset = DstAddress + (uint)DstSwizzle.GetSwizzleOffset(X, Y); Vmm.WriteByte(DstOffset, Vmm.ReadByte(SrcOffset)); } } }