private static void EmitStoreGlobal(EmitterContext context) { OpCodeMemory op = (OpCodeMemory)context.CurrOp; bool isSmallInt = op.Size < IntegerSize.B32; int count = GetVectorCount(op.Size); (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, op.Ra, op.Extended, op.Offset); Operand bitOffset = GetBitOffset(context, addrLow); for (int index = 0; index < count; index++) { Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr); Operand value = Register(rd); if (isSmallInt) { Operand word = context.LoadGlobal(addrLow, addrHigh); value = InsertSmallInt(context, op.Size, bitOffset, word, value); } context.StoreGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh, value); if (rd.IsRZ) { break; } } }
private static void EmitStore(EmitterContext context, MemoryRegion region) { OpCodeMemory op = (OpCodeMemory)context.CurrOp; if (op.Size > IntegerSize.B128) { context.Config.GpuAccessor.Log($"Invalid store size: {op.Size}."); } bool isSmallInt = op.Size < IntegerSize.B32; int count = 1; switch (op.Size) { case IntegerSize.B64: count = 2; break; case IntegerSize.B128: count = 4; break; } Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset)); Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2)); Operand bitOffset = GetBitOffset(context, baseOffset); for (int index = 0; index < count; index++) { bool isRz = op.Rd.IsRZ; Register rd = new Register(isRz ? op.Rd.Index : op.Rd.Index + index, RegisterType.Gpr); Operand value = Register(rd); Operand offset = context.IAdd(wordOffset, Const(index)); if (isSmallInt) { Operand word = null; switch (region) { case MemoryRegion.Local: word = context.LoadLocal(offset); break; case MemoryRegion.Shared: word = context.LoadShared(offset); break; } value = InsertSmallInt(context, op.Size, bitOffset, word, value); } switch (region) { case MemoryRegion.Local: context.StoreLocal(offset, value); break; case MemoryRegion.Shared: context.StoreShared(offset, value); break; } } }
private static void EmitLoad(EmitterContext context, MemoryRegion region) { OpCodeMemory op = (OpCodeMemory)context.CurrOp; if (op.Size > IntegerSize.B128) { context.Config.GpuAccessor.Log($"Invalid load size: {op.Size}."); } bool isSmallInt = op.Size < IntegerSize.B32; int count = 1; switch (op.Size) { case IntegerSize.B64: count = 2; break; case IntegerSize.B128: count = 4; break; } Operand baseOffset = context.IAdd(GetSrcA(context), Const(op.Offset)); // Word offset = byte offset / 4 (one word = 4 bytes). Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2)); Operand bitOffset = GetBitOffset(context, baseOffset); for (int index = 0; index < count; index++) { Register rd = new Register(op.Rd.Index + index, RegisterType.Gpr); if (rd.IsRZ) { break; } Operand offset = context.IAdd(wordOffset, Const(index)); Operand value = null; switch (region) { case MemoryRegion.Local: value = context.LoadLocal(offset); break; case MemoryRegion.Shared: value = context.LoadShared(offset); break; } if (isSmallInt) { value = ExtractSmallInt(context, op.Size, bitOffset, value); } context.Copy(Register(rd), value); } }