public static void Sha256h2_V(ArmEmitterContext context) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; Operand d = GetVec(op.Rd); Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashUpper)), d, n, m); context.Copy(GetVec(op.Rd), res); }
private static void EmitCmpSseOrSse2OpF(ArmEmitterContext context, CmpCondition cond, bool scalar) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; Operand n = GetVec(op.Rn); Operand m = op is OpCodeSimdReg binOp?GetVec(binOp.Rm) : context.VectorZero(); int sizeF = op.Size & 1; if (sizeF == 0) { Intrinsic inst = scalar ? Intrinsic.X86Cmpss : Intrinsic.X86Cmpps; Operand res = context.AddIntrinsic(inst, n, m, Const((int)cond)); if (scalar) { res = context.VectorZeroUpper96(res); } else if (op.RegisterSize == RegisterSize.Simd64) { res = context.VectorZeroUpper64(res); } context.Copy(GetVec(op.Rd), res); } else /* if (sizeF == 1) */ { Intrinsic inst = scalar ? Intrinsic.X86Cmpsd : Intrinsic.X86Cmppd; Operand res = context.AddIntrinsic(inst, n, m, Const((int)cond)); if (scalar) { res = context.VectorZeroUpper64(res); } context.Copy(GetVec(op.Rd), res); } }
public static void Rev64_V(ArmEmitterContext context) { if (Optimizations.UseSsse3) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; Operand n = GetVec(op.Rn); Operand mask; if (op.Size == 0) { const long maskE0 = 00L << 56 | 01L << 48 | 02L << 40 | 03L << 32 | 04L << 24 | 05L << 16 | 06L << 8 | 07L << 0; const long maskE1 = 08L << 56 | 09L << 48 | 10L << 40 | 11L << 32 | 12L << 24 | 13L << 16 | 14L << 8 | 15L << 0; mask = X86GetScalar(context, maskE0); mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); } else if (op.Size == 1) { const long maskE0 = 01L << 56 | 00L << 48 | 03L << 40 | 02L << 32 | 05L << 24 | 04L << 16 | 07L << 8 | 06L << 0; const long maskE1 = 09L << 56 | 08L << 48 | 11L << 40 | 10L << 32 | 13L << 24 | 12L << 16 | 15L << 8 | 14L << 0; mask = X86GetScalar(context, maskE0); mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); } else /* if (op.Size == 2) */ { const long maskE0 = 03L << 56 | 02L << 48 | 01L << 40 | 00L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0; const long maskE1 = 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 15L << 24 | 14L << 16 | 13L << 8 | 12L << 0; mask = X86GetScalar(context, maskE0); mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); } Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); if (op.RegisterSize == RegisterSize.Simd64) { res = context.VectorZeroUpper64(res); } context.Copy(GetVec(op.Rd), res); } else { EmitRev_V(context, containerSize: 3); } }
public static void Rshrn_V(ArmEmitterContext context) { if (Optimizations.UseSsse3) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; int shift = GetImmShr(op); long roundConst = 1L << (shift - 1); Operand d = GetVec(op.Rd); Operand n = GetVec(op.Rn); Operand dLow = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero()); Operand mask = null; switch (op.Size + 1) { case 1: mask = X86GetAllElements(context, (int)roundConst * 0x00010001); break; case 2: mask = X86GetAllElements(context, (int)roundConst); break; case 3: mask = X86GetAllElements(context, roundConst); break; } Intrinsic addInst = X86PaddInstruction[op.Size + 1]; Operand res = context.AddIntrinsic(addInst, n, mask); Intrinsic srlInst = X86PsrlInstruction[op.Size + 1]; res = context.AddIntrinsic(srlInst, res, Const(shift)); Operand mask2 = X86GetAllElements(context, _masks_RshrnShrn[op.Size]); res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask2); Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 ? Intrinsic.X86Movlhps : Intrinsic.X86Movhlps; res = context.AddIntrinsic(movInst, dLow, res); context.Copy(d, res); } else { EmitVectorShrImmNarrowOpZx(context, round: true); } }
public static void Sha1m_V(ArmEmitterContext context) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; Operand d = GetVec(op.Rd); Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0); Operand m = GetVec(op.Rm); Operand res = context.Call(new _V128_V128_U32_V128(SoftFallback.HashMajority), d, ne, m); context.Copy(GetVec(op.Rd), res); }
public static void Sha1p_V(ArmEmitterContext context) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; Operand d = GetVec(op.Rd); Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0); Operand m = GetVec(op.Rm); Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashParity)), d, ne, m); context.Copy(GetVec(op.Rd), res); }
private static void EmitStoreEx(ArmEmitterContext context, AccessType accType, bool pair) { OpCodeMemEx op = (OpCodeMemEx)context.CurrOp; bool ordered = (accType & AccessType.Ordered) != 0; bool exclusive = (accType & AccessType.Exclusive) != 0; if (ordered) { EmitBarrier(context); } Operand address = context.Copy(GetIntOrSP(context, op.Rn)); Operand t = GetIntOrZR(context, op.Rt); Operand s = null; if (pair) { Debug.Assert(op.Size == 2 || op.Size == 3, "Invalid size for pairwise store."); Operand t2 = GetIntOrZR(context, op.Rt2); Operand value; if (op.Size == 2) { value = context.BitwiseOr(t, context.ShiftLeft(t2, Const(32))); } else /* if (op.Size == 3) */ { value = context.VectorInsert(context.VectorZero(), t, 0); value = context.VectorInsert(value, t2, 1); } s = EmitStoreExclusive(context, address, value, exclusive, op.Size + 1); } else { s = EmitStoreExclusive(context, address, t, exclusive, op.Size); } if (s != null) { // This is only needed for exclusive stores. The function returns 0 // when the store is successful, and 1 otherwise. SetIntOrZR(context, op.Rs, s); } }
public static void Bl(ArmEmitterContext context) { OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp; ulong address = op.Address + 4; Operand addressOp = !context.HasTtc ? Const(address) : Const(address, new Symbol(SymbolType.DynFunc, context.GetOffset(address))); context.Copy(GetIntOrZR(context, RegisterAlias.Lr), addressOp); EmitCall(context, (ulong)op.Immediate); }
public static void Vmov_G2(ArmEmitterContext context) { OpCode32SimdMovGpDouble op = (OpCode32SimdMovGpDouble)context.CurrOp; Operand vec = GetVecA32(op.Vm >> 2); int vm1 = op.Vm + 1; bool sameOwnerVec = (op.Vm >> 2) == (vm1 >> 2); Operand vec2 = sameOwnerVec ? vec : GetVecA32(vm1 >> 2); if (op.Op == 1) { // To general purpose. Operand lowValue = context.VectorExtract(OperandType.I32, vec, op.Vm & 3); SetIntA32(context, op.Rt, lowValue); Operand highValue = context.VectorExtract(OperandType.I32, vec2, vm1 & 3); SetIntA32(context, op.Rt2, highValue); } else { // From general purpose. Operand lowValue = GetIntA32(context, op.Rt); Operand resultVec = context.VectorInsert(vec, lowValue, op.Vm & 3); Operand highValue = GetIntA32(context, op.Rt2); if (sameOwnerVec) { context.Copy(vec, context.VectorInsert(resultVec, highValue, vm1 & 3)); } else { context.Copy(vec, resultVec); context.Copy(vec2, context.VectorInsert(vec2, highValue, vm1 & 3)); } } }
private static void EmitShrImmOp(ArmEmitterContext context, ShrImmFlags flags) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; Operand res = context.VectorZero(); bool scalar = (flags & ShrImmFlags.Scalar) != 0; bool signed = (flags & ShrImmFlags.Signed) != 0; bool round = (flags & ShrImmFlags.Round) != 0; bool accumulate = (flags & ShrImmFlags.Accumulate) != 0; int shift = GetImmShr(op); long roundConst = 1L << (shift - 1); int elems = !scalar?op.GetBytesCount() >> op.Size : 1; for (int index = 0; index < elems; index++) { Operand e = EmitVectorExtract(context, op.Rn, index, op.Size, signed); if (op.Size <= 2) { if (round) { e = context.Add(e, Const(roundConst)); } e = signed ? context.ShiftRightSI(e, Const(shift)) : context.ShiftRightUI(e, Const(shift)); } else /* if (op.Size == 3) */ { e = EmitShrImm64(context, e, signed, round ? roundConst : 0L, shift); } if (accumulate) { Operand de = EmitVectorExtract(context, op.Rd, index, op.Size, signed); e = context.Add(e, de); } res = EmitVectorInsert(context, res, e, index, op.Size); } context.Copy(GetVec(op.Rd), res); }
public static void Scvtf_Gp(ArmEmitterContext context) { OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; Operand res = GetIntOrZR(context, op.Rn); if (op.RegisterSize == RegisterSize.Int32) { res = context.SignExtend32(OperandType.I64, res); } res = EmitFPConvert(context, res, op.Size, signed: true); context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); }
private static Operand GetRrxC(ArmEmitterContext context, Operand m, bool setCarry) { // Rotate right by 1 with carry. Operand cIn = context.Copy(GetFlag(PState.CFlag)); if (setCarry) { SetCarryMLsb(context, m); } m = context.ShiftRightUI(m, Const(1)); m = context.BitwiseOr(m, context.ShiftLeft(cIn, Const(31))); return(m); }
private static void EmitFcvtz(ArmEmitterContext context, bool signed, bool scalar) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; Operand res = context.VectorZero(); Operand n = GetVec(op.Rn); int sizeF = op.Size & 1; int sizeI = sizeF + 2; OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64; int fBits = GetFBits(context); int elems = !scalar?op.GetBytesCount() >> sizeI : 1; for (int index = 0; index < elems; index++) { Operand ne = context.VectorExtract(type, n, index); Operand e = EmitF2iFBitsMul(context, ne, fBits); if (sizeF == 0) { Delegate dlg = signed ? (Delegate) new _S32_F32(SoftFallback.SatF32ToS32) : (Delegate) new _U32_F32(SoftFallback.SatF32ToU32); e = context.Call(dlg, e); e = context.ZeroExtend32(OperandType.I64, e); } else /* if (sizeF == 1) */ { Delegate dlg = signed ? (Delegate) new _S64_F64(SoftFallback.SatF64ToS64) : (Delegate) new _U64_F64(SoftFallback.SatF64ToU64); e = context.Call(dlg, e); } res = EmitVectorInsert(context, res, e, index, sizeI); } context.Copy(GetVec(op.Rd), res); }
private static Operand GetAddress(ArmEmitterContext context, long addend = 0) { Operand address = default; switch (context.CurrOp) { case OpCodeMemImm op: { address = context.Copy(GetIntOrSP(context, op.Rn)); // Pre-indexing. if (!op.PostIdx) { address = context.Add(address, Const(op.Immediate + addend)); } else if (addend != 0) { address = context.Add(address, Const(addend)); } break; } case OpCodeMemReg op: { Operand n = GetIntOrSP(context, op.Rn); Operand m = GetExtendedM(context, op.Rm, op.IntType); if (op.Shift) { m = context.ShiftLeft(m, Const(op.Size)); } address = context.Add(n, m); if (addend != 0) { address = context.Add(address, Const(addend)); } break; } } return(address); }
public static void Fmov_Vi(ArmEmitterContext context) { OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; Operand e = Const(op.Immediate); Operand res = context.VectorZero(); int elems = op.RegisterSize == RegisterSize.Simd128 ? 4 : 2; for (int index = 0; index < (elems >> op.Size); index++) { res = EmitVectorInsert(context, res, e, index, op.Size + 2); } context.Copy(GetVec(op.Rd), res); }
// Integer public static void EmitVectorUnaryOpI32(ArmEmitterContext context, Func1I emit, bool signed) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; Operand res = GetVecA32(op.Qd); int elems = op.GetBytesCount() >> op.Size; for (int index = 0; index < elems; index++) { Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); res = EmitVectorInsert(context, res, emit(me), op.Id + index, op.Size); } context.Copy(GetVecA32(op.Qd), res); }
private static void EmitReadVector( ArmEmitterContext context, Operand address, Operand vector, int rt, int elem, int size) { Operand isUnalignedAddr = EmitAddressCheck(context, address, size); Operand lblFastPath = Label(); Operand lblSlowPath = Label(); Operand lblEnd = Label(); context.BranchIfFalse(lblFastPath, isUnalignedAddr); context.MarkLabel(lblSlowPath); EmitReadVectorFallback(context, address, vector, rt, elem, size); context.Branch(lblEnd); context.MarkLabel(lblFastPath); Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath); Operand value = null; switch (size) { case 0: value = context.VectorInsert8(vector, context.Load8(physAddr), elem); break; case 1: value = context.VectorInsert16(vector, context.Load16(physAddr), elem); break; case 2: value = context.VectorInsert(vector, context.Load(OperandType.I32, physAddr), elem); break; case 3: value = context.VectorInsert(vector, context.Load(OperandType.I64, physAddr), elem); break; case 4: value = context.Load(OperandType.V128, physAddr); break; } context.Copy(GetVec(rt), value); context.MarkLabel(lblEnd); }
// Narrow public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit, bool signed = false) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; int elems = 8 >> op.Size; // Size contains the target element size. (for when it becomes a doubleword) Operand res = GetVecA32(op.Qd); int id = (op.Vd & 1) << (3 - op.Size); // Target doubleword base. for (int index = 0; index < elems; index++) { Operand m = EmitVectorExtract32(context, op.Qm, index, op.Size + 1, signed); res = EmitVectorInsert(context, res, emit(m), id + index, op.Size); } context.Copy(GetVecA32(op.Qd), res); }
public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc) { OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; bool doubleSize = (op.Size & 1) != 0; int shift = doubleSize ? 1 : 2; Operand m = GetVecA32(op.Vm >> shift); Operand d = GetVecA32(op.Vd >> shift); m = EmitSwapScalar(context, m, op.Vm, doubleSize); Operand res = scalarFunc(m); // Insert scalar into vector. res = EmitScalarInsert(context, d, res, op.Vd, doubleSize); context.Copy(d, res); }
private static void EmitSimdMemWBack(ArmEmitterContext context, long offset) { OpCodeMemReg op = (OpCodeMemReg)context.CurrOp; Operand n = GetIntOrSP(context, op.Rn); Operand m; if (op.Rm != RegisterAlias.Zr) { m = GetIntOrZR(context, op.Rm); } else { m = Const(offset); } context.Copy(n, context.Add(n, m)); }
private static void EmitSse2Ucvtf(ArmEmitterContext context, bool scalar) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; Operand n = GetVec(op.Rn); Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16)); res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res); Operand mask = X86GetAllElements(context, 0x47800000); // 65536.0f (1 << 16) res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask); Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16)); res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16)); res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2); res = context.AddIntrinsic(Intrinsic.X86Addps, res, res2); if (op is OpCodeSimdShImm fixedOp) { int fBits = GetImmShr(fixedOp); // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits) int fpScaled = 0x3F800000 - fBits * 0x800000; Operand scale = X86GetAllElements(context, fpScaled); res = context.AddIntrinsic(Intrinsic.X86Mulps, res, scale); } if (scalar) { res = context.VectorZeroUpper96(res); } else if (op.RegisterSize == RegisterSize.Simd64) { res = context.VectorZeroUpper64(res); } context.Copy(GetVec(op.Rd), res); }
private static void EmitCmpOpF( ArmEmitterContext context, _F32_F32_F32 f32, _F64_F64_F64 f64, bool scalar, bool absolute = false) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; Operand res = context.VectorZero(); int sizeF = op.Size & 1; OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; int elems = !scalar?op.GetBytesCount() >> sizeF + 2 : 1; for (int index = 0; index < elems; index++) { Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); Operand me; if (op is OpCodeSimdReg binOp) { me = context.VectorExtract(type, GetVec(binOp.Rm), index); } else { me = sizeF == 0 ? ConstF(0f) : ConstF(0d); } if (absolute) { ne = EmitUnaryMathCall(context, MathF.Abs, Math.Abs, ne); me = EmitUnaryMathCall(context, MathF.Abs, Math.Abs, me); } Operand e = EmitSoftFloatCall(context, f32, f64, ne, me); res = context.VectorInsert(res, e, index); } context.Copy(GetVec(op.Rd), res); }
private static void EmitSimdMemMs(ArmEmitterContext context, bool isLoad) { OpCodeSimdMemMs op = (OpCodeSimdMemMs)context.CurrOp; Operand n = GetIntOrSP(context, op.Rn); long offset = 0; for (int rep = 0; rep < op.Reps; rep++) { for (int elem = 0; elem < op.Elems; elem++) { for (int sElem = 0; sElem < op.SElems; sElem++) { int rtt = (op.Rt + rep + sElem) & 0x1f; Operand tt = GetVec(rtt); Operand address = context.Add(n, Const(offset)); if (isLoad) { EmitLoadSimd(context, address, tt, rtt, elem, op.Size); if (op.RegisterSize == RegisterSize.Simd64 && elem == op.Elems - 1) { context.Copy(tt, context.VectorZeroUpper64(tt)); } } else { EmitStoreSimd(context, address, rtt, elem, op.Size); } offset += 1 << op.Size; } } } if (op.WBack) { EmitSimdMemWBack(context, offset); } }
public static void Rbit_V(ArmEmitterContext context) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; Operand res = context.VectorZero(); int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8; for (int index = 0; index < elems; index++) { Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0); Operand de = EmitReverseBits8Op(context, ne); res = EmitVectorInsert(context, res, de, index, 0); } context.Copy(GetVec(op.Rd), res); }
private static void EmitVectorShImmWidenBinaryOp(ArmEmitterContext context, Func2I emit, int imm, bool signed) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; Operand res = context.VectorZero(); int elems = 8 >> op.Size; int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; for (int index = 0; index < elems; index++) { Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); res = EmitVectorInsert(context, res, emit(ne, Const(imm)), index, op.Size + 1); } context.Copy(GetVec(op.Rd), res); }
public static void EmitVectorByScalarOpI32(ArmEmitterContext context, Func2I emit, bool signed) { OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; Operand m = ExtractElement(context, op.Vm, op.Size, signed); Operand res = GetVecA32(op.Qd); int elems = op.GetBytesCount() >> op.Size; for (int index = 0; index < elems; index++) { Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); res = EmitVectorInsert(context, res, emit(ne, m), op.Id + index, op.Size); } context.Copy(GetVecA32(op.Qd), res); }
public static void Ucvtf_S(ArmEmitterContext context) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; int sizeF = op.Size & 1; if (Optimizations.UseSse2 && sizeF == 0) { EmitSse2Ucvtf(context, scalar: true); } else { Operand ne = EmitVectorLongExtract(context, op.Rn, 0, sizeF + 2); Operand res = EmitFPConvert(context, ne, sizeF, signed: false); context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); } }
public static void Aesimc_V(ArmEmitterContext context) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; Operand n = GetVec(op.Rn); Operand res; if (Optimizations.UseAesni) { res = context.AddIntrinsic(Intrinsic.X86Aesimc, n); } else { res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.InverseMixColumns)), n); } context.Copy(GetVec(op.Rd), res); }
public static void Vmov_GS(ArmEmitterContext context) { OpCode32SimdMovGp op = (OpCode32SimdMovGp)context.CurrOp; Operand vec = GetVecA32(op.Vn >> 2); if (op.Op == 1) { // To general purpose. Operand value = context.VectorExtract(OperandType.I32, vec, op.Vn & 0x3); SetIntA32(context, op.Rt, value); } else { // From general purpose. Operand value = GetIntA32(context, op.Rt); context.Copy(vec, context.VectorInsert(vec, value, op.Vn & 0x3)); } }
public static void Aesimc_V(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; Operand n = GetVecA32(op.Qm); Operand res; if (Optimizations.UseAesni) { res = context.AddIntrinsic(Intrinsic.X86Aesimc, n); } else { res = context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n); } context.Copy(GetVecA32(op.Qd), res); }