public static void Aesmc_V(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; Operand n = GetVecA32(op.Qm); Operand res; if (Optimizations.UseAesni) { Operand roundKey = context.VectorZero(); // Inverse Shift Rows, Inverse Sub Bytes, xor 0 so nothing happens. res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, n, roundKey); // Shift Rows, Sub Bytes, Mix Columns (!), xor 0 so nothing happens. res = context.AddIntrinsic(Intrinsic.X86Aesenc, res, roundKey); } else { res = context.Call(new _V128_V128(SoftFallback.MixColumns), n); } context.Copy(GetVecA32(op.Qd), res); }
public static void Vneg_V(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; if (op.F) { if (Optimizations.UseSse2) { EmitVectorUnaryOpSimd32(context, (m) => { if ((op.Size & 1) == 0) { Operand mask = X86GetScalar(context, -0f); return(context.AddIntrinsic(Intrinsic.X86Xorps, mask, m)); } else { Operand mask = X86GetScalar(context, -0d); return(context.AddIntrinsic(Intrinsic.X86Xorpd, mask, m)); } }); } else { EmitVectorUnaryOpF32(context, (op1) => context.Negate(op1)); } } else { EmitVectorUnaryOpSx32(context, (op1) => context.Negate(op1)); } }
public static void Vcvt_V(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; bool unsigned = (op.Opc & 1) != 0; bool toInteger = (op.Opc & 2) != 0; OperandType floatSize = (op.Size == 2) ? OperandType.FP32 : OperandType.FP64; if (toInteger) { EmitVectorUnaryOpF32(context, (op1) => { return(EmitSaturateFloatToInt(context, op1, unsigned)); }); } else { if (unsigned) { EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false)); } else { EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true)); } } }
public static void Aesmc_V(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; Operand n = GetVecA32(op.Qm); context.Copy(GetVec(op.Qd), context.Call(new _V128_V128(SoftFallback.MixColumns), n)); }
public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; EmitVectorUnaryOpSimd32(context, (m) => context.AddIntrinsic(inst, m)); }
public static void Aese_V(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; Operand d = GetVecA32(op.Qd); Operand n = GetVecA32(op.Qm); context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n)); }
public static void Sha256su0_V(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; Operand d = GetVecA32(op.Qd); Operand m = GetVecA32(op.Qm); Operand res = InstEmitSimdHashHelper.EmitSha256su0(context, d, m); context.Copy(GetVecA32(op.Qd), res); }
public static void Vcle_Z(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; if (op.F) { EmitCmpOpF32(context, SoftFloat32.FPCompareLEFpscr, SoftFloat64.FPCompareLEFpscr, true); } else { EmitCmpOpI32(context, context.ICompareLessOrEqual, context.ICompareLessOrEqualUI, true, true); } }
public static void Vcgt_Z(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; if (op.F) { EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, true); } else { EmitCmpOpI32(context, context.ICompareGreater, context.ICompareGreaterUI, true, true); } }
public static void Vabs_V(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; if (op.F) { EmitVectorUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1)); } else { EmitVectorUnaryOpSx32(context, (op1) => EmitAbs(context, op1)); } }
public static void Vrev(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; EmitVectorUnaryOpZx32(context, (op1) => { switch (op.Opc) { case 0: switch (op.Size) // Swap bytes. { default: return(op1); case 1: return(InstEmitAluHelper.EmitReverseBytes16_32Op(context, op1)); case 2: case 3: return(context.ByteSwap(op1)); } case 1: switch (op.Size) { default: return(op1); case 2: return(context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffff0000)), Const(16)), context.ShiftLeft(context.BitwiseAnd(op1, Const(0x0000ffff)), Const(16)))); case 3: return(context.BitwiseOr( context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffff000000000000ul)), Const(48)), context.ShiftLeft(context.BitwiseAnd(op1, Const(0x000000000000fffful)), Const(48))), context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0x0000ffff00000000ul)), Const(16)), context.ShiftLeft(context.BitwiseAnd(op1, Const(0x00000000ffff0000ul)), Const(16))))); } case 2: // Swap upper and lower halves. return(context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffffffff00000000ul)), Const(32)), context.ShiftLeft(context.BitwiseAnd(op1, Const(0x00000000fffffffful)), Const(32)))); } return(op1); }); }
// Integer public static void EmitVectorUnaryOpI32(ArmEmitterContext context, Func1I emit, bool signed) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; Operand res = GetVecA32(op.Qd); int elems = op.GetBytesCount() >> op.Size; for (int index = 0; index < elems; index++) { Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); res = EmitVectorInsert(context, res, emit(me), op.Id + index, op.Size); } context.Copy(GetVecA32(op.Qd), res); }
// Narrow public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit, bool signed = false) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; int elems = 8 >> op.Size; // Size contains the target element size. (for when it becomes a doubleword) Operand res = GetVecA32(op.Qd); int id = (op.Vd & 1) << (3 - op.Size); // Target doubleword base. for (int index = 0; index < elems; index++) { Operand m = EmitVectorExtract32(context, op.Qm, index, op.Size + 1, signed); res = EmitVectorInsert(context, res, emit(m), id + index, op.Size); } context.Copy(GetVecA32(op.Qd), res); }
public static void Aesimc_V(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; Operand n = GetVecA32(op.Qm); Operand res; if (Optimizations.UseAesni) { res = context.AddIntrinsic(Intrinsic.X86Aesimc, n); } else { res = context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n); } context.Copy(GetVecA32(op.Qd), res); }
public static void Vclt_Z(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; if (op.F) { if (Optimizations.FastFP && Optimizations.UseSse2) { EmitSse2CmpOpF32(context, CmpCondition.LessThan, true); } else { EmitCmpOpF32(context, SoftFloat32.FPCompareLTFpscr, SoftFloat64.FPCompareLTFpscr, true); } } else { EmitCmpOpI32(context, context.ICompareLess, context.ICompareLessUI, true, true); } }
public static void Vceq_Z(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; if (op.F) { if (Optimizations.FastFP && Optimizations.UseSse2) { EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, true); } else { EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareEQFpscr), true); } } else { EmitCmpOpI32(context, context.ICompareEqual, context.ICompareEqual, true, false); } }
public static void Aese_V(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; Operand d = GetVecA32(op.Qd); Operand n = GetVecA32(op.Qm); Operand res; if (Optimizations.UseAesni) { res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero()); } else { res = context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n); } context.Copy(d, res); }
public static void Aesd_V(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; Operand d = GetVecA32(op.Qd); Operand n = GetVecA32(op.Qm); Operand res; if (Optimizations.UseAesni) { res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero()); } else { res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Decrypt)), d, n); } context.Copy(d, res); }
public static void Vcge_Z(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; if (op.F) { if (Optimizations.FastFP && Optimizations.UseAvx) { EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true); } else { EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGEFpscr), true); } } else { EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, true, true); } }
// Vector Operand Templates public static void EmitVectorUnaryOpSimd32(ArmEmitterContext context, Func1I vectorFunc) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; Operand m = GetVecA32(op.Qm); Operand d = GetVecA32(op.Qd); if (!op.Q) // Register swap: move relevant doubleword to destination side. { m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd); } Operand res = vectorFunc(m); if (!op.Q) // Register insert. { res = EmitDoubleWordInsert(context, d, res, op.Vd); } context.Copy(d, res); }
public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Func1I emit) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; int sizeF = op.Size & 1; OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; int elems = op.GetBytesCount() >> sizeF + 2; Operand res = GetVecA32(op.Qd); for (int index = 0; index < elems; index++) { Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index); res = context.VectorInsert(res, emit(me), op.Fd + index); } context.Copy(GetVecA32(op.Qd), res); }
private static void EmitSse2OrAvxCmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; int sizeF = op.Size & 1; Intrinsic inst = (sizeF == 0) ? Intrinsic.X86Cmpps : Intrinsic.X86Cmppd; if (zero) { EmitVectorUnaryOpSimd32(context, (m) => { return(context.AddIntrinsic(inst, m, context.VectorZero(), Const((int)cond))); }); } else { EmitVectorBinaryOpSimd32(context, (n, m) => { return(context.AddIntrinsic(inst, n, m, Const((int)cond))); }); } }
public static void Vabs_V(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; if (op.F) { if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorUnaryOpSimd32(context, (m) => { return(EmitFloatAbs(context, m, (op.Size & 1) == 0, true)); }); } else { EmitVectorUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Abs, Math.Abs, op1)); } } else { EmitVectorUnaryOpSx32(context, (op1) => EmitAbs(context, op1)); } }
public static void Vcvt_V(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; bool unsigned = (op.Opc & 1) != 0; bool toInteger = (op.Opc & 2) != 0; OperandType floatSize = (op.Size == 2) ? OperandType.FP32 : OperandType.FP64; if (toInteger) { if (Optimizations.UseSse41) { EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned); } else { EmitVectorUnaryOpF32(context, (op1) => { return(EmitSaturateFloatToInt(context, op1, unsigned)); }); } } else { if (Optimizations.UseSse2) { EmitVectorUnaryOpSimd32(context, (n) => { if (unsigned) { Operand mask = X86GetAllElements(context, 0x47800000); Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16)); res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res); res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask); Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16)); res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16)); res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2); return(context.AddIntrinsic(Intrinsic.X86Addps, res, res2)); } else { return(context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n)); } }); } else { if (unsigned) { EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false)); } else { EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true)); } } } }
private static void EmitSse41ConvertVector32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; EmitVectorUnaryOpSimd32(context, (n) => { int sizeF = op.Size & 1; if (sizeF == 0) { Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ)); nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode))); Operand zero = context.VectorZero(); Operand nCmp; if (!signed) { nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); } Operand fpMaxValMask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648) Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes); Operand nInt2 = default; if (!signed) { nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask); nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes); } nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); if (signed) { return(context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes)); } else { Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes); return(context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt)); } } else /* if (sizeF == 1) */ { Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ)); nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode))); Operand zero = context.VectorZero(); Operand nCmp; if (!signed) { nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); } Operand fpMaxValMask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808) Operand nLong = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false); Operand nLong2 = default; if (!signed) { nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask); nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); nLong2 = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false); } nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); if (signed) { return(context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes)); } else { Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes); return(context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong)); } } }); }