public static void Vmin_I(ArmEmitterContext context) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; if (op.U) { if (Optimizations.UseSse2) { EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PminuInstruction[op.Size], op1, op2)); } else { EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLessUI(op1, op2), op1, op2)); } } else { if (Optimizations.UseSse2) { EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PminsInstruction[op.Size], op1, op2)); } else { EmitVectorBinaryOpSx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLess(op1, op2), op1, op2)); } } }
private static void EmitBifBit(ArmEmitterContext context, bool notRm) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; if (Optimizations.UseSse2) { EmitVectorTernaryOpSimd32(context, (d, n, m) => { Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d); res = context.AddIntrinsic((notRm) ? Intrinsic.X86Pandn : Intrinsic.X86Pand, m, res); return(context.AddIntrinsic(Intrinsic.X86Pxor, d, res)); }); } else { EmitVectorTernaryOpZx32(context, (d, n, m) => { if (notRm) { m = context.BitwiseNot(m); } return(context.BitwiseExclusiveOr( context.BitwiseAnd(m, context.BitwiseExclusiveOr(d, n)), d)); }); } }
public static void EmitVectorBinaryOpSimd32(ArmEmitterContext context, Func2I vectorFunc, int side = -1) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; Operand n = GetVecA32(op.Qn); Operand m = GetVecA32(op.Qm); Operand d = GetVecA32(op.Qd); if (side == -1) { side = op.Vd; } if (!op.Q) // Register swap: move relevant doubleword to destination side. { n = EmitMoveDoubleWordToSide(context, n, op.Vn, side); m = EmitMoveDoubleWordToSide(context, m, op.Vm, side); } Operand res = vectorFunc(n, m); if (!op.Q) // Register insert. { if (side != op.Vd) { res = EmitMoveDoubleWordToSide(context, res, side, op.Vd); } res = EmitDoubleWordInsert(context, d, res, op.Vd); } context.Copy(d, res); }
public static void EmitVectorPairwiseTernaryLongOpI32(ArmEmitterContext context, Func3I emit, bool signed) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; int elems = op.GetBytesCount() >> op.Size; int pairs = elems >> 1; Operand res = GetVecA32(op.Qd); for (int index = 0; index < pairs; index++) { int pairIndex = index * 2; Operand m1 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex, op.Size, signed); Operand m2 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex + 1, op.Size, signed); if (op.Size == 2) { m1 = signed ? context.SignExtend32(OperandType.I64, m1) : context.ZeroExtend32(OperandType.I64, m1); m2 = signed ? context.SignExtend32(OperandType.I64, m2) : context.ZeroExtend32(OperandType.I64, m2); } Operand d1 = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size + 1, signed); res = EmitVectorInsert(context, res, emit(m1, m2, d1), op.Id + index, op.Size + 1); } context.Copy(GetVecA32(op.Qd), res); }
public static void EmitVectorTernaryLongOpI32(ArmEmitterContext context, Func3I emit, bool signed) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; Operand res = context.VectorZero(); int elems = op.GetBytesCount() >> op.Size; for (int index = 0; index < elems; index++) { Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size + 1, signed); Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); if (op.Size == 2) { ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne); me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me); } res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1); } context.Copy(GetVecA32(op.Qd), res); }
// Pairwise public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Func2I emit) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; int sizeF = op.Size & 1; OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; int elems = op.GetBytesCount() >> (sizeF + 2); int pairs = elems >> 1; Operand res = GetVecA32(op.Qd); Operand mvec = GetVecA32(op.Qm); Operand nvec = GetVecA32(op.Qn); for (int index = 0; index < pairs; index++) { int pairIndex = index << 1; Operand n1 = context.VectorExtract(type, nvec, op.Fn + pairIndex); Operand n2 = context.VectorExtract(type, nvec, op.Fn + pairIndex + 1); res = context.VectorInsert(res, emit(n1, n2), op.Fd + index); Operand m1 = context.VectorExtract(type, mvec, op.Fm + pairIndex); Operand m2 = context.VectorExtract(type, mvec, op.Fm + pairIndex + 1); res = context.VectorInsert(res, emit(m1, m2), op.Fd + index + pairs); } context.Copy(GetVecA32(op.Qd), res); }
public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m)); }
public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; Debug.Assert((op.Size & 1) == 0); EmitVectorTernaryOpSimd32(context, (d, n, m) => { return(context.AddIntrinsic(inst32, d, n, m)); }); }
public static void Vadd_I(ArmEmitterContext context) { if (Optimizations.UseSse2) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PaddInstruction[op.Size], op1, op2)); } else { EmitVectorBinaryOpZx32(context, (op1, op2) => context.Add(op1, op2)); } }
public static void Sha256su1_V(ArmEmitterContext context) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; Operand d = GetVecA32(op.Qd); Operand n = GetVecA32(op.Qn); Operand m = GetVecA32(op.Qm); Operand res = InstEmitSimdHashHelper.EmitSha256su1(context, d, n, m); context.Copy(GetVecA32(op.Qd), res); }
public static void Vpadd_I(ArmEmitterContext context) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; if (Optimizations.UseSsse3) { EmitSsse3VectorPairwiseOp32(context, X86PaddInstruction); } else { EmitVectorPairwiseOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U); } }
public static void Vmul_I(ArmEmitterContext context) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; if (op.U) // This instruction is always signed, U indicates polynomial mode. { EmitVectorBinaryOpZx32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size)); } else { EmitVectorBinaryOpSx32(context, (op1, op2) => context.Multiply(op1, op2)); } }
public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; Intrinsic inst1 = (op.Size & 1) != 0 ? inst64pt1 : inst32pt1; Intrinsic inst2 = (op.Size & 1) != 0 ? inst64pt2 : inst32pt2; EmitVectorTernaryOpSimd32(context, (d, n, m) => { Operand res = context.AddIntrinsic(inst1, n, m); return(res = context.AddIntrinsic(inst2, d, res)); }); }
public static void Vshl_I(ArmEmitterContext context) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; if (op.U) { EmitVectorBinaryOpZx32(context, (op1, op2) => EmitShlRegOp(context, op2, op1, op.Size, true)); } else { EmitVectorBinaryOpSx32(context, (op1, op2) => EmitShlRegOp(context, op2, op1, op.Size, false)); } }
public static void Vmin_I(ArmEmitterContext context) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; if (op.U) { EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLessUI(op1, op2), op1, op2)); } else { EmitVectorBinaryOpSx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLess(op1, op2), op1, op2)); } }
// Pairwise public static void EmitSse2VectorPairwiseOpF32(ArmEmitterContext context, Intrinsic inst32) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; EmitVectorBinaryOpSimd32(context, (n, m) => { Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, n, m); Operand part0 = unpck; Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, unpck, unpck); return(context.AddIntrinsic(inst32, part0, part1)); }, 0); }
private static void EmitBifBit(ArmEmitterContext context, bool notRm) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; EmitVectorTernaryOpZx32(context, (d, n, m) => { if (notRm) { m = context.BitwiseNot(m); } return(context.BitwiseExclusiveOr( context.BitwiseAnd(m, context.BitwiseExclusiveOr(d, n)), d)); }); }
public static void Vpmin_I(ArmEmitterContext context) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; if (Optimizations.UseSsse3) { EmitSsse3VectorPairwiseOp32(context, op.U ? X86PminuInstruction : X86PminsInstruction); } else { EmitVectorPairwiseOpI32(context, (op1, op2) => { Operand greater = op.U ? context.ICompareLessUI(op1, op2) : context.ICompareLess(op1, op2); return(context.ConditionalSelect(greater, op1, op2)); }, !op.U); } }
public static void EmitVectorBinaryOpI32(ArmEmitterContext context, Func2I emit, bool signed) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; Operand res = GetVecA32(op.Qd); int elems = op.GetBytesCount() >> op.Size; for (int index = 0; index < elems; index++) { Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); res = EmitVectorInsert(context, res, emit(ne, me), op.Id + index, op.Size); } context.Copy(GetVecA32(op.Qd), res); }
public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Func2I emit) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; int sizeF = op.Size & 1; OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; int elems = op.GetBytesCount() >> (sizeF + 2); Operand res = GetVecA32(op.Qd); for (int index = 0; index < elems; index++) { Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index); res = context.VectorInsert(res, emit(ne, me), op.Fd + index); } context.Copy(GetVecA32(op.Qd), res); }
public static void EmitSsse3VectorPairwiseOp32(ArmEmitterContext context, Intrinsic[] inst) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; EmitVectorBinaryOpSimd32(context, (n, m) => { if (op.RegisterSize == RegisterSize.Simd64) { Operand zeroEvenMask = X86GetElements(context, ZeroMask, EvenMasks[op.Size]); Operand zeroOddMask = X86GetElements(context, ZeroMask, OddMasks[op.Size]); Operand mN = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); // m:n Operand left = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroEvenMask); // 0:even from m:n Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask); // 0:odd from m:n return(context.AddIntrinsic(inst[op.Size], left, right)); } else if (op.Size < 3) { Operand oddEvenMask = X86GetElements(context, OddMasks[op.Size], EvenMasks[op.Size]); Operand oddEvenN = context.AddIntrinsic(Intrinsic.X86Pshufb, n, oddEvenMask); // odd:even from n Operand oddEvenM = context.AddIntrinsic(Intrinsic.X86Pshufb, m, oddEvenMask); // odd:even from m Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, oddEvenN, oddEvenM); Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, oddEvenN, oddEvenM); return(context.AddIntrinsic(inst[op.Size], left, right)); } else { Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, n, m); return(context.AddIntrinsic(inst[3], left, right)); } }, 0); }
public static void EmitVectorTernaryOpSimd32(ArmEmitterContext context, Func3I vectorFunc) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; Operand n = GetVecA32(op.Qn); Operand m = GetVecA32(op.Qm); Operand d = GetVecA32(op.Qd); Operand initialD = d; if (!op.Q) // Register swap: move relevant doubleword to destination side. { n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd); m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd); } Operand res = vectorFunc(d, n, m); if (!op.Q) // Register insert. { res = EmitDoubleWordInsert(context, initialD, res, op.Vd); } context.Copy(initialD, res); }
public static void EmitVectorPairwiseOpI32(ArmEmitterContext context, Func2I emit, bool signed) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; int elems = op.GetBytesCount() >> op.Size; int pairs = elems >> 1; Operand res = GetVecA32(op.Qd); for (int index = 0; index < pairs; index++) { int pairIndex = index << 1; Operand n1 = EmitVectorExtract32(context, op.Qn, op.In + pairIndex, op.Size, signed); Operand n2 = EmitVectorExtract32(context, op.Qn, op.In + pairIndex + 1, op.Size, signed); Operand m1 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex, op.Size, signed); Operand m2 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex + 1, op.Size, signed); res = EmitVectorInsert(context, res, emit(n1, n2), op.Id + index, op.Size); res = EmitVectorInsert(context, res, emit(m1, m2), op.Id + index + pairs, op.Size); } context.Copy(GetVecA32(op.Qd), res); }
public static void Vmlsl_I(ArmEmitterContext context) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; EmitVectorTernaryLongOpI32(context, (opD, op1, op2) => context.Subtract(opD, context.Multiply(op1, op2)), !op.U); }
public static void Vcge_I(ArmEmitterContext context) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, false, !op.U); }
public static void Vpadd_I(ArmEmitterContext context) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; EmitVectorPairwiseOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U); }