public static void Not_V(ArmEmitterContext context) { if (Optimizations.UseSse2) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; Operand n = GetVec(op.Rn); Operand mask = X86GetAllElements(context, -1L); Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, n, mask); if (op.RegisterSize == RegisterSize.Simd64) { res = context.VectorZeroUpper64(res); } context.Copy(GetVec(op.Rd), res); } else { EmitVectorUnaryOpZx(context, (op1) => context.BitwiseNot(op1)); } }
public static void Cmlt_V(ArmEmitterContext context) { if (Optimizations.UseSse42) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; Operand n = GetVec(op.Rn); Intrinsic cmpInst = X86PcmpgtInstruction[op.Size]; Operand res = context.AddIntrinsic(cmpInst, context.VectorZero(), n); if (op.RegisterSize == RegisterSize.Simd64) { res = context.VectorZeroUpper64(res); } context.Copy(GetVec(op.Rd), res); } else { EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: false); } }
private static void EmitSse41Fcvtu(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; Operand n = GetVec(op.Rn); // sizeF == ((OpCodeSimdShImm)op).Size - 2 int sizeF = op.Size & 1; if (sizeF == 0) { Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ)); Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n); if (op is OpCodeSimdShImm fixedOp) { int fBits = GetImmShr(fixedOp); // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits) int fpScaled = 0x3F800000 + fBits * 0x800000; Operand scale = X86GetAllElements(context, fpScaled); nScaled = context.AddIntrinsic(Intrinsic.X86Mulps, nScaled, scale); } Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundps, nScaled, Const(X86GetRoundControl(roundMode))); Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, context.VectorZero(), Const((int)CmpCondition.NotLessThanOrEqual)); Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask); Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRndMasked); Operand mask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648) Operand res = context.AddIntrinsic(Intrinsic.X86Subps, nRndMasked, mask); Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, res, context.VectorZero(), Const((int)CmpCondition.NotLessThanOrEqual)); Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2); res = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, resMasked); Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmpps, resMasked, mask, Const((int)CmpCondition.NotLessThan)); res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3); res = context.AddIntrinsic(Intrinsic.X86Paddd, res, nInt); if (scalar) { res = context.VectorZeroUpper96(res); } else if (op.RegisterSize == RegisterSize.Simd64) { res = context.VectorZeroUpper64(res); } context.Copy(GetVec(op.Rd), res); } else /* if (sizeF == 1) */ { Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ)); Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n); if (op is OpCodeSimdShImm fixedOp) { int fBits = GetImmShr(fixedOp); // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits) long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L; Operand scale = X86GetAllElements(context, fpScaled); nScaled = context.AddIntrinsic(Intrinsic.X86Mulpd, nScaled, scale); } Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundpd, nScaled, Const(X86GetRoundControl(roundMode))); Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, context.VectorZero(), Const((int)CmpCondition.NotLessThanOrEqual)); Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask); Operand high; if (!scalar) { high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nRndMasked, nRndMasked); high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high); } else { high = Const(0L); } Operand low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRndMasked); Operand nInt = EmitVectorLongCreate(context, low, high); Operand mask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808) Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, nRndMasked, mask); Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, res, context.VectorZero(), Const((int)CmpCondition.NotLessThanOrEqual)); Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2); if (!scalar) { high = context.AddIntrinsic(Intrinsic.X86Unpckhpd, resMasked, resMasked); high = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, high); } low = context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, resMasked); res = EmitVectorLongCreate(context, low, high); Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmppd, resMasked, mask, Const((int)CmpCondition.NotLessThan)); res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3); res = context.AddIntrinsic(Intrinsic.X86Paddq, res, nInt); if (scalar) { res = context.VectorZeroUpper64(res); } context.Copy(GetVec(op.Rd), res); } }
public static void Fcvt_S(ArmEmitterContext context) { OpCodeSimd op = (OpCodeSimd)context.CurrOp; if (op.Size == 0 && op.Opc == 1) // Single -> Double. { if (Optimizations.UseSse2) { Operand n = GetVec(op.Rn); Operand res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), n); context.Copy(GetVec(op.Rd), res); } else { Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0); Operand res = context.ConvertToFP(OperandType.FP64, ne); context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); } } else if (op.Size == 1 && op.Opc == 0) // Double -> Single. { if (Optimizations.UseSse2) { Operand n = GetVec(op.Rn); Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n); context.Copy(GetVec(op.Rd), res); } else { Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0); Operand res = context.ConvertToFP(OperandType.FP32, ne); context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); } } else if (op.Size == 0 && op.Opc == 3) // Single -> Half. { Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0); Delegate dlg = new _U16_F32(SoftFloat32_16.FPConvert); Operand res = context.Call(dlg, ne); res = context.ZeroExtend16(OperandType.I64, res); context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1)); } else if (op.Size == 3 && op.Opc == 0) // Half -> Single. { Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1); Delegate dlg = new _F32_U16(SoftFloat16_32.FPConvert); Operand res = context.Call(dlg, ne); context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); } else if (op.Size == 1 && op.Opc == 3) // Double -> Half. { throw new NotImplementedException("Double-precision to half-precision."); } else if (op.Size == 3 && op.Opc == 1) // Double -> Half. { throw new NotImplementedException("Half-precision to double-precision."); } else // Invalid encoding. { Debug.Assert(false, $"type == {op.Size} && opc == {op.Opc}"); } }