private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed) { // A port of the similar round function in InstEmitSimdCvt. OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; bool doubleSize = (op.Size & 1) != 0; int shift = doubleSize ? 1 : 2; Operand n = GetVecA32(op.Vm >> shift); n = EmitSwapScalar(context, n, op.Vm, doubleSize); if (!doubleSize) { Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ)); nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode))); Operand zero = context.VectorZero(); Operand nCmp; Operand nIntOrLong2 = default; if (!signed) { nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); } int fpMaxVal = 0x4F000000; // 2.14748365E9f (2147483648) Operand fpMaxValMask = X86GetScalar(context, fpMaxVal); Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes); if (!signed) { nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask); nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes); } nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes); Operand dRes; if (signed) { dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt); } else { dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt); dRes = context.Add(dRes, nIntOrLong); } InsertScalar(context, op.Vd, dRes); } else { Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ)); nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode))); Operand zero = context.VectorZero(); Operand nCmp; Operand nIntOrLong2 = default; if (!signed) { nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); } long fpMaxVal = 0x41E0000000000000L; // 2147483648.0000000d (2147483648) Operand fpMaxValMask = X86GetScalar(context, fpMaxVal); Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes); if (!signed) { nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask); nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes); } nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes); nLong = context.ConvertI64ToI32(nLong); Operand dRes; if (signed) { dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong); } else { dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong); dRes = context.Add(dRes, nIntOrLong); } InsertScalar(context, op.Vd, dRes); } }
public static Operand EmitCrc32(ArmEmitterContext context, Operand crc, Operand value, int size, bool castagnoli) { Debug.Assert(crc.Type.IsInteger() && value.Type.IsInteger()); Debug.Assert(size >= 0 && size < 4); Debug.Assert((size < 3) || (value.Type == OperandType.I64)); if (castagnoli && Optimizations.UseSse42) { // The CRC32 instruction does not have an immediate variant, so ensure both inputs are in registers. value = (value.Kind == OperandKind.Constant) ? context.Copy(value) : value; crc = (crc.Kind == OperandKind.Constant) ? context.Copy(crc) : crc; Intrinsic op = size switch { 0 => Intrinsic.X86Crc32_8, 1 => Intrinsic.X86Crc32_16, _ => Intrinsic.X86Crc32, }; return((size == 3) ? context.ConvertI64ToI32(context.AddIntrinsicLong(op, crc, value)) : context.AddIntrinsicInt(op, crc, value)); } else if (Optimizations.UsePclmulqdq) { return(size switch { 3 => EmitCrc32Optimized64(context, crc, value, castagnoli), _ => EmitCrc32Optimized(context, crc, value, castagnoli, size), });
private static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs) { OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; bool cmpWithZero = (op.Opc & 2) != 0; int sizeF = op.Size & 1; if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2)) { CmpCondition cmpOrdered = signalNaNs ? CmpCondition.OrderedS : CmpCondition.OrderedQ; bool doubleSize = sizeF != 0; int shift = doubleSize ? 1 : 2; Operand m = GetVecA32(op.Vm >> shift); Operand n = GetVecA32(op.Vd >> shift); n = EmitSwapScalar(context, n, op.Vd, doubleSize); m = cmpWithZero ? context.VectorZero() : EmitSwapScalar(context, m, op.Vm, doubleSize); Operand lblNaN = Label(); Operand lblEnd = Label(); if (!doubleSize) { Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered)); Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask); context.BranchIfFalse(lblNaN, isOrdered); Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, n, m); Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, n, m); Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, n, m); SetFpFlag(context, FPState.VFlag, Const(0)); SetFpFlag(context, FPState.CFlag, cf); SetFpFlag(context, FPState.ZFlag, zf); SetFpFlag(context, FPState.NFlag, nf); } else { Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered)); Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask); context.BranchIfFalse(lblNaN, isOrdered); Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, n, m); Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, n, m); Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, n, m); SetFpFlag(context, FPState.VFlag, Const(0)); SetFpFlag(context, FPState.CFlag, cf); SetFpFlag(context, FPState.ZFlag, zf); SetFpFlag(context, FPState.NFlag, nf); } context.Branch(lblEnd); context.MarkLabel(lblNaN); SetFpFlag(context, FPState.VFlag, Const(1)); SetFpFlag(context, FPState.CFlag, Const(1)); SetFpFlag(context, FPState.ZFlag, Const(0)); SetFpFlag(context, FPState.NFlag, Const(0)); context.MarkLabel(lblEnd); } else { OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; Operand ne = ExtractScalar(context, type, op.Vd); Operand me; if (cmpWithZero) { me = sizeF == 0 ? ConstF(0f) : ConstF(0d); } else { me = ExtractScalar(context, type, op.Vm); } MethodInfo info = sizeF != 0 ? typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompare)) : typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompare)); Operand nzcv = context.Call(info, ne, me, Const(signalNaNs)); EmitSetFpscrNzcv(context, nzcv); } }
private static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false; if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2)) { Operand n = GetVec(op.Rn); Operand m = cmpWithZero ? context.VectorZero() : GetVec(op.Rm); CmpCondition cmpOrdered = signalNaNs ? CmpCondition.OrderedS : CmpCondition.OrderedQ; Operand lblNaN = Label(); Operand lblEnd = Label(); if (op.Size == 0) { Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered)); Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask); context.BranchIfFalse(lblNaN, isOrdered); Operand nCopy = context.Copy(n); Operand mCopy = cmpWithZero ? context.VectorZero() : context.Copy(m); Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, nCopy, mCopy); Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, nCopy, mCopy); Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, nCopy, mCopy); SetFlag(context, PState.VFlag, Const(0)); SetFlag(context, PState.CFlag, cf); SetFlag(context, PState.ZFlag, zf); SetFlag(context, PState.NFlag, nf); } else /* if (op.Size == 1) */ { Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered)); Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask); context.BranchIfFalse(lblNaN, isOrdered); Operand nCopy = context.Copy(n); Operand mCopy = cmpWithZero ? context.VectorZero() : context.Copy(m); Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, nCopy, mCopy); Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, nCopy, mCopy); Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, nCopy, mCopy); SetFlag(context, PState.VFlag, Const(0)); SetFlag(context, PState.CFlag, cf); SetFlag(context, PState.ZFlag, zf); SetFlag(context, PState.NFlag, nf); } context.Branch(lblEnd); context.MarkLabel(lblNaN); SetFlag(context, PState.VFlag, Const(1)); SetFlag(context, PState.CFlag, Const(1)); SetFlag(context, PState.ZFlag, Const(0)); SetFlag(context, PState.NFlag, Const(0)); context.MarkLabel(lblEnd); } else { OperandType type = op.Size != 0 ? OperandType.FP64 : OperandType.FP32; Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); Operand me; if (cmpWithZero) { me = op.Size == 0 ? ConstF(0f) : ConstF(0d); } else { me = context.VectorExtract(type, GetVec(op.Rm), 0); } Operand nzcv = EmitSoftFloatCall(context, nameof(SoftFloat32.FPCompare), ne, me, Const(signalNaNs)); EmitSetNzcv(context, nzcv); } }
private static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; const int cmpOrdered = 7; bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false; if (Optimizations.FastFP && Optimizations.UseSse2) { Operand n = GetVec(op.Rn); Operand m = cmpWithZero ? context.VectorZero() : GetVec(op.Rm); Operand lblNaN = Label(); Operand lblEnd = Label(); if (op.Size == 0) { Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const(cmpOrdered)); Operand isOrdered = context.VectorExtract16(ordMask, 0); context.BranchIfFalse(lblNaN, isOrdered); Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, n, m); Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, n, m); Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, n, m); SetFlag(context, PState.VFlag, Const(0)); SetFlag(context, PState.CFlag, cf); SetFlag(context, PState.ZFlag, zf); SetFlag(context, PState.NFlag, nf); } else /* if (op.Size == 1) */ { Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const(cmpOrdered)); Operand isOrdered = context.VectorExtract16(ordMask, 0); context.BranchIfFalse(lblNaN, isOrdered); Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, n, m); Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, n, m); Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, n, m); SetFlag(context, PState.VFlag, Const(0)); SetFlag(context, PState.CFlag, cf); SetFlag(context, PState.ZFlag, zf); SetFlag(context, PState.NFlag, nf); } context.Branch(lblEnd); context.MarkLabel(lblNaN); SetFlag(context, PState.VFlag, Const(1)); SetFlag(context, PState.CFlag, Const(1)); SetFlag(context, PState.ZFlag, Const(0)); SetFlag(context, PState.NFlag, Const(0)); context.MarkLabel(lblEnd); } else { OperandType type = op.Size != 0 ? OperandType.FP64 : OperandType.FP32; Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); Operand me; if (cmpWithZero) { me = op.Size == 0 ? ConstF(0f) : ConstF(0d); } else { me = context.VectorExtract(type, GetVec(op.Rm), 0); } Delegate dlg = op.Size != 0 ? (Delegate) new _S32_F64_F64_Bool(SoftFloat64.FPCompare) : (Delegate) new _S32_F32_F32_Bool(SoftFloat32.FPCompare); Operand nzcv = context.Call(dlg, ne, me, Const(signalNaNs)); EmitSetNzcv(context, nzcv); } }