private static void EmitAddLongPairwise(AILEmitterCtx Context, bool Signed, bool Accumulate) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; int Words = Op.GetBitsCount() >> 4; int Pairs = Words >> Op.Size; for (int Index = 0; Index < Pairs; Index++) { int Idx = Index << 1; EmitVectorExtract(Context, Op.Rn, Idx, Op.Size, Signed); EmitVectorExtract(Context, Op.Rn, Idx + 1, Op.Size, Signed); Context.Emit(OpCodes.Add); if (Accumulate) { EmitVectorExtract(Context, Op.Rd, Index, Op.Size + 1, Signed); Context.Emit(OpCodes.Add); } EmitVectorInsertTmp(Context, Index, Op.Size + 1); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if (Op.RegisterSize == ARegisterSize.SIMD64) { EmitVectorZeroUpper(Context, Op.Rd); } }
public static void Faddp_V(AILEmitterCtx Context) { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; int SizeF = Op.Size & 1; int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> SizeF + 2; int Half = Elems >> 1; for (int Index = 0; Index < Elems; Index++) { int Elem = (Index & (Half - 1)) << 1; EmitVectorExtractF(Context, Index < Half ? Op.Rn : Op.Rm, Elem + 0, SizeF); EmitVectorExtractF(Context, Index < Half ? Op.Rn : Op.Rm, Elem + 1, SizeF); Context.Emit(OpCodes.Add); EmitVectorInsertTmpF(Context, Index, SizeF); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if (Op.RegisterSize == ARegisterSize.SIMD64) { EmitVectorZeroUpper(Context, Op.Rd); } }
private static void EmitVectorZip(AILEmitterCtx Context, int Part) { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; int Bytes = Context.CurrOp.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; int Half = Elems >> 1; for (int Index = 0; Index < Elems; Index++) { int Elem = Part * Half + (Index >> 1); EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size); EmitVectorInsertTmp(Context, Index, Op.Size); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if (Op.RegisterSize == ARegisterSize.SIMD64) { EmitVectorZeroUpper(Context, Op.Rd); } }
public static void Fcvtl_V(AILEmitterCtx Context) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; int SizeF = Op.Size & 1; int Elems = 4 >> SizeF; int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; for (int Index = 0; Index < Elems; Index++) { if (SizeF == 0) { EmitVectorExtractZx(Context, Op.Rn, Part + Index, 1); Context.Emit(OpCodes.Conv_U2); Context.EmitLdarg(ATranslatedSub.StateArgIdx); Context.EmitCall(typeof(ASoftFloat16_32), nameof(ASoftFloat16_32.FPConvert)); } else /* if (SizeF == 1) */ { EmitVectorExtractF(Context, Op.Rn, Part + Index, 0); Context.Emit(OpCodes.Conv_R8); } EmitVectorInsertTmpF(Context, Index, SizeF); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); }
public static void EmitSaturatingUnaryOpSx(AILEmitterCtx Context, Action Emit, SaturatingFlags Flags) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; bool Scalar = (Flags & SaturatingFlags.Scalar) != 0; int Bytes = Op.GetBitsCount() >> 3; int Elems = !Scalar ? Bytes >> Op.Size : 1; if (Scalar) { EmitVectorZeroLowerTmp(Context); } for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractSx(Context, Op.Rn, Index, Op.Size); Emit(); EmitUnarySignedSatQAbsOrNeg(Context, Op.Size); EmitVectorInsertTmp(Context, Index, Op.Size); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if ((Op.RegisterSize == ARegisterSize.SIMD64) || Scalar) { EmitVectorZeroUpper(Context, Op.Rd); } }
public static void EmitVectorOpByElemF(AILEmitterCtx Context, Action Emit, int Elem, bool Ternary) { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; int SizeF = Op.Size & 1; int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> SizeF + 2; for (int Index = 0; Index < Elems; Index++) { if (Ternary) { EmitVectorExtractF(Context, Op.Rd, Index, SizeF); } EmitVectorExtractF(Context, Op.Rn, Index, SizeF); EmitVectorExtractF(Context, Op.Rm, Elem, SizeF); Emit(); EmitVectorInsertTmpF(Context, Index, SizeF); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if (Op.RegisterSize == ARegisterSize.SIMD64) { EmitVectorZeroUpper(Context, Op.Rd); } }
public static void EmitVectorOpByElem(AILEmitterCtx Context, Action Emit, int Elem, bool Ternary, bool Signed) { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; int Bytes = Context.CurrOp.GetBitsCount() >> 3; for (int Index = 0; Index < (Bytes >> Op.Size); Index++) { if (Ternary) { EmitVectorExtract(Context, Op.Rd, Index, Op.Size, Signed); } EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed); EmitVectorExtract(Context, Op.Rm, Elem, Op.Size, Signed); Emit(); EmitVectorInsertTmp(Context, Index, Op.Size); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if (Op.RegisterSize == ARegisterSize.SIMD64) { EmitVectorZeroUpper(Context, Op.Rd); } }
private static void EmitRev_V(AILEmitterCtx Context, int ContainerSize) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; if (Op.Size >= ContainerSize) { throw new InvalidOperationException(); } int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; int ContainerMask = (1 << (ContainerSize - Op.Size)) - 1; for (int Index = 0; Index < Elems; Index++) { int RevIndex = Index ^ ContainerMask; EmitVectorExtractZx(Context, Op.Rn, RevIndex, Op.Size); EmitVectorInsertTmp(Context, Index, Op.Size); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if (Op.RegisterSize == ARegisterSize.SIMD64) { EmitVectorZeroUpper(Context, Op.Rd); } }
public static void EmitVectorWidenRnRmOp(AILEmitterCtx Context, Action Emit, bool Ternary, bool Signed) { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; Context.EmitLdvec(Op.Rd); Context.EmitStvectmp(); int Elems = 8 >> Op.Size; int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; for (int Index = 0; Index < Elems; Index++) { if (Ternary) { EmitVectorExtract(Context, Op.Rd, Index, Op.Size + 1, Signed); } EmitVectorExtract(Context, Op.Rn, Part + Index, Op.Size, Signed); EmitVectorExtract(Context, Op.Rm, Part + Index, Op.Size, Signed); Emit(); EmitVectorInsertTmp(Context, Index, Op.Size + 1); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); }
private static void EmitVectorZip(AILEmitterCtx Context, int Part) { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; int Words = Op.GetBitsCount() >> 4; int Pairs = Words >> Op.Size; int Base = Part != 0 ? Pairs : 0; for (int Index = 0; Index < Pairs; Index++) { int Idx = Index << 1; EmitVectorExtractZx(Context, Op.Rn, Base + Index, Op.Size); EmitVectorExtractZx(Context, Op.Rm, Base + Index, Op.Size); EmitVectorInsertTmp(Context, Idx + 1, Op.Size); EmitVectorInsertTmp(Context, Idx, Op.Size); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if (Op.RegisterSize == ARegisterSize.SIMD64) { EmitVectorZeroUpper(Context, Op.Rd); } }
private static void EmitVectorPairwiseOp(AILEmitterCtx Context, Action Emit, bool Signed) { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; int Bytes = Context.CurrOp.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; int Half = Elems >> 1; for (int Index = 0; Index < Elems; Index++) { int Elem = (Index & (Half - 1)) << 1; EmitVectorExtract(Context, Index < Half ? Op.Rn : Op.Rm, Elem + 0, Op.Size, Signed); EmitVectorExtract(Context, Index < Half ? Op.Rn : Op.Rm, Elem + 1, Op.Size, Signed); Emit(); EmitVectorInsertTmp(Context, Index, Op.Size); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if (Op.RegisterSize == ARegisterSize.SIMD64) { EmitVectorZeroUpper(Context, Op.Rd); } }
public static void Xtn_V(AILEmitterCtx Context) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; int Elems = 8 >> Op.Size; int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; if (Part != 0) { Context.EmitLdvec(Op.Rd); Context.EmitStvectmp(); } for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1); EmitVectorInsertTmp(Context, Part + Index, Op.Size); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if (Part == 0) { EmitVectorZeroUpper(Context, Op.Rd); } }
public static void EmitVectorPairwiseOpF(AILEmitterCtx Context, Action Emit) { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; int SizeF = Op.Size & 1; int Words = Op.GetBitsCount() >> 4; int Pairs = Words >> SizeF + 2; for (int Index = 0; Index < Pairs; Index++) { int Idx = Index << 1; EmitVectorExtractF(Context, Op.Rn, Idx, SizeF); EmitVectorExtractF(Context, Op.Rn, Idx + 1, SizeF); Emit(); EmitVectorExtractF(Context, Op.Rm, Idx, SizeF); EmitVectorExtractF(Context, Op.Rm, Idx + 1, SizeF); Emit(); EmitVectorInsertTmpF(Context, Pairs + Index, SizeF); EmitVectorInsertTmpF(Context, Index, SizeF); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if (Op.RegisterSize == ARegisterSize.SIMD64) { EmitVectorZeroUpper(Context, Op.Rd); } }
public static void Ext_V(AILEmitterCtx Context) { AOpCodeSimdExt Op = (AOpCodeSimdExt)Context.CurrOp; Context.EmitLdvec(Op.Rd); Context.EmitStvectmp(); int Bytes = Context.CurrOp.GetBitsCount() >> 3; int Position = Op.Imm4; for (int Index = 0; Index < Bytes; Index++) { int Reg = Op.Imm4 + Index < Bytes ? Op.Rn : Op.Rm; if (Position == Bytes) { Position = 0; } EmitVectorExtractZx(Context, Reg, Position++, 0); EmitVectorInsertTmp(Context, Index, 0); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if (Op.RegisterSize == ARegisterSize.SIMD64) { EmitVectorZeroUpper(Context, Op.Rd); } }
private static void EmitShrImmOp(AILEmitterCtx Context, ShrImmFlags Flags) { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; bool Scalar = (Flags & ShrImmFlags.Scalar) != 0; bool Signed = (Flags & ShrImmFlags.Signed) != 0; bool Round = (Flags & ShrImmFlags.Round) != 0; bool Accumulate = (Flags & ShrImmFlags.Accumulate) != 0; int Shift = GetImmShr(Op); long RoundConst = 1L << (Shift - 1); int Bytes = Op.GetBitsCount() >> 3; int Elems = !Scalar ? Bytes >> Op.Size : 1; for (int Index = 0; Index < Elems; Index++) { EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed); if (Op.Size <= 2) { if (Round) { Context.EmitLdc_I8(RoundConst); Context.Emit(OpCodes.Add); } Context.EmitLdc_I4(Shift); Context.Emit(Signed ? OpCodes.Shr : OpCodes.Shr_Un); } else /* if (Op.Size == 3) */ { EmitShrImm_64(Context, Signed, Round ? RoundConst : 0L, Shift); } if (Accumulate) { EmitVectorExtract(Context, Op.Rd, Index, Op.Size, Signed); Context.Emit(OpCodes.Add); } EmitVectorInsertTmp(Context, Index, Op.Size); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if ((Op.RegisterSize == ARegisterSize.SIMD64) || Scalar) { EmitVectorZeroUpper(Context, Op.Rd); } }
public static void EmitVectorInsertTmp(AILEmitterCtx Context, int Index, int Size) { ThrowIfInvalid(Index, Size); Context.EmitLdvectmp(); Context.EmitLdc_I4(Index); Context.EmitLdc_I4(Size); AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt)); Context.EmitStvectmp(); }
public static void EmitVectorInsertTmp(AILEmitterCtx Context, int Index, int Size) { if (Size < 0 || Size > 3) { throw new ArgumentOutOfRangeException(nameof(Size)); } Context.EmitLdvectmp(); Context.EmitLdc_I4(Index); Context.EmitLdc_I4(Size); ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertInt)); Context.EmitStvectmp(); }
private static void EmitFcvtn(AILEmitterCtx Context, bool Signed, bool Scalar) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; int SizeF = Op.Size & 1; int SizeI = SizeF + 2; int Bytes = Op.GetBitsCount() >> 3; int Elems = !Scalar ? Bytes >> SizeI : 1; if (Scalar && (SizeF == 0)) { EmitVectorZeroLowerTmp(Context); } for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractF(Context, Op.Rn, Index, SizeF); EmitRoundMathCall(Context, MidpointRounding.ToEven); if (SizeF == 0) { AVectorHelper.EmitCall(Context, Signed ? nameof(AVectorHelper.SatF32ToS32) : nameof(AVectorHelper.SatF32ToU32)); Context.Emit(OpCodes.Conv_U8); } else /* if (SizeF == 1) */ { AVectorHelper.EmitCall(Context, Signed ? nameof(AVectorHelper.SatF64ToS64) : nameof(AVectorHelper.SatF64ToU64)); } EmitVectorInsertTmp(Context, Index, SizeI); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if ((Op.RegisterSize == ARegisterSize.SIMD64) || Scalar) { EmitVectorZeroUpper(Context, Op.Rd); } }
public static void Srsra_V(AILEmitterCtx Context) { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; if (AOptimizations.UseSse2 && Op.Size > 0 && Op.Size < 3) { Type[] TypesShs = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) }; Type[] TypesAdd = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], VectorIntTypesPerSizeLog2[Op.Size] }; int Shift = GetImmShr(Op); int ESize = 8 << Op.Size; EmitLdvecWithSignedCast(Context, Op.Rd, Op.Size); EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size); Context.Emit(OpCodes.Dup); Context.EmitStvectmp(); Context.EmitLdc_I4(ESize - Shift); Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), TypesShs)); Context.EmitLdc_I4(ESize - 1); Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesShs)); Context.EmitLdvectmp(); Context.EmitLdc_I4(Shift); Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), TypesShs)); Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd)); Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd)); EmitStvecWithSignedCast(Context, Op.Rd, Op.Size); if (Op.RegisterSize == ARegisterSize.SIMD64) { EmitVectorZeroUpper(Context, Op.Rd); } } else { EmitVectorShrImmOpSx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate); } }
private static void EmitHighNarrow(AILEmitterCtx Context, Action Emit, bool Round) { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; int Elems = 8 >> Op.Size; int ESize = 8 << Op.Size; int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; long RoundConst = 1L << (ESize - 1); if (Part != 0) { Context.EmitLdvec(Op.Rd); Context.EmitStvectmp(); } for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1); EmitVectorExtractZx(Context, Op.Rm, Index, Op.Size + 1); Emit(); if (Round) { Context.EmitLdc_I8(RoundConst); Context.Emit(OpCodes.Add); } Context.EmitLsr(ESize); EmitVectorInsertTmp(Context, Part + Index, Op.Size); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if (Part == 0) { EmitVectorZeroUpper(Context, Op.Rd); } }
private static void EmitVectorShrImmNarrowOpZx(AILEmitterCtx Context, bool Round) { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; int Shift = GetImmShr(Op); long RoundConst = 1L << (Shift - 1); int Elems = 8 >> Op.Size; int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; if (Part != 0) { Context.EmitLdvec(Op.Rd); Context.EmitStvectmp(); } for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1); if (Round) { Context.EmitLdc_I8(RoundConst); Context.Emit(OpCodes.Add); } Context.EmitLdc_I4(Shift); Context.Emit(OpCodes.Shr_Un); EmitVectorInsertTmp(Context, Part + Index, Op.Size); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if (Part == 0) { EmitVectorZeroUpper(Context, Op.Rd); } }
public static void EmitSaturatingNarrowOp(AILEmitterCtx Context, Action Emit, SaturatingNarrowFlags Flags) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; bool Scalar = (Flags & SaturatingNarrowFlags.Scalar) != 0; bool SignedSrc = (Flags & SaturatingNarrowFlags.SignedSrc) != 0; bool SignedDst = (Flags & SaturatingNarrowFlags.SignedDst) != 0; int Elems = !Scalar ? 8 >> Op.Size : 1; int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0; if (Scalar) { EmitVectorZeroLowerTmp(Context); } if (Part != 0) { Context.EmitLdvec(Op.Rd); Context.EmitStvectmp(); } for (int Index = 0; Index < Elems; Index++) { EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, SignedSrc); Emit(); EmitSatQ(Context, Op.Size, SignedSrc, SignedDst); EmitVectorInsertTmp(Context, Part + Index, Op.Size); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if (Part == 0) { EmitVectorZeroUpper(Context, Op.Rd); } }
public static void EmitVectorInsertTmpF(AILEmitterCtx Context, int Index, int Size) { Context.EmitLdvectmp(); Context.EmitLdc_I4(Index); if (Size == 0) { ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertSingle)); } else if (Size == 1) { ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertDouble)); } else { throw new ArgumentOutOfRangeException(nameof(Size)); } Context.EmitStvectmp(); }
private static void EmitVectorShImmWidenBinaryOp(AILEmitterCtx Context, Action Emit, int Imm, bool Signed) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; int Elems = 8 >> Op.Size; int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; for (int Index = 0; Index < Elems; Index++) { EmitVectorExtract(Context, Op.Rn, Part + Index, Op.Size, Signed); Context.EmitLdc_I4(Imm); Emit(); EmitVectorInsertTmp(Context, Index, Op.Size + 1); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); }
public static void Xtn_V(AILEmitterCtx Context) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; int Elems = 8 >> Op.Size; int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; if (AOptimizations.UseSse41 && Op.Size < 2) { void EmitZeroVector() { switch (Op.Size) { case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt16Zero)); break; case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt32Zero)); break; } } //For XTN, first operand is source, second operand is 0. //For XTN2, first operand is 0, second operand is source. if (Part != 0) { EmitZeroVector(); } EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size + 1); //Set mask to discard the upper half of the wide elements. switch (Op.Size) { case 0: Context.EmitLdc_I4(0x00ff); break; case 1: Context.EmitLdc_I4(0x0000ffff); break; } Type WideType = IntTypesPerSizeLog2[Op.Size + 1]; Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), new Type[] { WideType })); WideType = VectorIntTypesPerSizeLog2[Op.Size + 1]; Type[] WideTypes = new Type[] { WideType, WideType }; Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), WideTypes)); if (Part == 0) { EmitZeroVector(); } //Pack values with signed saturation, the signed saturation shouldn't //saturate anything since the upper bits were masked off. Type SseType = Op.Size == 0 ? typeof(Sse2) : typeof(Sse41); Context.EmitCall(SseType.GetMethod(nameof(Sse2.PackUnsignedSaturate), WideTypes)); if (Part != 0) { //For XTN2, we additionally need to discard the upper bits //of the target register and OR the result with it. EmitVectorZeroUpper(Context, Op.Rd); EmitLdvecWithUnsignedCast(Context, Op.Rd, Op.Size); Type NarrowType = VectorUIntTypesPerSizeLog2[Op.Size]; Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), new Type[] { NarrowType, NarrowType })); } EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size); } else { if (Part != 0) { Context.EmitLdvec(Op.Rd); Context.EmitStvectmp(); } for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1); EmitVectorInsertTmp(Context, Part + Index, Op.Size); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if (Part == 0) { EmitVectorZeroUpper(Context, Op.Rd); } } }
public static void EmitSaturatingNarrowOp( AILEmitterCtx Context, Action Emit, bool SignedSrc, bool SignedDst, bool Scalar) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; int Elems = !Scalar ? 8 >> Op.Size : 1; int ESize = 8 << Op.Size; int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0; long TMaxValue = SignedDst ? (1 << (ESize - 1)) - 1 : (1L << ESize) - 1L; long TMinValue = SignedDst ? -((1 << (ESize - 1))) : 0; Context.EmitLdc_I8(0L); Context.EmitSttmp(); if (Part != 0) { Context.EmitLdvec(Op.Rd); Context.EmitStvectmp(); } for (int Index = 0; Index < Elems; Index++) { AILLabel LblLe = new AILLabel(); AILLabel LblGeEnd = new AILLabel(); EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, SignedSrc); Emit(); Context.Emit(OpCodes.Dup); Context.EmitLdc_I8(TMaxValue); Context.Emit(SignedSrc ? OpCodes.Ble_S : OpCodes.Ble_Un_S, LblLe); Context.Emit(OpCodes.Pop); Context.EmitLdc_I8(TMaxValue); Context.EmitLdc_I8(0x8000000L); Context.EmitSttmp(); Context.Emit(OpCodes.Br_S, LblGeEnd); Context.MarkLabel(LblLe); Context.Emit(OpCodes.Dup); Context.EmitLdc_I8(TMinValue); Context.Emit(SignedSrc ? OpCodes.Bge_S : OpCodes.Bge_Un_S, LblGeEnd); Context.Emit(OpCodes.Pop); Context.EmitLdc_I8(TMinValue); Context.EmitLdc_I8(0x8000000L); Context.EmitSttmp(); Context.MarkLabel(LblGeEnd); if (Scalar) { EmitVectorZeroLower(Context, Op.Rd); } EmitVectorInsertTmp(Context, Part + Index, Op.Size); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if (Part == 0) { EmitVectorZeroUpper(Context, Op.Rd); } Context.EmitLdarg(ATranslatedSub.StateArgIdx); Context.EmitLdarg(ATranslatedSub.StateArgIdx); Context.EmitCallPropGet(typeof(AThreadState), nameof(AThreadState.Fpsr)); Context.EmitLdtmp(); Context.Emit(OpCodes.Conv_I4); Context.Emit(OpCodes.Or); Context.EmitCallPropSet(typeof(AThreadState), nameof(AThreadState.Fpsr)); }
private static void EmitShrImmSaturatingNarrowOp(AILEmitterCtx Context, ShrImmSaturatingNarrowFlags Flags) { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; bool Scalar = (Flags & ShrImmSaturatingNarrowFlags.Scalar) != 0; bool SignedSrc = (Flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0; bool SignedDst = (Flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0; bool Round = (Flags & ShrImmSaturatingNarrowFlags.Round) != 0; int Shift = GetImmShr(Op); long RoundConst = 1L << (Shift - 1); int Elems = !Scalar ? 8 >> Op.Size : 1; int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0; if (Scalar) { EmitVectorZeroLowerTmp(Context); } if (Part != 0) { Context.EmitLdvec(Op.Rd); Context.EmitStvectmp(); } for (int Index = 0; Index < Elems; Index++) { EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, SignedSrc); if (Op.Size <= 1 || !Round) { if (Round) { Context.EmitLdc_I8(RoundConst); Context.Emit(OpCodes.Add); } Context.EmitLdc_I4(Shift); Context.Emit(SignedSrc ? OpCodes.Shr : OpCodes.Shr_Un); } else /* if (Op.Size == 2 && Round) */ { EmitShrImm_64(Context, SignedSrc, RoundConst, Shift); // Shift <= 32 } EmitSatQ(Context, Op.Size, SignedSrc, SignedDst); EmitVectorInsertTmp(Context, Part + Index, Op.Size); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if (Part == 0) { EmitVectorZeroUpper(Context, Op.Rd); } }
public static void EmitSaturatingBinaryOp(AILEmitterCtx Context, Action Emit, SaturatingFlags Flags) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; bool Scalar = (Flags & SaturatingFlags.Scalar) != 0; bool Signed = (Flags & SaturatingFlags.Signed) != 0; bool Add = (Flags & SaturatingFlags.Add) != 0; bool Sub = (Flags & SaturatingFlags.Sub) != 0; bool Accumulate = (Flags & SaturatingFlags.Accumulate) != 0; int Bytes = Op.GetBitsCount() >> 3; int Elems = !Scalar ? Bytes >> Op.Size : 1; if (Scalar) { EmitVectorZeroLowerTmp(Context); } if (Add || Sub) { for (int Index = 0; Index < Elems; Index++) { EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed); EmitVectorExtract(Context, ((AOpCodeSimdReg)Op).Rm, Index, Op.Size, Signed); if (Op.Size <= 2) { Context.Emit(Add ? OpCodes.Add : OpCodes.Sub); EmitSatQ(Context, Op.Size, true, Signed); } else /* if (Op.Size == 3) */ { if (Add) { EmitBinarySatQAdd(Context, Signed); } else /* if (Sub) */ { EmitBinarySatQSub(Context, Signed); } } EmitVectorInsertTmp(Context, Index, Op.Size); } } else if (Accumulate) { for (int Index = 0; Index < Elems; Index++) { EmitVectorExtract(Context, Op.Rn, Index, Op.Size, !Signed); EmitVectorExtract(Context, Op.Rd, Index, Op.Size, Signed); if (Op.Size <= 2) { Context.Emit(OpCodes.Add); EmitSatQ(Context, Op.Size, true, Signed); } else /* if (Op.Size == 3) */ { EmitBinarySatQAccumulate(Context, Signed); } EmitVectorInsertTmp(Context, Index, Op.Size); } } else { for (int Index = 0; Index < Elems; Index++) { EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed); EmitVectorExtract(Context, ((AOpCodeSimdReg)Op).Rm, Index, Op.Size, Signed); Emit(); EmitSatQ(Context, Op.Size, true, Signed); EmitVectorInsertTmp(Context, Index, Op.Size); } } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if ((Op.RegisterSize == ARegisterSize.SIMD64) || Scalar) { EmitVectorZeroUpper(Context, Op.Rd); } }
private static void EmitVectorZip(AILEmitterCtx Context, int Part) { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; if (AOptimizations.UseSse2) { EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); EmitLdvecWithUnsignedCast(Context, Op.Rm, Op.Size); Type[] Types = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], VectorUIntTypesPerSizeLog2[Op.Size] }; string Name = Part == 0 || (Part != 0 && Op.RegisterSize == ARegisterSize.SIMD64) ? nameof(Sse2.UnpackLow) : nameof(Sse2.UnpackHigh); Context.EmitCall(typeof(Sse2).GetMethod(Name, Types)); if (Op.RegisterSize == ARegisterSize.SIMD64 && Part != 0) { Context.EmitLdc_I4(8); Type[] ShTypes = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), ShTypes)); } EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size); if (Op.RegisterSize == ARegisterSize.SIMD64 && Part == 0) { EmitVectorZeroUpper(Context, Op.Rd); } } else { int Words = Op.GetBitsCount() >> 4; int Pairs = Words >> Op.Size; int Base = Part != 0 ? Pairs : 0; for (int Index = 0; Index < Pairs; Index++) { int Idx = Index << 1; EmitVectorExtractZx(Context, Op.Rn, Base + Index, Op.Size); EmitVectorExtractZx(Context, Op.Rm, Base + Index, Op.Size); EmitVectorInsertTmp(Context, Idx + 1, Op.Size); EmitVectorInsertTmp(Context, Idx, Op.Size); } Context.EmitLdvectmp(); Context.EmitStvec(Op.Rd); if (Op.RegisterSize == ARegisterSize.SIMD64) { EmitVectorZeroUpper(Context, Op.Rd); } } }