public static void Ext_V(ILEmitterCtx context) { OpCodeSimdExt64 op = (OpCodeSimdExt64)context.CurrOp; context.EmitLdvec(op.Rd); context.EmitStvectmp(); int bytes = op.GetBitsCount() >> 3; int position = op.Imm4; for (int index = 0; index < bytes; index++) { int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm; if (position == bytes) { position = 0; } EmitVectorExtractZx(context, reg, position++, 0); EmitVectorInsertTmp(context, index, 0); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { EmitVectorZeroUpper(context, op.Rd); } }
private static void EmitVectorUnzip(ILEmitterCtx context, int part) { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; int words = op.GetBitsCount() >> 4; int pairs = words >> op.Size; for (int index = 0; index < pairs; index++) { int idx = index << 1; EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); EmitVectorInsertTmp(context, pairs + index, op.Size); EmitVectorInsertTmp(context, index, op.Size); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { EmitVectorZeroUpper(context, op.Rd); } }
public static void EmitVectorOpByElemF(ILEmitterCtx context, Action emit, int elem, bool ternary) { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; int sizeF = op.Size & 1; int bytes = op.GetBitsCount() >> 3; int elems = bytes >> sizeF + 2; for (int index = 0; index < elems; index++) { if (ternary) { EmitVectorExtractF(context, op.Rd, index, sizeF); } EmitVectorExtractF(context, op.Rn, index, sizeF); EmitVectorExtractF(context, op.Rm, elem, sizeF); emit(); EmitVectorInsertTmpF(context, index, sizeF); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { EmitVectorZeroUpper(context, op.Rd); } }
public static void EmitVectorWidenOpByElem(ILEmitterCtx context, Action emit, int elem, bool ternary, bool signed) { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; int elems = 8 >> op.Size; int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; EmitVectorExtract(context, op.Rm, elem, op.Size, signed); context.EmitSttmp(); for (int index = 0; index < elems; index++) { if (ternary) { EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); } EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); context.EmitLdtmp(); emit(); EmitVectorInsertTmp(context, index, op.Size + 1); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); }
public static void EmitVectorPairwiseOpF(ILEmitterCtx context, Action emit) { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; int sizeF = op.Size & 1; int words = op.GetBitsCount() >> 4; int pairs = words >> sizeF + 2; for (int index = 0; index < pairs; index++) { int idx = index << 1; EmitVectorExtractF(context, op.Rn, idx, sizeF); EmitVectorExtractF(context, op.Rn, idx + 1, sizeF); emit(); EmitVectorExtractF(context, op.Rm, idx, sizeF); EmitVectorExtractF(context, op.Rm, idx + 1, sizeF); emit(); EmitVectorInsertTmpF(context, pairs + index, sizeF); EmitVectorInsertTmpF(context, index, sizeF); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { EmitVectorZeroUpper(context, op.Rd); } }
private static void EmitRev_V(ILEmitterCtx context, int containerSize) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; int bytes = op.GetBitsCount() >> 3; int elems = bytes >> op.Size; int containerMask = (1 << (containerSize - op.Size)) - 1; for (int index = 0; index < elems; index++) { int revIndex = index ^ containerMask; EmitVectorExtractZx(context, op.Rn, revIndex, op.Size); EmitVectorInsertTmp(context, index, op.Size); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { EmitVectorZeroUpper(context, op.Rd); } }
public static void Fcvtl_V(ILEmitterCtx context) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; int sizeF = op.Size & 1; int elems = 4 >> sizeF; int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; for (int index = 0; index < elems; index++) { if (sizeF == 0) { EmitVectorExtractZx(context, op.Rn, part + index, 1); context.Emit(OpCodes.Conv_U2); context.EmitLdarg(TranslatedSub.StateArgIdx); context.EmitCall(typeof(SoftFloat16_32), nameof(SoftFloat16_32.FPConvert)); } else /* if (sizeF == 1) */ { EmitVectorExtractF(context, op.Rn, part + index, 0); context.Emit(OpCodes.Conv_R8); } EmitVectorInsertTmpF(context, index, sizeF); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); }
public static void Xtn_V(ILEmitterCtx context) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; if (Optimizations.UseSsse3) { Type[] typesSve = new Type[] { typeof(long), typeof(long) }; string nameMov = op.RegisterSize == RegisterSize.Simd128 ? nameof(Sse.MoveLowToHigh) : nameof(Sse.MoveHighToLow); context.EmitLdvec(op.Rd); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh))); context.EmitLdvec(op.Rn); // value context.EmitLdc_I8(_masksE0_TrnUzpXtn[op.Size]); // mask context.Emit(OpCodes.Dup); // mask context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0))); context.EmitCall(typeof(Sse).GetMethod(nameMov)); context.EmitStvec(op.Rd); } else { int elems = 8 >> op.Size; int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; if (part != 0) { context.EmitLdvec(op.Rd); context.EmitStvectmp(); } for (int index = 0; index < elems; index++) { EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); EmitVectorInsertTmp(context, part + index, op.Size); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); if (part == 0) { EmitVectorZeroUpper(context, op.Rd); } } }
private static void EmitShrImmOp(ILEmitterCtx context, ShrImmFlags flags) { OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; bool scalar = (flags & ShrImmFlags.Scalar) != 0; bool signed = (flags & ShrImmFlags.Signed) != 0; bool round = (flags & ShrImmFlags.Round) != 0; bool accumulate = (flags & ShrImmFlags.Accumulate) != 0; int shift = GetImmShr(op); long roundConst = 1L << (shift - 1); int bytes = op.GetBitsCount() >> 3; int elems = !scalar ? bytes >> op.Size : 1; for (int index = 0; index < elems; index++) { EmitVectorExtract(context, op.Rn, index, op.Size, signed); if (op.Size <= 2) { if (round) { context.EmitLdc_I8(roundConst); context.Emit(OpCodes.Add); } context.EmitLdc_I4(shift); context.Emit(signed ? OpCodes.Shr : OpCodes.Shr_Un); } else /* if (op.Size == 3) */ { EmitShrImm64(context, signed, round ? roundConst : 0L, shift); } if (accumulate) { EmitVectorExtract(context, op.Rd, index, op.Size, signed); context.Emit(OpCodes.Add); } EmitVectorInsertTmp(context, index, op.Size); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); if ((op.RegisterSize == RegisterSize.Simd64) || scalar) { EmitVectorZeroUpper(context, op.Rd); } }
public static void Fcvtl_V(ILEmitterCtx context) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; int sizeF = op.Size & 1; if (Optimizations.UseSse2 && sizeF == 1) { Type[] typesMov = new Type[] { typeof(Vector128 <float>), typeof(Vector128 <float>) }; Type[] typesCvt = new Type[] { typeof(Vector128 <float>) }; string nameMov = op.RegisterSize == RegisterSize.Simd128 ? nameof(Sse.MoveHighToLow) : nameof(Sse.MoveLowToHigh); context.EmitLdvec(op.Rn); context.Emit(OpCodes.Dup); context.EmitCall(typeof(Sse).GetMethod(nameMov, typesMov)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Double), typesCvt)); EmitStvecWithCastFromDouble(context, op.Rd); } else { int elems = 4 >> sizeF; int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; for (int index = 0; index < elems; index++) { if (sizeF == 0) { EmitVectorExtractZx(context, op.Rn, part + index, 1); context.Emit(OpCodes.Conv_U2); context.EmitLdarg(TranslatedSub.StateArgIdx); context.EmitCall(typeof(SoftFloat16_32), nameof(SoftFloat16_32.FPConvert)); } else /* if (sizeF == 1) */ { EmitVectorExtractF(context, op.Rn, part + index, 0); context.Emit(OpCodes.Conv_R8); } EmitVectorInsertTmpF(context, index, sizeF); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); } }
private static void EmitVectorZip(ILEmitterCtx context, int part) { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; if (Optimizations.UseSse2) { string nameUpk = part == 0 ? nameof(Sse2.UnpackLow) : nameof(Sse2.UnpackHigh); context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); if (op.RegisterSize == RegisterSize.Simd128) { context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(op.Size))); } else { context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), GetTypesSflUpk(op.Size))); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3))); } context.EmitStvec(op.Rd); } else { int words = op.GetBitsCount() >> 4; int pairs = words >> op.Size; int Base = part != 0 ? pairs : 0; for (int index = 0; index < pairs; index++) { int idx = index << 1; EmitVectorExtractZx(context, op.Rn, Base + index, op.Size); EmitVectorExtractZx(context, op.Rm, Base + index, op.Size); EmitVectorInsertTmp(context, idx + 1, op.Size); EmitVectorInsertTmp(context, idx, op.Size); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { EmitVectorZeroUpper(context, op.Rd); } } }
public static void EmitVectorInsertTmp(ILEmitterCtx context, int index, int size) { ThrowIfInvalid(index, size); context.EmitLdvectmp(); context.EmitLdc_I4(index); context.EmitLdc_I4(size); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertInt)); context.EmitStvectmp(); }
public static void EmitVectorZeroLowerTmp(ILEmitterCtx context) { if (Optimizations.UseSse) { context.EmitLdvectmp(); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveHighToLow))); context.EmitStvectmp(); } else { EmitVectorInsertTmp(context, 0, 3, 0); } }
private static void EmitFcvtn(ILEmitterCtx context, bool signed, bool scalar) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; int sizeF = op.Size & 1; int sizeI = sizeF + 2; int bytes = op.GetBitsCount() >> 3; int elems = !scalar ? bytes >> sizeI : 1; if (scalar && (sizeF == 0)) { EmitVectorZeroLowerTmp(context); } for (int index = 0; index < elems; index++) { EmitVectorExtractF(context, op.Rn, index, sizeF); EmitRoundMathCall(context, MidpointRounding.ToEven); if (sizeF == 0) { VectorHelper.EmitCall(context, signed ? nameof(VectorHelper.SatF32ToS32) : nameof(VectorHelper.SatF32ToU32)); context.Emit(OpCodes.Conv_U8); } else /* if (sizeF == 1) */ { VectorHelper.EmitCall(context, signed ? nameof(VectorHelper.SatF64ToS64) : nameof(VectorHelper.SatF64ToU64)); } EmitVectorInsertTmp(context, index, sizeI); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); if ((op.RegisterSize == RegisterSize.Simd64) || scalar) { EmitVectorZeroUpper(context, op.Rd); } }
public static void Srsra_V(ILEmitterCtx context) { OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) { Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; int shift = GetImmShr(op); int eSize = 8 << op.Size; EmitLdvecWithSignedCast(context, op.Rd, op.Size); EmitLdvecWithSignedCast(context, op.Rn, op.Size); context.Emit(OpCodes.Dup); context.EmitStvectmp(); context.EmitLdc_I4(eSize - shift); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs)); context.EmitLdc_I4(eSize - 1); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); context.EmitLdvectmp(); context.EmitLdc_I4(shift); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); EmitStvecWithSignedCast(context, op.Rd, op.Size); if (op.RegisterSize == RegisterSize.Simd64) { EmitVectorZeroUpper(context, op.Rd); } } else { EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); } }
private static void EmitVectorShrImmNarrowOpZx(ILEmitterCtx context, bool round) { OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; int shift = GetImmShr(op); long roundConst = 1L << (shift - 1); int elems = 8 >> op.Size; int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; if (part != 0) { context.EmitLdvec(op.Rd); context.EmitStvectmp(); } for (int index = 0; index < elems; index++) { EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); if (round) { context.EmitLdc_I8(roundConst); context.Emit(OpCodes.Add); } context.EmitLdc_I4(shift); context.Emit(OpCodes.Shr_Un); EmitVectorInsertTmp(context, part + index, op.Size); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); if (part == 0) { EmitVectorZeroUpper(context, op.Rd); } }
public static void EmitSaturatingNarrowOp(ILEmitterCtx context, SaturatingNarrowFlags flags) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; bool scalar = (flags & SaturatingNarrowFlags.Scalar) != 0; bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0; bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0; int elems = !scalar ? 8 >> op.Size : 1; int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0; if (scalar) { EmitVectorZeroLowerTmp(context); } if (part != 0) { context.EmitLdvec(op.Rd); context.EmitStvectmp(); } for (int index = 0; index < elems; index++) { EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc); EmitSatQ(context, op.Size, signedSrc, signedDst); EmitVectorInsertTmp(context, part + index, op.Size); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); if (part == 0) { EmitVectorZeroUpper(context, op.Rd); } }
public static void EmitSaturatingUnaryOpSx(ILEmitterCtx context, Action emit, SaturatingFlags flags) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; bool scalar = (flags & SaturatingFlags.Scalar) != 0; int bytes = op.GetBitsCount() >> 3; int elems = !scalar ? bytes >> op.Size : 1; if (scalar) { EmitVectorZeroLowerTmp(context); } for (int index = 0; index < elems; index++) { EmitVectorExtractSx(context, op.Rn, index, op.Size); emit(); if (op.Size <= 2) { EmitSatQ(context, op.Size, true, true); } else /* if (Op.Size == 3) */ { EmitUnarySignedSatQAbsOrNeg(context); } EmitVectorInsertTmp(context, index, op.Size); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); if ((op.RegisterSize == RegisterSize.Simd64) || scalar) { EmitVectorZeroUpper(context, op.Rd); } }
public static void EmitVectorInsertTmpF(ILEmitterCtx context, int index, int size) { ThrowIfInvalidF(index, size); context.EmitLdvectmp(); context.EmitLdc_I4(index); if (size == 0) { VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertSingle)); } else if (size == 1) { VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertDouble)); } else { throw new ArgumentOutOfRangeException(nameof(size)); } context.EmitStvectmp(); }
private static void EmitVectorShImmWidenBinaryOp(ILEmitterCtx context, Action emit, int imm, bool signed) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; int elems = 8 >> op.Size; int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; for (int index = 0; index < elems; index++) { EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); context.EmitLdc_I4(imm); emit(); EmitVectorInsertTmp(context, index, op.Size + 1); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); }
public static void Fcvtn_V(ILEmitterCtx context) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; int sizeF = op.Size & 1; if (Optimizations.UseSse2 && sizeF == 1) { Type[] typesCvt = new Type[] { typeof(Vector128 <double>) }; string nameMov = op.RegisterSize == RegisterSize.Simd128 ? nameof(Sse.MoveLowToHigh) : nameof(Sse.MoveHighToLow); context.EmitLdvec(op.Rd); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh))); EmitLdvecWithCastToDouble(context, op.Rn); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt)); context.Emit(OpCodes.Dup); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh))); context.EmitCall(typeof(Sse).GetMethod(nameMov)); context.EmitStvec(op.Rd); } else { int elems = 4 >> sizeF; int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; if (part != 0) { context.EmitLdvec(op.Rd); context.EmitStvectmp(); } for (int index = 0; index < elems; index++) { EmitVectorExtractF(context, op.Rn, index, sizeF); if (sizeF == 0) { context.EmitLdarg(TranslatedSub.StateArgIdx); context.EmitCall(typeof(SoftFloat32_16), nameof(SoftFloat32_16.FPConvert)); context.Emit(OpCodes.Conv_U8); EmitVectorInsertTmp(context, part + index, 1); } else /* if (sizeF == 1) */ { context.Emit(OpCodes.Conv_R4); EmitVectorInsertTmpF(context, part + index, 0); } } context.EmitLdvectmp(); context.EmitStvec(op.Rd); if (part == 0) { EmitVectorZeroUpper(context, op.Rd); } } }
private static void EmitVectorUnzip(ILEmitterCtx context, int part) { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; if (Optimizations.UseSsse3) { Type[] typesSve = new Type[] { typeof(long), typeof(long) }; string nameUpk = part == 0 ? nameof(Sse2.UnpackLow) : nameof(Sse2.UnpackHigh); if (op.RegisterSize == RegisterSize.Simd128) { context.EmitLdvec(op.Rn); // value if (op.Size < 3) { context.EmitLdc_I8(_masksE1_TrnUzp [op.Size]); // maskE1 context.EmitLdc_I8(_masksE0_TrnUzpXtn[op.Size]); // maskE0 context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0))); } context.EmitLdvec(op.Rm); // value if (op.Size < 3) { context.EmitLdc_I8(_masksE1_TrnUzp [op.Size]); // maskE1 context.EmitLdc_I8(_masksE0_TrnUzpXtn[op.Size]); // maskE0 context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0))); } context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3))); context.EmitStvec(op.Rd); } else { context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), GetTypesSflUpk(op.Size))); // value if (op.Size < 2) { context.EmitLdc_I8(_masksE1_Uzp[op.Size]); // maskE1 context.EmitLdc_I8(_masksE0_Uzp[op.Size]); // maskE0 context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0))); } VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3))); context.EmitStvec(op.Rd); } } else { int words = op.GetBitsCount() >> 4; int pairs = words >> op.Size; for (int index = 0; index < pairs; index++) { int idx = index << 1; EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); EmitVectorInsertTmp(context, pairs + index, op.Size); EmitVectorInsertTmp(context, index, op.Size); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { EmitVectorZeroUpper(context, op.Rd); } } }
public static void Xtn_V(ILEmitterCtx context) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; if (Optimizations.UseSsse3) { long[] masks = new long[] { 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 }; Type[] typesMov = new Type[] { typeof(Vector128 <float>), typeof(Vector128 <float>) }; Type[] typesSfl = new Type[] { typeof(Vector128 <sbyte>), typeof(Vector128 <sbyte>) }; Type[] typesSve = new Type[] { typeof(long), typeof(long) }; string nameMov = op.RegisterSize == RegisterSize.Simd128 ? nameof(Sse.MoveLowToHigh) : nameof(Sse.MoveHighToLow); context.EmitLdvec(op.Rd); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), typesMov)); EmitLdvecWithSignedCast(context, op.Rn, 0); context.EmitLdc_I8(masks[op.Size]); context.Emit(OpCodes.Dup); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl)); context.EmitCall(typeof(Sse).GetMethod(nameMov, typesMov)); context.EmitStvec(op.Rd); } else { int elems = 8 >> op.Size; int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; if (part != 0) { context.EmitLdvec(op.Rd); context.EmitStvectmp(); } for (int index = 0; index < elems; index++) { EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); EmitVectorInsertTmp(context, part + index, op.Size); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); if (part == 0) { EmitVectorZeroUpper(context, op.Rd); } } }
public static void Tbl_V(ILEmitterCtx context) { OpCodeSimdTbl64 op = (OpCodeSimdTbl64)context.CurrOp; if (Optimizations.UseSsse3) { Type[] typesCmpSflSub = new Type[] { typeof(Vector128 <sbyte>), typeof(Vector128 <sbyte>) }; Type[] typesOr = new Type[] { typeof(Vector128 <long>), typeof(Vector128 <long>) }; Type[] typesSav = new Type[] { typeof(long) }; context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); context.EmitLdc_I8(0x0F0F0F0F0F0F0F0FL); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); context.EmitStvectmp2(); context.EmitLdvectmp2(); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSflSub)); context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr)); context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesCmpSflSub)); for (int index = 1; index < op.Size; index++) { context.EmitLdvec((op.Rn + index) & 0x1F); context.EmitLdvec(op.Rm); context.EmitLdc_I8(0x1010101010101010L * index); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSflSub)); context.EmitStvectmp(); context.EmitLdvectmp(); context.EmitLdvectmp2(); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSflSub)); context.EmitLdvectmp(); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr)); context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesCmpSflSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr)); } context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { EmitVectorZeroUpper(context, op.Rd); } } else { context.EmitLdvec(op.Rm); for (int index = 0; index < op.Size; index++) { context.EmitLdvec((op.Rn + index) & 0x1F); } switch (op.Size) { case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.Tbl1_V64), nameof(VectorHelper.Tbl1_V128)); break; case 2: VectorHelper.EmitCall(context, nameof(VectorHelper.Tbl2_V64), nameof(VectorHelper.Tbl2_V128)); break; case 3: VectorHelper.EmitCall(context, nameof(VectorHelper.Tbl3_V64), nameof(VectorHelper.Tbl3_V128)); break; case 4: VectorHelper.EmitCall(context, nameof(VectorHelper.Tbl4_V64), nameof(VectorHelper.Tbl4_V128)); break; default: throw new InvalidOperationException(); } context.EmitStvec(op.Rd); } }
public static void Ext_V(ILEmitterCtx context) { OpCodeSimdExt64 op = (OpCodeSimdExt64)context.CurrOp; if (Optimizations.UseSse2) { Type[] typesShs = new Type[] { typeof(Vector128 <byte>), typeof(byte) }; Type[] typesOr = new Type[] { typeof(Vector128 <byte>), typeof(Vector128 <byte>) }; context.EmitLdvec(op.Rn); if (op.RegisterSize == RegisterSize.Simd64) { VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh))); } context.EmitLdc_I4(op.Imm4); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesShs)); context.EmitLdvec(op.Rm); context.EmitLdc_I4((op.RegisterSize == RegisterSize.Simd64 ? 8 : 16) - op.Imm4); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical128BitLane), typesShs)); if (op.RegisterSize == RegisterSize.Simd64) { VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh))); } context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr)); context.EmitStvec(op.Rd); } else { int bytes = op.GetBitsCount() >> 3; int position = op.Imm4; for (int index = 0; index < bytes; index++) { int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm; if (position == bytes) { position = 0; } EmitVectorExtractZx(context, reg, position++, 0); EmitVectorInsertTmp(context, index, 0); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { EmitVectorZeroUpper(context, op.Rd); } } }
private static void EmitSse41Fcvt_Signed(ILEmitterCtx context, RoundMode roundMode, bool scalar) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; // sizeF == ((OpCodeSimdShImm64)op).Size - 2 int sizeF = op.Size & 1; if (sizeF == 0) { Type[] types = new Type[] { typeof(Vector128 <float>), typeof(Vector128 <float>) }; Type[] typesRndCvt = new Type[] { typeof(Vector128 <float>) }; Type[] typesSav = new Type[] { typeof(int) }; context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareOrdered), types)); context.EmitLdvec(op.Rn); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.And), types)); if (op is OpCodeSimdShImm64 fixedOp) { int fBits = GetImmShr(fixedOp); // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits) int fpScaled = 0x3F800000 + fBits * 0x800000; context.EmitLdc_I4(fpScaled); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), types)); } context.EmitCall(typeof(Sse41).GetMethod(GetVectorSse41NameRnd(roundMode), typesRndCvt)); context.EmitStvectmp(); context.EmitLdvectmp(); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Int32), typesRndCvt)); context.EmitLdvectmp(); context.EmitLdc_I4(0x4F000000); // 2.14748365E9f (2147483648) context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareGreaterThanOrEqual), types)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Xor), types)); context.EmitStvec(op.Rd); if (scalar) { EmitVectorZero32_128(context, op.Rd); } else if (op.RegisterSize == RegisterSize.Simd64) { EmitVectorZeroUpper(context, op.Rd); } } else /* if (sizeF == 1) */ { Type[] types = new Type[] { typeof(Vector128 <double>), typeof(Vector128 <double>) }; Type[] typesRndCvt = new Type[] { typeof(Vector128 <double>) }; Type[] typesSv = new Type[] { typeof(long), typeof(long) }; Type[] typesSav = new Type[] { typeof(long) }; context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rn); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareOrdered), types)); context.EmitLdvec(op.Rn); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), types)); if (op is OpCodeSimdShImm64 fixedOp) { int fBits = GetImmShr(fixedOp); // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits) long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L; context.EmitLdc_I8(fpScaled); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), types)); } context.EmitCall(typeof(Sse41).GetMethod(GetVectorSse41NameRnd(roundMode), typesRndCvt)); context.EmitStvectmp(); if (!scalar) { context.EmitLdvectmp(); context.EmitLdvectmp(); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackHigh), types)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToInt64), typesRndCvt)); } else { context.EmitLdc_I8(0L); } context.EmitLdvectmp(); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToInt64), typesRndCvt)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSv)); context.EmitLdvectmp(); context.EmitLdc_I8(0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808) context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThanOrEqual), types)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types)); context.EmitStvec(op.Rd); if (scalar) { EmitVectorZeroUpper(context, op.Rd); } } }
private static void EmitFcmpOrFcmpe(ILEmitterCtx context, bool signalNaNs) { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; bool cmpWithZero = !(op is OpCodeSimdFcond64) ? op.Bit3 : false; if (Optimizations.FastFP && Optimizations.UseSse2) { if (op.Size == 0) { Type[] typesCmp = new Type[] { typeof(Vector128 <float>), typeof(Vector128 <float>) }; ILLabel lblNaN = new ILLabel(); ILLabel lblEnd = new ILLabel(); context.EmitLdvec(op.Rn); context.Emit(OpCodes.Dup); context.EmitStvectmp(); if (cmpWithZero) { VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); } else { context.EmitLdvec(op.Rm); } context.Emit(OpCodes.Dup); context.EmitStvectmp2(); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareOrderedScalar), typesCmp)); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareEqualOrderedScalar), typesCmp)); context.Emit(OpCodes.Brtrue_S, lblNaN); context.EmitLdc_I4(0); context.EmitLdvectmp(); context.EmitLdvectmp2(); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareGreaterThanOrEqualOrderedScalar), typesCmp)); context.EmitLdvectmp(); context.EmitLdvectmp2(); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareEqualOrderedScalar), typesCmp)); context.EmitLdvectmp(); context.EmitLdvectmp2(); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareLessThanOrderedScalar), typesCmp)); context.EmitStflg((int)PState.NBit); context.EmitStflg((int)PState.ZBit); context.EmitStflg((int)PState.CBit); context.EmitStflg((int)PState.VBit); context.Emit(OpCodes.Br_S, lblEnd); context.MarkLabel(lblNaN); context.EmitLdc_I4(1); context.Emit(OpCodes.Dup); context.EmitLdc_I4(0); context.Emit(OpCodes.Dup); context.EmitStflg((int)PState.NBit); context.EmitStflg((int)PState.ZBit); context.EmitStflg((int)PState.CBit); context.EmitStflg((int)PState.VBit); context.MarkLabel(lblEnd); } else /* if (op.Size == 1) */ { Type[] typesCmp = new Type[] { typeof(Vector128 <double>), typeof(Vector128 <double>) }; ILLabel lblNaN = new ILLabel(); ILLabel lblEnd = new ILLabel(); context.EmitLdvec(op.Rn); context.Emit(OpCodes.Dup); context.EmitStvectmp(); if (cmpWithZero) { VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero)); } else { context.EmitLdvec(op.Rm); } context.Emit(OpCodes.Dup); context.EmitStvectmp2(); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareOrderedScalar), typesCmp)); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqualOrderedScalar), typesCmp)); context.Emit(OpCodes.Brtrue_S, lblNaN); context.EmitLdc_I4(0); context.EmitLdvectmp(); context.EmitLdvectmp2(); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThanOrEqualOrderedScalar), typesCmp)); context.EmitLdvectmp(); context.EmitLdvectmp2(); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqualOrderedScalar), typesCmp)); context.EmitLdvectmp(); context.EmitLdvectmp2(); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareLessThanOrderedScalar), typesCmp)); context.EmitStflg((int)PState.NBit); context.EmitStflg((int)PState.ZBit); context.EmitStflg((int)PState.CBit); context.EmitStflg((int)PState.VBit); context.Emit(OpCodes.Br_S, lblEnd); context.MarkLabel(lblNaN); context.EmitLdc_I4(1); context.Emit(OpCodes.Dup); context.EmitLdc_I4(0); context.Emit(OpCodes.Dup); context.EmitStflg((int)PState.NBit); context.EmitStflg((int)PState.ZBit); context.EmitStflg((int)PState.CBit); context.EmitStflg((int)PState.VBit); context.MarkLabel(lblEnd); } } else { EmitVectorExtractF(context, op.Rn, 0, op.Size); if (cmpWithZero) { if (op.Size == 0) { context.EmitLdc_R4(0f); } else /* if (op.Size == 1) */ { context.EmitLdc_R8(0d); } } else { EmitVectorExtractF(context, op.Rm, 0, op.Size); } context.EmitLdc_I4(!signalNaNs ? 0 : 1); EmitSoftFloatCall(context, nameof(SoftFloat32.FPCompare)); EmitSetNzcv(context); } }
private static void EmitShrImmSaturatingNarrowOp(ILEmitterCtx context, ShrImmSaturatingNarrowFlags flags) { OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp; bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0; bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0; bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0; bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0; int shift = GetImmShr(op); long roundConst = 1L << (shift - 1); int elems = !scalar ? 8 >> op.Size : 1; int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0; if (scalar) { EmitVectorZeroLowerTmp(context); } if (part != 0) { context.EmitLdvec(op.Rd); context.EmitStvectmp(); } for (int index = 0; index < elems; index++) { EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc); if (op.Size <= 1 || !round) { if (round) { context.EmitLdc_I8(roundConst); context.Emit(OpCodes.Add); } context.EmitLdc_I4(shift); context.Emit(signedSrc ? OpCodes.Shr : OpCodes.Shr_Un); } else /* if (op.Size == 2 && round) */ { EmitShrImm64(context, signedSrc, roundConst, shift); // shift <= 32 } EmitSatQ(context, op.Size, signedSrc, signedDst); EmitVectorInsertTmp(context, part + index, op.Size); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); if (part == 0) { EmitVectorZeroUpper(context, op.Rd); } }
public static void EmitVectorPairwiseSseOrSse2OpF(ILEmitterCtx context, string name) { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; int sizeF = op.Size & 1; if (sizeF == 0) { if (op.RegisterSize == RegisterSize.Simd64) { Type[] types = new Type[] { typeof(Vector128 <float>), typeof(Vector128 <float>) }; context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.UnpackLow), types)); context.EmitStvectmp(); context.EmitLdvectmp(); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), types)); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); context.EmitLdvectmp(); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveHighToLow), types)); context.EmitCall(typeof(Sse).GetMethod(name, types)); context.EmitStvec(op.Rd); } else /* if (op.RegisterSize == RegisterSize.Simd128) */ { Type[] typesSfl = new Type[] { typeof(Vector128 <float>), typeof(Vector128 <float>), typeof(byte) }; Type[] types = new Type[] { typeof(Vector128 <float>), typeof(Vector128 <float>) }; context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); context.EmitLdc_I4(2 << 6 | 0 << 4 | 2 << 2 | 0 << 0); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); context.EmitLdc_I4(3 << 6 | 1 << 4 | 3 << 2 | 1 << 0); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); context.EmitCall(typeof(Sse).GetMethod(name, types)); context.EmitStvec(op.Rd); } } else /* if (sizeF == 1) */ { Type[] types = new Type[] { typeof(Vector128 <double>), typeof(Vector128 <double>) }; context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), types)); context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackHigh), types)); context.EmitCall(typeof(Sse2).GetMethod(name, types)); context.EmitStvec(op.Rd); } }
private static void EmitVectorZip(ILEmitterCtx context, int part) { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; if (Optimizations.UseSse2) { EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); Type[] types = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; string name = part == 0 || (part != 0 && op.RegisterSize == RegisterSize.Simd64) ? nameof(Sse2.UnpackLow) : nameof(Sse2.UnpackHigh); context.EmitCall(typeof(Sse2).GetMethod(name, types)); if (op.RegisterSize == RegisterSize.Simd64 && part != 0) { context.EmitLdc_I4(8); Type[] shTypes = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), shTypes)); } EmitStvecWithUnsignedCast(context, op.Rd, op.Size); if (op.RegisterSize == RegisterSize.Simd64 && part == 0) { EmitVectorZeroUpper(context, op.Rd); } } else { int words = op.GetBitsCount() >> 4; int pairs = words >> op.Size; int Base = part != 0 ? pairs : 0; for (int index = 0; index < pairs; index++) { int idx = index << 1; EmitVectorExtractZx(context, op.Rn, Base + index, op.Size); EmitVectorExtractZx(context, op.Rm, Base + index, op.Size); EmitVectorInsertTmp(context, idx + 1, op.Size); EmitVectorInsertTmp(context, idx, op.Size); } context.EmitLdvectmp(); context.EmitStvec(op.Rd); if (op.RegisterSize == RegisterSize.Simd64) { EmitVectorZeroUpper(context, op.Rd); } } }