public static void Ext_V(ILEmitterCtx context)
        {
            OpCodeSimdExt64 op = (OpCodeSimdExt64)context.CurrOp;

            context.EmitLdvec(op.Rd);
            context.EmitStvectmp();

            int bytes = op.GetBitsCount() >> 3;

            int position = op.Imm4;

            for (int index = 0; index < bytes; index++)
            {
                int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm;

                if (position == bytes)
                {
                    position = 0;
                }

                EmitVectorExtractZx(context, reg, position++, 0);
                EmitVectorInsertTmp(context, index, 0);
            }

            context.EmitLdvectmp();
            context.EmitStvec(op.Rd);

            if (op.RegisterSize == RegisterSize.Simd64)
            {
                EmitVectorZeroUpper(context, op.Rd);
            }
        }
예제 #2
0
        private static void EmitVectorUnzip(ILEmitterCtx context, int part)
        {
            OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;

            int words = op.GetBitsCount() >> 4;
            int pairs = words >> op.Size;

            for (int index = 0; index < pairs; index++)
            {
                int idx = index << 1;

                EmitVectorExtractZx(context, op.Rn, idx + part, op.Size);
                EmitVectorExtractZx(context, op.Rm, idx + part, op.Size);

                EmitVectorInsertTmp(context, pairs + index, op.Size);
                EmitVectorInsertTmp(context, index, op.Size);
            }

            context.EmitLdvectmp();
            context.EmitStvec(op.Rd);

            if (op.RegisterSize == RegisterSize.Simd64)
            {
                EmitVectorZeroUpper(context, op.Rd);
            }
        }
예제 #3
0
        public static void EmitVectorOpByElemF(ILEmitterCtx context, Action emit, int elem, bool ternary)
        {
            OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;

            int sizeF = op.Size & 1;

            int bytes = op.GetBitsCount() >> 3;
            int elems = bytes >> sizeF + 2;

            for (int index = 0; index < elems; index++)
            {
                if (ternary)
                {
                    EmitVectorExtractF(context, op.Rd, index, sizeF);
                }

                EmitVectorExtractF(context, op.Rn, index, sizeF);
                EmitVectorExtractF(context, op.Rm, elem, sizeF);

                emit();

                EmitVectorInsertTmpF(context, index, sizeF);
            }

            context.EmitLdvectmp();
            context.EmitStvec(op.Rd);

            if (op.RegisterSize == RegisterSize.Simd64)
            {
                EmitVectorZeroUpper(context, op.Rd);
            }
        }
예제 #4
0
        public static void EmitVectorWidenOpByElem(ILEmitterCtx context, Action emit, int elem, bool ternary, bool signed)
        {
            OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;

            int elems = 8 >> op.Size;

            int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;

            EmitVectorExtract(context, op.Rm, elem, op.Size, signed);
            context.EmitSttmp();

            for (int index = 0; index < elems; index++)
            {
                if (ternary)
                {
                    EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
                }

                EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
                context.EmitLdtmp();

                emit();

                EmitVectorInsertTmp(context, index, op.Size + 1);
            }

            context.EmitLdvectmp();
            context.EmitStvec(op.Rd);
        }
예제 #5
0
        public static void EmitVectorPairwiseOpF(ILEmitterCtx context, Action emit)
        {
            OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;

            int sizeF = op.Size & 1;

            int words = op.GetBitsCount() >> 4;
            int pairs = words >> sizeF + 2;

            for (int index = 0; index < pairs; index++)
            {
                int idx = index << 1;

                EmitVectorExtractF(context, op.Rn, idx, sizeF);
                EmitVectorExtractF(context, op.Rn, idx + 1, sizeF);

                emit();

                EmitVectorExtractF(context, op.Rm, idx, sizeF);
                EmitVectorExtractF(context, op.Rm, idx + 1, sizeF);

                emit();

                EmitVectorInsertTmpF(context, pairs + index, sizeF);
                EmitVectorInsertTmpF(context, index, sizeF);
            }

            context.EmitLdvectmp();
            context.EmitStvec(op.Rd);

            if (op.RegisterSize == RegisterSize.Simd64)
            {
                EmitVectorZeroUpper(context, op.Rd);
            }
        }
예제 #6
0
        private static void EmitRev_V(ILEmitterCtx context, int containerSize)
        {
            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;

            int bytes = op.GetBitsCount() >> 3;
            int elems = bytes >> op.Size;

            int containerMask = (1 << (containerSize - op.Size)) - 1;

            for (int index = 0; index < elems; index++)
            {
                int revIndex = index ^ containerMask;

                EmitVectorExtractZx(context, op.Rn, revIndex, op.Size);

                EmitVectorInsertTmp(context, index, op.Size);
            }

            context.EmitLdvectmp();
            context.EmitStvec(op.Rd);

            if (op.RegisterSize == RegisterSize.Simd64)
            {
                EmitVectorZeroUpper(context, op.Rd);
            }
        }
예제 #7
0
        public static void Fcvtl_V(ILEmitterCtx context)
        {
            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;

            int sizeF = op.Size & 1;

            int elems = 4 >> sizeF;

            int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;

            for (int index = 0; index < elems; index++)
            {
                if (sizeF == 0)
                {
                    EmitVectorExtractZx(context, op.Rn, part + index, 1);
                    context.Emit(OpCodes.Conv_U2);

                    context.EmitLdarg(TranslatedSub.StateArgIdx);

                    context.EmitCall(typeof(SoftFloat16_32), nameof(SoftFloat16_32.FPConvert));
                }
                else /* if (sizeF == 1) */
                {
                    EmitVectorExtractF(context, op.Rn, part + index, 0);

                    context.Emit(OpCodes.Conv_R8);
                }

                EmitVectorInsertTmpF(context, index, sizeF);
            }

            context.EmitLdvectmp();
            context.EmitStvec(op.Rd);
        }
예제 #8
0
        public static void Xtn_V(ILEmitterCtx context)
        {
            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;

            if (Optimizations.UseSsse3)
            {
                Type[] typesSve = new Type[] { typeof(long), typeof(long) };

                string nameMov = op.RegisterSize == RegisterSize.Simd128
                    ? nameof(Sse.MoveLowToHigh)
                    : nameof(Sse.MoveHighToLow);

                context.EmitLdvec(op.Rd);
                VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));

                context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));

                context.EmitLdvec(op.Rn);                        // value

                context.EmitLdc_I8(_masksE0_TrnUzpXtn[op.Size]); // mask
                context.Emit(OpCodes.Dup);                       // mask

                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));

                context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0)));

                context.EmitCall(typeof(Sse).GetMethod(nameMov));

                context.EmitStvec(op.Rd);
            }
            else
            {
                int elems = 8 >> op.Size;

                int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;

                if (part != 0)
                {
                    context.EmitLdvec(op.Rd);
                    context.EmitStvectmp();
                }

                for (int index = 0; index < elems; index++)
                {
                    EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);

                    EmitVectorInsertTmp(context, part + index, op.Size);
                }

                context.EmitLdvectmp();
                context.EmitStvec(op.Rd);

                if (part == 0)
                {
                    EmitVectorZeroUpper(context, op.Rd);
                }
            }
        }
예제 #9
0
        private static void EmitShrImmOp(ILEmitterCtx context, ShrImmFlags flags)
        {
            OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;

            bool scalar     = (flags & ShrImmFlags.Scalar) != 0;
            bool signed     = (flags & ShrImmFlags.Signed) != 0;
            bool round      = (flags & ShrImmFlags.Round) != 0;
            bool accumulate = (flags & ShrImmFlags.Accumulate) != 0;

            int shift = GetImmShr(op);

            long roundConst = 1L << (shift - 1);

            int bytes = op.GetBitsCount() >> 3;
            int elems = !scalar ? bytes >> op.Size : 1;

            for (int index = 0; index < elems; index++)
            {
                EmitVectorExtract(context, op.Rn, index, op.Size, signed);

                if (op.Size <= 2)
                {
                    if (round)
                    {
                        context.EmitLdc_I8(roundConst);

                        context.Emit(OpCodes.Add);
                    }

                    context.EmitLdc_I4(shift);

                    context.Emit(signed ? OpCodes.Shr : OpCodes.Shr_Un);
                }
                else /* if (op.Size == 3) */
                {
                    EmitShrImm64(context, signed, round ? roundConst : 0L, shift);
                }

                if (accumulate)
                {
                    EmitVectorExtract(context, op.Rd, index, op.Size, signed);

                    context.Emit(OpCodes.Add);
                }

                EmitVectorInsertTmp(context, index, op.Size);
            }

            context.EmitLdvectmp();
            context.EmitStvec(op.Rd);

            if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
            {
                EmitVectorZeroUpper(context, op.Rd);
            }
        }
예제 #10
0
        public static void Fcvtl_V(ILEmitterCtx context)
        {
            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;

            int sizeF = op.Size & 1;

            if (Optimizations.UseSse2 && sizeF == 1)
            {
                Type[] typesMov = new Type[] { typeof(Vector128 <float>), typeof(Vector128 <float>) };
                Type[] typesCvt = new Type[] { typeof(Vector128 <float>) };

                string nameMov = op.RegisterSize == RegisterSize.Simd128
                    ? nameof(Sse.MoveHighToLow)
                    : nameof(Sse.MoveLowToHigh);

                context.EmitLdvec(op.Rn);
                context.Emit(OpCodes.Dup);

                context.EmitCall(typeof(Sse).GetMethod(nameMov, typesMov));

                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Double), typesCvt));

                EmitStvecWithCastFromDouble(context, op.Rd);
            }
            else
            {
                int elems = 4 >> sizeF;

                int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;

                for (int index = 0; index < elems; index++)
                {
                    if (sizeF == 0)
                    {
                        EmitVectorExtractZx(context, op.Rn, part + index, 1);
                        context.Emit(OpCodes.Conv_U2);

                        context.EmitLdarg(TranslatedSub.StateArgIdx);

                        context.EmitCall(typeof(SoftFloat16_32), nameof(SoftFloat16_32.FPConvert));
                    }
                    else /* if (sizeF == 1) */
                    {
                        EmitVectorExtractF(context, op.Rn, part + index, 0);

                        context.Emit(OpCodes.Conv_R8);
                    }

                    EmitVectorInsertTmpF(context, index, sizeF);
                }

                context.EmitLdvectmp();
                context.EmitStvec(op.Rd);
            }
        }
예제 #11
0
        private static void EmitVectorZip(ILEmitterCtx context, int part)
        {
            OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;

            if (Optimizations.UseSse2)
            {
                string nameUpk = part == 0
                    ? nameof(Sse2.UnpackLow)
                    : nameof(Sse2.UnpackHigh);

                context.EmitLdvec(op.Rn);
                context.EmitLdvec(op.Rm);

                if (op.RegisterSize == RegisterSize.Simd128)
                {
                    context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(op.Size)));
                }
                else
                {
                    context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), GetTypesSflUpk(op.Size)));
                    VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));

                    context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3)));
                }

                context.EmitStvec(op.Rd);
            }
            else
            {
                int words = op.GetBitsCount() >> 4;
                int pairs = words >> op.Size;

                int Base = part != 0 ? pairs : 0;

                for (int index = 0; index < pairs; index++)
                {
                    int idx = index << 1;

                    EmitVectorExtractZx(context, op.Rn, Base + index, op.Size);
                    EmitVectorExtractZx(context, op.Rm, Base + index, op.Size);

                    EmitVectorInsertTmp(context, idx + 1, op.Size);
                    EmitVectorInsertTmp(context, idx, op.Size);
                }

                context.EmitLdvectmp();
                context.EmitStvec(op.Rd);

                if (op.RegisterSize == RegisterSize.Simd64)
                {
                    EmitVectorZeroUpper(context, op.Rd);
                }
            }
        }
예제 #12
0
        public static void EmitVectorInsertTmp(ILEmitterCtx context, int index, int size)
        {
            ThrowIfInvalid(index, size);

            context.EmitLdvectmp();
            context.EmitLdc_I4(index);
            context.EmitLdc_I4(size);

            VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertInt));

            context.EmitStvectmp();
        }
예제 #13
0
        public static void EmitVectorZeroLowerTmp(ILEmitterCtx context)
        {
            if (Optimizations.UseSse)
            {
                context.EmitLdvectmp();
                VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));

                context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveHighToLow)));

                context.EmitStvectmp();
            }
            else
            {
                EmitVectorInsertTmp(context, 0, 3, 0);
            }
        }
예제 #14
0
        private static void EmitFcvtn(ILEmitterCtx context, bool signed, bool scalar)
        {
            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;

            int sizeF = op.Size & 1;
            int sizeI = sizeF + 2;

            int bytes = op.GetBitsCount() >> 3;
            int elems = !scalar ? bytes >> sizeI : 1;

            if (scalar && (sizeF == 0))
            {
                EmitVectorZeroLowerTmp(context);
            }

            for (int index = 0; index < elems; index++)
            {
                EmitVectorExtractF(context, op.Rn, index, sizeF);

                EmitRoundMathCall(context, MidpointRounding.ToEven);

                if (sizeF == 0)
                {
                    VectorHelper.EmitCall(context, signed
                        ? nameof(VectorHelper.SatF32ToS32)
                        : nameof(VectorHelper.SatF32ToU32));

                    context.Emit(OpCodes.Conv_U8);
                }
                else /* if (sizeF == 1) */
                {
                    VectorHelper.EmitCall(context, signed
                        ? nameof(VectorHelper.SatF64ToS64)
                        : nameof(VectorHelper.SatF64ToU64));
                }

                EmitVectorInsertTmp(context, index, sizeI);
            }

            context.EmitLdvectmp();
            context.EmitStvec(op.Rd);

            if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
            {
                EmitVectorZeroUpper(context, op.Rd);
            }
        }
예제 #15
0
        public static void Srsra_V(ILEmitterCtx context)
        {
            OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;

            if (Optimizations.UseSse2 && op.Size > 0 &&
                op.Size < 3)
            {
                Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
                Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };

                int shift = GetImmShr(op);
                int eSize = 8 << op.Size;

                EmitLdvecWithSignedCast(context, op.Rd, op.Size);
                EmitLdvecWithSignedCast(context, op.Rn, op.Size);

                context.Emit(OpCodes.Dup);
                context.EmitStvectmp();

                context.EmitLdc_I4(eSize - shift);
                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));

                context.EmitLdc_I4(eSize - 1);
                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));

                context.EmitLdvectmp();

                context.EmitLdc_I4(shift);
                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs));

                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));

                EmitStvecWithSignedCast(context, op.Rd, op.Size);

                if (op.RegisterSize == RegisterSize.Simd64)
                {
                    EmitVectorZeroUpper(context, op.Rd);
                }
            }
            else
            {
                EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
            }
        }
예제 #16
0
        private static void EmitVectorShrImmNarrowOpZx(ILEmitterCtx context, bool round)
        {
            OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;

            int shift = GetImmShr(op);

            long roundConst = 1L << (shift - 1);

            int elems = 8 >> op.Size;

            int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;

            if (part != 0)
            {
                context.EmitLdvec(op.Rd);
                context.EmitStvectmp();
            }

            for (int index = 0; index < elems; index++)
            {
                EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);

                if (round)
                {
                    context.EmitLdc_I8(roundConst);

                    context.Emit(OpCodes.Add);
                }

                context.EmitLdc_I4(shift);

                context.Emit(OpCodes.Shr_Un);

                EmitVectorInsertTmp(context, part + index, op.Size);
            }

            context.EmitLdvectmp();
            context.EmitStvec(op.Rd);

            if (part == 0)
            {
                EmitVectorZeroUpper(context, op.Rd);
            }
        }
예제 #17
0
        public static void EmitSaturatingNarrowOp(ILEmitterCtx context, SaturatingNarrowFlags flags)
        {
            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;

            bool scalar    = (flags & SaturatingNarrowFlags.Scalar) != 0;
            bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0;
            bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0;

            int elems = !scalar ? 8 >> op.Size : 1;

            int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;

            if (scalar)
            {
                EmitVectorZeroLowerTmp(context);
            }

            if (part != 0)
            {
                context.EmitLdvec(op.Rd);
                context.EmitStvectmp();
            }

            for (int index = 0; index < elems; index++)
            {
                EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);

                EmitSatQ(context, op.Size, signedSrc, signedDst);

                EmitVectorInsertTmp(context, part + index, op.Size);
            }

            context.EmitLdvectmp();
            context.EmitStvec(op.Rd);

            if (part == 0)
            {
                EmitVectorZeroUpper(context, op.Rd);
            }
        }
예제 #18
0
        public static void EmitSaturatingUnaryOpSx(ILEmitterCtx context, Action emit, SaturatingFlags flags)
        {
            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;

            bool scalar = (flags & SaturatingFlags.Scalar) != 0;

            int bytes = op.GetBitsCount() >> 3;
            int elems = !scalar ? bytes >> op.Size : 1;

            if (scalar)
            {
                EmitVectorZeroLowerTmp(context);
            }

            for (int index = 0; index < elems; index++)
            {
                EmitVectorExtractSx(context, op.Rn, index, op.Size);

                emit();

                if (op.Size <= 2)
                {
                    EmitSatQ(context, op.Size, true, true);
                }
                else /* if (Op.Size == 3) */
                {
                    EmitUnarySignedSatQAbsOrNeg(context);
                }

                EmitVectorInsertTmp(context, index, op.Size);
            }

            context.EmitLdvectmp();
            context.EmitStvec(op.Rd);

            if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
            {
                EmitVectorZeroUpper(context, op.Rd);
            }
        }
예제 #19
0
        public static void EmitVectorInsertTmpF(ILEmitterCtx context, int index, int size)
        {
            ThrowIfInvalidF(index, size);

            context.EmitLdvectmp();
            context.EmitLdc_I4(index);

            if (size == 0)
            {
                VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertSingle));
            }
            else if (size == 1)
            {
                VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertDouble));
            }
            else
            {
                throw new ArgumentOutOfRangeException(nameof(size));
            }

            context.EmitStvectmp();
        }
예제 #20
0
        private static void EmitVectorShImmWidenBinaryOp(ILEmitterCtx context, Action emit, int imm, bool signed)
        {
            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;

            int elems = 8 >> op.Size;

            int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;

            for (int index = 0; index < elems; index++)
            {
                EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);

                context.EmitLdc_I4(imm);

                emit();

                EmitVectorInsertTmp(context, index, op.Size + 1);
            }

            context.EmitLdvectmp();
            context.EmitStvec(op.Rd);
        }
예제 #21
0
        public static void Fcvtn_V(ILEmitterCtx context)
        {
            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;

            int sizeF = op.Size & 1;

            if (Optimizations.UseSse2 && sizeF == 1)
            {
                Type[] typesCvt = new Type[] { typeof(Vector128 <double>) };

                string nameMov = op.RegisterSize == RegisterSize.Simd128
                    ? nameof(Sse.MoveLowToHigh)
                    : nameof(Sse.MoveHighToLow);

                context.EmitLdvec(op.Rd);
                VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));

                context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));

                EmitLdvecWithCastToDouble(context, op.Rn);
                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt));
                context.Emit(OpCodes.Dup);

                context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));

                context.EmitCall(typeof(Sse).GetMethod(nameMov));

                context.EmitStvec(op.Rd);
            }
            else
            {
                int elems = 4 >> sizeF;

                int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;

                if (part != 0)
                {
                    context.EmitLdvec(op.Rd);
                    context.EmitStvectmp();
                }

                for (int index = 0; index < elems; index++)
                {
                    EmitVectorExtractF(context, op.Rn, index, sizeF);

                    if (sizeF == 0)
                    {
                        context.EmitLdarg(TranslatedSub.StateArgIdx);

                        context.EmitCall(typeof(SoftFloat32_16), nameof(SoftFloat32_16.FPConvert));

                        context.Emit(OpCodes.Conv_U8);
                        EmitVectorInsertTmp(context, part + index, 1);
                    }
                    else /* if (sizeF == 1) */
                    {
                        context.Emit(OpCodes.Conv_R4);

                        EmitVectorInsertTmpF(context, part + index, 0);
                    }
                }

                context.EmitLdvectmp();
                context.EmitStvec(op.Rd);

                if (part == 0)
                {
                    EmitVectorZeroUpper(context, op.Rd);
                }
            }
        }
예제 #22
0
        private static void EmitVectorUnzip(ILEmitterCtx context, int part)
        {
            OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;

            if (Optimizations.UseSsse3)
            {
                Type[] typesSve = new Type[] { typeof(long), typeof(long) };

                string nameUpk = part == 0
                    ? nameof(Sse2.UnpackLow)
                    : nameof(Sse2.UnpackHigh);

                if (op.RegisterSize == RegisterSize.Simd128)
                {
                    context.EmitLdvec(op.Rn); // value

                    if (op.Size < 3)
                    {
                        context.EmitLdc_I8(_masksE1_TrnUzp   [op.Size]); // maskE1
                        context.EmitLdc_I8(_masksE0_TrnUzpXtn[op.Size]); // maskE0

                        context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));

                        context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0)));
                    }

                    context.EmitLdvec(op.Rm); // value

                    if (op.Size < 3)
                    {
                        context.EmitLdc_I8(_masksE1_TrnUzp   [op.Size]); // maskE1
                        context.EmitLdc_I8(_masksE0_TrnUzpXtn[op.Size]); // maskE0

                        context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));

                        context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0)));
                    }

                    context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3)));

                    context.EmitStvec(op.Rd);
                }
                else
                {
                    context.EmitLdvec(op.Rn);
                    context.EmitLdvec(op.Rm);

                    context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), GetTypesSflUpk(op.Size))); // value

                    if (op.Size < 2)
                    {
                        context.EmitLdc_I8(_masksE1_Uzp[op.Size]); // maskE1
                        context.EmitLdc_I8(_masksE0_Uzp[op.Size]); // maskE0

                        context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));

                        context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0)));
                    }

                    VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));

                    context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3)));

                    context.EmitStvec(op.Rd);
                }
            }
            else
            {
                int words = op.GetBitsCount() >> 4;
                int pairs = words >> op.Size;

                for (int index = 0; index < pairs; index++)
                {
                    int idx = index << 1;

                    EmitVectorExtractZx(context, op.Rn, idx + part, op.Size);
                    EmitVectorExtractZx(context, op.Rm, idx + part, op.Size);

                    EmitVectorInsertTmp(context, pairs + index, op.Size);
                    EmitVectorInsertTmp(context, index, op.Size);
                }

                context.EmitLdvectmp();
                context.EmitStvec(op.Rd);

                if (op.RegisterSize == RegisterSize.Simd64)
                {
                    EmitVectorZeroUpper(context, op.Rd);
                }
            }
        }
예제 #23
0
        public static void Xtn_V(ILEmitterCtx context)
        {
            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;

            if (Optimizations.UseSsse3)
            {
                long[] masks = new long[]
                {
                    14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0,
                        13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0,
                        11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0
                };

                Type[] typesMov = new Type[] { typeof(Vector128 <float>), typeof(Vector128 <float>) };
                Type[] typesSfl = new Type[] { typeof(Vector128 <sbyte>), typeof(Vector128 <sbyte>) };
                Type[] typesSve = new Type[] { typeof(long), typeof(long) };

                string nameMov = op.RegisterSize == RegisterSize.Simd128
                    ? nameof(Sse.MoveLowToHigh)
                    : nameof(Sse.MoveHighToLow);

                context.EmitLdvec(op.Rd);
                VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));

                context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), typesMov));

                EmitLdvecWithSignedCast(context, op.Rn, 0);

                context.EmitLdc_I8(masks[op.Size]);
                context.Emit(OpCodes.Dup);

                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));

                context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));

                context.EmitCall(typeof(Sse).GetMethod(nameMov, typesMov));

                context.EmitStvec(op.Rd);
            }
            else
            {
                int elems = 8 >> op.Size;

                int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;

                if (part != 0)
                {
                    context.EmitLdvec(op.Rd);
                    context.EmitStvectmp();
                }

                for (int index = 0; index < elems; index++)
                {
                    EmitVectorExtractZx(context, op.Rn, index, op.Size + 1);

                    EmitVectorInsertTmp(context, part + index, op.Size);
                }

                context.EmitLdvectmp();
                context.EmitStvec(op.Rd);

                if (part == 0)
                {
                    EmitVectorZeroUpper(context, op.Rd);
                }
            }
        }
예제 #24
0
        public static void Tbl_V(ILEmitterCtx context)
        {
            OpCodeSimdTbl64 op = (OpCodeSimdTbl64)context.CurrOp;

            if (Optimizations.UseSsse3)
            {
                Type[] typesCmpSflSub = new Type[] { typeof(Vector128 <sbyte>), typeof(Vector128 <sbyte>) };
                Type[] typesOr        = new Type[] { typeof(Vector128 <long>), typeof(Vector128 <long>) };
                Type[] typesSav       = new Type[] { typeof(long) };

                context.EmitLdvec(op.Rn);
                context.EmitLdvec(op.Rm);

                context.EmitLdc_I8(0x0F0F0F0F0F0F0F0FL);
                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));

                context.EmitStvectmp2();
                context.EmitLdvectmp2();

                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSflSub));

                context.EmitLdvec(op.Rm);

                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr));

                context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesCmpSflSub));

                for (int index = 1; index < op.Size; index++)
                {
                    context.EmitLdvec((op.Rn + index) & 0x1F);
                    context.EmitLdvec(op.Rm);

                    context.EmitLdc_I8(0x1010101010101010L * index);
                    context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));

                    context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSflSub));

                    context.EmitStvectmp();
                    context.EmitLdvectmp();

                    context.EmitLdvectmp2();

                    context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSflSub));

                    context.EmitLdvectmp();

                    context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr));

                    context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesCmpSflSub));

                    context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr));
                }

                context.EmitStvec(op.Rd);

                if (op.RegisterSize == RegisterSize.Simd64)
                {
                    EmitVectorZeroUpper(context, op.Rd);
                }
            }
            else
            {
                context.EmitLdvec(op.Rm);

                for (int index = 0; index < op.Size; index++)
                {
                    context.EmitLdvec((op.Rn + index) & 0x1F);
                }

                switch (op.Size)
                {
                case 1: VectorHelper.EmitCall(context,
                                              nameof(VectorHelper.Tbl1_V64),
                                              nameof(VectorHelper.Tbl1_V128)); break;

                case 2: VectorHelper.EmitCall(context,
                                              nameof(VectorHelper.Tbl2_V64),
                                              nameof(VectorHelper.Tbl2_V128)); break;

                case 3: VectorHelper.EmitCall(context,
                                              nameof(VectorHelper.Tbl3_V64),
                                              nameof(VectorHelper.Tbl3_V128)); break;

                case 4: VectorHelper.EmitCall(context,
                                              nameof(VectorHelper.Tbl4_V64),
                                              nameof(VectorHelper.Tbl4_V128)); break;

                default: throw new InvalidOperationException();
                }

                context.EmitStvec(op.Rd);
            }
        }
예제 #25
0
        public static void Ext_V(ILEmitterCtx context)
        {
            OpCodeSimdExt64 op = (OpCodeSimdExt64)context.CurrOp;

            if (Optimizations.UseSse2)
            {
                Type[] typesShs = new Type[] { typeof(Vector128 <byte>), typeof(byte) };
                Type[] typesOr  = new Type[] { typeof(Vector128 <byte>), typeof(Vector128 <byte>) };

                context.EmitLdvec(op.Rn);

                if (op.RegisterSize == RegisterSize.Simd64)
                {
                    VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));

                    context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
                }

                context.EmitLdc_I4(op.Imm4);
                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesShs));

                context.EmitLdvec(op.Rm);

                context.EmitLdc_I4((op.RegisterSize == RegisterSize.Simd64 ? 8 : 16) - op.Imm4);
                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical128BitLane), typesShs));

                if (op.RegisterSize == RegisterSize.Simd64)
                {
                    VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));

                    context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
                }

                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr));

                context.EmitStvec(op.Rd);
            }
            else
            {
                int bytes = op.GetBitsCount() >> 3;

                int position = op.Imm4;

                for (int index = 0; index < bytes; index++)
                {
                    int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm;

                    if (position == bytes)
                    {
                        position = 0;
                    }

                    EmitVectorExtractZx(context, reg, position++, 0);
                    EmitVectorInsertTmp(context, index, 0);
                }

                context.EmitLdvectmp();
                context.EmitStvec(op.Rd);

                if (op.RegisterSize == RegisterSize.Simd64)
                {
                    EmitVectorZeroUpper(context, op.Rd);
                }
            }
        }
예제 #26
0
        private static void EmitSse41Fcvt_Signed(ILEmitterCtx context, RoundMode roundMode, bool scalar)
        {
            OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;

            // sizeF == ((OpCodeSimdShImm64)op).Size - 2
            int sizeF = op.Size & 1;

            if (sizeF == 0)
            {
                Type[] types       = new Type[] { typeof(Vector128 <float>), typeof(Vector128 <float>) };
                Type[] typesRndCvt = new Type[] { typeof(Vector128 <float>) };
                Type[] typesSav    = new Type[] { typeof(int) };

                context.EmitLdvec(op.Rn);
                context.EmitLdvec(op.Rn);

                context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareOrdered), types));

                context.EmitLdvec(op.Rn);

                context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.And), types));

                if (op is OpCodeSimdShImm64 fixedOp)
                {
                    int fBits = GetImmShr(fixedOp);

                    // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
                    int fpScaled = 0x3F800000 + fBits * 0x800000;

                    context.EmitLdc_I4(fpScaled);
                    context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));

                    context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), types));
                }

                context.EmitCall(typeof(Sse41).GetMethod(GetVectorSse41NameRnd(roundMode), typesRndCvt));

                context.EmitStvectmp();
                context.EmitLdvectmp();

                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Int32), typesRndCvt));

                context.EmitLdvectmp();

                context.EmitLdc_I4(0x4F000000); // 2.14748365E9f (2147483648)
                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));

                context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareGreaterThanOrEqual), types));

                context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Xor), types));

                context.EmitStvec(op.Rd);

                if (scalar)
                {
                    EmitVectorZero32_128(context, op.Rd);
                }
                else if (op.RegisterSize == RegisterSize.Simd64)
                {
                    EmitVectorZeroUpper(context, op.Rd);
                }
            }
            else /* if (sizeF == 1) */
            {
                Type[] types       = new Type[] { typeof(Vector128 <double>), typeof(Vector128 <double>) };
                Type[] typesRndCvt = new Type[] { typeof(Vector128 <double>) };
                Type[] typesSv     = new Type[] { typeof(long), typeof(long) };
                Type[] typesSav    = new Type[] { typeof(long) };

                context.EmitLdvec(op.Rn);
                context.EmitLdvec(op.Rn);

                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareOrdered), types));

                context.EmitLdvec(op.Rn);

                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), types));

                if (op is OpCodeSimdShImm64 fixedOp)
                {
                    int fBits = GetImmShr(fixedOp);

                    // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
                    long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;

                    context.EmitLdc_I8(fpScaled);
                    context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));

                    context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), types));
                }

                context.EmitCall(typeof(Sse41).GetMethod(GetVectorSse41NameRnd(roundMode), typesRndCvt));

                context.EmitStvectmp();

                if (!scalar)
                {
                    context.EmitLdvectmp();
                    context.EmitLdvectmp();

                    context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackHigh), types));

                    context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToInt64), typesRndCvt));
                }
                else
                {
                    context.EmitLdc_I8(0L);
                }

                context.EmitLdvectmp();

                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToInt64), typesRndCvt));

                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSv));

                context.EmitLdvectmp();

                context.EmitLdc_I8(0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));

                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThanOrEqual), types));

                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), types));

                context.EmitStvec(op.Rd);

                if (scalar)
                {
                    EmitVectorZeroUpper(context, op.Rd);
                }
            }
        }
예제 #27
0
        private static void EmitFcmpOrFcmpe(ILEmitterCtx context, bool signalNaNs)
        {
            OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;

            bool cmpWithZero = !(op is OpCodeSimdFcond64) ? op.Bit3 : false;

            if (Optimizations.FastFP && Optimizations.UseSse2)
            {
                if (op.Size == 0)
                {
                    Type[] typesCmp = new Type[] { typeof(Vector128 <float>), typeof(Vector128 <float>) };

                    ILLabel lblNaN = new ILLabel();
                    ILLabel lblEnd = new ILLabel();

                    context.EmitLdvec(op.Rn);

                    context.Emit(OpCodes.Dup);
                    context.EmitStvectmp();

                    if (cmpWithZero)
                    {
                        VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
                    }
                    else
                    {
                        context.EmitLdvec(op.Rm);
                    }

                    context.Emit(OpCodes.Dup);
                    context.EmitStvectmp2();

                    context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareOrderedScalar), typesCmp));
                    VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));

                    context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareEqualOrderedScalar), typesCmp));

                    context.Emit(OpCodes.Brtrue_S, lblNaN);

                    context.EmitLdc_I4(0);

                    context.EmitLdvectmp();
                    context.EmitLdvectmp2();
                    context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareGreaterThanOrEqualOrderedScalar), typesCmp));

                    context.EmitLdvectmp();
                    context.EmitLdvectmp2();
                    context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareEqualOrderedScalar), typesCmp));

                    context.EmitLdvectmp();
                    context.EmitLdvectmp2();
                    context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareLessThanOrderedScalar), typesCmp));

                    context.EmitStflg((int)PState.NBit);
                    context.EmitStflg((int)PState.ZBit);
                    context.EmitStflg((int)PState.CBit);
                    context.EmitStflg((int)PState.VBit);

                    context.Emit(OpCodes.Br_S, lblEnd);

                    context.MarkLabel(lblNaN);

                    context.EmitLdc_I4(1);
                    context.Emit(OpCodes.Dup);
                    context.EmitLdc_I4(0);
                    context.Emit(OpCodes.Dup);

                    context.EmitStflg((int)PState.NBit);
                    context.EmitStflg((int)PState.ZBit);
                    context.EmitStflg((int)PState.CBit);
                    context.EmitStflg((int)PState.VBit);

                    context.MarkLabel(lblEnd);
                }
                else /* if (op.Size == 1) */
                {
                    Type[] typesCmp = new Type[] { typeof(Vector128 <double>), typeof(Vector128 <double>) };

                    ILLabel lblNaN = new ILLabel();
                    ILLabel lblEnd = new ILLabel();

                    context.EmitLdvec(op.Rn);

                    context.Emit(OpCodes.Dup);
                    context.EmitStvectmp();

                    if (cmpWithZero)
                    {
                        VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero));
                    }
                    else
                    {
                        context.EmitLdvec(op.Rm);
                    }

                    context.Emit(OpCodes.Dup);
                    context.EmitStvectmp2();

                    context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareOrderedScalar), typesCmp));
                    VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero));

                    context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqualOrderedScalar), typesCmp));

                    context.Emit(OpCodes.Brtrue_S, lblNaN);

                    context.EmitLdc_I4(0);

                    context.EmitLdvectmp();
                    context.EmitLdvectmp2();
                    context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThanOrEqualOrderedScalar), typesCmp));

                    context.EmitLdvectmp();
                    context.EmitLdvectmp2();
                    context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqualOrderedScalar), typesCmp));

                    context.EmitLdvectmp();
                    context.EmitLdvectmp2();
                    context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareLessThanOrderedScalar), typesCmp));

                    context.EmitStflg((int)PState.NBit);
                    context.EmitStflg((int)PState.ZBit);
                    context.EmitStflg((int)PState.CBit);
                    context.EmitStflg((int)PState.VBit);

                    context.Emit(OpCodes.Br_S, lblEnd);

                    context.MarkLabel(lblNaN);

                    context.EmitLdc_I4(1);
                    context.Emit(OpCodes.Dup);
                    context.EmitLdc_I4(0);
                    context.Emit(OpCodes.Dup);

                    context.EmitStflg((int)PState.NBit);
                    context.EmitStflg((int)PState.ZBit);
                    context.EmitStflg((int)PState.CBit);
                    context.EmitStflg((int)PState.VBit);

                    context.MarkLabel(lblEnd);
                }
            }
            else
            {
                EmitVectorExtractF(context, op.Rn, 0, op.Size);

                if (cmpWithZero)
                {
                    if (op.Size == 0)
                    {
                        context.EmitLdc_R4(0f);
                    }
                    else /* if (op.Size == 1) */
                    {
                        context.EmitLdc_R8(0d);
                    }
                }
                else
                {
                    EmitVectorExtractF(context, op.Rm, 0, op.Size);
                }

                context.EmitLdc_I4(!signalNaNs ? 0 : 1);

                EmitSoftFloatCall(context, nameof(SoftFloat32.FPCompare));

                EmitSetNzcv(context);
            }
        }
예제 #28
0
        private static void EmitShrImmSaturatingNarrowOp(ILEmitterCtx context, ShrImmSaturatingNarrowFlags flags)
        {
            OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;

            bool scalar    = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
            bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
            bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
            bool round     = (flags & ShrImmSaturatingNarrowFlags.Round) != 0;

            int shift = GetImmShr(op);

            long roundConst = 1L << (shift - 1);

            int elems = !scalar ? 8 >> op.Size : 1;

            int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;

            if (scalar)
            {
                EmitVectorZeroLowerTmp(context);
            }

            if (part != 0)
            {
                context.EmitLdvec(op.Rd);
                context.EmitStvectmp();
            }

            for (int index = 0; index < elems; index++)
            {
                EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);

                if (op.Size <= 1 || !round)
                {
                    if (round)
                    {
                        context.EmitLdc_I8(roundConst);

                        context.Emit(OpCodes.Add);
                    }

                    context.EmitLdc_I4(shift);

                    context.Emit(signedSrc ? OpCodes.Shr : OpCodes.Shr_Un);
                }
                else /* if (op.Size == 2 && round) */
                {
                    EmitShrImm64(context, signedSrc, roundConst, shift); // shift <= 32
                }

                EmitSatQ(context, op.Size, signedSrc, signedDst);

                EmitVectorInsertTmp(context, part + index, op.Size);
            }

            context.EmitLdvectmp();
            context.EmitStvec(op.Rd);

            if (part == 0)
            {
                EmitVectorZeroUpper(context, op.Rd);
            }
        }
예제 #29
0
        public static void EmitVectorPairwiseSseOrSse2OpF(ILEmitterCtx context, string name)
        {
            OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;

            int sizeF = op.Size & 1;

            if (sizeF == 0)
            {
                if (op.RegisterSize == RegisterSize.Simd64)
                {
                    Type[] types = new Type[] { typeof(Vector128 <float>), typeof(Vector128 <float>) };

                    context.EmitLdvec(op.Rn);
                    context.EmitLdvec(op.Rm);

                    context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.UnpackLow), types));

                    context.EmitStvectmp();
                    context.EmitLdvectmp();

                    VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));

                    context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), types));

                    VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));

                    context.EmitLdvectmp();

                    context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveHighToLow), types));

                    context.EmitCall(typeof(Sse).GetMethod(name, types));

                    context.EmitStvec(op.Rd);
                }
                else /* if (op.RegisterSize == RegisterSize.Simd128) */
                {
                    Type[] typesSfl = new Type[] { typeof(Vector128 <float>), typeof(Vector128 <float>), typeof(byte) };
                    Type[] types    = new Type[] { typeof(Vector128 <float>), typeof(Vector128 <float>) };

                    context.EmitLdvec(op.Rn);
                    context.EmitLdvec(op.Rm);

                    context.EmitLdc_I4(2 << 6 | 0 << 4 | 2 << 2 | 0 << 0);
                    context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));

                    context.EmitLdvec(op.Rn);
                    context.EmitLdvec(op.Rm);

                    context.EmitLdc_I4(3 << 6 | 1 << 4 | 3 << 2 | 1 << 0);
                    context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));

                    context.EmitCall(typeof(Sse).GetMethod(name, types));

                    context.EmitStvec(op.Rd);
                }
            }
            else /* if (sizeF == 1) */
            {
                Type[] types = new Type[] { typeof(Vector128 <double>), typeof(Vector128 <double>) };

                context.EmitLdvec(op.Rn);
                context.EmitLdvec(op.Rm);

                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), types));

                context.EmitLdvec(op.Rn);
                context.EmitLdvec(op.Rm);

                context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackHigh), types));

                context.EmitCall(typeof(Sse2).GetMethod(name, types));

                context.EmitStvec(op.Rd);
            }
        }
예제 #30
0
        private static void EmitVectorZip(ILEmitterCtx context, int part)
        {
            OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;

            if (Optimizations.UseSse2)
            {
                EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
                EmitLdvecWithUnsignedCast(context, op.Rm, op.Size);

                Type[] types = new Type[]
                {
                    VectorUIntTypesPerSizeLog2[op.Size],
                    VectorUIntTypesPerSizeLog2[op.Size]
                };

                string name = part == 0 || (part != 0 && op.RegisterSize == RegisterSize.Simd64)
                    ? nameof(Sse2.UnpackLow)
                    : nameof(Sse2.UnpackHigh);

                context.EmitCall(typeof(Sse2).GetMethod(name, types));

                if (op.RegisterSize == RegisterSize.Simd64 && part != 0)
                {
                    context.EmitLdc_I4(8);

                    Type[] shTypes = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };

                    context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), shTypes));
                }

                EmitStvecWithUnsignedCast(context, op.Rd, op.Size);

                if (op.RegisterSize == RegisterSize.Simd64 && part == 0)
                {
                    EmitVectorZeroUpper(context, op.Rd);
                }
            }
            else
            {
                int words = op.GetBitsCount() >> 4;
                int pairs = words >> op.Size;

                int Base = part != 0 ? pairs : 0;

                for (int index = 0; index < pairs; index++)
                {
                    int idx = index << 1;

                    EmitVectorExtractZx(context, op.Rn, Base + index, op.Size);
                    EmitVectorExtractZx(context, op.Rm, Base + index, op.Size);

                    EmitVectorInsertTmp(context, idx + 1, op.Size);
                    EmitVectorInsertTmp(context, idx, op.Size);
                }

                context.EmitLdvectmp();
                context.EmitStvec(op.Rd);

                if (op.RegisterSize == RegisterSize.Simd64)
                {
                    EmitVectorZeroUpper(context, op.Rd);
                }
            }
        }