예제 #1
0
        public static void Orn_V(ArmEmitterContext context)
        {
            if (Optimizations.UseSse2)
            {
                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

                Operand n = GetVec(op.Rn);
                Operand m = GetVec(op.Rm);

                Operand mask = X86GetAllElements(context, -1L);

                Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, mask);

                res = context.AddIntrinsic(Intrinsic.X86Por, res, n);

                if (op.RegisterSize == RegisterSize.Simd64)
                {
                    res = context.VectorZeroUpper64(res);
                }

                context.Copy(GetVec(op.Rd), res);
            }
            else
            {
                EmitVectorBinaryOpZx(context, (op1, op2) =>
                {
                    return(context.BitwiseOr(op1, context.BitwiseNot(op2)));
                });
            }
        }
예제 #2
0
        public static Operand EmitScalarInsert(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth)
        {
            Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128);

            // Insert from index 0 in value to index in target.
            int index = reg & (doubleWidth ? 1 : 3);

            if (doubleWidth)
            {
                if (index == 1)
                {
                    return(context.AddIntrinsic(Intrinsic.X86Movlhps, target, value)); // Low to high.
                }
                else
                {
                    return(context.AddIntrinsic(Intrinsic.X86Shufpd, value, target, Const(2))); // Low to low, keep high from original.
                }
            }
            else
            {
                if (Optimizations.UseSse41)
                {
                    return(context.AddIntrinsic(Intrinsic.X86Insertps, target, value, Const(index << 4)));
                }
                else
                {
                    target = EmitSwapScalar(context, target, index, doubleWidth);     // Swap value to replace into element 0.
                    target = context.AddIntrinsic(Intrinsic.X86Movss, target, value); // Move the value into element 0 of the vector.
                    return(EmitSwapScalar(context, target, index, doubleWidth));      // Swap new value back to the correct index.
                }
            }
        }
예제 #3
0
        public static void Ushll_V(ArmEmitterContext context)
        {
            OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;

            int shift = GetImmShl(op);

            if (Optimizations.UseSse41)
            {
                Operand n = GetVec(op.Rn);

                if (op.RegisterSize == RegisterSize.Simd128)
                {
                    n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8));
                }

                Intrinsic movzxInst = X86PmovzxInstruction[op.Size];

                Operand res = context.AddIntrinsic(movzxInst, n);

                if (shift != 0)
                {
                    Intrinsic sllInst = X86PsllInstruction[op.Size + 1];

                    res = context.AddIntrinsic(sllInst, res, Const(shift));
                }

                context.Copy(GetVec(op.Rd), res);
            }
            else
            {
                EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift);
            }
        }
예제 #4
0
        public static void Bsl_V(ArmEmitterContext context)
        {
            if (Optimizations.UseSse2)
            {
                OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

                Operand d = GetVec(op.Rd);
                Operand n = GetVec(op.Rn);
                Operand m = GetVec(op.Rm);

                Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m);

                res = context.AddIntrinsic(Intrinsic.X86Pand, res, d);
                res = context.AddIntrinsic(Intrinsic.X86Pxor, res, m);

                if (op.RegisterSize == RegisterSize.Simd64)
                {
                    res = context.VectorZeroUpper64(res);
                }

                context.Copy(GetVec(op.Rd), res);
            }
            else
            {
                EmitVectorTernaryOpZx(context, (op1, op2, op3) =>
                {
                    return(context.BitwiseExclusiveOr(
                               context.BitwiseAnd(op1,
                                                  context.BitwiseExclusiveOr(op2, op3)), op3));
                });
            }
        }
예제 #5
0
        public static void Vrev(ArmEmitterContext context)
        {
            OpCode32SimdRev op = (OpCode32SimdRev)context.CurrOp;

            if (Optimizations.UseSsse3)
            {
                EmitVectorUnaryOpSimd32(context, (op1) =>
                {
                    Operand mask;
                    switch (op.Size)
                    {
                    case 3:
                        // Rev64
                        switch (op.Opc)
                        {
                        case 0:
                            mask = X86GetElements(context, 0x08090a0b0c0d0e0fL, 0x0001020304050607L);
                            return(context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask));

                        case 1:
                            mask = X86GetElements(context, 0x09080b0a0d0c0f0eL, 0x0100030205040706L);
                            return(context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask));

                        case 2:
                            return(context.AddIntrinsic(Intrinsic.X86Shufps, op1, op1, Const(1 | (0 << 2) | (3 << 4) | (2 << 6))));
                        }
                        break;

                    case 2:
                        // Rev32
                        switch (op.Opc)
                        {
                        case 0:
                            mask = X86GetElements(context, 0x0c0d0e0f_08090a0bL, 0x04050607_00010203L);
                            return(context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask));
예제 #6
0
        public static void Usra_V(ArmEmitterContext context)
        {
            OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;

            if (Optimizations.UseSse2 && op.Size > 0)
            {
                int shift = GetImmShr(op);

                Operand d = GetVec(op.Rd);
                Operand n = GetVec(op.Rn);

                Intrinsic srlInst = X86PsrlInstruction[op.Size];

                Operand res = context.AddIntrinsic(srlInst, n, Const(shift));

                Intrinsic addInst = X86PaddInstruction[op.Size];

                res = context.AddIntrinsic(addInst, res, d);

                if (op.RegisterSize == RegisterSize.Simd64)
                {
                    res = context.VectorZeroUpper64(res);
                }

                context.Copy(d, res);
            }
            else
            {
                EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate);
            }
        }
예제 #7
0
        public static void Vmin_I(ArmEmitterContext context)
        {
            OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;

            if (op.U)
            {
                if (Optimizations.UseSse2)
                {
                    EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PminuInstruction[op.Size], op1, op2));
                }
                else
                {
                    EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLessUI(op1, op2), op1, op2));
                }
            }
            else
            {
                if (Optimizations.UseSse2)
                {
                    EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PminsInstruction[op.Size], op1, op2));
                }
                else
                {
                    EmitVectorBinaryOpSx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLess(op1, op2), op1, op2));
                }
            }
        }
예제 #8
0
        public static void Vneg_V(ArmEmitterContext context)
        {
            OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp;

            if (op.F)
            {
                if (Optimizations.FastFP && Optimizations.UseSse2)
                {
                    EmitVectorUnaryOpSimd32(context, (m) =>
                    {
                        if ((op.Size & 1) == 0)
                        {
                            Operand mask = X86GetAllElements(context, -0f);
                            return(context.AddIntrinsic(Intrinsic.X86Xorps, mask, m));
                        }
                        else
                        {
                            Operand mask = X86GetAllElements(context, -0d);
                            return(context.AddIntrinsic(Intrinsic.X86Xorpd, mask, m));
                        }
                    });
                }
                else
                {
                    EmitVectorUnaryOpF32(context, (op1) => context.Negate(op1));
                }
            }
            else
            {
                EmitVectorUnaryOpSx32(context, (op1) => context.Negate(op1));
            }
        }
예제 #9
0
        public static void Vnmul_S(ArmEmitterContext context)
        {
            OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;

            if (Optimizations.UseSse2)
            {
                EmitScalarBinaryOpSimd32(context, (n, m) =>
                {
                    if ((op.Size & 1) == 0)
                    {
                        Operand res  = context.AddIntrinsic(Intrinsic.X86Mulss, n, m);
                        Operand mask = X86GetScalar(context, -0f);
                        return(context.AddIntrinsic(Intrinsic.X86Xorps, mask, res));
                    }
                    else
                    {
                        Operand res  = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m);
                        Operand mask = X86GetScalar(context, -0d);
                        return(context.AddIntrinsic(Intrinsic.X86Xorpd, mask, res));
                    }
                });
            }
            else
            {
                EmitScalarBinaryOpF32(context, (op1, op2) => context.Negate(context.Multiply(op1, op2)));
            }
        }
예제 #10
0
        public static void EmitScalarTernaryOpF32(
            ArmEmitterContext context,
            Intrinsic inst32pt1,
            Intrinsic inst64pt1,
            Intrinsic inst32pt2,
            Intrinsic inst64pt2,
            bool isNegD = false)
        {
            OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;

            bool doubleSize = (op.Size & 1) != 0;

            Intrinsic inst1 = doubleSize ? inst64pt1 : inst32pt1;
            Intrinsic inst2 = doubleSize ? inst64pt2 : inst32pt2;

            EmitScalarTernaryOpSimd32(context, (d, n, m) =>
            {
                Operand res = context.AddIntrinsic(inst1, n, m);

                if (isNegD)
                {
                    Operand mask = doubleSize
                        ? X86GetScalar(context, -0d)
                        : X86GetScalar(context, -0f);

                    d = doubleSize
                        ? context.AddIntrinsic(Intrinsic.X86Xorpd, mask, d)
                        : context.AddIntrinsic(Intrinsic.X86Xorps, mask, d);
                }

                return(context.AddIntrinsic(inst2, d, res));
            });
        }
예제 #11
0
        public static void Shrn_V(ArmEmitterContext context)
        {
            if (Optimizations.UseSsse3)
            {
                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;

                int shift = GetImmShr(op);

                Operand d = GetVec(op.Rd);
                Operand n = GetVec(op.Rn);

                Operand dLow = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());

                Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];

                Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift));

                Operand mask = X86GetAllElements(context, _masks_RshrnShrn[op.Size]);

                Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, nShifted, mask);

                Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
                    ? Intrinsic.X86Movlhps
                    : Intrinsic.X86Movhlps;

                res = context.AddIntrinsic(movInst, dLow, res);

                context.Copy(d, res);
            }
            else
            {
                EmitVectorShrImmNarrowOpZx(context, round: false);
            }
        }
예제 #12
0
        public static void Cmhs_V(ArmEmitterContext context)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            if (Optimizations.UseSse41 && op.Size < 3)
            {
                Operand n = GetVec(op.Rn);
                Operand m = GetVec(op.Rm);

                Intrinsic maxInst = X86PmaxuInstruction[op.Size];

                Operand res = context.AddIntrinsic(maxInst, n, m);

                Intrinsic cmpInst = X86PcmpeqInstruction[op.Size];

                res = context.AddIntrinsic(cmpInst, res, n);

                if (op.RegisterSize == RegisterSize.Simd64)
                {
                    res = context.VectorZeroUpper64(res);
                }

                context.Copy(GetVec(op.Rd), res);
            }
            else
            {
                EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: false);
            }
        }
예제 #13
0
        public static void Aesmc_V(ArmEmitterContext context)
        {
            OpCode32Simd op = (OpCode32Simd)context.CurrOp;

            Operand n = GetVecA32(op.Qm);

            Operand res;

            if (Optimizations.UseAesni)
            {
                Operand roundKey = context.VectorZero();

                // Inverse Shift Rows, Inverse Sub Bytes, xor 0 so nothing happens.
                res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, n, roundKey);

                // Shift Rows, Sub Bytes, Mix Columns (!), xor 0 so nothing happens.
                res = context.AddIntrinsic(Intrinsic.X86Aesenc, res, roundKey);
            }
            else
            {
                res = context.Call(new _V128_V128(SoftFallback.MixColumns), n);
            }

            context.Copy(GetVecA32(op.Qd), res);
        }
예제 #14
0
        private static void EmitBifBit(ArmEmitterContext context, bool notRm)
        {
            OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;

            if (Optimizations.UseSse2)
            {
                EmitVectorTernaryOpSimd32(context, (d, n, m) =>
                {
                    Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d);
                    res         = context.AddIntrinsic((notRm) ? Intrinsic.X86Pandn : Intrinsic.X86Pand, m, res);
                    return(context.AddIntrinsic(Intrinsic.X86Pxor, d, res));
                });
            }
            else
            {
                EmitVectorTernaryOpZx32(context, (d, n, m) =>
                {
                    if (notRm)
                    {
                        m = context.BitwiseNot(m);
                    }
                    return(context.BitwiseExclusiveOr(
                               context.BitwiseAnd(m,
                                                  context.BitwiseExclusiveOr(d, n)), d));
                });
            }
        }
예제 #15
0
        public static void Cmle_V(ArmEmitterContext context)
        {
            if (Optimizations.UseSse42)
            {
                OpCodeSimd op = (OpCodeSimd)context.CurrOp;

                Operand n = GetVec(op.Rn);

                Intrinsic cmpInst = X86PcmpgtInstruction[op.Size];

                Operand res = context.AddIntrinsic(cmpInst, n, context.VectorZero());

                Operand mask = X86GetAllElements(context, -1L);

                res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask);

                if (op.RegisterSize == RegisterSize.Simd64)
                {
                    res = context.VectorZeroUpper64(res);
                }

                context.Copy(GetVec(op.Rd), res);
            }
            else
            {
                EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: false);
            }
        }
예제 #16
0
        public static void Vneg_S(ArmEmitterContext context)
        {
            OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;

            if (Optimizations.UseSse2)
            {
                EmitScalarUnaryOpSimd32(context, (m) =>
                {
                    if ((op.Size & 1) == 0)
                    {
                        Operand mask = X86GetScalar(context, -0f);
                        return(context.AddIntrinsic(Intrinsic.X86Xorps, mask, m));
                    }
                    else
                    {
                        Operand mask = X86GetScalar(context, -0d);
                        return(context.AddIntrinsic(Intrinsic.X86Xorpd, mask, m));
                    }
                });
            }
            else
            {
                EmitScalarUnaryOpF32(context, (op1) => context.Negate(op1));
            }
        }
예제 #17
0
        public static void Fcvtn_V(ArmEmitterContext context)
        {
            OpCodeSimd op = (OpCodeSimd)context.CurrOp;

            int sizeF = op.Size & 1;

            if (Optimizations.UseSse2 && sizeF == 1)
            {
                Operand d = GetVec(op.Rd);
                Operand n = GetVec(op.Rn);

                Operand res = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());

                Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtpd2ps, n);

                nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt);

                Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
                    ? Intrinsic.X86Movlhps
                    : Intrinsic.X86Movhlps;

                res = context.AddIntrinsic(movInst, res, nInt);

                context.Copy(GetVec(op.Rd), res);
            }
            else
            {
                OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;

                int elems = 4 >> sizeF;

                int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;

                Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd));

                for (int index = 0; index < elems; index++)
                {
                    Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);

                    if (sizeF == 0)
                    {
                        Delegate dlg = new _U16_F32(SoftFloat32_16.FPConvert);

                        Operand e = context.Call(dlg, ne);

                        e = context.ZeroExtend16(OperandType.I64, e);

                        res = EmitVectorInsert(context, res, e, part + index, 1);
                    }
                    else /* if (sizeF == 1) */
                    {
                        Operand e = context.ConvertToFP(OperandType.FP32, ne);

                        res = context.VectorInsert(res, e, part + index);
                    }
                }

                context.Copy(GetVec(op.Rd), res);
            }
        }
예제 #18
0
        private static void EmitSse2Scvtf(ArmEmitterContext context, bool scalar)
        {
            OpCodeSimd op = (OpCodeSimd)context.CurrOp;

            Operand n = GetVec(op.Rn);

            Operand res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);

            if (op is OpCodeSimdShImm fixedOp)
            {
                int fBits = GetImmShr(fixedOp);

                // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
                int fpScaled = 0x3F800000 - fBits * 0x800000;

                Operand scale = X86GetAllElements(context, fpScaled);

                res = context.AddIntrinsic(Intrinsic.X86Mulps, res, scale);
            }

            if (scalar)
            {
                res = context.VectorZeroUpper96(res);
            }
            else if (op.RegisterSize == RegisterSize.Simd64)
            {
                res = context.VectorZeroUpper64(res);
            }

            context.Copy(GetVec(op.Rd), res);
        }
예제 #19
0
        private static void EmitBifBit(ArmEmitterContext context, bool notRm)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            if (Optimizations.UseSse2)
            {
                Operand d = GetVec(op.Rd);
                Operand n = GetVec(op.Rn);
                Operand m = GetVec(op.Rm);

                Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d);

                if (notRm)
                {
                    res = context.AddIntrinsic(Intrinsic.X86Pandn, m, res);
                }
                else
                {
                    res = context.AddIntrinsic(Intrinsic.X86Pand, m, res);
                }

                res = context.AddIntrinsic(Intrinsic.X86Pxor, d, res);

                if (op.RegisterSize == RegisterSize.Simd64)
                {
                    res = context.VectorZeroUpper64(res);
                }

                context.Copy(GetVec(op.Rd), res);
            }
            else
            {
                Operand res = context.VectorZero();

                int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1;

                for (int index = 0; index < elems; index++)
                {
                    Operand d = EmitVectorExtractZx(context, op.Rd, index, 3);
                    Operand n = EmitVectorExtractZx(context, op.Rn, index, 3);
                    Operand m = EmitVectorExtractZx(context, op.Rm, index, 3);

                    if (notRm)
                    {
                        m = context.BitwiseNot(m);
                    }

                    Operand e = context.BitwiseExclusiveOr(d, n);

                    e = context.BitwiseAnd(e, m);
                    e = context.BitwiseExclusiveOr(e, d);

                    res = EmitVectorInsert(context, res, e, index, 3);
                }

                context.Copy(GetVec(op.Rd), res);
            }
        }
예제 #20
0
        public static void Fcvtl_V(ArmEmitterContext context)
        {
            OpCodeSimd op = (OpCodeSimd)context.CurrOp;

            int sizeF = op.Size & 1;

            if (Optimizations.UseSse2 && sizeF == 1)
            {
                Operand n = GetVec(op.Rn);
                Operand res;

                if (op.RegisterSize == RegisterSize.Simd128)
                {
                    res = context.AddIntrinsic(Intrinsic.X86Movhlps, n, n);
                }
                else
                {
                    res = n;
                }

                res = context.AddIntrinsic(Intrinsic.X86Cvtps2pd, res);

                context.Copy(GetVec(op.Rd), res);
            }
            else
            {
                Operand res = context.VectorZero();

                int elems = 4 >> sizeF;

                int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;

                for (int index = 0; index < elems; index++)
                {
                    if (sizeF == 0)
                    {
                        Operand ne = EmitVectorExtractZx(context, op.Rn, part + index, 1);

                        Delegate dlg = new _F32_U16(SoftFloat16_32.FPConvert);

                        Operand e = context.Call(dlg, ne);

                        res = context.VectorInsert(res, e, index);
                    }
                    else /* if (sizeF == 1) */
                    {
                        Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), part + index);

                        Operand e = context.ConvertToFP(OperandType.FP64, ne);

                        res = context.VectorInsert(res, e, index);
                    }
                }

                context.Copy(GetVec(op.Rd), res);
            }
        }
예제 #21
0
        public static void Vext(ArmEmitterContext context)
        {
            OpCode32SimdExt op      = (OpCode32SimdExt)context.CurrOp;
            int             elems   = op.GetBytesCount();
            int             byteOff = op.Immediate;

            if (Optimizations.UseSsse3)
            {
                EmitVectorBinaryOpSimd32(context, (n, m) =>
                {
                    // Writing low to high of d: start <imm> into n, overlap into m.
                    // Then rotate n down by <imm>, m up by (elems)-imm.
                    // Then OR them together for the result.

                    (long nMaskHigh, long nMaskLow) = MaskHelperByteSequence(0, elems - byteOff, byteOff);
                    (long mMaskHigh, long mMaskLow) = MaskHelperByteSequence(elems - byteOff, byteOff, 0);
                    Operand nMask, mMask;
                    if (!op.Q)
                    {
                        // Do the same operation to the bytes in the top doubleword too, as our target could be in either.
                        nMaskHigh = nMaskLow + 0x0808080808080808L;
                        mMaskHigh = mMaskLow + 0x0808080808080808L;
                    }
                    nMask         = X86GetElements(context, nMaskHigh, nMaskLow);
                    mMask         = X86GetElements(context, mMaskHigh, mMaskLow);
                    Operand nPart = context.AddIntrinsic(Intrinsic.X86Pshufb, n, nMask);
                    Operand mPart = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mMask);

                    return(context.AddIntrinsic(Intrinsic.X86Por, nPart, mPart));
                });
            }
            else
            {
                Operand res = GetVecA32(op.Qd);

                for (int index = 0; index < elems; index++)
                {
                    Operand extract;

                    if (byteOff >= elems)
                    {
                        extract = EmitVectorExtractZx32(context, op.Qm, op.Im + (byteOff - elems), op.Size);
                    }
                    else
                    {
                        extract = EmitVectorExtractZx32(context, op.Qn, op.In + byteOff, op.Size);
                    }
                    byteOff++;

                    res = EmitVectorInsert(context, res, extract, op.Id + index, op.Size);
                }

                context.Copy(GetVecA32(op.Qd), res);
            }
        }
예제 #22
0
        private static void EmitSri(ArmEmitterContext context, bool scalar)
        {
            OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;

            int shift = GetImmShr(op);
            int eSize = 8 << op.Size;

            ulong mask = (ulong.MaxValue << (eSize - shift)) & (ulong.MaxValue >> (64 - eSize));

            if (Optimizations.UseSse2 && op.Size > 0)
            {
                Operand d = GetVec(op.Rd);
                Operand n = GetVec(op.Rn);

                Intrinsic srlInst = X86PsrlInstruction[op.Size];

                Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift));

                Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]);

                Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask);

                Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked);

                if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
                {
                    res = context.VectorZeroUpper64(res);
                }

                context.Copy(d, res);
            }
            else
            {
                Operand res = context.VectorZero();

                int elems = !scalar?op.GetBytesCount() >> op.Size : 1;

                for (int index = 0; index < elems; index++)
                {
                    Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);

                    Operand neShifted = shift != 64 ? context.ShiftRightUI(ne, Const(shift)) : Const(0UL);

                    Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size);

                    Operand deMasked = context.BitwiseAnd(de, Const(mask));

                    Operand e = context.BitwiseOr(neShifted, deMasked);

                    res = EmitVectorInsert(context, res, e, index, op.Size);
                }

                context.Copy(GetVec(op.Rd), res);
            }
        }
예제 #23
0
        private static void EmitVectorZip(ArmEmitterContext context, int part)
        {
            OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

            if (Optimizations.UseSse2)
            {
                Operand n = GetVec(op.Rn);
                Operand m = GetVec(op.Rm);

                if (op.RegisterSize == RegisterSize.Simd128)
                {
                    Intrinsic punpckInst = part == 0
                        ? X86PunpcklInstruction[op.Size]
                        : X86PunpckhInstruction[op.Size];

                    Operand res = context.AddIntrinsic(punpckInst, n, m);

                    context.Copy(GetVec(op.Rd), res);
                }
                else
                {
                    Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], n, m);

                    Intrinsic punpckInst = part == 0
                        ? Intrinsic.X86Punpcklqdq
                        : Intrinsic.X86Punpckhqdq;

                    res = context.AddIntrinsic(punpckInst, res, context.VectorZero());

                    context.Copy(GetVec(op.Rd), res);
                }
            }
            else
            {
                Operand res = context.VectorZero();

                int pairs = op.GetPairsCount() >> op.Size;

                int baseIndex = part != 0 ? pairs : 0;

                for (int index = 0; index < pairs; index++)
                {
                    int pairIndex = index << 1;

                    Operand ne = EmitVectorExtractZx(context, op.Rn, baseIndex + index, op.Size);
                    Operand me = EmitVectorExtractZx(context, op.Rm, baseIndex + index, op.Size);

                    res = EmitVectorInsert(context, res, ne, pairIndex, op.Size);
                    res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size);
                }

                context.Copy(GetVec(op.Rd), res);
            }
        }
예제 #24
0
        public static void EmitVectorsByScalarOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2)
        {
            OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;

            Intrinsic inst1 = (op.Size & 1) != 0 ? inst64pt1 : inst32pt1;
            Intrinsic inst2 = (op.Size & 1) != 0 ? inst64pt2 : inst32pt2;

            EmitVectorsByScalarOpSimd32(context, (d, n, m) =>
            {
                Operand res = context.AddIntrinsic(inst1, n, m);
                return(res = context.AddIntrinsic(inst2, d, res));
            });
        }
예제 #25
0
        public static void Dup_Gp(ArmEmitterContext context)
        {
            OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp;

            Operand n = GetIntOrZR(context, op.Rn);

            if (Optimizations.UseSse2)
            {
                switch (op.Size)
                {
                case 0: n = context.ZeroExtend8(n.Type, n); n = context.Multiply(n, Const(n.Type, 0x01010101)); break;

                case 1: n = context.ZeroExtend16(n.Type, n); n = context.Multiply(n, Const(n.Type, 0x00010001)); break;

                case 2: n = context.ZeroExtend32(n.Type, n); break;
                }

                Operand res = context.VectorInsert(context.VectorZero(), n, 0);

                if (op.Size < 3)
                {
                    if (op.RegisterSize == RegisterSize.Simd64)
                    {
                        res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0xf0));
                    }
                    else
                    {
                        res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0));
                    }
                }
                else
                {
                    res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res);
                }

                context.Copy(GetVec(op.Rd), res);
            }
            else
            {
                Operand res = context.VectorZero();

                int elems = op.GetBytesCount() >> op.Size;

                for (int index = 0; index < elems; index++)
                {
                    res = EmitVectorInsert(context, res, n, index, op.Size);
                }

                context.Copy(GetVec(op.Rd), res);
            }
        }
예제 #26
0
        public static void Rshrn_V(ArmEmitterContext context)
        {
            if (Optimizations.UseSsse3)
            {
                OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp;

                int shift = GetImmShr(op);

                long roundConst = 1L << (shift - 1);

                Operand d = GetVec(op.Rd);
                Operand n = GetVec(op.Rn);

                Operand dLow = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero());

                Operand mask = null;

                switch (op.Size + 1)
                {
                case 1: mask = X86GetAllElements(context, (int)roundConst * 0x00010001); break;

                case 2: mask = X86GetAllElements(context, (int)roundConst); break;

                case 3: mask = X86GetAllElements(context, roundConst); break;
                }

                Intrinsic addInst = X86PaddInstruction[op.Size + 1];

                Operand res = context.AddIntrinsic(addInst, n, mask);

                Intrinsic srlInst = X86PsrlInstruction[op.Size + 1];

                res = context.AddIntrinsic(srlInst, res, Const(shift));

                Operand mask2 = X86GetAllElements(context, _masks_RshrnShrn[op.Size]);

                res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask2);

                Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128
                    ? Intrinsic.X86Movlhps
                    : Intrinsic.X86Movhlps;

                res = context.AddIntrinsic(movInst, dLow, res);

                context.Copy(d, res);
            }
            else
            {
                EmitVectorShrImmNarrowOpZx(context, round: true);
            }
        }
예제 #27
0
        public static void Ext_V(ArmEmitterContext context)
        {
            OpCodeSimdExt op = (OpCodeSimdExt)context.CurrOp;

            if (Optimizations.UseSse2)
            {
                Operand nShifted = GetVec(op.Rn);

                if (op.RegisterSize == RegisterSize.Simd64)
                {
                    nShifted = context.VectorZeroUpper64(nShifted);
                }

                nShifted = context.AddIntrinsic(Intrinsic.X86Psrldq, nShifted, Const(op.Imm4));

                Operand mShifted = GetVec(op.Rm);

                mShifted = context.AddIntrinsic(Intrinsic.X86Pslldq, mShifted, Const(op.GetBytesCount() - op.Imm4));

                if (op.RegisterSize == RegisterSize.Simd64)
                {
                    mShifted = context.VectorZeroUpper64(mShifted);
                }

                Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, mShifted);

                context.Copy(GetVec(op.Rd), res);
            }
            else
            {
                Operand res = context.VectorZero();

                int bytes = op.GetBytesCount();

                int position = op.Imm4 & (bytes - 1);

                for (int index = 0; index < bytes; index++)
                {
                    int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm;

                    Operand e = EmitVectorExtractZx(context, reg, position, 0);

                    position = (position + 1) & (bytes - 1);

                    res = EmitVectorInsert(context, res, e, index, 0);
                }

                context.Copy(GetVec(op.Rd), res);
            }
        }
예제 #28
0
        private static void EmitCrc32Optimized64(ArmEmitterContext context, bool castagnoli)
        {
            OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp;

            long mu         = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))'
            long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1

            Operand crc  = GetIntOrZR(context, op.Rn);
            Operand data = GetIntOrZR(context, op.Rm);

            crc  = context.VectorInsert(context.VectorZero(), crc, 0);
            data = context.VectorInsert(context.VectorZero(), data, 0);

            Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data);
            Operand res = context.AddIntrinsic(Intrinsic.X86Pslldq, tmp, Const(4));

            tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, res, X86GetScalar(context, mu), Const(0));
            tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));

            tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, res);
            tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(32));

            tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(1));
            tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0));

            SetIntOrZR(context, op.Rd, context.VectorExtract(OperandType.I32, tmp, 2));
        }
예제 #29
0
        public static Operand EmitSha256su1(ArmEmitterContext context, Operand x, Operand y, Operand z)
        {
            if (Optimizations.UseSha && Optimizations.UseSsse3)
            {
                Operand extr = context.AddIntrinsic(Intrinsic.X86Palignr, z, y, Const(4));
                Operand tmp  = context.AddIntrinsic(Intrinsic.X86Paddd, extr, x);

                Operand res = context.AddIntrinsic(Intrinsic.X86Sha256Msg2, tmp, z);

                return(res);
            }

            return(context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart2)), x, y, z));
        }
예제 #30
0
        // Pairwise

        public static void EmitSse2VectorPairwiseOpF32(ArmEmitterContext context, Intrinsic inst32)
        {
            OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;

            EmitVectorBinaryOpSimd32(context, (n, m) =>
            {
                Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, n, m);

                Operand part0 = unpck;
                Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, unpck, unpck);

                return(context.AddIntrinsic(inst32, part0, part1));
            }, 0);
        }