public static void Shl_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; int shift = GetImmShl(op); if (Optimizations.UseSse2 && op.Size > 0) { Operand n = GetVec(op.Rn); Intrinsic sllInst = X86PsllInstruction[op.Size]; Operand res = context.AddIntrinsic(sllInst, n, Const(shift)); if (op.RegisterSize == RegisterSize.Simd64) { res = context.VectorZeroUpper64(res); } context.Copy(GetVec(op.Rd), res); } else { EmitVectorUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift))); } }
public static void Usra_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; if (Optimizations.UseSse2 && op.Size > 0) { int shift = GetImmShr(op); Operand d = GetVec(op.Rd); Operand n = GetVec(op.Rn); Intrinsic srlInst = X86PsrlInstruction[op.Size]; Operand res = context.AddIntrinsic(srlInst, n, Const(shift)); Intrinsic addInst = X86PaddInstruction[op.Size]; res = context.AddIntrinsic(addInst, res, d); if (op.RegisterSize == RegisterSize.Simd64) { res = context.VectorZeroUpper64(res); } context.Copy(d, res); } else { EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate); } }
private static void EmitVectorShrImmNarrowOpZx(ArmEmitterContext context, bool round) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; int shift = GetImmShr(op); long roundConst = 1L << (shift - 1); int elems = 8 >> op.Size; int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd)); for (int index = 0; index < elems; index++) { Operand e = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); if (round) { e = context.Add(e, Const(roundConst)); } e = context.ShiftRightUI(e, Const(shift)); res = EmitVectorInsert(context, res, e, part + index, op.Size); } context.Copy(GetVec(op.Rd), res); }
public static void Sshr_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) { int shift = GetImmShr(op); Operand n = GetVec(op.Rn); Intrinsic sraInst = X86PsraInstruction[op.Size]; Operand res = context.AddIntrinsic(sraInst, n, Const(shift)); if (op.RegisterSize == RegisterSize.Simd64) { res = context.VectorZeroUpper64(res); } context.Copy(GetVec(op.Rd), res); } else { EmitShrImmOp(context, ShrImmFlags.VectorSx); } }
public static void Ushll_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; int shift = GetImmShl(op); if (Optimizations.UseSse41) { Operand n = GetVec(op.Rn); if (op.RegisterSize == RegisterSize.Simd128) { n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); } Intrinsic movzxInst = X86PmovzxInstruction[op.Size]; Operand res = context.AddIntrinsic(movzxInst, n); if (shift != 0) { Intrinsic sllInst = X86PsllInstruction[op.Size + 1]; res = context.AddIntrinsic(sllInst, res, Const(shift)); } context.Copy(GetVec(op.Rd), res); } else { EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift); } }
public static void Shrn_V(ArmEmitterContext context) { if (Optimizations.UseSsse3) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; int shift = GetImmShr(op); Operand d = GetVec(op.Rd); Operand n = GetVec(op.Rn); Operand dLow = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero()); Intrinsic srlInst = X86PsrlInstruction[op.Size + 1]; Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift)); Operand mask = X86GetAllElements(context, _masks_RshrnShrn[op.Size]); Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, nShifted, mask); Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 ? Intrinsic.X86Movlhps : Intrinsic.X86Movhlps; res = context.AddIntrinsic(movInst, dLow, res); context.Copy(d, res); } else { EmitVectorShrImmNarrowOpZx(context, round: false); } }
public static void Sli_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; Operand res = context.VectorZero(); int elems = op.GetBytesCount() >> op.Size; int shift = GetImmShl(op); ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0; for (int index = 0; index < elems; index++) { Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); Operand neShifted = context.ShiftLeft(ne, Const(shift)); Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); Operand deMasked = context.BitwiseAnd(de, Const(mask)); Operand e = context.BitwiseOr(neShifted, deMasked); res = EmitVectorInsert(context, res, e, index, op.Size); } context.Copy(GetVec(op.Rd), res); }
public static void Shl_S(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; int shift = GetImmShl(op); EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift))); }
private static void EmitSri(ArmEmitterContext context, bool scalar) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; int shift = GetImmShr(op); int eSize = 8 << op.Size; ulong mask = (ulong.MaxValue << (eSize - shift)) & (ulong.MaxValue >> (64 - eSize)); if (Optimizations.UseSse2 && op.Size > 0) { Operand d = GetVec(op.Rd); Operand n = GetVec(op.Rn); Intrinsic srlInst = X86PsrlInstruction[op.Size]; Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift)); Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]); Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask); Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked); if ((op.RegisterSize == RegisterSize.Simd64) || scalar) { res = context.VectorZeroUpper64(res); } context.Copy(d, res); } else { Operand res = context.VectorZero(); int elems = !scalar?op.GetBytesCount() >> op.Size : 1; for (int index = 0; index < elems; index++) { Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); Operand neShifted = shift != 64 ? context.ShiftRightUI(ne, Const(shift)) : Const(0UL); Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); Operand deMasked = context.BitwiseAnd(de, Const(mask)); Operand e = context.BitwiseOr(neShifted, deMasked); res = EmitVectorInsert(context, res, e, index, op.Size); } context.Copy(GetVec(op.Rd), res); } }
public static void Rshrn_V(ArmEmitterContext context) { if (Optimizations.UseSsse3) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; int shift = GetImmShr(op); long roundConst = 1L << (shift - 1); Operand d = GetVec(op.Rd); Operand n = GetVec(op.Rn); Operand dLow = context.AddIntrinsic(Intrinsic.X86Movlhps, d, context.VectorZero()); Operand mask = null; switch (op.Size + 1) { case 1: mask = X86GetAllElements(context, (int)roundConst * 0x00010001); break; case 2: mask = X86GetAllElements(context, (int)roundConst); break; case 3: mask = X86GetAllElements(context, roundConst); break; } Intrinsic addInst = X86PaddInstruction[op.Size + 1]; Operand res = context.AddIntrinsic(addInst, n, mask); Intrinsic srlInst = X86PsrlInstruction[op.Size + 1]; res = context.AddIntrinsic(srlInst, res, Const(shift)); Operand mask2 = X86GetAllElements(context, _masks_RshrnShrn[op.Size]); res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask2); Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 ? Intrinsic.X86Movlhps : Intrinsic.X86Movhlps; res = context.AddIntrinsic(movInst, dLow, res); context.Copy(d, res); } else { EmitVectorShrImmNarrowOpZx(context, round: true); } }
private static void EmitShrImmOp(ArmEmitterContext context, ShrImmFlags flags) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; Operand res = context.VectorZero(); bool scalar = (flags & ShrImmFlags.Scalar) != 0; bool signed = (flags & ShrImmFlags.Signed) != 0; bool round = (flags & ShrImmFlags.Round) != 0; bool accumulate = (flags & ShrImmFlags.Accumulate) != 0; int shift = GetImmShr(op); long roundConst = 1L << (shift - 1); int elems = !scalar?op.GetBytesCount() >> op.Size : 1; for (int index = 0; index < elems; index++) { Operand e = EmitVectorExtract(context, op.Rn, index, op.Size, signed); if (op.Size <= 2) { if (round) { e = context.Add(e, Const(roundConst)); } e = signed ? context.ShiftRightSI(e, Const(shift)) : context.ShiftRightUI(e, Const(shift)); } else /* if (op.Size == 3) */ { e = EmitShrImm64(context, e, signed, round ? roundConst : 0L, shift); } if (accumulate) { Operand de = EmitVectorExtract(context, op.Rd, index, op.Size, signed); e = context.Add(e, de); } res = EmitVectorInsert(context, res, e, index, op.Size); } context.Copy(GetVec(op.Rd), res); }
private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0; bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0; bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0; bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0; int shift = GetImmShr(op); long roundConst = 1L << (shift - 1); int elems = !scalar ? 8 >> op.Size : 1; int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0; Operand res = part == 0 ? context.VectorZero() : context.Copy(GetVec(op.Rd)); for (int index = 0; index < elems; index++) { Operand e = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc); if (op.Size <= 1 || !round) { if (round) { e = context.Add(e, Const(roundConst)); } e = signedSrc ? context.ShiftRightSI(e, Const(shift)) : context.ShiftRightUI(e, Const(shift)); } else /* if (op.Size == 2 && round) */ { e = EmitShrImm64(context, e, signedSrc, roundConst, shift); // shift <= 32 } e = EmitSatQ(context, e, op.Size, signedSrc, signedDst); res = EmitVectorInsert(context, res, e, part + index, op.Size); } context.Copy(GetVec(op.Rd), res); }
public static void Srshr_V(ArmEmitterContext context) { OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) { int shift = GetImmShr(op); int eSize = 8 << op.Size; Operand n = GetVec(op.Rn); Intrinsic sllInst = X86PsllInstruction[op.Size]; Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift)); Intrinsic srlInst = X86PsrlInstruction[op.Size]; res = context.AddIntrinsic(srlInst, res, Const(eSize - 1)); Intrinsic sraInst = X86PsraInstruction[op.Size]; Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift)); Intrinsic addInst = X86PaddInstruction[op.Size]; res = context.AddIntrinsic(addInst, res, nSra); if (op.RegisterSize == RegisterSize.Simd64) { res = context.VectorZeroUpper64(res); } context.Copy(GetVec(op.Rd), res); } else { EmitVectorShrImmOpSx(context, ShrImmFlags.Round); } }