private static void EmitVectorZip(ArmEmitterContext context, int part) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; if (Optimizations.UseSse2) { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); if (op.RegisterSize == RegisterSize.Simd128) { Intrinsic punpckInst = part == 0 ? X86PunpcklInstruction[op.Size] : X86PunpckhInstruction[op.Size]; Operand res = context.AddIntrinsic(punpckInst, n, m); context.Copy(GetVec(op.Rd), res); } else { Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], n, m); Intrinsic punpckInst = part == 0 ? Intrinsic.X86Punpcklqdq : Intrinsic.X86Punpckhqdq; res = context.AddIntrinsic(punpckInst, res, context.VectorZero()); context.Copy(GetVec(op.Rd), res); } } else { Operand res = context.VectorZero(); int pairs = op.GetPairsCount() >> op.Size; int baseIndex = part != 0 ? pairs : 0; for (int index = 0; index < pairs; index++) { int pairIndex = index << 1; Operand ne = EmitVectorExtractZx(context, op.Rn, baseIndex + index, op.Size); Operand me = EmitVectorExtractZx(context, op.Rm, baseIndex + index, op.Size); res = EmitVectorInsert(context, res, ne, pairIndex, op.Size); res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size); } context.Copy(GetVec(op.Rd), res); } }
private static void EmitVectorUnzip(ArmEmitterContext context, int part) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; if (Optimizations.UseSsse3) { if (op.RegisterSize == RegisterSize.Simd128) { Operand mask = null; if (op.Size < 3) { long maskE0 = _masksE0_TrnUzpXtn[op.Size]; long maskE1 = _masksE1_TrnUzp [op.Size]; mask = X86GetScalar(context, maskE0); mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); } Operand n = GetVec(op.Rn); if (op.Size < 3) { n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); } Operand m = GetVec(op.Rm); if (op.Size < 3) { m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask); } Intrinsic punpckInst = part == 0 ? Intrinsic.X86Punpcklqdq : Intrinsic.X86Punpckhqdq; Operand res = context.AddIntrinsic(punpckInst, n, m); context.Copy(GetVec(op.Rd), res); } else { Operand n = GetVec(op.Rn); Operand m = GetVec(op.Rm); Intrinsic punpcklInst = X86PunpcklInstruction[op.Size]; Operand res = context.AddIntrinsic(punpcklInst, n, m); if (op.Size < 2) { long maskE0 = _masksE0_Uzp[op.Size]; long maskE1 = _masksE1_Uzp[op.Size]; Operand mask = X86GetScalar(context, maskE0); mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask); } Intrinsic punpckInst = part == 0 ? Intrinsic.X86Punpcklqdq : Intrinsic.X86Punpckhqdq; res = context.AddIntrinsic(punpckInst, res, context.VectorZero()); context.Copy(GetVec(op.Rd), res); } } else { Operand res = context.VectorZero(); int pairs = op.GetPairsCount() >> op.Size; for (int index = 0; index < pairs; index++) { int idx = index << 1; Operand ne = EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); Operand me = EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); res = EmitVectorInsert(context, res, ne, index, op.Size); res = EmitVectorInsert(context, res, me, pairs + index, op.Size); } context.Copy(GetVec(op.Rd), res); } }
private static void EmitVectorTranspose(ArmEmitterContext context, int part) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; if (Optimizations.UseSsse3) { Operand mask = null; if (op.Size < 3) { long maskE0 = EvenMasks[op.Size]; long maskE1 = OddMasks [op.Size]; mask = X86GetScalar(context, maskE0); mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); } Operand n = GetVec(op.Rn); if (op.Size < 3) { n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); } Operand m = GetVec(op.Rm); if (op.Size < 3) { m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask); } Intrinsic punpckInst = part == 0 ? X86PunpcklInstruction[op.Size] : X86PunpckhInstruction[op.Size]; Operand res = context.AddIntrinsic(punpckInst, n, m); if (op.RegisterSize == RegisterSize.Simd64) { res = context.VectorZeroUpper64(res); } context.Copy(GetVec(op.Rd), res); } else { Operand res = context.VectorZero(); int pairs = op.GetPairsCount() >> op.Size; for (int index = 0; index < pairs; index++) { int pairIndex = index << 1; Operand ne = EmitVectorExtractZx(context, op.Rn, pairIndex + part, op.Size); Operand me = EmitVectorExtractZx(context, op.Rm, pairIndex + part, op.Size); res = EmitVectorInsert(context, res, ne, pairIndex, op.Size); res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size); } context.Copy(GetVec(op.Rd), res); } }