public static void EmitVectorByScalarLongOpI32(ArmEmitterContext context, Func2I emit, bool signed) { OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; Operand m = ExtractElement(context, op.Vm, op.Size, signed); if (op.Size == 2) { m = signed ? context.SignExtend32(OperandType.I64, m) : context.ZeroExtend32(OperandType.I64, m); } Operand res = context.VectorZero(); int elems = op.GetBytesCount() >> op.Size; for (int index = 0; index < elems; index++) { Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); if (op.Size == 2) { ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne); } res = EmitVectorInsert(context, res, emit(ne, m), index, op.Size + 1); } context.Copy(GetVecA32(op.Qd), res); }
public static void EmitVectorsByScalarOpSimd32(ArmEmitterContext context, Func3I vectorFunc) { OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; Operand n = GetVecA32(op.Qn); Operand d = GetVecA32(op.Qd); Operand initialD = d; int index = op.Vm & 3; int dupeMask = (index << 6) | (index << 4) | (index << 2) | index; Operand m = GetVecA32(op.Vm >> 2); m = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(dupeMask)); if (!op.Q) // Register swap: move relevant doubleword to destination side. { n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd); } Operand res = vectorFunc(d, n, m); if (!op.Q) // Register insert. { res = EmitDoubleWordInsert(context, initialD, res, op.Vd); } context.Copy(initialD, res); }
public static void EmitVectorByScalarOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) { OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; EmitVectorByScalarOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m)); }
public static void EmitVectorsByScalarOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2) { OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; Intrinsic inst1 = (op.Size & 1) != 0 ? inst64pt1 : inst32pt1; Intrinsic inst2 = (op.Size & 1) != 0 ? inst64pt2 : inst32pt2; EmitVectorsByScalarOpSimd32(context, (d, n, m) => { Operand res = context.AddIntrinsic(inst1, n, m); return(res = context.AddIntrinsic(inst2, d, res)); }); }
public static void EmitVectorByScalarOpI32(ArmEmitterContext context, Func2I emit, bool signed) { OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; Operand m = ExtractElement(context, op.Vm, op.Size, signed); Operand res = GetVecA32(op.Qd); int elems = op.GetBytesCount() >> op.Size; for (int index = 0; index < elems; index++) { Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); res = EmitVectorInsert(context, res, emit(ne, m), op.Id + index, op.Size); } context.Copy(GetVecA32(op.Qd), res); }
public static void Vmls_1(ArmEmitterContext context) { OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; if (op.F) { if (Optimizations.FastFP) { EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); } else { EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulSubFpscr, SoftFloat64.FPMulSubFpscr, op1, op2, op3)); } } else { EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)), false); } }
// Vector by scalar public static void EmitVectorByScalarOpF32(ArmEmitterContext context, Func2I emit) { OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; int sizeF = op.Size & 1; OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; int elems = op.GetBytesCount() >> sizeF + 2; Operand m = ExtractScalar(context, type, op.Vm); Operand res = GetVecA32(op.Qd); for (int index = 0; index < elems; index++) { Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); res = context.VectorInsert(res, emit(ne, m), op.Fd + index); } context.Copy(GetVecA32(op.Qd), res); }
public static void Vmul_1(ArmEmitterContext context) { OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; if (op.F) { if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd); } else if (Optimizations.FastFP) { EmitVectorByScalarOpF32(context, (op1, op2) => context.Multiply(op1, op2)); } else { EmitVectorByScalarOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulFpscr), op1, op2)); } } else { EmitVectorByScalarOpI32(context, (op1, op2) => context.Multiply(op1, op2), false); } }
public static void Vmla_1(ArmEmitterContext context) { OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; if (op.F) { if (Optimizations.FastFP && Optimizations.UseSse2) { EmitVectorsByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd); } else if (Optimizations.FastFP) { EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); } else { EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulAddFpscr, SoftFloat64.FPMulAddFpscr, op1, op2, op3)); } } else { EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)), false); } }
public static void Vmull_1(ArmEmitterContext context) { OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; EmitVectorByScalarLongOpI32(context, (op1, op2) => context.Multiply(op1, op2), !op.U); }