public int TryParseSIMDUseCount(byte *p, int cnt, out int n) { var tmp = Sse2.LoadVector128(p); var tmp1 = Sse.StaticCast <byte, sbyte>(tmp); tmp1 = Sse2.Subtract(tmp1, subtmp); var data0 = Ssse3.Shuffle(tmp1, mask0); var data1 = Ssse3.Shuffle(tmp1, mask1); var mul0 = Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data0), mul0Array[cnt]); var mul1 = Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data1), mul1Array[cnt]); var x = Sse2.Add(mul0, mul1); x = Ssse3.HorizontalAdd(x, x); x = Ssse3.HorizontalAdd(x, x); n = Sse41.Extract(x, 3); var com0 = Sse2.CompareGreaterThan(Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data0), cmp0Array[cnt]), _9); var com1 = Sse2.CompareGreaterThan(Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data0), cmp1Array[cnt]), _9); var xx = Sse2.Add(com0, com1); xx = Ssse3.HorizontalAdd(xx, xx); xx = Ssse3.HorizontalAdd(xx, xx); return(Sse41.Extract(xx, 3)); }
internal static Vector256 <T> MultiplyAddVector256(Vector256 <T> a, Vector256 <T> b, Vector256 <T> c) { if (typeof(T) == typeof(int)) { var va = a.As <T, int>(); var vb = b.As <T, int>(); var vl = Avx2.MultiplyLow(va, vb); var vh = Sse41.MultiplyLow(va.GetUpper(), vb.GetUpper()); return(Avx2.Add(Vector256.Create(vl.GetLower(), vh), c.As <T, int>()).As <int, T>()); } if (typeof(T) == typeof(uint)) { var va = a.As <T, uint>(); var vb = b.As <T, uint>(); var vl = Avx2.MultiplyLow(va, vb); var vh = Sse41.MultiplyLow(va.GetUpper(), vb.GetUpper()); return(Avx2.Add(Vector256.Create(vl.GetLower(), vh), c.As <T, uint>()).As <uint, T>()); } if (typeof(T) == typeof(float)) { return(Fma.MultiplyAdd(a.As <T, float>(), b.As <T, float>(), c.As <T, float>()).As <float, T>()); } if (typeof(T) == typeof(double)) { return(Fma.MultiplyAdd(a.As <T, double>(), b.As <T, double>(), c.As <T, double>()).As <double, T>()); } throw new NotSupportedException(); }
public void RunFldScenario() { var result = Sse41.MultiplyLow(_fld1, _fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); }
public void RunStructFldScenario(SimpleBinaryOpTest__MultiplyLowUInt32 testClass) { var result = Sse41.MultiplyLow(_fld1, _fld2); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); }
public void RunLclFldScenario() { var test = new SimpleBinaryOpTest__MultiplyLowInt32(); var result = Sse41.MultiplyLow(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario() { var test = TestStruct.Create(); var result = Sse41.MultiplyLow(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { var left = Unsafe.Read <Vector128 <Int32> >(_dataTable.inArray1Ptr); var right = Unsafe.Read <Vector128 <Int32> >(_dataTable.inArray2Ptr); var result = Sse41.MultiplyLow(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunLclVarScenario_LoadAligned() { var left = Sse2.LoadAlignedVector128((Int32 *)(_dataTable.inArray1Ptr)); var right = Sse2.LoadAlignedVector128((Int32 *)(_dataTable.inArray2Ptr)); var result = Sse41.MultiplyLow(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunClassFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); var result = Sse41.MultiplyLow(_fld1, _fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); }
public void RunClsVarScenario() { var result = Sse41.MultiplyLow( _clsVar1, _clsVar2 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr); }
public void RunBasicScenario_LoadAligned() { var result = Sse41.MultiplyLow( Sse2.LoadAlignedVector128((Int32 *)(_dataTable.inArray1Ptr)), Sse2.LoadAlignedVector128((Int32 *)(_dataTable.inArray2Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { var result = Sse41.MultiplyLow( Unsafe.Read <Vector128 <Int32> >(_dataTable.inArray1Ptr), Unsafe.Read <Vector128 <Int32> >(_dataTable.inArray2Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunClassLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); var test = new SimpleBinaryOpTest__MultiplyLowUInt32(); var result = Sse41.MultiplyLow(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); var test = TestStruct.Create(); var result = Sse41.MultiplyLow(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); var op1 = Sse2.LoadVector128((UInt32 *)(_dataTable.inArray1Ptr)); var op2 = Sse2.LoadVector128((UInt32 *)(_dataTable.inArray2Ptr)); var result = Sse41.MultiplyLow(op1, op2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(op1, op2, _dataTable.outArrayPtr); }
private unsafe ulong HashSse(byte *buf, int len) { ulong h = 0; Vector128 <int> v_ps = Vector128 <int> .Zero; bool useSse4 = Sse41.IsSupported; int i = 0; for (int j = len - i - 1; len - i >= 4; i += 4, j = len - i - 1) { Vector128 <int> c_v = Sse2.LoadVector128(&kMultFactorsPtr[j - 3]); c_v = Sse2.Shuffle(c_v, SO123); Vector128 <byte> q_v = Sse2.LoadVector128(buf + i); Vector128 <int> s_v; if (useSse4) { s_v = Sse41.ConvertToVector128Int32(q_v); } else { q_v = Sse2.UnpackLow(q_v, q_v); s_v = Sse2.ShiftRightLogical(Sse2.UnpackLow(q_v.AsUInt16(), q_v.AsUInt16()).AsInt32(), 24); } if (useSse4) { v_ps = Sse2.Add(v_ps, Sse41.MultiplyLow(c_v, s_v)); } else { Vector128 <ulong> v_tmp1 = Sse2.Multiply(c_v.AsUInt32(), s_v.AsUInt32()); Vector128 <ulong> v_tmp2 = Sse2.Multiply(Sse2.ShiftRightLogical128BitLane(c_v.AsByte(), 4).AsUInt32(), Sse2.ShiftRightLogical128BitLane(s_v.AsByte(), 4).AsUInt32()); ; v_ps = Sse2.Add(v_ps, Sse2.UnpackLow(Sse2.Shuffle(v_tmp1.AsInt32(), SOO2O), Sse2.Shuffle(v_tmp2.AsInt32(), SOO2O))); } } v_ps = Sse2.Add(v_ps, Sse2.Shuffle(v_ps, S23O1)); v_ps = Sse2.Add(v_ps, Sse2.Shuffle(v_ps, S1O32)); h += Sse2.ConvertToUInt32(v_ps.AsUInt32()); for (; i < len; i++) { int index = len - i - 1; ulong c = (uint)kMultFactors[index]; h += c * buf[i]; } return(h & (kBase - 1)); }
public void RunLclVarScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); var left = Sse2.LoadAlignedVector128((UInt32 *)(_dataTable.inArray1Ptr)); var right = Sse2.LoadAlignedVector128((UInt32 *)(_dataTable.inArray2Ptr)); var result = Sse41.MultiplyLow(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); var left = Unsafe.Read <Vector128 <UInt32> >(_dataTable.inArray1Ptr); var right = Unsafe.Read <Vector128 <UInt32> >(_dataTable.inArray2Ptr); var result = Sse41.MultiplyLow(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunBasicScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); var result = Sse41.MultiplyLow( Sse2.LoadVector128((Int32 *)(_dataTable.inArray1Ptr)), Sse2.LoadVector128((Int32 *)(_dataTable.inArray2Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); var result = Sse41.MultiplyLow( Unsafe.Read <Vector128 <Int32> >(_dataTable.inArray1Ptr), Unsafe.Read <Vector128 <Int32> >(_dataTable.inArray2Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load)); var test = TestStruct.Create(); var result = Sse41.MultiplyLow( Sse2.LoadVector128((UInt32 *)(&test._fld1)), Sse2.LoadVector128((UInt32 *)(&test._fld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public static Vector4Int32 Multiply(Vector4Int32Param1_3 left, Vector4Int32Param1_3 right) { if (Sse41.IsSupported) { return(Sse41.MultiplyLow(left, right)); } // TODO try accelerate with less than < Sse4.1 //else if (Sse2.IsSupported) //{ // Vector128<ulong> elem2And0 = Sse2.Multiply(left.AsUInt32(), right.AsUInt32()); //} return(Multiply_Software(left, right)); }
public void RunStructFldScenario_Load(SimpleBinaryOpTest__MultiplyLowUInt32 testClass) { fixed(Vector128 <UInt32> *pFld1 = &_fld1) fixed(Vector128 <UInt32> *pFld2 = &_fld2) { var result = Sse41.MultiplyLow( Sse2.LoadVector128((UInt32 *)(pFld1)), Sse2.LoadVector128((UInt32 *)(pFld2)) ); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); } }
public void RunClassFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load)); fixed(Vector128 <UInt32> *pFld1 = &_fld1) fixed(Vector128 <UInt32> *pFld2 = &_fld2) { var result = Sse41.MultiplyLow( Sse2.LoadVector128((UInt32 *)(pFld1)), Sse2.LoadVector128((UInt32 *)(pFld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); } }
public void RunClsVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load)); fixed(Vector128 <Int32> *pClsVar1 = &_clsVar1) fixed(Vector128 <Int32> *pClsVar2 = &_clsVar2) { var result = Sse41.MultiplyLow( Sse2.LoadVector128((Int32 *)(pClsVar1)), Sse2.LoadVector128((Int32 *)(pClsVar2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr); } }
public static Vector128 <int>[,] Procedure(int[,] A, int[,] B, int a, Vector128 <int> x, Vector128 <int> y) { unsafe { int length = A.GetLength(0); int length2 = B.GetLength(1); int length3 = 0; int length4 = 0; try { length3 = length; length4 = length2 / 4; } catch (Exception e) { Console.WriteLine("Error"); } Vector128 <int>[,] MatrixA = new Vector128 <int> [length3, length4]; Vector128 <int>[,] MatrixB = new Vector128 <int> [length3, length4]; Vector128 <int>[,] Matrix = new Vector128 <int> [length3, length4]; int c = 0; fixed(int *ptr = A) { fixed(int *ptr2 = B) { for (int i = 0; i < length3; i++) { for (int j = 0; j < length4; j++) { var v = Sse41.LoadVector128(ptr + c); var v2 = Sse41.LoadVector128(ptr2 + c); c += 4; MatrixA[i, j] = Sse41.MultiplyLow(v, x); MatrixA[i, j] = Sse41.MultiplyLow(MatrixA[i, j], Vector128.Create(a)); MatrixB[i, j] = Sse41.MultiplyLow(v2, y); Matrix[i, j] = Sse41.Add(MatrixA[i, j], MatrixB[i, j]); } } } } return(Matrix); } }
public void RunClassLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario_Load)); var test = new SimpleBinaryOpTest__MultiplyLowInt32(); fixed(Vector128 <Int32> *pFld1 = &test._fld1) fixed(Vector128 <Int32> *pFld2 = &test._fld2) { var result = Sse41.MultiplyLow( Sse2.LoadVector128((Int32 *)(pFld1)), Sse2.LoadVector128((Int32 *)(pFld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); } }
public static Vector128 <int>[,] MultiplyMatrixSSE(int[,] a, int[,] b) { unsafe { int length = a.GetLength(0); int length2 = a.GetLength(1); int length3 = 0; int length4 = 0; try { length3 = length; length4 = length2 / 4; } catch (Exception e) { Console.WriteLine("Error"); } Vector128 <int>[,] A = new Vector128 <int> [length3, length4]; int c = 0; fixed(int *ptr = a) { fixed(int *ptr2 = b) { for (int i = 0; i < length3; i++) { for (int j = 0; j < length4; j++) { var v = Sse41.LoadVector128(ptr + c); var v2 = Sse41.LoadVector128(ptr2 + c); c += 4; A[i, j] = Sse41.MultiplyLow(v, v2); } } } } return(A); } }
public static Vector128 <short> DivideBy10(this Vector128 <short> dividend) { // Convert to two 32-bit integers Vector128 <int> a_hi = Sse2.ShiftRightArithmetic(dividend.AsInt32(), 16); Vector128 <int> a_lo = Sse2.ShiftLeftLogical(dividend.AsInt32(), 16); a_lo = Sse2.ShiftRightArithmetic(a_lo, 16); Vector128 <int> div10_hi; Vector128 <int> div10_lo; if (Avx2.IsSupported) { Vector256 <int> a = Vector256.Create(a_lo, a_hi); Vector256 <int> s0 = Avx2.ShiftRightArithmetic(a, 15); Vector256 <int> factor = Vector256.Create(26215); Vector256 <int> mul = Avx2.MultiplyLow(a, factor); Vector256 <int> s1 = Avx2.ShiftRightArithmetic(mul, 18); Vector256 <int> div10 = Avx2.Subtract(s1, s0); div10_hi = div10.GetUpper(); div10_lo = div10.GetLower(); } else { Vector128 <int> s0_hi = Sse2.ShiftRightArithmetic(a_hi, 15); Vector128 <int> s0_lo = Sse2.ShiftRightArithmetic(a_lo, 15); Vector128 <int> factor = Vector128.Create(26215); Vector128 <int> mul_hi = Sse41.MultiplyLow(a_hi, factor); Vector128 <int> mul_lo = Sse41.MultiplyLow(a_lo, factor); Vector128 <int> s1_hi = Sse2.ShiftRightArithmetic(mul_hi, 18); Vector128 <int> s1_lo = Sse2.ShiftRightArithmetic(mul_lo, 18); div10_hi = Sse2.Subtract(s1_hi, s0_hi); div10_lo = Sse2.Subtract(s1_lo, s0_lo); } //div10_hi = Sse2.ShiftLeftLogical(div10_hi, 16); div10_hi = Sse2.ShiftLeftLogical128BitLane(div10_hi, 2); return(Sse41.Blend(div10_lo.AsInt16(), div10_hi.AsInt16(), 0xAA)); }
public static i32 Mul(i32 lhs, i32 rhs) { if (Sse41.IsSupported) { return(Sse41.MultiplyLow(lhs, rhs)); } else { var tmp1 = Sse2.Multiply(lhs.AsUInt32(), rhs.AsUInt32()); // mul 2,0 var tmp2 = Sse2.Multiply( Sse2.ShiftRightLogical128BitLane(lhs, 4).AsUInt32(), Sse2.ShiftRightLogical128BitLane(rhs, 4).AsUInt32()); // mul 3,1 const byte control = 8; // _MM_SHUFFLE(0,0,2,0) return(Sse2.UnpackLow( Sse2.Shuffle(tmp1.AsInt32(), control), Sse2.Shuffle(tmp2.AsInt32(), control))); // shuffle results to [63..0] and pack } }