public int TryParseSIMDUseCount(byte *p, int cnt, out int n) { var tmp = Sse2.LoadVector128(p); var tmp1 = Sse.StaticCast <byte, sbyte>(tmp); tmp1 = Sse2.Subtract(tmp1, subtmp); var data0 = Ssse3.Shuffle(tmp1, mask0); var data1 = Ssse3.Shuffle(tmp1, mask1); var mul0 = Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data0), mul0Array[cnt]); var mul1 = Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data1), mul1Array[cnt]); var x = Sse2.Add(mul0, mul1); x = Ssse3.HorizontalAdd(x, x); x = Ssse3.HorizontalAdd(x, x); n = Sse41.Extract(x, 3); var com0 = Sse2.CompareGreaterThan(Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data0), cmp0Array[cnt]), _9); var com1 = Sse2.CompareGreaterThan(Sse41.MultiplyLow(Sse.StaticCast <sbyte, int>(data0), cmp1Array[cnt]), _9); var xx = Sse2.Add(com0, com1); xx = Ssse3.HorizontalAdd(xx, xx); xx = Ssse3.HorizontalAdd(xx, xx); return(Sse41.Extract(xx, 3)); }
private static Vector128 <int> MultiplyAddAdjacent( Vector128 <short> vsrc0, Vector128 <short> vsrc1, Vector128 <short> vsrc2, Vector128 <short> vsrc3, Vector128 <short> vfilter, Vector128 <int> zero) { // < sumN, sumN, sumN, sumN > Vector128 <int> sum0 = Sse2.MultiplyAddAdjacent(vsrc0, vfilter); Vector128 <int> sum1 = Sse2.MultiplyAddAdjacent(vsrc1, vfilter); Vector128 <int> sum2 = Sse2.MultiplyAddAdjacent(vsrc2, vfilter); Vector128 <int> sum3 = Sse2.MultiplyAddAdjacent(vsrc3, vfilter); // < 0, 0, sumN, sumN > sum0 = Ssse3.HorizontalAdd(sum0, zero); sum1 = Ssse3.HorizontalAdd(sum1, zero); sum2 = Ssse3.HorizontalAdd(sum2, zero); sum3 = Ssse3.HorizontalAdd(sum3, zero); // < 0, 0, 0, sumN > sum0 = Ssse3.HorizontalAdd(sum0, zero); sum1 = Ssse3.HorizontalAdd(sum1, zero); sum2 = Ssse3.HorizontalAdd(sum2, zero); sum3 = Ssse3.HorizontalAdd(sum3, zero); // < 0, 0, sum1, sum0 > Vector128 <int> sum01 = Sse2.UnpackLow(sum0, sum1); // < 0, 0, sum3, sum2 > Vector128 <int> sum23 = Sse2.UnpackLow(sum2, sum3); // < sum3, sum2, sum1, sum0 > return(Sse.MoveLowToHigh(sum01.AsSingle(), sum23.AsSingle()).AsInt32()); }
private unsafe int Sum_VectorizedSse2(ReadOnlySpan <int> source) { int result = 0; fixed(int *pSource = source) { Vector128 <int> vresult = Vector128 <int> .Zero; int i = 0; int lastBlockIndex = source.Length - (source.Length % 4); while (i < lastBlockIndex) { vresult = Sse2.Add(vresult, Sse2.LoadVector128(pSource + i)); i += 4; } if (Ssse3.IsSupported) { vresult = Ssse3.HorizontalAdd(vresult, vresult); vresult = Ssse3.HorizontalAdd(vresult, vresult); } else { vresult = Sse2.Add(vresult, Sse2.Shuffle(vresult, 0x4E)); vresult = Sse2.Add(vresult, Sse2.Shuffle(vresult, 0xB1)); } result = vresult.ToScalar(); while (i < source.Length) { result += pSource[i]; i++; } } return(result); }
public unsafe double SumAmount(DoDPOLines_v4 source, int len) { double result; fixed(double *pSource = source.Amount) { Vector128 <double> vresult = Vector128 <double> .Zero; int i = 0; int lastBlockIndex = len - (len % 2); while (i < lastBlockIndex) { vresult = Sse2.Add(vresult, Sse2.LoadVector128(pSource + i)); i += 2; } vresult = Ssse3.HorizontalAdd(vresult, vresult); result = vresult.ToScalar(); while (i < len) { result += pSource[i]; i += 1; } } return(result); }
public unsafe int CsSumVectorizedSse2UnsafeAs() { int result; Vector128 <int> vresult = Vector128 <int> .Zero; int i = 0; int lastBlockIndex = _source.Length - (_source.Length % 4); while (i < lastBlockIndex) { vresult = Sse2.Add(vresult, Unsafe.As <int, Vector128 <int> >(ref _source[i])); i += 4; } if (Ssse3.IsSupported) { vresult = Ssse3.HorizontalAdd(vresult, vresult); vresult = Ssse3.HorizontalAdd(vresult, vresult); } else { vresult = Sse2.Add(vresult, Sse2.Shuffle(vresult, 0x4E)); vresult = Sse2.Add(vresult, Sse2.Shuffle(vresult, 0xB1)); } result = vresult.ToScalar(); while (i < _source.Length) { result += _source[i]; i += 1; } return(result); }
public void RunClassFldScenario() { var result = Ssse3.HorizontalAdd(_fld1, _fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); }
public void RunStructFldScenario(HorizontalBinaryOpTest__HorizontalAddInt16 testClass) { var result = Ssse3.HorizontalAdd(_fld1, _fld2); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); }
public override ulong Run(CancellationToken cancellationToken) { if (!Ssse3.IsSupported) { return(0uL); } var randomFloatingSpan = new Span <int>(new[] { randomInt, randomInt, randomInt, randomInt }); var dst = new Span <int>(Enumerable.Repeat(int.MaxValue / 2, 4).ToArray()); var iterations = 0uL; unsafe { fixed(int *pdst = dst) fixed(int *psrc = randomFloatingSpan) { var srcVector = Sse2.LoadVector128(psrc); var dstVector = Sse2.LoadVector128(pdst); while (!cancellationToken.IsCancellationRequested) { for (var j = 0; j < LENGTH; j++) { dstVector = Ssse3.HorizontalAdd(dstVector, srcVector); } Sse2.Store(pdst, dstVector); iterations++; } } } return(iterations); }
public void RunClassLclFldScenario() { var test = new HorizontalBinaryOpTest__HorizontalAddInt16(); var result = Ssse3.HorizontalAdd(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario() { var test = TestStruct.Create(); var result = Ssse3.HorizontalAdd(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_LoadAligned() { var left = Sse2.LoadAlignedVector128((Int16 *)(_dataTable.inArray1Ptr)); var right = Sse2.LoadAlignedVector128((Int16 *)(_dataTable.inArray2Ptr)); var result = Ssse3.HorizontalAdd(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunClassFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); var result = Ssse3.HorizontalAdd(_fld1, _fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { var left = Unsafe.Read <Vector128 <Int16> >(_dataTable.inArray1Ptr); var right = Unsafe.Read <Vector128 <Int16> >(_dataTable.inArray2Ptr); var result = Ssse3.HorizontalAdd(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunBasicScenario_LoadAligned() { var result = Ssse3.HorizontalAdd( Sse2.LoadAlignedVector128((Int16 *)(_dataTable.inArray1Ptr)), Sse2.LoadAlignedVector128((Int16 *)(_dataTable.inArray2Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { var result = Ssse3.HorizontalAdd( Unsafe.Read <Vector128 <Int16> >(_dataTable.inArray1Ptr), Unsafe.Read <Vector128 <Int16> >(_dataTable.inArray2Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunClassLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); var test = new HorizontalBinaryOpTest__HorizontalAddInt32(); var result = Ssse3.HorizontalAdd(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunClsVarScenario() { var result = Ssse3.HorizontalAdd( _clsVar1, _clsVar2 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); var test = TestStruct.Create(); var result = Ssse3.HorizontalAdd(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); var op1 = Unsafe.Read <Vector128 <Int16> >(_dataTable.inArray1Ptr); var op2 = Unsafe.Read <Vector128 <Int16> >(_dataTable.inArray2Ptr); var result = Ssse3.HorizontalAdd(op1, op2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(op1, op2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); var op1 = Sse2.LoadAlignedVector128((Int16 *)(_dataTable.inArray1Ptr)); var op2 = Sse2.LoadAlignedVector128((Int16 *)(_dataTable.inArray2Ptr)); var result = Ssse3.HorizontalAdd(op1, op2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(op1, op2, _dataTable.outArrayPtr); }
public static Vector4Int32 HorizontalAdd(Vector4Int32Param1_3 left, Vector4Int32Param1_3 right) { if (Ssse3.IsSupported) { return(Ssse3.HorizontalAdd(left, right)); } // TODO can Sse be used over the software fallback? return(HorizontalAdd_Software(left, right)); }
public void RunLclVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); var left = Sse2.LoadVector128((Int16 *)(_dataTable.inArray1Ptr)); var right = Sse2.LoadVector128((Int16 *)(_dataTable.inArray2Ptr)); var result = Ssse3.HorizontalAdd(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); var result = Ssse3.HorizontalAdd( Unsafe.Read <Vector128 <Int32> >(_dataTable.inArray1Ptr), Unsafe.Read <Vector128 <Int32> >(_dataTable.inArray2Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunBasicScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); var result = Ssse3.HorizontalAdd( Sse2.LoadAlignedVector128((Int32 *)(_dataTable.inArray1Ptr)), Sse2.LoadAlignedVector128((Int32 *)(_dataTable.inArray2Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load)); var test = TestStruct.Create(); var result = Ssse3.HorizontalAdd( Sse2.LoadVector128((Int16 *)(&test._fld1)), Sse2.LoadVector128((Int16 *)(&test._fld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunStructFldScenario_Load(HorizontalBinaryOpTest__HorizontalAddInt16 testClass) { fixed(Vector128 <Int16> *pFld1 = &_fld1) fixed(Vector128 <Int16> *pFld2 = &_fld2) { var result = Ssse3.HorizontalAdd( Sse2.LoadVector128((Int16 *)(pFld1)), Sse2.LoadVector128((Int16 *)(pFld2)) ); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); } }
public void RunClassFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load)); fixed(Vector128 <Int16> *pFld1 = &_fld1) fixed(Vector128 <Int16> *pFld2 = &_fld2) { var result = Ssse3.HorizontalAdd( Sse2.LoadVector128((Int16 *)(pFld1)), Sse2.LoadVector128((Int16 *)(pFld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); } }
public void RunClsVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load)); fixed(Vector128 <Int32> *pClsVar1 = &_clsVar1) fixed(Vector128 <Int32> *pClsVar2 = &_clsVar2) { var result = Ssse3.HorizontalAdd( Sse2.LoadVector128((Int32 *)(pClsVar1)), Sse2.LoadVector128((Int32 *)(pClsVar2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr); } }
public unsafe void SumVectorizedHardwareSse2() { if (!Sse2.IsSupported) { return; } int sum; fixed(int *pItems = _items) { var resultVector = Vector128 <int> .Zero; var i = 0; var lastBlockIndex = _items.Length - (_items.Length % 4); // sum unrolled block with vectors while (i < lastBlockIndex) { resultVector = Sse2.Add(resultVector, Sse2.LoadVector128(pItems + i)); i += 4; } if (Ssse3.IsSupported) { resultVector = Ssse3.HorizontalAdd(resultVector, resultVector); resultVector = Ssse3.HorizontalAdd(resultVector, resultVector); } else { resultVector = Sse2.Add(resultVector, Sse2.Shuffle(resultVector, 0x4E)); resultVector = Sse2.Add(resultVector, Sse2.Shuffle(resultVector, 0xB1)); } sum = resultVector.ToScalar(); // sum the remaining items while (i < _items.Length) { sum += pItems[i]; i += 1; } } CheckSum(sum); }
public void RunClassLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario_Load)); var test = new HorizontalBinaryOpTest__HorizontalAddInt32(); fixed(Vector128 <Int32> *pFld1 = &test._fld1) fixed(Vector128 <Int32> *pFld2 = &test._fld2) { var result = Ssse3.HorizontalAdd( Sse2.LoadVector128((Int32 *)(pFld1)), Sse2.LoadVector128((Int32 *)(pFld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); } }