public void RunStructFldScenario(SimpleBinaryOpTest__CompareLessThanSingle testClass) { var result = Sse.CompareLessThan(_fld1, _fld2); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); }
static unsafe int Main(string[] args) { int testResult = Pass; if (Sse.IsSupported) { using (TestTable <float> floatTable = new TestTable <float>(new float[4] { 1, -5, 100, 0 }, new float[4] { 22, -1, -50, 0 }, new float[4])) { var vf1 = Unsafe.Read <Vector128 <float> >(floatTable.inArray1Ptr); var vf2 = Unsafe.Read <Vector128 <float> >(floatTable.inArray2Ptr); var vf3 = Sse.CompareLessThan(vf1, vf2); Unsafe.Write(floatTable.outArrayPtr, vf3); if (!floatTable.CheckResult((x, y, z) => BitConverter.SingleToInt32Bits(z) == ((x < y) ? -1 : 0))) { Console.WriteLine("SSE CompareLessThan failed on float:"); foreach (var item in floatTable.outArray) { Console.Write(item + ", "); } Console.WriteLine(); testResult = Fail; } } } return(testResult); }
public void RunFldScenario() { var result = Sse.CompareLessThan(_fld1, _fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); }
public void RunLclFldScenario() { var test = new SimpleBinaryOpTest__CompareLessThanSingle(); var result = Sse.CompareLessThan(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario() { var test = TestStruct.Create(); var result = Sse.CompareLessThan(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunClassFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); var result = Sse.CompareLessThan(_fld1, _fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { var left = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr); var right = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr); var result = Sse.CompareLessThan(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunLclVarScenario_LoadAligned() { var left = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr)); var right = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr)); var result = Sse.CompareLessThan(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public static Vector128 <float> CompareLessThan(Vector4FParam1_3 left, Vector4FParam1_3 right) { if (Sse.IsSupported) { return(Sse.CompareLessThan(left, right)); } return(CompareLessThan_Software(left, right)); }
public void RunBasicScenario_LoadAligned() { var result = Sse.CompareLessThan( Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr)), Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunClsVarScenario() { var result = Sse.CompareLessThan( _clsVar1, _clsVar2 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr); }
public void RunClassLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); var test = new SimpleBinaryOpTest__CompareLessThanSingle(); var result = Sse.CompareLessThan(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { var result = Sse.CompareLessThan( Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr), Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); var test = TestStruct.Create(); var result = Sse.CompareLessThan(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); var op1 = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr); var op2 = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr); var result = Sse.CompareLessThan(op1, op2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(op1, op2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); var left = Sse.LoadVector128((Single *)(_dataTable.inArray1Ptr)); var right = Sse.LoadVector128((Single *)(_dataTable.inArray2Ptr)); var result = Sse.CompareLessThan(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunLclVarScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); var op1 = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr)); var op2 = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr)); var result = Sse.CompareLessThan(op1, op2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(op1, op2, _dataTable.outArrayPtr); }
internal static bool IsIntersectingSse(Vector128 <float> a, Vector128 <float> b) { var aMin = Sse.MoveLowToHigh(a, a); var aMax = Sse.MoveHighToLow(a, a); var bMin = Sse.MoveLowToHigh(b, b); var bMax = Sse.MoveHighToLow(b, b); var lt = Sse.CompareGreaterThan(aMin, bMax); var gt = Sse.CompareLessThan(aMax, bMin); var oob = Sse.Or(gt, lt); return(Sse.MoveMask(oob) == 0); }
public void RunStructLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load)); var test = TestStruct.Create(); var result = Sse.CompareLessThan( Sse.LoadVector128((Single *)(&test._fld1)), Sse.LoadVector128((Single *)(&test._fld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public static f32 Ceil_f32(f32 a) { if (Sse41.IsSupported) { return(Sse41.RoundToPositiveInfinity(a)); } else { f32 fval = Sse2.ConvertToVector128Single(Sse2.ConvertToVector128Int32WithTruncation(a)); f32 cmp = Sse.CompareLessThan(fval, a); return(Sse.Add(fval, Sse.And(cmp, Vector128.Create(1f)))); } }
public static f32 Floor_f32(f32 a) { if (Sse41.IsSupported) { return(Sse41.RoundToNegativeInfinity(a)); } else { f32 fval = Sse2.ConvertToVector128Single(Sse2.ConvertToVector128Int32WithTruncation(a)); f32 cmp = Sse.CompareLessThan(a, fval); return(Sse.Subtract(fval, Sse.And(cmp, Vector128.Create(1f)))); } }
public void RunStructFldScenario_Load(SimpleBinaryOpTest__CompareLessThanSingle testClass) { fixed(Vector128 <Single> *pFld1 = &_fld1) fixed(Vector128 <Single> *pFld2 = &_fld2) { var result = Sse.CompareLessThan( Sse.LoadVector128((Single *)(pFld1)), Sse.LoadVector128((Single *)(pFld2)) ); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); } }
public void RunClassFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load)); fixed(Vector128 <Single> *pFld1 = &_fld1) fixed(Vector128 <Single> *pFld2 = &_fld2) { var result = Sse.CompareLessThan( Sse.LoadVector128((Single *)(pFld1)), Sse.LoadVector128((Single *)(pFld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); } }
public static m32 LessThan(f32 lhs, f32 rhs) => Sse.CompareLessThan(lhs, rhs).AsInt32();
public static Vector128 <float> op_LessThan(Vector128 <float> left, Vector128 <float> right) => Sse.CompareLessThan(left, right);
public static __m128 _mm_cmplt_ps(__m128 a, __m128 b) => Sse.CompareLessThan(a, b);
public void ResizeBicubic(FastBitmap rtnImage) { float scaleX = (float)this.width / rtnImage.width; float scaleY = (float)this.height / rtnImage.height; if (scaleX > 1 || scaleY > 1) { throw new Exception("拡大のみ対応"); } float[] tmpa = new float[rtnImage.width * 4 * this.height]; fixed(float *tmpp = tmpa) { float *tmp = tmpp; var _00mask = Vector128.Create(0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255); var _01mask = Vector128.Create(4, 255, 255, 255, 5, 255, 255, 255, 6, 255, 255, 255, 7, 255, 255, 255); var _10mask = Vector128.Create(8, 255, 255, 255, 9, 255, 255, 255, 10, 255, 255, 255, 11, 255, 255, 255); var _11mask = Vector128.Create(12, 255, 255, 255, 13, 255, 255, 255, 14, 255, 255, 255, 15, 255, 255, 255); var _vmask = Vector128.Create(0, 4, 8, 12, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255); var _1012 = Vector128.Create(-1, 0, 1, 2); var _0123i = Vector128.Create(0, 1, 2, 3); var _0000 = Vector128.Create(0, 0, 0, 0); var _0000f = Vector128.Create(0f, 0, 0, 0); var _255f = Vector128.Create(255f, 255, 255, 255); var _1111 = Vector128.Create(1, 1, 1, 1); var _1111f = Vector128.Create(1f, 1, 1, 1); var _4444f = Vector128.Create(4f, 4, 4, 4); var _4444 = Vector128.Create(4, 4, 4, 4); var _5555f = Vector128.Create(5f, 5, 5, 5); var _2222f = Vector128.Create(2f, 2, 2, 2); var _8888f = Vector128.Create(8f, 8, 8, 8); var _7f = Vector128.Create(0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff).AsSingle(); var _ff = Vector128.Create(-1, -1, -1, -1); var _stride = Vector128.Create(rtnImage.width * 4, rtnImage.width * 4, rtnImage.width * 4, rtnImage.width * 4); Parallel.For(0, this.height, (y) => { float py = (y * scaleY); float *tmpPos = tmp + y * rtnImage.width * 4; for (int x = 0; x < rtnImage.width; x++) { float px = (x * scaleX); int sx = (int)px; var _px = Vector128.CreateScalar(px); _px = Sse.Shuffle(_px, _px, 0); var _sx = Vector128.CreateScalar(sx); _sx = Sse2.Shuffle(_sx, 0); var _width = Vector128.CreateScalar(this.width); _width = Sse2.Shuffle(_width, 0); var _x2 = Sse2.Add(_sx, _1012); var _d = Sse.And(Sse.Subtract(_px, Sse2.ConvertToVector128Single(_x2)), _7f); var _d2 = Sse.Multiply(_d, _d); var _d3 = Sse.Multiply(_d2, _d); var w1 = Sse.Add(_1111f, Sse.Subtract(_d3, Sse.Multiply(_2222f, _d2))); var w2 = Sse.Subtract(Sse.Subtract(Sse.Add(_4444f, Sse.Multiply(_5555f, _d2)), Sse.Multiply(_d, _8888f)), _d3); var wb = Sse2.CompareGreaterThan(_d, _1111f); var _w = Sse41.BlendVariable(w1, w2, wb); var _xpb = Sse2.Or(Sse2.CompareLessThan(_x2, _0000), Sse41.MultiplyLow(Sse2.AndNot(Sse2.CompareLessThan(_x2, _width), _1111).AsInt32(), _ff)); var _xpp = Sse2.And(_sx, _xpb); var _xp = Sse41.BlendVariable(_x2, _xpp, _xpb); var p = Avx2.GatherVector128((uint *)(this._ptr + this._stride * y), _xp, 4).AsByte(); var _p0 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _00mask).AsInt32()); var _p1 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _01mask).AsInt32()); var _p2 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _10mask).AsInt32()); var _p3 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _11mask).AsInt32()); var _w0 = Sse.Shuffle(_w, _w, 0); var _w1 = Sse.Shuffle(_w, _w, 0b01010101); var _w2 = Sse.Shuffle(_w, _w, 0b10101010); var _w3 = Sse.Shuffle(_w, _w, 0b11111111); var rgbaf = Sse.Add(Sse.Add(Sse.Multiply(_p0, _w0), Sse.Multiply(_p1, _w1)), Sse.Add(Sse.Multiply(_p2, _w2), Sse.Multiply(_p3, _w3))); Sse2.Store(tmpPos + x * 4, rgbaf); } }); Parallel.For(0, rtnImage.height, (y) => { float py = (y * scaleY); int sy = (int)py; uint *store = stackalloc uint[4]; var _py = Vector128.CreateScalar(py); _py = Sse.Shuffle(_py, _py, 0); var _sy = Vector128.CreateScalar(sy); _sy = Sse2.Shuffle(_sy, 0); var _height = Vector128.CreateScalar(this.height); _height = Sse2.Shuffle(_height, 0); var _y2 = Sse2.Add(_sy, _1012); var _d = Sse.And(Sse.Subtract(_py, Sse2.ConvertToVector128Single(_y2)), _7f); var _d2 = Sse.Multiply(_d, _d); var _d3 = Sse.Multiply(_d2, _d); var w1 = Sse.Add(_1111f, Sse.Subtract(_d3, Sse.Multiply(_2222f, _d2))); var w2 = Sse.Subtract(Sse.Subtract(Sse.Add(_4444f, Sse.Multiply(_5555f, _d2)), Sse.Multiply(_d, _8888f)), _d3); var wb = Sse2.CompareGreaterThan(_d, _1111f); var _w = Sse41.BlendVariable(w1, w2, wb); var _ypb = Sse2.Or(Sse2.CompareLessThan(_y2, _0000), Sse41.MultiplyLow(Sse2.AndNot(Sse2.CompareLessThan(_y2, _height), _1111).AsInt32(), _ff)); var _ypp = Sse2.And(_sy, _ypb); var _yp = Sse41.BlendVariable(_y2, _ypp, _ypb); var _yps = Sse41.MultiplyLow(_yp, _stride); var _yp0 = Sse2.Add(Sse2.Shuffle(_yps, 0), _0123i); var _yp1 = Sse2.Add(Sse2.Shuffle(_yps, 0b01010101), _0123i); var _yp2 = Sse2.Add(Sse2.Shuffle(_yps, 0b10101010), _0123i); var _yp3 = Sse2.Add(Sse2.Shuffle(_yps, 0b11111111), _0123i); uint *rtn = (uint *)(rtnImage._ptr + rtnImage._stride * y); for (int x = 0; x < rtnImage.width; x++) { var _p0 = Avx2.GatherVector128((float *)(tmp), _yp0, 4); var _p1 = Avx2.GatherVector128((float *)(tmp), _yp1, 4); var _p2 = Avx2.GatherVector128((float *)(tmp), _yp2, 4); var _p3 = Avx2.GatherVector128((float *)(tmp), _yp3, 4); var _w0 = Sse.Shuffle(_w, _w, 0); var _w1 = Sse.Shuffle(_w, _w, 0b01010101); var _w2 = Sse.Shuffle(_w, _w, 0b10101010); var _w3 = Sse.Shuffle(_w, _w, 0b11111111); var rgbaf = Sse.Add(Sse.Add(Sse.Multiply(_p0, _w0), Sse.Multiply(_p1, _w1)), Sse.Add(Sse.Multiply(_p2, _w2), Sse.Multiply(_p3, _w3))); var _b0 = Sse.CompareLessThan(rgbaf, _0000f); rgbaf = Sse41.BlendVariable(rgbaf, _0000f, _b0); var _b1 = Sse.CompareGreaterThan(rgbaf, _255f); rgbaf = Sse41.BlendVariable(rgbaf, _255f, _b1); var rgbab = Sse2.ConvertToVector128Int32(rgbaf).AsByte(); var rgba = Ssse3.Shuffle(rgbab, _vmask).AsUInt32(); Sse2.Store(store, rgba); _yp0 = Sse2.Add(_yp0, _4444); _yp1 = Sse2.Add(_yp1, _4444); _yp2 = Sse2.Add(_yp2, _4444); _yp3 = Sse2.Add(_yp3, _4444); *rtn = *store; rtn++; } });
public static Vector128 <float> _mm_cmplt_ps(Vector128 <float> left, Vector128 <float> right) { return(Sse.CompareLessThan(left, right)); }