static unsafe int Main(string[] args) { int testResult = Pass; if (Sse.IsSupported) { using (TestTable <float> floatTable = new TestTable <float>(new float[4] { 1, -5, 100, 0 }, new float[4] { 22, -1, -50, 0 }, new float[4])) { var vf1 = Unsafe.Read <Vector128 <float> >(floatTable.inArray1Ptr); var vf2 = Unsafe.Read <Vector128 <float> >(floatTable.inArray2Ptr); var vf3 = Sse.CompareGreaterThan(vf1, vf2); Unsafe.Write(floatTable.outArrayPtr, vf3); if (!floatTable.CheckResult((x, y, z) => BitConverter.SingleToInt32Bits(z) == ((x > y) ? -1 : 0))) { Console.WriteLine("SSE CompareGreaterThan failed on float:"); foreach (var item in floatTable.outArray) { Console.Write(item + ", "); } Console.WriteLine(); testResult = Fail; } } } return(testResult); }
public void RunStructFldScenario(SimpleBinaryOpTest__CompareGreaterThanSingle testClass) { var result = Sse.CompareGreaterThan(_fld1, _fld2); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); }
public void RunClassFldScenario() { var result = Sse.CompareGreaterThan(_fld1, _fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario() { var test = TestStruct.Create(); var result = Sse.CompareGreaterThan(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunClassLclFldScenario() { var test = new SimpleBinaryOpTest__CompareGreaterThanSingle(); var result = Sse.CompareGreaterThan(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public static Vector4F GreaterThanOrEqual(Vector4FParam1_3 left, Vector4FParam1_3 right) { if (Sse.IsSupported) { return(Sse.CompareGreaterThan(left, right)); } return(GreaterThanOrEqual_Software(left, right)); }
public static Vector128 <float> CompareGreaterThan(Vector4FParam1_3 left, Vector4FParam1_3 right) { if (Sse.IsSupported) { return(Sse.CompareGreaterThan(left, right)); } return(CompareGreaterThan_Software(left, right)); }
public void RunClassFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); var result = Sse.CompareGreaterThan(_fld1, _fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_LoadAligned() { var left = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr)); var right = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr)); var result = Sse.CompareGreaterThan(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { var left = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr); var right = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr); var result = Sse.CompareGreaterThan(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { var result = Sse.CompareGreaterThan( Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr), Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); var test = TestStruct.Create(); var result = Sse.CompareGreaterThan(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunClsVarScenario() { var result = Sse.CompareGreaterThan( _clsVar1, _clsVar2 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr); }
public void RunBasicScenario_LoadAligned() { var result = Sse.CompareGreaterThan( Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr)), Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); var op1 = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr); var op2 = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr); var result = Sse.CompareGreaterThan(op1, op2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(op1, op2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); var op1 = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr)); var op2 = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr)); var result = Sse.CompareGreaterThan(op1, op2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(op1, op2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); var left = Sse.LoadVector128((Single *)(_dataTable.inArray1Ptr)); var right = Sse.LoadVector128((Single *)(_dataTable.inArray2Ptr)); var result = Sse.CompareGreaterThan(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
internal static bool IsIntersectingSse(Vector128 <float> a, Vector128 <float> b) { var aMin = Sse.MoveLowToHigh(a, a); var aMax = Sse.MoveHighToLow(a, a); var bMin = Sse.MoveLowToHigh(b, b); var bMax = Sse.MoveHighToLow(b, b); var lt = Sse.CompareGreaterThan(aMin, bMax); var gt = Sse.CompareLessThan(aMax, bMin); var oob = Sse.Or(gt, lt); return(Sse.MoveMask(oob) == 0); }
public void RunStructLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load)); var test = TestStruct.Create(); var result = Sse.CompareGreaterThan( Sse.LoadVector128((Single *)(&test._fld1)), Sse.LoadVector128((Single *)(&test._fld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunStructFldScenario_Load(SimpleBinaryOpTest__CompareGreaterThanSingle testClass) { fixed(Vector128 <Single> *pFld1 = &_fld1) fixed(Vector128 <Single> *pFld2 = &_fld2) { var result = Sse.CompareGreaterThan( Sse.LoadVector128((Single *)(pFld1)), Sse.LoadVector128((Single *)(pFld2)) ); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); } }
public void RunClassFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load)); fixed(Vector128 <Single> *pFld1 = &_fld1) fixed(Vector128 <Single> *pFld2 = &_fld2) { var result = Sse.CompareGreaterThan( Sse.LoadVector128((Single *)(pFld1)), Sse.LoadVector128((Single *)(pFld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); } }
static bool TestSseCompareGreaterThan() { if (Sse.IsSupported) { const int expectedResult = 0b0100; Vector128 <float> value1 = Vector128.Create(float.NaN, 1.0f, 2.0f, 3.0f); Vector128 <float> value2 = Vector128.Create(0.0f, 2.0f, 1.0f, 3.0f); Vector128 <float> result = Sse.CompareGreaterThan(value1, value2); int actualResult = Sse.MoveMask(result); if (actualResult != expectedResult) { Console.WriteLine($"{nameof(Sse)}.{nameof(Sse.CompareGreaterThan)}({value1}, {value2}) returned {Convert.ToString(actualResult, 2)}; expected {Convert.ToString(expectedResult, 2)}"); return(false); } } return(true); }
public Intro() { var middleVector = Vector128.Create(1.0f); // middleVector = <1,1,1,1> middleVector = Vector128.CreateScalar(-1.0f); // middleVector = <-1,0,0,0> var floatBytes = Vector64.AsByte(Vector64.Create(1.0f, -1.0f)); // floatBytes = <0, 0, 128, 63, 0, 0, 128, 191> if (Avx.IsSupported) { var left = Vector256.Create(-2.5f); // <-2.5, -2.5, -2.5, -2.5, -2.5, -2.5, -2.5, -2.5> var right = Vector256.Create(5.0f); // <5, 5, 5, 5, 5, 5, 5, 5> Vector256 <float> result = Avx.AddSubtract(left, right); // result = <-7.5, 2.5, -7.5, 2.5, -7.5, 2.5, -7.5, 2.5>xit left = Vector256.Create(-1.0f, -2.0f, -3.0f, -4.0f, -50.0f, -60.0f, -70.0f, -80.0f); right = Vector256.Create(0.0f, 2.0f, 3.0f, 4.0f, 50.0f, 60.0f, 70.0f, 80.0f); result = Avx.UnpackHigh(left, right); // result = <-3, 3, -4, 4, -70, 70, -80, 80> result = Avx.UnpackLow(left, right); // result = <-1, 1, -2, 2, -50, 50, -60, 60> result = Avx.DotProduct(left, right, 0b1111_0001); // result = <-30, 0, 0, 0, -17400, 0, 0, 0> bool testResult = Avx.TestC(left, right); // testResult = true testResult = Avx.TestC(right, left); // testResult = false Vector256 <float> result1 = Avx.Divide(left, right); var plusOne = Vector256.Create(1.0f); result = Avx.Compare(right, result1, FloatComparisonMode.OrderedGreaterThanNonSignaling); result = Avx.Compare(right, result1, FloatComparisonMode.UnorderedNotLessThanNonSignaling); left = Vector256.Create(0.0f, 3.0f, -3.0f, 4.0f, -50.0f, 60.0f, -70.0f, 80.0f); right = Vector256.Create(0.0f, 2.0f, 3.0f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f); Vector256 <float> nanInFirstPosition = Avx.Divide(left, right); left = Vector256.Create(1.1f, 3.3333333f, -3.0f, 4.22f, -50.0f, 60.0f, -70.0f, 80.0f); Vector256 <float> InfInFirstPosition = Avx.Divide(left, right); left = Vector256.Create(-1.1f, 3.0f, 1.0f / 3.0f, MathF.PI, -50.0f, 60.0f, -70.0f, 80.0f); right = Vector256.Create(0.0f, 2.0f, 3.1f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f); Vector256 <float> compareResult = Avx.Compare(left, right, FloatComparisonMode.OrderedGreaterThanNonSignaling); // compareResult = <0, NaN, 0, NaN, 0, NaN, 0, NaN> Vector256 <float> mixed = Avx.BlendVariable(left, right, compareResult); // mixed = <-1, 2, -3, 2, -50, -60, -70, -80> //left = Vector256.Create(-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f); //right = Vector256.Create(1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, -1.0f, 1.0f); Vector256 <float> other = right = Vector256.Create(0.0f, 2.0f, 3.0f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f); bool bRes = Avx.TestZ(plusOne, compareResult); bool bRes2 = Avx.TestC(plusOne, compareResult); bool allTrue = !Avx.TestZ(compareResult, compareResult); compareResult = Avx.Compare(nanInFirstPosition, right, FloatComparisonMode.OrderedEqualNonSignaling); // compareResult = <0, NaN, 0, NaN, 0, NaN, 0, NaN> compareResult = Avx.Compare(nanInFirstPosition, right, FloatComparisonMode.UnorderedEqualNonSignaling); compareResult = Avx.Compare(InfInFirstPosition, right, FloatComparisonMode.UnorderedNotLessThanOrEqualNonSignaling); compareResult = Avx.Compare(InfInFirstPosition, right, FloatComparisonMode.OrderedGreaterThanNonSignaling); var left128 = Vector128.Create(1.0f, 2.0f, 3.0f, 4.0f); var right128 = Vector128.Create(2.0f, 3.0f, 4.0f, 5.0f); Vector128 <float> compResult128 = Sse.CompareGreaterThan(left128, right128); // compResult128 = <0, 0, 0, 0> int res = Avx.MoveMask(compareResult); if (Fma.IsSupported) { Vector256 <float> resultFma = Fma.MultiplyAdd(left, right, other); // = left * right + other for each element resultFma = Fma.MultiplyAddNegated(left, right, other); // = -(left * right + other) for each element resultFma = Fma.MultiplySubtract(left, right, other); // = left * right - other for each element Fma.MultiplyAddSubtract(left, right, other); // even elements (0, 2, ...) like MultiplyAdd, odd elements like MultiplySubtract } result = Avx.DotProduct(left, right, 0b1010_0001); // result = <-20, 0, 0, 0, -10000, 0, 0, 0> result = Avx.Floor(left); // result = <-3, -3, -3, -3, -3, -3, -3, -3> result = Avx.Add(left, right); // result = <2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5> result = Avx.Ceiling(left); // result = <-2, -2, -2, -2, -2, -2, -2, -2> result = Avx.Multiply(left, right); // result = <-12.5, -12.5, -12.5, -12.5, -12.5, -12.5, -12.5, -12.5> result = Avx.HorizontalAdd(left, right); // result = <-5, -5, 10, 10, -5, -5, 10, 10> result = Avx.HorizontalSubtract(left, right); // result = <0, 0, 0, 0, 0, 0, 0, 0> double[] someDoubles = new double[] { 1.0, 3.0, -2.5, 7.5, 10.8, 0.33333 }; double[] someOtherDoubles = new double[] { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; double[] someResult = new double[someDoubles.Length]; float[] someFloats = new float[] { 1, 2, 3, 4, 10, 20, 30, 40, 0 }; float[] someOtherFloats = new float[] { 1, 1, 1, 1, 1, 1, 1, 1 }; unsafe { fixed(double *ptr = &someDoubles[1]) { fixed(double *ptr2 = &someResult[0]) { Vector256 <double> res2 = Avx.LoadVector256(ptr); // res2 = <3, -2.5, 7.5, 10.8> Avx.Store(ptr2, res2); } } fixed(float *ptr = &someFloats[0]) { fixed(float *ptr2 = &someOtherFloats[0]) { Vector256 <float> res2 = Avx.DotProduct(Avx.LoadVector256(ptr), Avx.LoadVector256(ptr2), 0b0001_0001); //Avx.Store(ptr2, res2); } } } } }
public void ResizeBicubic(FastBitmap rtnImage) { float scaleX = (float)this.width / rtnImage.width; float scaleY = (float)this.height / rtnImage.height; if (scaleX > 1 || scaleY > 1) { throw new Exception("拡大のみ対応"); } float[] tmpa = new float[rtnImage.width * 4 * this.height]; fixed(float *tmpp = tmpa) { float *tmp = tmpp; var _00mask = Vector128.Create(0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255); var _01mask = Vector128.Create(4, 255, 255, 255, 5, 255, 255, 255, 6, 255, 255, 255, 7, 255, 255, 255); var _10mask = Vector128.Create(8, 255, 255, 255, 9, 255, 255, 255, 10, 255, 255, 255, 11, 255, 255, 255); var _11mask = Vector128.Create(12, 255, 255, 255, 13, 255, 255, 255, 14, 255, 255, 255, 15, 255, 255, 255); var _vmask = Vector128.Create(0, 4, 8, 12, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255); var _1012 = Vector128.Create(-1, 0, 1, 2); var _0123i = Vector128.Create(0, 1, 2, 3); var _0000 = Vector128.Create(0, 0, 0, 0); var _0000f = Vector128.Create(0f, 0, 0, 0); var _255f = Vector128.Create(255f, 255, 255, 255); var _1111 = Vector128.Create(1, 1, 1, 1); var _1111f = Vector128.Create(1f, 1, 1, 1); var _4444f = Vector128.Create(4f, 4, 4, 4); var _4444 = Vector128.Create(4, 4, 4, 4); var _5555f = Vector128.Create(5f, 5, 5, 5); var _2222f = Vector128.Create(2f, 2, 2, 2); var _8888f = Vector128.Create(8f, 8, 8, 8); var _7f = Vector128.Create(0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff).AsSingle(); var _ff = Vector128.Create(-1, -1, -1, -1); var _stride = Vector128.Create(rtnImage.width * 4, rtnImage.width * 4, rtnImage.width * 4, rtnImage.width * 4); Parallel.For(0, this.height, (y) => { float py = (y * scaleY); float *tmpPos = tmp + y * rtnImage.width * 4; for (int x = 0; x < rtnImage.width; x++) { float px = (x * scaleX); int sx = (int)px; var _px = Vector128.CreateScalar(px); _px = Sse.Shuffle(_px, _px, 0); var _sx = Vector128.CreateScalar(sx); _sx = Sse2.Shuffle(_sx, 0); var _width = Vector128.CreateScalar(this.width); _width = Sse2.Shuffle(_width, 0); var _x2 = Sse2.Add(_sx, _1012); var _d = Sse.And(Sse.Subtract(_px, Sse2.ConvertToVector128Single(_x2)), _7f); var _d2 = Sse.Multiply(_d, _d); var _d3 = Sse.Multiply(_d2, _d); var w1 = Sse.Add(_1111f, Sse.Subtract(_d3, Sse.Multiply(_2222f, _d2))); var w2 = Sse.Subtract(Sse.Subtract(Sse.Add(_4444f, Sse.Multiply(_5555f, _d2)), Sse.Multiply(_d, _8888f)), _d3); var wb = Sse2.CompareGreaterThan(_d, _1111f); var _w = Sse41.BlendVariable(w1, w2, wb); var _xpb = Sse2.Or(Sse2.CompareLessThan(_x2, _0000), Sse41.MultiplyLow(Sse2.AndNot(Sse2.CompareLessThan(_x2, _width), _1111).AsInt32(), _ff)); var _xpp = Sse2.And(_sx, _xpb); var _xp = Sse41.BlendVariable(_x2, _xpp, _xpb); var p = Avx2.GatherVector128((uint *)(this._ptr + this._stride * y), _xp, 4).AsByte(); var _p0 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _00mask).AsInt32()); var _p1 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _01mask).AsInt32()); var _p2 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _10mask).AsInt32()); var _p3 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _11mask).AsInt32()); var _w0 = Sse.Shuffle(_w, _w, 0); var _w1 = Sse.Shuffle(_w, _w, 0b01010101); var _w2 = Sse.Shuffle(_w, _w, 0b10101010); var _w3 = Sse.Shuffle(_w, _w, 0b11111111); var rgbaf = Sse.Add(Sse.Add(Sse.Multiply(_p0, _w0), Sse.Multiply(_p1, _w1)), Sse.Add(Sse.Multiply(_p2, _w2), Sse.Multiply(_p3, _w3))); Sse2.Store(tmpPos + x * 4, rgbaf); } }); Parallel.For(0, rtnImage.height, (y) => { float py = (y * scaleY); int sy = (int)py; uint *store = stackalloc uint[4]; var _py = Vector128.CreateScalar(py); _py = Sse.Shuffle(_py, _py, 0); var _sy = Vector128.CreateScalar(sy); _sy = Sse2.Shuffle(_sy, 0); var _height = Vector128.CreateScalar(this.height); _height = Sse2.Shuffle(_height, 0); var _y2 = Sse2.Add(_sy, _1012); var _d = Sse.And(Sse.Subtract(_py, Sse2.ConvertToVector128Single(_y2)), _7f); var _d2 = Sse.Multiply(_d, _d); var _d3 = Sse.Multiply(_d2, _d); var w1 = Sse.Add(_1111f, Sse.Subtract(_d3, Sse.Multiply(_2222f, _d2))); var w2 = Sse.Subtract(Sse.Subtract(Sse.Add(_4444f, Sse.Multiply(_5555f, _d2)), Sse.Multiply(_d, _8888f)), _d3); var wb = Sse2.CompareGreaterThan(_d, _1111f); var _w = Sse41.BlendVariable(w1, w2, wb); var _ypb = Sse2.Or(Sse2.CompareLessThan(_y2, _0000), Sse41.MultiplyLow(Sse2.AndNot(Sse2.CompareLessThan(_y2, _height), _1111).AsInt32(), _ff)); var _ypp = Sse2.And(_sy, _ypb); var _yp = Sse41.BlendVariable(_y2, _ypp, _ypb); var _yps = Sse41.MultiplyLow(_yp, _stride); var _yp0 = Sse2.Add(Sse2.Shuffle(_yps, 0), _0123i); var _yp1 = Sse2.Add(Sse2.Shuffle(_yps, 0b01010101), _0123i); var _yp2 = Sse2.Add(Sse2.Shuffle(_yps, 0b10101010), _0123i); var _yp3 = Sse2.Add(Sse2.Shuffle(_yps, 0b11111111), _0123i); uint *rtn = (uint *)(rtnImage._ptr + rtnImage._stride * y); for (int x = 0; x < rtnImage.width; x++) { var _p0 = Avx2.GatherVector128((float *)(tmp), _yp0, 4); var _p1 = Avx2.GatherVector128((float *)(tmp), _yp1, 4); var _p2 = Avx2.GatherVector128((float *)(tmp), _yp2, 4); var _p3 = Avx2.GatherVector128((float *)(tmp), _yp3, 4); var _w0 = Sse.Shuffle(_w, _w, 0); var _w1 = Sse.Shuffle(_w, _w, 0b01010101); var _w2 = Sse.Shuffle(_w, _w, 0b10101010); var _w3 = Sse.Shuffle(_w, _w, 0b11111111); var rgbaf = Sse.Add(Sse.Add(Sse.Multiply(_p0, _w0), Sse.Multiply(_p1, _w1)), Sse.Add(Sse.Multiply(_p2, _w2), Sse.Multiply(_p3, _w3))); var _b0 = Sse.CompareLessThan(rgbaf, _0000f); rgbaf = Sse41.BlendVariable(rgbaf, _0000f, _b0); var _b1 = Sse.CompareGreaterThan(rgbaf, _255f); rgbaf = Sse41.BlendVariable(rgbaf, _255f, _b1); var rgbab = Sse2.ConvertToVector128Int32(rgbaf).AsByte(); var rgba = Ssse3.Shuffle(rgbab, _vmask).AsUInt32(); Sse2.Store(store, rgba); _yp0 = Sse2.Add(_yp0, _4444); _yp1 = Sse2.Add(_yp1, _4444); _yp2 = Sse2.Add(_yp2, _4444); _yp3 = Sse2.Add(_yp3, _4444); *rtn = *store; rtn++; } });
public static m32 GreaterThan(f32 lhs, f32 rhs) => Sse.CompareGreaterThan(lhs, rhs).AsInt32();
unsafe void IConvolver.SharpenLine(byte *cstart, byte *ystart, byte *bstart, byte *ostart, int ox, int ow, float amt, float thresh, bool gamma) { float *ip = (float *)cstart + (uint)ox * channels, yp = (float *)ystart + (uint)ox, bp = (float *)bstart, op = (float *)ostart; float *ipe = ip + (uint)ow * channels; bool threshold = thresh > 0f; if (Avx.IsSupported && ip <= ipe - VectorAvx.Count) { var vthresh = Vector256.Create(threshold ? thresh : -1f); var vmsk = Vector256.Create(0x7fffffff).AsSingle(); var vamt = Vector256.Create(amt); var vmin = VectorAvx.Zero; ipe -= VectorAvx.Count; do { var vd = Avx.Subtract(Avx.LoadVector256(yp), Avx.LoadVector256(bp)); yp += VectorAvx.Count; bp += VectorAvx.Count; if (threshold) { var sm = HWIntrinsics.AvxCompareGreaterThan(Avx.And(vd, vmsk), vthresh); vd = Avx.And(vd, sm); } vd = Avx.Multiply(vd, vamt); var v0 = Avx.LoadVector256(ip); ip += VectorAvx.Count; if (gamma) { v0 = Avx.Max(v0, vmin); v0 = Avx.Multiply(v0, Avx.ReciprocalSqrt(v0)); v0 = Avx.Add(v0, vd); v0 = Avx.Max(v0, vmin); v0 = Avx.Multiply(v0, v0); } else { v0 = Avx.Add(v0, vd); } Avx.Store(op, v0); op += VectorAvx.Count; } while (ip <= ipe); ipe += VectorAvx.Count; } else if (ip <= ipe - VectorSse.Count) { var vthresh = Vector128.Create(threshold ? thresh : -1f); var vmsk = Vector128.Create(0x7fffffff).AsSingle(); var vamt = Vector128.Create(amt); var vmin = VectorSse.Zero; ipe -= VectorSse.Count; do { var vd = Sse.Subtract(Sse.LoadVector128(yp), Sse.LoadVector128(bp)); yp += VectorSse.Count; bp += VectorSse.Count; if (threshold) { var sm = Sse.CompareGreaterThan(Sse.And(vd, vmsk), vthresh); vd = Sse.And(vd, sm); } vd = Sse.Multiply(vd, vamt); var v0 = Sse.LoadVector128(ip); ip += VectorSse.Count; if (gamma) { v0 = Sse.Max(v0, vmin); v0 = Sse.Multiply(v0, Sse.ReciprocalSqrt(v0)); v0 = Sse.Add(v0, vd); v0 = Sse.Max(v0, vmin); v0 = Sse.Multiply(v0, v0); } else { v0 = Sse.Add(v0, vd); } Sse.Store(op, v0); op += VectorSse.Count; } while (ip <= ipe); ipe += VectorSse.Count; } float fmin = VectorSse.Zero.ToScalar(); while (ip < ipe) { float dif = *yp++ - *bp++; float c0 = *ip++; if (!threshold || Math.Abs(dif) > thresh) { dif *= amt; if (gamma) { c0 = MathUtil.MaxF(c0, fmin).Sqrt(); c0 = MathUtil.MaxF(c0 + dif, fmin); c0 *= c0; } else { c0 += dif; } } *op++ = c0; } }
public static Vector128 <float> _mm_cmpgt_ps(Vector128 <float> left, Vector128 <float> right) { return(Sse.CompareGreaterThan(left, right)); }
public static Vector128 <float> op_GreaterThan(Vector128 <float> left, Vector128 <float> right) => Sse.CompareGreaterThan(left, right);