public static unsafe void V128Filter(int *src, int srcCount, int target, int *dst, out int dstCount) { var alignedCount = srcCount & ~3; int i = 0; var dstPtr = dst; for (; i < alignedCount; i += 4) { var val = loadu_si128(src + i); var cmp = cmpeq_epi32(val, set1_epi32(target)); var packed = SIMDHelpers.LeftPack4PS(cmp, val); storeu_si128(dstPtr, packed); var mask = movemask_ps(cmp); dstPtr += popcnt_u32((uint)mask); } for (; i < srcCount; i++) { if (src[i] == target) { *(dstPtr++) = src[i]; } } dstCount = (int)(dstPtr - dst); }
public static unsafe void V256Filter(int *src, int srcCount, int target, int *dst, out int dstCount) { var alignedCount = srcCount & ~7; var dstPtr = dst; int i = 0; for (; i < alignedCount; i += 8) { var val = mm256_loadu_si256(src + i); var cmp = mm256_cmpeq_epi32(val, mm256_set1_epi32(target)); var packed = SIMDHelpers.LeftPack8PS(cmp, val); mm256_storeu_si256(dstPtr, packed); var mask = mm256_movemask_ps(cmp); dstPtr += popcnt_u32((uint)mask); } for (; i < srcCount; i++) { if (src[i] == target) { *(dstPtr++) = src[i]; } } dstCount = (int)(dstPtr - dst); }
private static unsafe void V128Filter([NoAlias] int *src, int srcCount, int greaterThan, int lessThan, [NoAlias] int *dst, [NoAlias] out int dstCount) { var alignedCount = srcCount & ~3; var dstPtr = dst; int i = 0; for (; i < alignedCount; i += 4) { var val = loadu_si128(src + i); var cmpLt = cmplt_epi32(val, set1_epi32(lessThan)); var cmpGt = cmplt_epi32(set1_epi32(greaterThan), val); var cmp = and_ps(cmpLt, cmpGt); var packed = SIMDHelpers.LeftPack4PS(cmp, val); storeu_si128(dstPtr, packed); var mask = movemask_ps(cmp); dstPtr += popcnt_u32((uint)mask); } for (; i < srcCount; i++) { if (src[i] < lessThan && src[i] > greaterThan) { *(dstPtr++) = src[i]; } } dstCount = (int)(dstPtr - dst); }
private static unsafe void V256Filter([NoAlias] int *src, int srcCount, int greaterThan, int lessThan, [NoAlias] int *dst, [NoAlias] out int dstCount) { var alignedCount = srcCount & ~7; var dstPtr = dst; int i = 0; for (; i < alignedCount; i += 8) { var val = mm256_loadu_si256(src + i); var cmpLt = mm256_cmpgt_epi32(mm256_set1_epi32(lessThan), val); var cmpGt = mm256_cmpgt_epi32(val, mm256_set1_epi32(greaterThan)); var cmp = mm256_and_ps(cmpLt, cmpGt); var packed = SIMDHelpers.LeftPack8PS(cmp, val); mm256_storeu_si256(dstPtr, packed); var mask = mm256_movemask_ps(cmp); dstPtr += popcnt_u32((uint)mask); } for (; i < srcCount; i++) { if (src[i] < lessThan && src[i] > greaterThan) { *(dstPtr++) = src[i]; } } dstCount = (int)(dstPtr - dst); }
private Hit[] RayTraceAVXFaster(Ray ray) { Vector256 <double> dir = (Vector256 <double>)ray.Direction; Vector256 <double> vert0 = (Vector256 <double>)Vert0.Position; Vector256 <double> edge0to1 = (Vector256 <double>)Edge0to1; Vector256 <double> edge0to2 = (Vector256 <double>)Edge0to2; Vector256 <double> offset = Avx.Subtract((Vector256 <double>)ray.Origin, vert0); Vector256 <double> side1 = SIMDHelpers.Cross(offset, edge0to1); Vector256 <double> side2 = SIMDHelpers.Cross(dir, edge0to2); // Prepare all dot products Vector256 <double> uvTemp = Avx.Multiply(offset, side2); // u Vector256 <double> temp = Avx.Multiply(dir, side1); // v Vector256 <double> edge2Temp = Avx.Multiply(edge0to2, side1); Vector256 <double> distTemp = Avx.Multiply(edge0to1, side2); uvTemp = Avx.HorizontalAdd(uvTemp, temp); edge2Temp = Avx.HorizontalAdd(edge2Temp, edge2Temp); distTemp = Avx.HorizontalAdd(distTemp, distTemp); // Complete all dot products for SSE ops Vector128 <double> uvs = SIMDHelpers.Add2(uvTemp); Vector128 <double> dist = SIMDHelpers.Add2(edge2Temp); Vector128 <double> temp1 = SIMDHelpers.Add2(distTemp); Vector128 <double> temp2; // vec2 constants we'll be using later Vector128 <double> ones2 = SIMDHelpers.BroadcastScalar2(1D); Vector128 <double> zeroes2 = new Vector128 <double>(); // Reciprocal of distance along edge0to1 temp1 = Sse2.Divide(ones2, temp1); temp2 = Sse2.CompareOrdered(temp1, temp1); // Remove NaNs from the result, replaced with 0 Vector128 <double> distZeroed = Sse2.And(temp1, temp2); uvs = Sse2.Multiply(uvs, distZeroed); dist = Sse2.Multiply(dist, distZeroed); // compare uvs < 0 and > 1, dist < 0, jump out if any of those conditions are met temp1 = Sse2.CompareLessThan(uvs, zeroes2); temp2 = Mirror ? uvs : Sse3.HorizontalAdd(uvs, uvs); temp2 = Sse2.CompareGreaterThan(temp2, ones2); temp1 = Sse2.Or(temp1, temp2); temp2 = Sse2.CompareLessThan(dist, zeroes2); temp1 = Sse2.Or(temp1, temp2); if (!Avx.TestZ(temp1, temp1)) { return(default);
public (double near, double far) IntersectAVX(Ray ray) { Vector256 <double> origin = (Vector256 <double>)ray.Origin; Vector256 <double> direction = (Vector256 <double>)ray.Direction; Vector256 <double> zeroes = new Vector256 <double>(); Vector256 <double> min = (Vector256 <double>)Minimum; Vector256 <double> max = (Vector256 <double>)Maximum; // Replace slabs that won't be checked (0 direction axis) with infinity so that NaN doesn't propagate Vector256 <double> dirInfMask = Avx.And( Avx.Compare(direction, zeroes, FloatComparisonMode.OrderedEqualNonSignaling), Avx.And( Avx.Compare(origin, min, FloatComparisonMode.OrderedGreaterThanOrEqualNonSignaling), Avx.Compare(origin, max, FloatComparisonMode.OrderedLessThanOrEqualNonSignaling))); min = Avx.BlendVariable(min, SIMDHelpers.BroadcastScalar4(double.NegativeInfinity), dirInfMask); max = Avx.BlendVariable(max, SIMDHelpers.BroadcastScalar4(double.PositiveInfinity), dirInfMask); // Flip slabs in direction axes that are negative (using direction as mask takes the most significant bit, the sign.. probably includes -0) Vector256 <double> minMasked = Avx.BlendVariable(min, max, direction); Vector256 <double> maxMasked = Avx.BlendVariable(max, min, direction); direction = Avx.Divide(Vector256.Create(1D), direction); Vector256 <double> near4 = Avx.Multiply(Avx.Subtract(minMasked, origin), direction); Vector256 <double> far4 = Avx.Multiply(Avx.Subtract(maxMasked, origin), direction); Vector128 <double> near2 = Sse2.Max(near4.GetLower(), near4.GetUpper()); near2 = Sse2.MaxScalar(near2, SIMDHelpers.Swap(near2)); Vector128 <double> far2 = Sse2.Min(far4.GetLower(), far4.GetUpper()); far2 = Sse2.MinScalar(far2, SIMDHelpers.Swap(far2)); if (Sse2.CompareScalarOrderedGreaterThan(near2, far2) | Sse2.CompareScalarOrderedLessThan(far2, new Vector128 <double>())) { return(double.NaN, double.NaN); } return(near2.ToScalar(), far2.ToScalar()); }