예제 #1
0
    public static unsafe void V128Filter(int *src, int srcCount, int target, int *dst, out int dstCount)
    {
        var alignedCount = srcCount & ~3;
        int i            = 0;
        var dstPtr       = dst;

        for (; i < alignedCount; i += 4)
        {
            var val    = loadu_si128(src + i);
            var cmp    = cmpeq_epi32(val, set1_epi32(target));
            var packed = SIMDHelpers.LeftPack4PS(cmp, val);
            storeu_si128(dstPtr, packed);
            var mask = movemask_ps(cmp);
            dstPtr += popcnt_u32((uint)mask);
        }

        for (; i < srcCount; i++)
        {
            if (src[i] == target)
            {
                *(dstPtr++) = src[i];
            }
        }

        dstCount = (int)(dstPtr - dst);
    }
예제 #2
0
    public static unsafe void V256Filter(int *src, int srcCount, int target, int *dst, out int dstCount)
    {
        var alignedCount = srcCount & ~7;
        var dstPtr       = dst;
        int i            = 0;

        for (; i < alignedCount; i += 8)
        {
            var val    = mm256_loadu_si256(src + i);
            var cmp    = mm256_cmpeq_epi32(val, mm256_set1_epi32(target));
            var packed = SIMDHelpers.LeftPack8PS(cmp, val);
            mm256_storeu_si256(dstPtr, packed);
            var mask = mm256_movemask_ps(cmp);
            dstPtr += popcnt_u32((uint)mask);
        }

        for (; i < srcCount; i++)
        {
            if (src[i] == target)
            {
                *(dstPtr++) = src[i];
            }
        }

        dstCount = (int)(dstPtr - dst);
    }
예제 #3
0
    private static unsafe void V128Filter([NoAlias] int *src, int srcCount, int greaterThan, int lessThan, [NoAlias] int *dst, [NoAlias] out int dstCount)
    {
        var alignedCount = srcCount & ~3;
        var dstPtr       = dst;
        int i            = 0;

        for (; i < alignedCount; i += 4)
        {
            var val    = loadu_si128(src + i);
            var cmpLt  = cmplt_epi32(val, set1_epi32(lessThan));
            var cmpGt  = cmplt_epi32(set1_epi32(greaterThan), val);
            var cmp    = and_ps(cmpLt, cmpGt);
            var packed = SIMDHelpers.LeftPack4PS(cmp, val);
            storeu_si128(dstPtr, packed);
            var mask = movemask_ps(cmp);
            dstPtr += popcnt_u32((uint)mask);
        }

        for (; i < srcCount; i++)
        {
            if (src[i] < lessThan && src[i] > greaterThan)
            {
                *(dstPtr++) = src[i];
            }
        }

        dstCount = (int)(dstPtr - dst);
    }
예제 #4
0
    private static unsafe void V256Filter([NoAlias] int *src, int srcCount, int greaterThan, int lessThan, [NoAlias] int *dst, [NoAlias] out int dstCount)
    {
        var alignedCount = srcCount & ~7;
        var dstPtr       = dst;
        int i            = 0;

        for (; i < alignedCount; i += 8)
        {
            var val    = mm256_loadu_si256(src + i);
            var cmpLt  = mm256_cmpgt_epi32(mm256_set1_epi32(lessThan), val);
            var cmpGt  = mm256_cmpgt_epi32(val, mm256_set1_epi32(greaterThan));
            var cmp    = mm256_and_ps(cmpLt, cmpGt);
            var packed = SIMDHelpers.LeftPack8PS(cmp, val);
            mm256_storeu_si256(dstPtr, packed);
            var mask = mm256_movemask_ps(cmp);
            dstPtr += popcnt_u32((uint)mask);
        }

        for (; i < srcCount; i++)
        {
            if (src[i] < lessThan && src[i] > greaterThan)
            {
                *(dstPtr++) = src[i];
            }
        }

        dstCount = (int)(dstPtr - dst);
    }
예제 #5
0
        private Hit[] RayTraceAVXFaster(Ray ray)
        {
            Vector256 <double> dir      = (Vector256 <double>)ray.Direction;
            Vector256 <double> vert0    = (Vector256 <double>)Vert0.Position;
            Vector256 <double> edge0to1 = (Vector256 <double>)Edge0to1;
            Vector256 <double> edge0to2 = (Vector256 <double>)Edge0to2;

            Vector256 <double> offset = Avx.Subtract((Vector256 <double>)ray.Origin, vert0);
            Vector256 <double> side1  = SIMDHelpers.Cross(offset, edge0to1);
            Vector256 <double> side2  = SIMDHelpers.Cross(dir, edge0to2);

            // Prepare all dot products
            Vector256 <double> uvTemp    = Avx.Multiply(offset, side2);         // u
            Vector256 <double> temp      = Avx.Multiply(dir, side1);            // v
            Vector256 <double> edge2Temp = Avx.Multiply(edge0to2, side1);
            Vector256 <double> distTemp  = Avx.Multiply(edge0to1, side2);

            uvTemp    = Avx.HorizontalAdd(uvTemp, temp);
            edge2Temp = Avx.HorizontalAdd(edge2Temp, edge2Temp);
            distTemp  = Avx.HorizontalAdd(distTemp, distTemp);

            // Complete all dot products for SSE ops
            Vector128 <double> uvs   = SIMDHelpers.Add2(uvTemp);
            Vector128 <double> dist  = SIMDHelpers.Add2(edge2Temp);
            Vector128 <double> temp1 = SIMDHelpers.Add2(distTemp);
            Vector128 <double> temp2;

            // vec2 constants we'll be using later
            Vector128 <double> ones2   = SIMDHelpers.BroadcastScalar2(1D);
            Vector128 <double> zeroes2 = new Vector128 <double>();

            // Reciprocal of distance along edge0to1
            temp1 = Sse2.Divide(ones2, temp1);
            temp2 = Sse2.CompareOrdered(temp1, temp1);
            // Remove NaNs from the result, replaced with 0
            Vector128 <double> distZeroed = Sse2.And(temp1, temp2);

            uvs  = Sse2.Multiply(uvs, distZeroed);
            dist = Sse2.Multiply(dist, distZeroed);

            // compare uvs < 0 and > 1, dist < 0, jump out if any of those conditions are met
            temp1 = Sse2.CompareLessThan(uvs, zeroes2);
            temp2 = Mirror ? uvs : Sse3.HorizontalAdd(uvs, uvs);
            temp2 = Sse2.CompareGreaterThan(temp2, ones2);
            temp1 = Sse2.Or(temp1, temp2);
            temp2 = Sse2.CompareLessThan(dist, zeroes2);
            temp1 = Sse2.Or(temp1, temp2);

            if (!Avx.TestZ(temp1, temp1))
            {
                return(default);
예제 #6
0
        public (double near, double far) IntersectAVX(Ray ray)
        {
            Vector256 <double> origin    = (Vector256 <double>)ray.Origin;
            Vector256 <double> direction = (Vector256 <double>)ray.Direction;

            Vector256 <double> zeroes = new Vector256 <double>();
            Vector256 <double> min    = (Vector256 <double>)Minimum;
            Vector256 <double> max    = (Vector256 <double>)Maximum;

            // Replace slabs that won't be checked (0 direction axis) with infinity so that NaN doesn't propagate
            Vector256 <double> dirInfMask = Avx.And(
                Avx.Compare(direction, zeroes, FloatComparisonMode.OrderedEqualNonSignaling),
                Avx.And(
                    Avx.Compare(origin, min, FloatComparisonMode.OrderedGreaterThanOrEqualNonSignaling),
                    Avx.Compare(origin, max, FloatComparisonMode.OrderedLessThanOrEqualNonSignaling)));

            min = Avx.BlendVariable(min, SIMDHelpers.BroadcastScalar4(double.NegativeInfinity), dirInfMask);
            max = Avx.BlendVariable(max, SIMDHelpers.BroadcastScalar4(double.PositiveInfinity), dirInfMask);

            // Flip slabs in direction axes that are negative (using direction as mask takes the most significant bit, the sign.. probably includes -0)
            Vector256 <double> minMasked = Avx.BlendVariable(min, max, direction);
            Vector256 <double> maxMasked = Avx.BlendVariable(max, min, direction);

            direction = Avx.Divide(Vector256.Create(1D), direction);
            Vector256 <double> near4 = Avx.Multiply(Avx.Subtract(minMasked, origin), direction);
            Vector256 <double> far4  = Avx.Multiply(Avx.Subtract(maxMasked, origin), direction);

            Vector128 <double> near2 = Sse2.Max(near4.GetLower(), near4.GetUpper());

            near2 = Sse2.MaxScalar(near2, SIMDHelpers.Swap(near2));
            Vector128 <double> far2 = Sse2.Min(far4.GetLower(), far4.GetUpper());

            far2 = Sse2.MinScalar(far2, SIMDHelpers.Swap(far2));

            if (Sse2.CompareScalarOrderedGreaterThan(near2, far2) | Sse2.CompareScalarOrderedLessThan(far2, new Vector128 <double>()))
            {
                return(double.NaN, double.NaN);
            }

            return(near2.ToScalar(), far2.ToScalar());
        }