예제 #1
0
            /// <summary>
            /// SIMD optimized bulk implementation of <see cref="IPixel.PackFromVector4(Vector4)"/>
            /// that works only with `count` divisible by <see cref="Vector{UInt32}.Count"/>.
            /// </summary>
            /// <param name="sourceColors">The <see cref="BufferSpan{T}"/> to the source colors.</param>
            /// <param name="destVectors">The <see cref="BufferSpan{T}"/> to the dstination vectors.</param>
            /// <param name="count">The number of pixels to convert.</param>
            /// <remarks>
            /// Implementation adapted from:
            /// <see>
            ///     <cref>http://stackoverflow.com/a/5362789</cref>
            /// </see>
            /// TODO: We can replace this implementation in the future using new Vector API-s:
            /// <see>
            ///     <cref>https://github.com/dotnet/corefx/issues/15957</cref>
            /// </see>
            /// </remarks>
            internal static unsafe void ToVector4SimdAligned(
                BufferSpan <Color> sourceColors,
                BufferSpan <Vector4> destVectors,
                int count)
            {
                if (!Vector.IsHardwareAccelerated)
                {
                    throw new InvalidOperationException(
                              "Color.BulkOperations.ToVector4SimdAligned() should not be called when Vector.IsHardwareAccelerated == false!");
                }

                int vecSize = Vector <uint> .Count;

                DebugGuard.IsTrue(
                    count % vecSize == 0,
                    nameof(count),
                    "Argument 'count' should divisible by Vector<uint>.Count!");

                Vector <float> bVec       = new Vector <float>(256.0f / 255.0f);
                Vector <float> magicFloat = new Vector <float>(32768.0f);
                Vector <uint>  magicInt   = new Vector <uint>(1191182336); // reinterpreded value of 32768.0f
                Vector <uint>  mask       = new Vector <uint>(255);

                int unpackedRawCount = count * 4;

                uint *src    = (uint *)sourceColors.PointerAtOffset;
                uint *srcEnd = src + count;

                using (PinnedBuffer <uint> tempBuf = new PinnedBuffer <uint>(
                           unpackedRawCount + Vector <uint> .Count))
                {
                    uint *        tPtr  = (uint *)tempBuf.Pointer;
                    uint[]        temp  = tempBuf.Array;
                    float[]       fTemp = Unsafe.As <float[]>(temp);
                    UnpackedRGBA *dst   = (UnpackedRGBA *)tPtr;

                    for (; src < srcEnd; src++, dst++)
                    {
                        // This call is the bottleneck now:
                        dst->Load(*src);
                    }

                    for (int i = 0; i < unpackedRawCount; i += vecSize)
                    {
                        Vector <uint> vi = new Vector <uint>(temp, i);

                        vi &= mask;
                        vi |= magicInt;

                        Vector <float> vf = Vector.AsVectorSingle(vi);
                        vf = (vf - magicFloat) * bVec;
                        vf.CopyTo(fTemp, i);
                    }

                    BufferSpan.Copy <uint>(tempBuf, (BufferSpan <byte>)destVectors, unpackedRawCount);
                }
            }
예제 #2
0
            /// <summary>
            /// SIMD optimized bulk implementation of <see cref="IPixel.PackFromVector4(Vector4)"/>
            /// that works only with `count` divisible by <see cref="Vector{UInt32}.Count"/>.
            /// </summary>
            /// <param name="sourceColors">The <see cref="Span{T}"/> to the source colors.</param>
            /// <param name="destVectors">The <see cref="Span{T}"/> to the dstination vectors.</param>
            /// <param name="count">The number of pixels to convert.</param>
            /// <remarks>
            /// Implementation adapted from:
            /// <see>
            ///     <cref>http://stackoverflow.com/a/5362789</cref>
            /// </see>
            /// TODO: We can replace this implementation in the future using new Vector API-s:
            /// <see>
            ///     <cref>https://github.com/dotnet/corefx/issues/15957</cref>
            /// </see>
            /// </remarks>
            internal static void ToVector4SimdAligned(Span <Rgba32> sourceColors, Span <Vector4> destVectors, int count)
            {
                if (!Vector.IsHardwareAccelerated)
                {
                    throw new InvalidOperationException(
                              "Rgba32.PixelOperations.ToVector4SimdAligned() should not be called when Vector.IsHardwareAccelerated == false!");
                }

                DebugGuard.IsTrue(
                    count % Vector <uint> .Count == 0,
                    nameof(count),
                    "Argument 'count' should divisible by Vector<uint>.Count!");

                Vector <float> bVec       = new Vector <float>(256.0f / 255.0f);
                Vector <float> magicFloat = new Vector <float>(32768.0f);
                Vector <uint>  magicInt   = new Vector <uint>(1191182336); // reinterpreded value of 32768.0f
                Vector <uint>  mask       = new Vector <uint>(255);

                int unpackedRawCount = count * 4;

                ref uint           sourceBase         = ref Unsafe.As <Rgba32, uint>(ref sourceColors.DangerousGetPinnableReference());