コード例 #1
0
        private static float TruncateSse41(float x)
        {
            var f = Vector128.CreateScalarUnsafe(x);
            var r = Sse41.RoundToZero(f);

            return(r.ToScalar());
        }
コード例 #2
0
        public static int AVXMin(int x, int y)
        {
            var v1 = Vector128.CreateScalarUnsafe(x);
            var v2 = Vector128.CreateScalarUnsafe(y);

            return(Avx.Min(v1, v2).ToScalar());
        }
コード例 #3
0
        private static float RoundSse41(float x)
        {
            var f = Vector128.CreateScalarUnsafe(x);
            var r = Sse41.RoundCurrentDirectionScalar(f);

            return(r.ToScalar());
        }
コード例 #4
0
        private static float CeilingSse41(float x)
        {
            var f = Vector128.CreateScalarUnsafe(x);
            var r = Sse41.CeilingScalar(f);

            return(r.ToScalar());
        }
コード例 #5
0
 public float AccurateSse(float a)
 {
     return(Sse.DivideScalar(
                Vector128.CreateScalarUnsafe(1f),
                Sse.SqrtScalar(Vector128.CreateScalarUnsafe(a)))
            .ToScalar());
 }
コード例 #6
0
        public unsafe static Vector128 <float> BroadcastScalarToVector128(float value)
        {
            // could implement this with Avx.BroadcastScalarToVector128(&value) (_mm256_broadcast_ps) but vbroadcastf128 makes a memory thunk
            Vector128 <float> value128 = Vector128.CreateScalarUnsafe(value);

            return(Avx.Shuffle(value128, value128, Constant.Simd128x4.Broadcast0toAll));
        }
コード例 #7
0
 static void TestExplicitFmaUsage6(ref Vector128 <float> a, float b)
 {
     CompareFloats(ReferenceMultiplyAdd(b, b, b),
                   Fma.MultiplyAdd(
                       Vector128.CreateScalarUnsafe(b),
                       Vector128.CreateScalar(b),
                       Vector128.Create(b)).ToScalar());
 }
コード例 #8
0
 static void TestExplicitFmaUsage5(ref Vector128 <double> a, double b)
 {
     CompareDoubles(ReferenceMultiplyAdd(-b, -b, -333.0),
                    Fma.MultiplyAdd(
                        Vector128.CreateScalarUnsafe(-b),
                        Vector128.CreateScalarUnsafe(-b),
                        Vector128.CreateScalarUnsafe(-333.0)).ToScalar());
 }
コード例 #9
0
 static void TestExplicitFmaUsage6(ref Vector128 <double> a, double b)
 {
     CompareDoubles(ReferenceMultiplyAdd(b, b, b),
                    Fma.MultiplyAdd(
                        Vector128.CreateScalarUnsafe(b),
                        Vector128.CreateScalar(b),
                        Vector128.Create(b)).ToScalar());
 }
コード例 #10
0
 public static f32 Max_f32(f32 a, f32 b)
 {
     if (Sse.IsSupported)
     {
         return(Sse.MaxScalar(Vector128.CreateScalarUnsafe(a), Vector128.CreateScalarUnsafe(b)).ToScalar());
     }
     return(MathF.Max(a, b));
 }
コード例 #11
0
        private static double Sse2Clamp(double value, double min, double max)
        {
            // around 2x faster than managed (benchmarked on i7-4720HQ @ 2.6Ghz)
            var vals = Vector128.CreateScalarUnsafe(value);
            var mins = Vector128.CreateScalarUnsafe(min);
            var maxs = Vector128.CreateScalarUnsafe(max);

            return(Sse2.MaxScalar(mins, Sse2.MinScalar(vals, maxs)).ToScalar());
        }
コード例 #12
0
        private static float SseClamp(float value, float min, float max)
        {
            // around 2x faster than managed (benchmarked on i7-4720HQ @ 2.6Ghz)
            var vals = Vector128.CreateScalarUnsafe(value);
            var mins = Vector128.CreateScalarUnsafe(min);
            var maxs = Vector128.CreateScalarUnsafe(max);

            return(Sse.MaxScalar(mins, Sse.MinScalar(vals, maxs)).ToScalar());
        }
コード例 #13
0
        public static Vector128 <int> BroadcastScalarToVector128(int value)
        {
            // AVX version of Avx2.BroadcastScalarToVector128(int) (_mm_broadcastd_epi32())
            // Same code as https://github.com/dotnet/runtime/blob/master/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
            // Vector128.Create(int) without the CPU dispatch and signal to compiler that VEX can be used.
            Vector128 <int> value128 = Vector128.CreateScalarUnsafe(value); // reinterpet cast without upper zeroing

            return(Avx.Shuffle(value128, Constant.Simd128x4.Broadcast0toAll));
        }
コード例 #14
0
        public void RunBasicScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario));

            UInt16             value  = TestLibrary.Generator.GetUInt16();
            Vector128 <UInt16> result = Vector128.CreateScalarUnsafe(value);

            ValidateResult(result, value);
        }
コード例 #15
0
        private static float RoundDown(float x)
        {
            if (Sse41.IsSupported)
            {
                return(Sse41.RoundToNegativeInfinity(Vector128.CreateScalarUnsafe(x)).ToScalar());
            }

            return(MathF.Round(x, MidpointRounding.ToNegativeInfinity));
        }
コード例 #16
0
ファイル: MathF.cs プロジェクト: vitek-karas/runtime
 public static float CopySign(float x, float y)
 {
     if (Sse.IsSupported || AdvSimd.IsSupported)
     {
         return(VectorMath.ConditionalSelectBitwise(Vector128.CreateScalarUnsafe(-0.0f), Vector128.CreateScalarUnsafe(y), Vector128.CreateScalarUnsafe(x)).ToScalar());
     }
     else
     {
         return(SoftwareFallback(x, y));
     }
コード例 #17
0
        public static Vector128 <uint> CreateTwoUInt(uint a)
        {
            if (Sse2.IsSupported)
            {
                var t1 = Vector128.CreateScalarUnsafe(a).AsUInt64();

                return(Sse2.UnpackLow(t1, t1).AsUInt32());
            }

            return(Vector128.Create(a, 0, a, 0));
        }
コード例 #18
0
ファイル: Scalar.Exp.cs プロジェクト: storm32600/Silk.NET
            static unsafe double asdouble(ulong x)
            {
#if SSE
                if (Sse.IsSupported)
                {
                    return(Vector128.CreateScalarUnsafe(x).AsDouble().ToScalar()); // ToScalar "relies" on Sse (the fallback is garbage)
                }
                else
#endif
                return(*(double *)&x); // this produces bad codegen on < net5
            }
コード例 #19
0
        private static void NarrowFourUtf16CharsToAsciiAndWriteToBuffer(ref byte outputBuffer, ulong value)
        {
            Debug.Assert(AllCharsInUInt64AreAscii(value));

#if NETCOREAPP3_1
            if (Bmi2.X64.IsSupported)
            {
                // BMI2 will work regardless of the processor's endianness.
                Unsafe.WriteUnaligned(ref outputBuffer, (uint)Bmi2.X64.ParallelBitExtract(value, 0x00FF00FF_00FF00FFul));
            }
#else
            if (Sse2.X64.IsSupported)
            {
                // Narrows a vector of words [ w0 w1 w2 w3 ] to a vector of bytes
                // [ b0 b1 b2 b3 b0 b1 b2 b3 ], then writes 4 bytes (32 bits) to the destination.

                Vector128 <short> vecWide   = Sse2.X64.ConvertScalarToVector128UInt64(value).AsInt16();
                Vector128 <uint>  vecNarrow = Sse2.PackUnsignedSaturate(vecWide, vecWide).AsUInt32();
                Unsafe.WriteUnaligned <uint>(ref outputBuffer, Sse2.ConvertToUInt32(vecNarrow));
            }
            else if (AdvSimd.IsSupported)
            {
                // Narrows a vector of words [ w0 w1 w2 w3 ] to a vector of bytes
                // [ b0 b1 b2 b3 * * * * ], then writes 4 bytes (32 bits) to the destination.

                Vector128 <short> vecWide = Vector128.CreateScalarUnsafe(value).AsInt16();
                Vector64 <byte>   lower   = AdvSimd.ExtractNarrowingSaturateUnsignedLower(vecWide);
                Unsafe.WriteUnaligned <uint>(ref outputBuffer, lower.AsUInt32().ToScalar());
            }
#endif
            else
            {
                if (BitConverter.IsLittleEndian)
                {
                    outputBuffer = (byte)value;
                    value      >>= 16;
                    Unsafe.Add(ref outputBuffer, 1) = (byte)value;
                    value >>= 16;
                    Unsafe.Add(ref outputBuffer, 2) = (byte)value;
                    value >>= 16;
                    Unsafe.Add(ref outputBuffer, 3) = (byte)value;
                }
                else
                {
                    Unsafe.Add(ref outputBuffer, 3) = (byte)value;
                    value >>= 16;
                    Unsafe.Add(ref outputBuffer, 2) = (byte)value;
                    value >>= 16;
                    Unsafe.Add(ref outputBuffer, 1) = (byte)value;
                    value      >>= 16;
                    outputBuffer = (byte)value;
                }
            }
        }
コード例 #20
0
        public static f32 Reciprocal_f32(f32 a)
        {
            if (Sse.IsSupported)
            {
                return(Sse.ReciprocalScalar(Vector128.CreateScalarUnsafe(a)).ToScalar());
            }

            // pow( pow(x,-0.5), 2 ) = pow( x, -1 ) = 1.0 / x
            a = Casti32_f32((int)(0xbe6eb3beU - (uint)Castf32_i32(a)) >> 1);
            return(a * a);
        }
コード例 #21
0
        public static unsafe float Int32BitsToSingle(int value)
        {
            // Workaround for https://github.com/dotnet/runtime/issues/11413
            if (Sse2.IsSupported)
            {
                Vector128 <float> vec = Vector128.CreateScalarUnsafe(value).AsSingle();
                return(vec.ToScalar());
            }

            return(*((float *)&value));
        }
コード例 #22
0
ファイル: Scalar.Log.cs プロジェクト: storm32600/Silk.NET
            static unsafe float asfloat(uint x)
            {
#if SSE
                if (Sse.IsSupported)
                {
                    return(Vector128.CreateScalarUnsafe(x).AsSingle().ToScalar()); // ToScalar "relies" on Sse (the fallback is garbage)
                }
                else
#endif
                return(*(float *)&x); // this produces bad codegen on < net5
            }
コード例 #23
0
        public static unsafe double Int64BitsToDouble(long value)
        {
            // Workaround for https://github.com/dotnet/runtime/issues/11413
            if (Sse2.X64.IsSupported)
            {
                Vector128 <double> vec = Vector128.CreateScalarUnsafe(value).AsDouble();
                return(vec.ToScalar());
            }

            return(*((double *)&value));
        }
コード例 #24
0
        public static unsafe int SingleToInt32Bits(float value)
        {
            // Workaround for https://github.com/dotnet/runtime/issues/11413
            if (Sse2.IsSupported)
            {
                Vector128 <int> vec = Vector128.CreateScalarUnsafe(value).AsInt32();
                return(Sse2.ConvertToInt32(vec));
            }

            return(*((int *)&value));
        }
コード例 #25
0
        public static unsafe long DoubleToInt64Bits(double value)
        {
            // Workaround for https://github.com/dotnet/runtime/issues/11413
            if (Sse2.X64.IsSupported)
            {
                Vector128 <long> vec = Vector128.CreateScalarUnsafe(value).AsInt64();
                return(Sse2.X64.ConvertToInt64(vec));
            }

            return(*((long *)&value));
        }
コード例 #26
0
ファイル: Program.cs プロジェクト: frederikja163/MathSharp
        public static bool IntrinsicEquality(this float left, float right)
        {
            var vLeft  = Vector128.CreateScalarUnsafe(left).AsInt32();
            var vRight = Vector128.CreateScalarUnsafe(right).AsInt32();

            vLeft = Sse2.CompareEqual(vLeft, vRight);

            int mask = Sse.MoveMask(vLeft.AsSingle());

            return(mask == -1);
        }
コード例 #27
0
        private static void EncodeToUtf16_Ssse3(ReadOnlySpan <byte> bytes, Span <char> chars, Casing casing)
        {
            Debug.Assert(bytes.Length >= 4);
            nint pos = 0;

            Vector128 <byte> shuffleMask = Vector128.Create(
                0xFF, 0xFF, 0, 0xFF, 0xFF, 0xFF, 1, 0xFF,
                0xFF, 0xFF, 2, 0xFF, 0xFF, 0xFF, 3, 0xFF);

            Vector128 <byte> asciiTable = (casing == Casing.Upper) ?
                                          Vector128.Create((byte)'0', (byte)'1', (byte)'2', (byte)'3',
                                                           (byte)'4', (byte)'5', (byte)'6', (byte)'7',
                                                           (byte)'8', (byte)'9', (byte)'A', (byte)'B',
                                                           (byte)'C', (byte)'D', (byte)'E', (byte)'F') :
                                          Vector128.Create((byte)'0', (byte)'1', (byte)'2', (byte)'3',
                                                           (byte)'4', (byte)'5', (byte)'6', (byte)'7',
                                                           (byte)'8', (byte)'9', (byte)'a', (byte)'b',
                                                           (byte)'c', (byte)'d', (byte)'e', (byte)'f');

            do
            {
                // Read 32bits from "bytes" span at "pos" offset
                uint block = Unsafe.ReadUnaligned <uint>(
                    ref Unsafe.Add(ref MemoryMarshal.GetReference(bytes), pos));

                // Calculate nibbles
                Vector128 <byte> lowNibbles = Ssse3.Shuffle(
                    Vector128.CreateScalarUnsafe(block).AsByte(), shuffleMask);
                Vector128 <byte> highNibbles = Sse2.ShiftRightLogical(
                    Sse2.ShiftRightLogical128BitLane(lowNibbles, 2).AsInt32(), 4).AsByte();

                // Lookup the hex values at the positions of the indices
                Vector128 <byte> indices = Sse2.And(
                    Sse2.Or(lowNibbles, highNibbles), Vector128.Create((byte)0xF));
                Vector128 <byte> hex = Ssse3.Shuffle(asciiTable, indices);

                // The high bytes (0x00) of the chars have also been converted
                // to ascii hex '0', so clear them out.
                hex = Sse2.And(hex, Vector128.Create((ushort)0xFF).AsByte());

                // Save to "chars" at pos*2 offset
                Unsafe.WriteUnaligned(
                    ref Unsafe.As <char, byte>(
                        ref Unsafe.Add(ref MemoryMarshal.GetReference(chars), pos * 2)), hex);

                pos += 4;
            } while (pos < bytes.Length - 3);

            // Process trailing elements (bytes.Length % 4)
            for (; pos < bytes.Length; pos++)
            {
                ToCharsBuffer(Unsafe.Add(ref MemoryMarshal.GetReference(bytes), pos), chars, (int)pos * 2, casing);
            }
        }
コード例 #28
0
        public static Vector128 <uint> CreateTwoUInt(uint a, uint b)
        {
            if (Sse2.IsSupported)
            {
                var t1 = Vector128.CreateScalarUnsafe(a);
                var t2 = Vector128.CreateScalarUnsafe(b);

                return(Sse2.UnpackLow(t1.AsUInt64(), t2.AsUInt64()).AsUInt32());
            }

            return(Vector128.Create(a, 0, b, 0));
        }
コード例 #29
0
ファイル: RgbaColor8.cs プロジェクト: Ryujinx/Ryujinx
 public RgbaColor32 GetColor32()
 {
     if (Sse41.IsSupported)
     {
         Vector128 <byte> color = Vector128.CreateScalarUnsafe(Unsafe.As <RgbaColor8, uint>(ref this)).AsByte();
         return(new RgbaColor32(Sse41.ConvertToVector128Int32(color)));
     }
     else
     {
         return(new RgbaColor32(R, G, B, A));
     }
 }
コード例 #30
0
        private static double RoundSse41(double x, MidpointRounding mpr)
        {
            var f = Vector128.CreateScalarUnsafe(x);

            return((mpr switch {
                MidpointRounding.ToEven => Sse41.RoundToNearestIntegerScalar(f),
                MidpointRounding.AwayFromZero => Sse41.RoundCurrentDirectionScalar(f),
                MidpointRounding.ToZero => Sse41.RoundToZeroScalar(f),
                MidpointRounding.ToNegativeInfinity => Sse41.RoundToNegativeInfinityScalar(f),
                MidpointRounding.ToPositiveInfinity => Sse41.RoundToPositiveInfinityScalar(f),
                _ => throw new ArgumentOutOfRangeException(nameof(mpr), mpr, "Midpoint Rounding must be a valid value.")
            }).ToScalar());