コード例 #1
0
ファイル: BasicMaths.cs プロジェクト: y8x/MathSharp
 public static Vector4F Add(Vector4FParam1_3 vector, float scalar)
 => Add(vector, Vector128.Create(scalar));
コード例 #2
0
ファイル: SSE.cs プロジェクト: badamczewski/SimpleIntrinsics
 public static long _mm_cvtss_si64(Vector128 <float> value)
 {
     return(Sse.X64.ConvertToInt64(value));
 }
コード例 #3
0
ファイル: SSE.cs プロジェクト: badamczewski/SimpleIntrinsics
 public static Vector128 <float> _mm_cmpgt_ps(Vector128 <float> left, Vector128 <float> right)
 {
     return(Sse.CompareGreaterThan(left, right));
 }
コード例 #4
0
 /// <summary>
 /// Performs SHA1 schedule update 0
 /// vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11)
 /// </summary>
 public static Vector128 <uint> SchedulePart1(Vector128 <uint> w0_3, Vector128 <uint> w4_7, Vector128 <uint> w8_11)
 {
     throw new PlatformNotSupportedException();
 }
コード例 #5
0
ファイル: LessThanAll.Byte.cs プロジェクト: z77ma/runtime
            public void RunStructFldScenario(VectorBooleanBinaryOpTest__LessThanAllByte testClass)
            {
                var result = Vector128.LessThanAll(_fld1, _fld2);

                testClass.ValidateResult(_fld1, _fld2, result);
            }
コード例 #6
0
 public Add()
 {
     vector1 = Vector128.Create(0x12345678);
     vector2 = Vector128.Create(0x23456789);
 }
 private void ValidateResult(Vector128 <Double> firstOp, Int32 result, [CallerMemberName] string method = "")
 {
     Double[] inArray = new Double[Op1ElementCount];
     Unsafe.WriteUnaligned(ref Unsafe.As <Double, byte>(ref inArray[0]), firstOp);
     ValidateResult(inArray, result, method);
 }
コード例 #8
0
        public VectorArg128 Change(float f)
        {
            Vector128 <float> t = Sse.SetAllVector128(f);

            return(new VectorArg128(Sse.Add(t, _rgb)));
        }
コード例 #9
0
 public static float CopySign(float x, float y)
 {
     if (Sse.IsSupported || AdvSimd.IsSupported)
     {
         return(VectorMath.ConditionalSelectBitwise(Vector128.CreateScalarUnsafe(-0.0f), Vector128.CreateScalarUnsafe(y), Vector128.CreateScalarUnsafe(x)).ToScalar());
     }
     else
     {
         return(SoftwareFallback(x, y));
     }
コード例 #10
0
ファイル: TestAvx2.cs プロジェクト: KazuhiroNomura/SIMD2
 public void Add2_Int16()
 {
     for (var left_0_0 = 0; left_0_0 < 1; left_0_0++)
     {
         var left_0_1  = (Int16)left_0_0;
         var left_64_0 = Vector64.Create(left_0_1);
         for (var left_1_0 = 0; left_1_0 < 1; left_1_0++)
         {
             var left_1_1   = (Int16)left_1_0;
             var left_64_1  = Vector64.Create(left_1_1);
             var left_128_0 = Vector128.Create(left_64_0, left_64_1);
             for (var left_2_0 = 0; left_2_0 < 1; left_2_0++)
             {
                 var left_2_1  = (Int16)left_2_0;
                 var left_64_2 = Vector64.Create(left_2_1);
                 for (var left_3_0 = 0; left_3_0 < 1; left_3_0++)
                 {
                     var left_3_1   = (Int16)left_3_0;
                     var left_64_3  = Vector64.Create(left_3_1);
                     var left_128_1 = Vector128.Create(left_64_2, left_64_3);
                     var left_256   = Vector256.Create(left_128_0, left_128_1);
                     for (var right_0_0 = 0; right_0_0 < 1; right_0_0++)
                     {
                         var right_0_1  = (Int16)right_0_0;
                         var right_64_0 = Vector64.Create(right_0_1);
                         for (var right_1_0 = 0; right_1_0 < 1; right_1_0++)
                         {
                             var right_1_1   = (Int16)right_1_0;
                             var right_64_1  = Vector64.Create(right_1_1);
                             var right_128_0 = Vector128.Create(right_64_0, right_64_1);
                             for (var right_2_0 = 0; right_2_0 < 1; right_2_0++)
                             {
                                 var right_2_1  = (Int16)right_2_0;
                                 var right_64_2 = Vector64.Create(right_2_1);
                                 for (var right_3_0 = 0; right_3_0 < 1; right_3_0++)
                                 {
                                     var right_3_1   = (Int16)right_3_0;
                                     var right_64_3  = Vector64.Create(right_3_1);
                                     var right_128_1 = Vector128.Create(right_64_2, right_64_3);
                                     var right_256   = Vector256.Create(right_128_0, right_128_1);
                                     var actual      = Avx2.Add(left_256, right_256);
                                     //var expected_upper0 = (UInt64)(left_0_1+left_1_1+actual_5_1+actual_4_1);
                                     //var expected_upper1 =
                                     //    (expected_upper0<<0)|
                                     //    (expected_upper0<<16)|
                                     //    (expected_upper0<<32)|
                                     //    (expected_upper0<<48);
                                     //var expected_upper2 = Vector128.Create(expected_upper1);
                                     //var expected_lower0 = (UInt64)(actual_7_1+actual_6_1+actual_5_1+actual_4_1);
                                     //var expected_lower1 =
                                     //    (expected_lower0<<0)|
                                     //    (expected_lower0<<16)|
                                     //    (expected_lower0<<32)|
                                     //    (expected_lower0<<48);
                                     //var expected_lower2 = Vector128.Create(expected_lower1);
                                     //var expected3 = Vector256.Create(expected_lower2,expected_upper2).AsInt16();
                                 }
                             }
                         }
                     }
                 }
             }
         }
     }
 }
コード例 #11
0
 public VectorArg128(Vector128 <float> _rgb)
 {
     this._rgb = _rgb;
 }
コード例 #12
0
ファイル: BasicMaths.cs プロジェクト: y8x/MathSharp
 public static Vector4F Divide(Vector4FParam1_3 dividend, float scalarDivisor)
 => Multiply(dividend, Vector128.Create(scalarDivisor));
コード例 #13
0
ファイル: BasicMaths.cs プロジェクト: y8x/MathSharp
 public static Vector4F Multiply(Vector4FParam1_3 vector, float scalar)
 => Multiply(vector, Vector128.Create(scalar));
コード例 #14
0
ファイル: BasicMaths.cs プロジェクト: y8x/MathSharp
 public static Vector4F Subtract(Vector4FParam1_3 vector, float scalar)
 => Subtract(vector, Vector128.Create(scalar));
コード例 #15
0
 private void ValidateResult(Vector128 <UInt64> result, UInt64 expectedLowerValue, UInt64 expectedUpperValue, [CallerMemberName] string method = "")
 {
     UInt64[] resultElements = new UInt64[ElementCount];
     Unsafe.WriteUnaligned(ref Unsafe.As <UInt64, byte>(ref resultElements[0]), result);
     ValidateResult(resultElements, expectedLowerValue, expectedUpperValue, method);
 }
コード例 #16
0
 private static double MaxSse2(double a, double b)
 => Sse2.MaxScalar(
     Vector128.CreateScalarUnsafe(a),
     Vector128.CreateScalarUnsafe(b)
     ).ToScalar();
コード例 #17
0
 public override void RunStep() => vector = Vector128.Create(0x123456);
コード例 #18
0
        public unsafe void Serialize(ref MessagePackWriter writer, sbyte[]?value, MessagePackSerializerOptions options)
        {
            if (value == null)
            {
                writer.WriteNil();
                return;
            }

            var inputLength = value.Length;

            writer.WriteArrayHeader(inputLength);
            if (inputLength == 0)
            {
                return;
            }

            fixed(sbyte *pSource = &value[0])
            {
                var inputEnd      = pSource + inputLength;
                var inputIterator = pSource;

                if (Popcnt.IsSupported)
                {
                    const int ShiftCount = 4;
                    const int Stride     = 1 << ShiftCount;
                    // We enter the SIMD mode when there are more than the Stride after alignment adjustment.
                    if (inputLength < Stride << 1)
                    {
                        goto ProcessEach;
                    }

                    {
                        // Make InputIterator Aligned
                        var offset = UnsafeMemoryAlignmentUtility.CalculateDifferenceAlign16(inputIterator);
                        inputLength -= offset;
                        var offsetEnd = inputIterator + offset;
                        while (inputIterator != offsetEnd)
                        {
                            writer.Write(*inputIterator++);
                        }
                    }

                    fixed(byte *tablePointer = &ShuffleAndMaskTable[0])
                    {
                        fixed(byte *maskTablePointer = &SingleInstructionMultipleDataPrimitiveArrayFormatterHelper.StoreMaskTable[0])
                        {
                            var vectorMinFixNegInt        = Vector128.Create((sbyte)MessagePackRange.MinFixNegativeInt);
                            var vectorMessagePackCodeInt8 = Vector128.Create(MessagePackCode.Int8);

                            for (var vectorizedEnd = inputIterator + ((inputLength >> ShiftCount) << ShiftCount); inputIterator != vectorizedEnd; inputIterator += Stride)
                            {
                                var current = Sse2.LoadVector128(inputIterator);
                                var index   = unchecked ((uint)Sse2.MoveMask(Sse2.CompareGreaterThan(vectorMinFixNegInt, current)));

                                if (index == 0)
                                {
                                    // When all 32 input values are in the FixNum range.
                                    var span = writer.GetSpan(Stride);
                                    Sse2.Store((sbyte *)Unsafe.AsPointer(ref span[0]), current);

                                    writer.Advance(Stride);
                                    continue;
                                }

                                unchecked
                                {
                                    var index0      = (byte)index;
                                    var index1      = (byte)(index >> 8);
                                    var count0      = (int)(Popcnt.PopCount(index0) + 8);
                                    var count1      = (int)(Popcnt.PopCount(index1) + 8);
                                    var countTotal  = count0 + count1;
                                    var destination = writer.GetSpan(countTotal);
                                    fixed(byte *pDestination = &destination[0])
                                    {
                                        var tempDestination = pDestination;
                                        var shuffle0        = Sse2.LoadVector128(tablePointer + (index0 << 4));
                                        var shuffled0       = Ssse3.Shuffle(current.AsByte(), shuffle0);
                                        var answer0         = Sse41.BlendVariable(shuffled0, vectorMessagePackCodeInt8, shuffle0);

                                        Sse2.MaskMove(answer0, Sse2.LoadVector128(maskTablePointer + (count0 << 4)), tempDestination);
                                        tempDestination += count0;

                                        var shuffle1  = Sse2.LoadVector128(tablePointer + (index1 << 4));
                                        var shift1    = Sse2.ShiftRightLogical128BitLane(current.AsByte(), 8);
                                        var shuffled1 = Ssse3.Shuffle(shift1, shuffle1);
                                        var answer1   = Sse41.BlendVariable(shuffled1, vectorMessagePackCodeInt8, shuffle1);

                                        Sse2.MaskMove(answer1, Sse2.LoadVector128(maskTablePointer + (count1 << 4)), tempDestination);
                                    }

                                    writer.Advance(countTotal);
                                }
                            }
                        }
                    }
                }

ProcessEach:
                while (inputIterator != inputEnd)
                {
                    writer.Write(*inputIterator++);
                }
            }
        }
コード例 #19
0
 public Multiply()
 {
     vector1 = Vector128.Create(0x12345678);
     vector2 = Vector128.Create(0x23456789);
 }
コード例 #20
0
        public unsafe void Serialize(ref MessagePackWriter writer, int[]?value, MessagePackSerializerOptions options)
        {
            if (value == null)
            {
                writer.WriteNil();
                return;
            }

            var inputLength = value.Length;

            writer.WriteArrayHeader(inputLength);
            if (inputLength == 0)
            {
                return;
            }

            fixed(int *pSource = &value[0])
            {
                var inputEnd      = pSource + inputLength;
                var inputIterator = pSource;

                if (Sse41.IsSupported)
                {
                    const int ShiftCount = 2;
                    const int Stride     = 1 << ShiftCount;

                    if (inputLength < Stride << 1)
                    {
                        goto ProcessEach;
                    }

                    {
                        // Make InputIterator Aligned
                        var offset = UnsafeMemoryAlignmentUtility.CalculateDifferenceAlign16(inputIterator);
                        // When offset is times of 4, you can adjust memory address.
                        if ((offset & 3) == 0)
                        {
                            offset     >>= 2;
                            inputLength -= offset;
                            var offsetEnd = inputIterator + offset;
                            while (inputIterator != offsetEnd)
                            {
                                writer.Write(*inputIterator++);
                            }
                        }
                    }

                    fixed(byte *tablePointer = &ShuffleAndMaskTable[0])
                    {
                        var countPointer = (int *)(tablePointer + CountTableOffset);

                        fixed(byte *maskTablePointer = &SingleInstructionMultipleDataPrimitiveArrayFormatterHelper.StoreMaskTable[0])
                        {
                            var vectorShortMinValueM1 = Vector128.Create(short.MinValue - 1);
                            var vectorSByteMinValueM1 = Vector128.Create(sbyte.MinValue - 1);
                            var vectorMinFixNegIntM1  = Vector128.Create(MessagePackRange.MinFixNegativeInt - 1);
                            var vectorSByteMaxValue   = Vector128.Create((int)sbyte.MaxValue);
                            var vectorByteMaxValue    = Vector128.Create((int)byte.MaxValue);
                            var vectorUShortMaxValue  = Vector128.Create((int)ushort.MaxValue);
                            var vectorM1M7            = Vector128.Create(-1, -7, -1, -7);
                            var vectorIn1Range        = Vector128.Create(0, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);

                            for (var vectorizedEnd = inputIterator + ((inputLength >> ShiftCount) << ShiftCount); inputIterator != vectorizedEnd; inputIterator += Stride)
                            {
                                var current = Sse2.LoadVector128(inputIterator);
                                var isGreaterThanMinFixNegIntM1 = Sse2.CompareGreaterThan(current, vectorMinFixNegIntM1);
                                var isGreaterThanSByteMaxValue  = Sse2.CompareGreaterThan(current, vectorSByteMaxValue);

                                if (Sse2.MoveMask(Sse2.AndNot(isGreaterThanSByteMaxValue, isGreaterThanMinFixNegIntM1).AsByte()) == 0xFFFF)
                                {
                                    var answer = Ssse3.Shuffle(current.AsByte(), vectorIn1Range).AsUInt32();
                                    var span   = writer.GetSpan(Stride);
                                    Unsafe.As <byte, uint>(ref span[0]) = answer.GetElement(0);
                                    writer.Advance(Stride);
                                    continue;
                                }

                                var indexVector = Sse2.Add(isGreaterThanSByteMaxValue, isGreaterThanMinFixNegIntM1);
                                indexVector = Sse2.Add(indexVector, Sse2.CompareGreaterThan(current, vectorUShortMaxValue));
                                indexVector = Sse2.Add(indexVector, Sse2.CompareGreaterThan(current, vectorByteMaxValue));
                                indexVector = Sse2.Add(indexVector, Sse2.CompareGreaterThan(current, vectorShortMinValueM1));
                                indexVector = Sse2.Add(indexVector, Sse2.CompareGreaterThan(current, vectorSByteMinValueM1));
                                indexVector = Sse41.MultiplyLow(indexVector, vectorM1M7);
                                indexVector = Ssse3.HorizontalAdd(indexVector, indexVector);

                                var index0 = indexVector.GetElement(0);
                                var index1 = indexVector.GetElement(1);

                                var count0     = countPointer[index0];
                                var count1     = countPointer[index1];
                                var countTotal = count0 + count1;

                                var destination = writer.GetSpan(countTotal);
                                fixed(byte *pDestination = &destination[0])
                                {
                                    var tmpDestination = pDestination;

                                    var item0     = tablePointer + (index0 << 5);
                                    var shuffle0  = Sse2.LoadVector128(item0);
                                    var shuffled0 = Ssse3.Shuffle(current.AsByte(), shuffle0);
                                    var constant0 = Sse2.LoadVector128(item0 + 16);
                                    var answer0   = Sse2.Or(shuffled0, constant0);

                                    Sse2.MaskMove(answer0, Sse2.LoadVector128(maskTablePointer + (count0 << 4)), pDestination);
                                    tmpDestination += count0;

                                    var shift1    = Sse2.ShiftRightLogical128BitLane(current, 8).AsByte();
                                    var item1     = tablePointer + (index1 << 5);
                                    var shuffle1  = Sse2.LoadVector128(item1);
                                    var shuffled1 = Ssse3.Shuffle(shift1, shuffle1);
                                    var constant1 = Sse2.LoadVector128(item1 + 16);
                                    var answer1   = Sse2.Or(shuffled1, constant1);

                                    Sse2.MaskMove(answer1, Sse2.LoadVector128(maskTablePointer + (count1 << 4)), tmpDestination);
                                }

                                writer.Advance(countTotal);
                            }
                        }
                    }
                }

ProcessEach:
                while (inputIterator != inputEnd)
                {
                    writer.Write(*inputIterator++);
                }
            }
        }
コード例 #21
0
 /// <summary>
 /// Performs SHA1 hash update parity form.
 /// vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk)
 /// </summary>
 public static Vector128 <uint> HashParity(Vector128 <uint> hash_abcd, uint hash_e, Vector128 <uint> wk)
 {
     throw new PlatformNotSupportedException();
 }
コード例 #22
0
        public unsafe void Serialize(ref MessagePackWriter writer, float[]?value, MessagePackSerializerOptions options)
        {
            if (value == null)
            {
                writer.WriteNil();
                return;
            }

            var inputLength = value.Length;

            writer.WriteArrayHeader(inputLength);
            if (inputLength == 0)
            {
                return;
            }

            // output byte[] length can be calculated from input float[] length.
            var outputLength = inputLength * 5;
            var destination  = writer.GetSpan(outputLength);

            fixed(byte *pDestination = &destination[0])
            {
                var outputIterator = pDestination;

                fixed(float *pSource = &value[0])
                {
                    var inputEnd      = pSource + inputLength;
                    var inputIterator = (uint *)pSource;

                    if (Sse42.IsSupported)
                    {
                        if (inputLength < 6)
                        {
                            goto ProcessEach;
                        }

                        // Process 3 floats at once.
                        // From 12 bytes to 15 bytes.
                        var vectorConstant   = Vector128.Create(MessagePackCode.Float32, 0, 0, 0, 0, MessagePackCode.Float32, 0, 0, 0, 0, MessagePackCode.Float32, 0, 0, 0, 0, 0);
                        var vectorShuffle    = Vector128.Create(0x80, 3, 2, 1, 0, 0x80, 7, 6, 5, 4, 0x80, 11, 10, 9, 8, 0x80);
                        var vectorLoopLength = ((inputLength / 3) - 1) * 3;
                        for (var vectorizedEnd = inputIterator + vectorLoopLength; inputIterator != vectorizedEnd; inputIterator += 3, outputIterator += 15)
                        {
                            // new float[] { 1.0, -2.0, 3.5, } is byte[12] { 00, 00, 80, 3f, 00, 00, 00, c0, 00, 00, 60, 40 } in binary expression;
                            var current = Sse2.LoadVector128((byte *)inputIterator);
                            // Output binary should be byte[15] { ca, 3f, 80, 00, 00, ca, c0, 00, 00, 00, ca, 40, 60, 00, 00 };
                            Sse2.Store(outputIterator, Sse2.Or(Ssse3.Shuffle(current, vectorShuffle), vectorConstant));
                        }
                    }

ProcessEach:
                    while (inputIterator != inputEnd)
                    {
                        // Encode float as Big Endian
                        *   outputIterator++ = MessagePackCode.Float32;
                        var current          = *inputIterator++;
                        *   outputIterator++ = (byte)(current >> 24);
                        *   outputIterator++ = (byte)(current >> 16);
                        *   outputIterator++ = (byte)(current >> 8);
                        *   outputIterator++ = (byte)current;
                    }
                }
            }

            writer.Advance(outputLength);
        }
コード例 #23
0
 /// <summary>
 /// Performs SHA1 schedule update 1
 /// vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15)
 /// </summary>
 public static Vector128 <uint> SchedulePart2(Vector128 <uint> tw0_3, Vector128 <uint> w12_15)
 {
     throw new PlatformNotSupportedException();
 }
コード例 #24
0
        public unsafe void Serialize(ref MessagePackWriter writer, double[]?value, MessagePackSerializerOptions options)
        {
            if (value == null)
            {
                writer.WriteNil();
                return;
            }

            var inputLength = value.Length;

            writer.WriteArrayHeader(inputLength);
            if (inputLength == 0)
            {
                return;
            }

            var outputLength = inputLength * 9;
            var destination  = writer.GetSpan(outputLength);

            fixed(byte *pDestination = &destination[0])
            {
                var outputIterator = pDestination;

                fixed(double *pSource = &value[0])
                {
                    var inputEnd      = pSource + inputLength;
                    var inputIterator = (ulong *)pSource;

                    if (Avx2.IsSupported)
                    {
                        const int ShiftCount = 2;
                        const int Stride     = 1 << ShiftCount;

                        if (inputLength < Stride << 1)
                        {
                            goto ProcessEach;
                        }

                        var vectorShuffle = Vector256.Create((byte)7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
                        for (var vectorizedEnd = inputIterator + ((inputLength >> ShiftCount) << ShiftCount); inputIterator != vectorizedEnd; inputIterator += Stride)
                        {
                            // Fetch 4 doubles.
                            var current = Avx.LoadVector256((byte *)inputIterator);
                            // Reorder Little Endian bytes to Big Endian.
                            var answer = Avx2.Shuffle(current, vectorShuffle).AsUInt64();
                            // Write 4 Big-Endian doubles.
                            *outputIterator++ = MessagePackCode.Float64;
                            *(ulong *)outputIterator = answer.GetElement(0);
                            outputIterator          += 8;
                            *outputIterator++ = MessagePackCode.Float64;
                            *(ulong *)outputIterator = answer.GetElement(1);
                            outputIterator          += 8;
                            *outputIterator++ = MessagePackCode.Float64;
                            *(ulong *)outputIterator = answer.GetElement(2);
                            outputIterator          += 8;
                            *outputIterator++ = MessagePackCode.Float64;
                            *(ulong *)outputIterator = answer.GetElement(3);
                            outputIterator          += 8;
                        }
                    }
                    else if (Ssse3.IsSupported)
                    {
                        const int ShiftCount = 1;
                        const int Stride     = 1 << ShiftCount;

                        if (inputLength < Stride << 1)
                        {
                            goto ProcessEach;
                        }

                        var vectorShuffle = Vector128.Create((byte)7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
                        for (var vectorizedEnd = inputIterator + ((inputLength >> ShiftCount) << ShiftCount); inputIterator != vectorizedEnd; inputIterator += Stride)
                        {
                            var current          = Sse2.LoadVector128((byte *)inputIterator);
                            var answer           = Ssse3.Shuffle(current, vectorShuffle).AsUInt64();
                            *   outputIterator++ = MessagePackCode.Float64;
                            *(ulong *)outputIterator = answer.GetElement(0);
                            outputIterator          += 8;
                            *outputIterator++ = MessagePackCode.Float64;
                            *(ulong *)outputIterator = answer.GetElement(1);
                            outputIterator          += 8;
                        }
                    }

ProcessEach:
                    while (inputIterator != inputEnd)
                    {
                        *   outputIterator++ = MessagePackCode.Float64;
                        var current          = *inputIterator++;
                        *   outputIterator++ = (byte)(current >> 56);
                        *   outputIterator++ = (byte)(current >> 48);
                        *   outputIterator++ = (byte)(current >> 40);
                        *   outputIterator++ = (byte)(current >> 32);
                        *   outputIterator++ = (byte)(current >> 24);
                        *   outputIterator++ = (byte)(current >> 16);
                        *   outputIterator++ = (byte)(current >> 8);
                        *   outputIterator++ = (byte)current;
                    }
                }
            }

            writer.Advance(outputLength);
        }
コード例 #25
0
ファイル: SSE.cs プロジェクト: badamczewski/SimpleIntrinsics
 public static bool _mm_ucomieq_ss(Vector128 <float> left, Vector128 <float> right)
 {
     return(Sse.CompareScalarUnorderedEqual(left, right));
 }
コード例 #26
0
        public void Serialize(ref MessagePackWriter writer, bool[]?value, MessagePackSerializerOptions options)
        {
            if (value == null)
            {
                writer.WriteNil();
                return;
            }

            var inputLength = value.Length;

            writer.WriteArrayHeader(inputLength);
            if (inputLength == 0)
            {
                return;
            }

            var outputLength = inputLength;

            fixed(bool *pSource = &value[0])
            {
                var inputEnd      = pSource + inputLength;
                var inputIterator = pSource;
                var destination   = writer.GetSpan(inputLength);

                fixed(byte *pDestination = &destination[0])
                {
                    var outputIterator = pDestination;

                    if (Avx2.IsSupported)
                    {
                        const int ShiftCount = 5;
                        const int Stride     = 1 << ShiftCount;
                        if (inputLength < Stride << 1)
                        {
                            goto ProcessEach;
                        }

                        {
                            // make output span align 32
                            var offset = UnsafeMemoryAlignmentUtility.CalculateDifferenceAlign32(outputIterator);
                            inputLength -= offset;
                            var offsetEnd = inputIterator + offset;
                            while (inputIterator != offsetEnd)
                            {
                                *outputIterator++ = *inputIterator++ ? MessagePackCode.True : MessagePackCode.False;
                            }
                        }

                        var vectorTrue       = Vector256.Create(MessagePackCode.True).AsSByte();
                        var vectorLoopLength = (inputLength >> ShiftCount) << ShiftCount;
                        for (var vectorizedEnd = inputIterator + vectorLoopLength; inputIterator != vectorizedEnd; inputIterator += Stride, outputIterator += Stride)
                        {
                            // Load 32 bool values.
                            var current = Avx.LoadVector256((sbyte *)inputIterator);

                            // A value of false for the type bool is 0 for the sbyte representation.
                            var isTrue = Avx2.CompareEqual(current, Vector256 <sbyte> .Zero);
                            // A value of true in the SIMD context is -1 for the sbyte representation.
                            // True is 0xc3 as MessagePackCode and false is 0xc2.
                            // Reinterpreted as sbyte values, they are -61 and -62, respectively.
                            // For each of the 32 true Vectors, we can add -1 to the false ones to get the answer.
                            var answer = Avx2.Add(vectorTrue, isTrue);
                            Avx.Store((sbyte *)outputIterator, answer);
                        }
                    }
                    else if (Sse2.IsSupported)
                    {
                        // for older x86 cpu
                        const int ShiftCount = 4;
                        const int Stride     = 1 << ShiftCount;
                        if (inputLength < Stride << 1)
                        {
                            goto ProcessEach;
                        }

                        {
                            // make output span align 16
                            var offset = UnsafeMemoryAlignmentUtility.CalculateDifferenceAlign16(outputIterator);
                            inputLength -= offset;
                            var offsetEnd = inputIterator + offset;
                            while (inputIterator != offsetEnd)
                            {
                                *outputIterator++ = *inputIterator++ ? MessagePackCode.True : MessagePackCode.False;
                            }
                        }

                        var vectorTrue       = Vector128.Create(MessagePackCode.True).AsSByte();
                        var vectorLoopLength = (inputLength >> ShiftCount) << ShiftCount;
                        for (var vectorizedEnd = inputIterator + vectorLoopLength; inputIterator != vectorizedEnd; inputIterator += Stride, outputIterator += Stride)
                        {
                            // Load 16 bool values.
                            var current = Sse2.LoadVector128((sbyte *)inputIterator);

                            // A value of false for the type bool is 0 for the sbyte representation.
                            var isTrue = Sse2.CompareEqual(current, Vector128 <sbyte> .Zero);
                            // A value of true in the SIMD context is -1 for the sbyte representation.
                            // True is 0xc3 as MessagePackCode and false is 0xc2.
                            // Reinterpreted as sbyte values, they are -61 and -62, respectively.
                            // For each of the 16 true Vectors, we can add -1 to the false ones to get the answer.
                            var answer = Sse2.Add(vectorTrue, isTrue);
                            Sse2.Store((sbyte *)outputIterator, answer);
                        }
                    }

ProcessEach:
                    while (inputIterator != inputEnd)
                    {
                        *outputIterator++ = *inputIterator++ ? MessagePackCode.True : MessagePackCode.False;
                    }
                }

                writer.Advance(outputLength);
            }
        }
コード例 #27
0
ファイル: SSE.cs プロジェクト: badamczewski/SimpleIntrinsics
 public static Vector128 <float> _mm_cmpeq_ss(Vector128 <float> left, Vector128 <float> right)
 {
     return(Sse.CompareScalarEqual(left, right));
 }
コード例 #28
0
        private void ValidateResult(Vector128 <Single> firstOp, Vector128 <Single> secondOp, Vector128 <Single> thirdOp, void *result, [CallerMemberName] string method = "")
        {
            Single[] inArray1 = new Single[Op1ElementCount];
            Single[] inArray2 = new Single[Op2ElementCount];
            Single[] inArray3 = new Single[Op3ElementCount];
            Single[] outArray = new Single[RetElementCount];

            Unsafe.WriteUnaligned(ref Unsafe.As <Single, byte>(ref inArray1[0]), firstOp);
            Unsafe.WriteUnaligned(ref Unsafe.As <Single, byte>(ref inArray2[0]), secondOp);
            Unsafe.WriteUnaligned(ref Unsafe.As <Single, byte>(ref inArray3[0]), thirdOp);
            Unsafe.CopyBlockUnaligned(ref Unsafe.As <Single, byte>(ref outArray[0]), ref Unsafe.AsRef <byte>(result), (uint)Unsafe.SizeOf <Vector128 <Single> >());

            ValidateResult(inArray1, inArray2, inArray3, outArray, method);
        }
コード例 #29
0
ファイル: SSE.cs プロジェクト: badamczewski/SimpleIntrinsics
 public static bool _mm_ucomigt_ss(Vector128 <float> left, Vector128 <float> right)
 {
     return(Sse.CompareScalarUnorderedGreaterThan(left, right));
 }
コード例 #30
0
    private static unsafe double[] BilinearInterpol_AVX(
        double[] x,
        double[] A,
        double minXA,
        double maxXA,
        double[] B,
        double minXB,
        double maxXB,
        double weightB)
    {
        double[] z = new double[outputVectorSize];

        fixed(double *pX = &x[0], pA = &A[0], pB = &B[0], pZ = &z[0])
        {
            Vector256 <double> vWeightB = Vector256.Create(weightB);
            Vector256 <double> vWeightA = Vector256.Create(1 - weightB);

            Vector256 <double> vMinXA = Vector256.Create(minXA);
            Vector256 <double> vMaxXA = Vector256.Create(maxXA);
            Vector256 <double> vMinXB = Vector256.Create(minXB);
            Vector256 <double> vMaxXB = Vector256.Create(maxXB);

            double             deltaA  = (maxXA - minXA) / (double)(A.Length - 1);
            double             deltaB  = (maxXB - minXB) / (double)(B.Length - 1);
            Vector256 <double> vDeltaA = Vector256.Create(deltaA);
            Vector256 <double> vDeltaB = Vector256.Create(deltaB);

            double             invDeltaA  = 1.0 / deltaA;
            double             invDeltaB  = 1.0 / deltaB;
            Vector256 <double> vInvDeltaA = Vector256.Create(invDeltaA);
            Vector256 <double> vInvDeltaB = Vector256.Create(invDeltaB);

            Vector128 <int> ALengthMinusOne = Vector128.Create(A.Length - 1);
            Vector128 <int> BLengthMinusOne = Vector128.Create(B.Length - 1);
            Vector128 <int> One             = Vector128.Create(1);

            for (var i = 0; i < x.Length; i += Vector256 <double> .Count)
            {
                Vector256 <double> currentX = Avx.LoadVector256(pX + i);

                // Determine the largest a, such that A[i] = f(xA) and xA <= x[i].
                // This involves casting from double to int; here we use a Vector conversion.
                Vector256 <double> aDouble = Avx.Multiply(Avx.Subtract(currentX, vMinXA), vInvDeltaA);
                Vector128 <int>    a       = Avx.ConvertToVector128Int32WithTruncation(aDouble);
                a = Sse41.Min(Sse41.Max(a, Vector128 <int> .Zero), ALengthMinusOne);
                Vector128 <int> aPlusOne = Sse41.Min(Sse2.Add(a, One), ALengthMinusOne);

                // Now, get the reference input, xA, for our index a.
                // This involves casting from  int to double.
                Vector256 <double> xA = Avx.Add(Avx.Multiply(Avx.ConvertToVector256Double(a), vDeltaA), vMinXA);

                // Now, compute the lambda for our A reference point.
                Vector256 <double> currentXNormA = Avx.Max(vMinXA, Avx.Min(currentX, vMaxXA));
                Vector256 <double> lambdaA       = Avx.Multiply(Avx.Subtract(currentXNormA, xA), vInvDeltaA);

                // Now, we need to load up our reference points using Vector Gather operations.
                Vector256 <double> AVector        = Avx2.GatherVector256(pA, a, 8);
                Vector256 <double> AVectorPlusOne = Avx2.GatherVector256(pA, aPlusOne, 8);

                // Now, do the all of the above for our B reference point.
                Vector256 <double> bDouble = Avx.Multiply(Avx.Subtract(currentX, vMinXB), vInvDeltaB);
                Vector128 <int>    b       = Avx.ConvertToVector128Int32WithTruncation(bDouble);
                b = Sse41.Min(Sse41.Max(b, Vector128 <int> .Zero), BLengthMinusOne);
                Vector128 <int> bPlusOne = Sse41.Min(Sse2.Add(b, One), BLengthMinusOne);

                Vector256 <double> xB            = Avx.Add(Avx.Multiply(Avx.ConvertToVector256Double(b), vDeltaB), vMinXB);
                Vector256 <double> currentXNormB = Avx.Max(vMinXB, Avx.Min(currentX, vMaxXB));
                Vector256 <double> lambdaB       = Avx.Multiply(Avx.Subtract(currentXNormB, xB), vInvDeltaB);

                Vector256 <double> BVector        = Avx2.GatherVector256(pB, b, 8);
                Vector256 <double> BVectorPlusOne = Avx2.GatherVector256(pB, bPlusOne, 8);

                Vector256 <double> newZ = Avx.Add(Avx.Multiply(vWeightA, Avx.Add(AVector, Avx.Multiply(lambdaA, Avx.Subtract(AVectorPlusOne, AVector)))),
                                                  Avx.Multiply(vWeightB, Avx.Add(BVector, Avx.Multiply(lambdaB, Avx.Subtract(BVectorPlusOne, BVector)))));
                Avx.Store(pZ + i, newZ);
            }
        }

        return(z);
    }