コード例 #1
0
        public void RunStructLclFldScenario()
        {
            var test   = TestStruct.Create();
            var result = Bmi2.ParallelBitDeposit(test._fld1, test._fld2);

            ValidateResult(test._fld1, test._fld2, result);
        }
コード例 #2
0
        public void RunLclFldScenario()
        {
            var test   = new ScalarBinaryOpTest__ParallelBitDepositUInt32();
            var result = Bmi2.ParallelBitDeposit(test._fld1, test._fld2);

            ValidateResult(test._fld1, test._fld2, result);
        }
コード例 #3
0
        public void RunLclVarScenario_UnsafeRead()
        {
            var data1  = Unsafe.ReadUnaligned <UInt32>(ref Unsafe.As <UInt32, byte>(ref _data1));
            var data2  = Unsafe.ReadUnaligned <UInt32>(ref Unsafe.As <UInt32, byte>(ref _data2));
            var result = Bmi2.ParallelBitDeposit(data1, data2);

            ValidateResult(data1, data2, result);
        }
コード例 #4
0
        public void RunClassFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));

            var result = Bmi2.ParallelBitDeposit(_fld1, _fld2);

            ValidateResult(_fld1, _fld2, result);
        }
コード例 #5
0
 public static uint ParallelBitDeposit(uint x, uint mask)
 {
     if (Bmi2.IsSupported)
     {
         return(Bmi2.ParallelBitDeposit(x, mask));
     }
     return(ParallelBitDepositLogic(x, mask));
 }
コード例 #6
0
        public void RunBasicScenario_UnsafeRead()
        {
            var result = Bmi2.ParallelBitDeposit(
                Unsafe.ReadUnaligned <UInt32>(ref Unsafe.As <UInt32, byte>(ref _data1)),
                Unsafe.ReadUnaligned <UInt32>(ref Unsafe.As <UInt32, byte>(ref _data2))
                );

            ValidateResult(_data1, _data2, result);
        }
コード例 #7
0
        public void RunStructLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));

            var test   = TestStruct.Create();
            var result = Bmi2.ParallelBitDeposit(test._fld1, test._fld2);

            ValidateResult(test._fld1, test._fld2, result);
        }
コード例 #8
0
        public void RunClsVarScenario()
        {
            var result = Bmi2.ParallelBitDeposit(
                _clsVar1,
                _clsVar2
                );

            ValidateResult(_clsVar1, _clsVar2, result);
        }
コード例 #9
0
        public void RunClassLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));

            var test   = new ScalarBinaryOpTest__ParallelBitDepositUInt32();
            var result = Bmi2.ParallelBitDeposit(test._fld1, test._fld2);

            ValidateResult(test._fld1, test._fld2, result);
        }
コード例 #10
0
        private static uint UInt16ToUpperHexWithBmi2(uint value)
        {
            Debug.Assert(Bmi2.IsSupported, "This code path shouldn't have gotten hit unless BMI2 was supported.");

            // Convert 0x0000WXYZ to 0x0W0X0Y0Z.
            value = Bmi2.ParallelBitDeposit(value, 0x0F0F0F0Fu);

            // From WriteHexByte, must document better
            return((((0x89898989u - value) & 0x70707070u) >> 4) + value + 0x30303030u);
        }
コード例 #11
0
        public void RunLclVarScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));

            var data1  = Unsafe.ReadUnaligned <UInt32>(ref Unsafe.As <UInt32, byte>(ref _data1));
            var data2  = Unsafe.ReadUnaligned <UInt32>(ref Unsafe.As <UInt32, byte>(ref _data2));
            var result = Bmi2.ParallelBitDeposit(data1, data2);

            ValidateResult(data1, data2, result);
        }
コード例 #12
0
        public void RunBasicScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));

            var result = Bmi2.ParallelBitDeposit(
                Unsafe.ReadUnaligned <UInt64>(ref Unsafe.As <UInt64, byte>(ref _data1)),
                Unsafe.ReadUnaligned <UInt64>(ref Unsafe.As <UInt64, byte>(ref _data2))
                );

            ValidateResult(_data1, _data2, result);
        }
コード例 #13
0
        public void RunClsVarScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));

            var result = Bmi2.ParallelBitDeposit(
                _clsVar1,
                _clsVar2
                );

            ValidateResult(_clsVar1, _clsVar2, result);
        }
コード例 #14
0
        public override ulong Run(CancellationToken cancellationToken)
        {
            if (!Bmi2.IsSupported)
            {
                return(0uL);
            }

            var iterations = 0uL;
            var zhb        = randomInt;

            while (!cancellationToken.IsCancellationRequested)
            {
                for (var i = 0; i < LENGTH; i++)
                {
                    zhb = Bmi2.ParallelBitDeposit(zhb, anotherRandomInt);
                }

                iterations++;
            }

            return(iterations + zhb - zhb);
        }
コード例 #15
0
        public void RunFldScenario()
        {
            var result = Bmi2.ParallelBitDeposit(_fld1, _fld2);

            ValidateResult(_fld1, _fld2, result);
        }
コード例 #16
0
        public static unsafe bool TryGetAsciiString(byte *input, char *output, int count)
        {
            Debug.Assert(input != null);
            Debug.Assert(output != null);

            var end = input + count;

            Debug.Assert((long)end >= Vector256 <sbyte> .Count);

            if (Sse2.IsSupported)
            {
                if (Avx2.IsSupported && input <= end - Vector256 <sbyte> .Count)
                {
                    Vector256 <sbyte> zero = Vector256 <sbyte> .Zero;

                    do
                    {
                        var vector = Avx.LoadVector256(input).AsSByte();
                        if (!CheckBytesInAsciiRange(vector, zero))
                        {
                            return(false);
                        }

                        var tmp0 = Avx2.UnpackLow(vector, zero);
                        var tmp1 = Avx2.UnpackHigh(vector, zero);

                        // Bring into the right order
                        var out0 = Avx2.Permute2x128(tmp0, tmp1, 0x20);
                        var out1 = Avx2.Permute2x128(tmp0, tmp1, 0x31);

                        Avx.Store((ushort *)output, out0.AsUInt16());
                        Avx.Store((ushort *)output + Vector256 <ushort> .Count, out1.AsUInt16());

                        input  += Vector256 <sbyte> .Count;
                        output += Vector256 <sbyte> .Count;
                    } while (input <= end - Vector256 <sbyte> .Count);

                    if (input == end)
                    {
                        return(true);
                    }
                }

                if (input <= end - Vector128 <sbyte> .Count)
                {
                    Vector128 <sbyte> zero = Vector128 <sbyte> .Zero;

                    do
                    {
                        var vector = Sse2.LoadVector128(input).AsSByte();
                        if (!CheckBytesInAsciiRange(vector, zero))
                        {
                            return(false);
                        }

                        var c0 = Sse2.UnpackLow(vector, zero).AsUInt16();
                        var c1 = Sse2.UnpackHigh(vector, zero).AsUInt16();

                        Sse2.Store((ushort *)output, c0);
                        Sse2.Store((ushort *)output + Vector128 <ushort> .Count, c1);

                        input  += Vector128 <sbyte> .Count;
                        output += Vector128 <sbyte> .Count;
                    } while (input <= end - Vector128 <sbyte> .Count);

                    if (input == end)
                    {
                        return(true);
                    }
                }
            }
            else if (Vector.IsHardwareAccelerated)
            {
                while (input <= end - Vector <sbyte> .Count)
                {
                    var vector = Unsafe.AsRef <Vector <sbyte> >(input);
                    if (!CheckBytesInAsciiRange(vector))
                    {
                        return(false);
                    }

                    Vector.Widen(
                        vector,
                        out Unsafe.AsRef <Vector <short> >(output),
                        out Unsafe.AsRef <Vector <short> >(output + Vector <short> .Count));

                    input  += Vector <sbyte> .Count;
                    output += Vector <sbyte> .Count;
                }

                if (input == end)
                {
                    return(true);
                }
            }

            if (Environment.Is64BitProcess) // Use Intrinsic switch for branch elimination
            {
                // 64-bit: Loop longs by default
                while (input <= end - sizeof(long))
                {
                    var value = *(long *)input;
                    if (!CheckBytesInAsciiRange(value))
                    {
                        return(false);
                    }

                    if (Bmi2.X64.IsSupported)
                    {
                        // BMI2 will work regardless of the processor's endianness.
                        ((ulong *)output)[0] = Bmi2.X64.ParallelBitDeposit((ulong)value, 0x00FF00FF_00FF00FFul);
                        ((ulong *)output)[1] = Bmi2.X64.ParallelBitDeposit((ulong)(value >> 32), 0x00FF00FF_00FF00FFul);
                    }
                    else
                    {
                        output[0] = (char)input[0];
                        output[1] = (char)input[1];
                        output[2] = (char)input[2];
                        output[3] = (char)input[3];
                        output[4] = (char)input[4];
                        output[5] = (char)input[5];
                        output[6] = (char)input[6];
                        output[7] = (char)input[7];
                    }

                    input  += sizeof(long);
                    output += sizeof(long);
                }

                if (input <= end - sizeof(int))
                {
                    var value = *(int *)input;
                    if (!CheckBytesInAsciiRange(value))
                    {
                        return(false);
                    }

                    if (Bmi2.IsSupported)
                    {
                        // BMI2 will work regardless of the processor's endianness.
                        ((uint *)output)[0] = Bmi2.ParallelBitDeposit((uint)value, 0x00FF00FFu);
                        ((uint *)output)[1] = Bmi2.ParallelBitDeposit((uint)(value >> 16), 0x00FF00FFu);
                    }
                    else
                    {
                        output[0] = (char)input[0];
                        output[1] = (char)input[1];
                        output[2] = (char)input[2];
                        output[3] = (char)input[3];
                    }

                    input  += sizeof(int);
                    output += sizeof(int);
                }
            }
            else
            {
                // 32-bit: Loop ints by default
                while (input <= end - sizeof(int))
                {
                    var value = *(int *)input;
                    if (!CheckBytesInAsciiRange(value))
                    {
                        return(false);
                    }

                    if (Bmi2.IsSupported)
                    {
                        // BMI2 will work regardless of the processor's endianness.
                        ((uint *)output)[0] = Bmi2.ParallelBitDeposit((uint)value, 0x00FF00FFu);
                        ((uint *)output)[1] = Bmi2.ParallelBitDeposit((uint)(value >> 16), 0x00FF00FFu);
                    }
                    else
                    {
                        output[0] = (char)input[0];
                        output[1] = (char)input[1];
                        output[2] = (char)input[2];
                        output[3] = (char)input[3];
                    }

                    input  += sizeof(int);
                    output += sizeof(int);
                }
            }

            if (input <= end - sizeof(short))
            {
                if (!CheckBytesInAsciiRange(((short *)input)[0]))
                {
                    return(false);
                }

                output[0] = (char)input[0];
                output[1] = (char)input[1];

                input  += sizeof(short);
                output += sizeof(short);
            }

            if (input < end)
            {
                if (!CheckBytesInAsciiRange(((sbyte *)input)[0]))
                {
                    return(false);
                }
                output[0] = (char)input[0];
            }

            return(true);
        }
コード例 #17
0
        internal static uint ExtractFourUtf8BytesFromSurrogatePair(uint value)
        {
            Debug.Assert(IsWellFormedUtf16SurrogatePair(value));

            if (BitConverter.IsLittleEndian)
            {
                // input = [ 110111yyyyxxxxxx 110110wwwwzzzzyy ] = scalar (000uuuuu zzzzyyyy yyxxxxxx)
                // must return [ 10xxxxxx 10yyyyyy 10uuzzzz 11110uuu ], where wwww = uuuuu - 1

                if (Bmi2.IsSupported)
                {
                    // Since pdep and pext have high latencies and can only be dispatched to a single execution port, we want
                    // to use them conservatively. Here, we'll build up the scalar value (this would normally be pext) via simple
                    // logical and arithmetic operations, and use only pdep for the expensive step of exploding the scalar across
                    // all four output bytes.

                    uint unmaskedScalar = (value << 10) + (value >> 16) + ((0x40u) << 10) /* uuuuu = wwww + 1 */ - 0xDC00u /* remove low surrogate marker */;

                    // Now, unmaskedScalar = [ xxxxxx11 011uuuuu zzzzyyyy yyxxxxxx ]. There's a bit of unneeded junk at the beginning
                    // that should normally be masked out via an and, but we'll just direct pdep to ignore it.

                    uint exploded = Bmi2.ParallelBitDeposit(unmaskedScalar, 0b00000111_00111111_00111111_00111111u); // = [ 00000uuu 00uuzzzz 00yyyyyy 00xxxxxx ]
                    return(BinaryPrimitives.ReverseEndianness(exploded + 0xF080_8080u));                             // = [ 10xxxxxx 10yyyyyy 10uuzzzz 11110uuu ]
                }
                else
                {
                    value += 0x0000_0040u;                                                 // = [ 110111yyyyxxxxxx 11011uuuuuzzzzyy ]

                    uint tempA = BinaryPrimitives.ReverseEndianness(value & 0x003F_0700u); // = [ 00000000 00000uuu 00xxxxxx 00000000 ]
                    tempA = BitOperations.RotateLeft(tempA, 16);                           // = [ 00xxxxxx 00000000 00000000 00000uuu ]

                    uint tempB = (value & 0x00FCu) << 6;                                   // = [ 00000000 00000000 00uuzzzz 00000000 ]
                    uint tempC = (value >> 6) & 0x000F_0000u;                              // = [ 00000000 0000yyyy 00000000 00000000 ]
                    tempC |= tempB;

                    uint tempD = (value & 0x03u) << 20; // = [ 00000000 00yy0000 00000000 00000000 ]
                    tempD |= 0x8080_80F0u;

                    return(tempD | tempA | tempC);  // = [ 10xxxxxx 10yyyyyy 10uuzzzz 11110uuu ]
                }
            }
            else
            {
                // input = [ 110110wwwwzzzzyy 110111yyyyxxxxxx ], where wwww = uuuuu - 1
                // must return [ 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx ], where wwww = uuuuu - 1

                value -= 0xD800_DC00u;                    // = [ 000000wwwwzzzzyy 000000yyyyxxxxxx ]
                value += 0x0040_0000u;                    // = [ 00000uuuuuzzzzyy 000000yyyyxxxxxx ]

                uint tempA = value & 0x0700_0000u;        // = [ 00000uuu 00000000 00000000 00000000 ]
                uint tempB = (value >> 2) & 0x003F_0000u; // = [ 00000000 00uuzzzz 00000000 00000000 ]
                tempB |= tempA;

                uint tempC = (value << 2) & 0x0000_0F00u; // = [ 00000000 00000000 0000yyyy 00000000 ]
                uint tempD = (value >> 6) & 0x0003_0000u; // = [ 00000000 00000000 00yy0000 00000000 ]
                tempD |= tempC;

                uint tempE = (value & 0x3Fu) + 0xF080_8080u; // = [ 11110000 10000000 10000000 10xxxxxx ]
                return(tempE | tempB | tempD);               // = [ 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx ]
            }
        }
コード例 #18
0
            public void RunStructFldScenario(ScalarBinaryOpTest__ParallelBitDepositUInt64 testClass)
            {
                var result = Bmi2.ParallelBitDeposit(_fld1, _fld2);

                testClass.ValidateResult(_fld1, _fld2, result);
            }