示例#1
0
        public override ulong Run(CancellationToken cancellationToken)
        {
            if (!Popcnt.IsSupported)
            {
                return(0uL);
            }

            var iterations = 0uL;

            while (!cancellationToken.IsCancellationRequested)
            {
                for (var i = 0; i < LENGTH; i++)
                {
                    data = Popcnt.PopCount(data);
                }

                iterations++;
            }

            return(iterations);
        }
示例#2
0
        public static int PopCount(uint value)
        {
            if (Popcnt.IsSupported)
            {
                return((int)Popcnt.PopCount(value));
            }

            if (AdvSimd.Arm64.IsSupported)
            {
                // PopCount works on vector so convert input value to vector first.

                // Vector64.CreateScalar(uint) generates suboptimal code by storing and
                // loading the result to memory.
                // See https://github.com/dotnet/runtime/issues/35976 for details.
                // Hence use Vector64.Create(ulong) to create Vector64<ulong> and operate on that.
                Vector64 <ulong> input      = Vector64.Create((ulong)value);
                Vector64 <byte>  aggregated = AdvSimd.Arm64.AddAcross(AdvSimd.PopCount(input.AsByte()));
                return(AdvSimd.Extract(aggregated, 0));
            }

            return(SoftwareFallback(value));
示例#3
0
        public (List <int> codes, int score) Solve(int gridCode)
        {
            var minScore      = int.MaxValue;
            var solutionCodes = new List <int>();

            for (int i = 0; i < 1 << (Size * Size); i++)
            {
                var numBlack = (int)Popcnt.PopCount((uint)(gridCode ^ flipActions[i]));
                var score    = (int)Popcnt.PopCount((uint)i) + Math.Min(numBlack, Size * Size - numBlack);
                if (minScore >= score)
                {
                    if (minScore > score)
                    {
                        solutionCodes.Clear();
                    }
                    solutionCodes.Add(i);
                    minScore = score;
                }
            }
            return(solutionCodes, minScore);
        }
        internal override ImmutableDictionary <TKey, TValue> Add(TKey key, TValue value, uint hash, int shift)
        {
            var bit = 1U << (int)((hash >> shift) & Mask);

            if ((_bitmapNodes & bit) != 0)
            {
                var index = Popcnt.PopCount(_bitmapNodes & (bit - 1));
                return(_nodes.DuplicateWith(key, value, hash, index, shift, _bitmapNodes, _bitmapValues, _values));
            }
            else if ((_bitmapValues & bit) != 0)
            {
                // TODO collisions and same value
                var index       = Popcnt.PopCount(_bitmapNodes & (bit - 1));
                var indexValues = Popcnt.PopCount(_bitmapValues & (bit - 1));
                return(_nodes.Add(key, value, hash, (uint)index, (uint)indexValues, shift, _bitmapNodes | bit, _bitmapValues ^ bit, _values));
            }
            else
            {
                var index = (uint)Popcnt.PopCount(_bitmapValues & (bit - 1));
                return(_values.Add(key, value, _bitmapNodes, _nodes, _bitmapValues | bit, index));
            }
        }
        internal static int GetHammingDistanceCore(ulong v)
        {
#if !NO_X86_INSTRINSICS
            unchecked
            {
                if (Popcnt.X64.IsSupported)
                {
                    return((int)Popcnt.X64.PopCount(v));
                }
                if (Popcnt.IsSupported)
                {
                    return((int)(Popcnt.PopCount((uint)v) + Popcnt.PopCount((uint)(v >> 32))));
                }
            }
#endif
            unchecked
            {
                v = v - ((v >> 1) & 0x5555555555555555UL);
                v = (v & 0x3333333333333333UL) + ((v >> 2) & 0x3333333333333333UL);
                return((int)((((v + (v >> 4)) & 0xF0F0F0F0F0F0F0FUL) * 0x101010101010101UL) >> 56));
            }
        }
        internal override ImmutableDictionary <TKey, TValue> Add(TKey key, TValue value, uint hash, int shift)
        {
            var bit = 1U << (int)((hash >> shift) & Mask);

            if ((_bitmap & bit) != 0)
            {
                var newNodes = new ImmutableDictionary <TKey, TValue> [_nodes.Length];
                Array.Copy(_nodes, newNodes, _nodes.Length);
                var index = Popcnt.PopCount((_bitmap >> (int)bit) & Mask);
                newNodes[index] = _nodes[index].Add(key, value, hash, shift + Shift);
                return(new BitMapNode <TKey, TValue>(_bitmap, newNodes));
            }
            else
            {
                var index    = Popcnt.PopCount((_bitmap >> (int)bit) & Mask);
                var newNodes = new ImmutableDictionary <TKey, TValue> [_nodes.Length + 1];
                Array.Copy(_nodes, newNodes, index);
                Array.Copy(_nodes, index, newNodes, index + 1, _nodes.Length - index);
                newNodes[index] = new KeyValueNode <TKey, TValue>(key, value, hash);
                return(new BitMapNode <TKey, TValue>(_bitmap | bit, newNodes));
            }
        }
示例#7
0
        public static int PopCount(uint value)
        {
            if (Popcnt.IsSupported)
            {
                return((int)Popcnt.PopCount(value));
            }

            return(SoftwareFallback(value));

            int SoftwareFallback(uint v)
            {
                const uint c1 = 0x_55555555u;
                const uint c2 = 0x_33333333u;
                const uint c3 = 0x_0F0F0F0Fu;
                const uint c4 = 0x_01010101u;

                v = v - ((v >> 1) & c1);
                v = (v & c2) + ((v >> 2) & c2);
                v = (((v + (v >> 4)) & c3) * c4) >> 24;

                return((int)v);
            }
        }
示例#8
0
        public override void Solve(IOManager io)
        {
            var n = io.ReadInt();
            var k = io.ReadInt();
            var a = io.ReadIntArray(n);

            long min = long.MaxValue;

            for (var flag = BitSet.Zero; flag < (1 << n); flag++)
            {
                if (Popcnt.PopCount(flag) == k)
                {
                    int  last  = 0;
                    long total = 0;

                    for (int i = 0; i < a.Length; i++)
                    {
                        if (flag[i])
                        {
                            if (last >= a[i])
                            {
                                var added = last - a[i] + 1;
                                last++;
                                total += added;
                            }
                        }

                        last.ChangeMax(a[i]);
                    }

                    min.ChangeMin(total);
                }
            }

            io.WriteLine(min);
        }
示例#9
0
文件: Program.cs 项目: z77ma/runtime
    static int Main()
    {
        s_success = true;

        // We expect the AOT compiler generated HW intrinsics with the following characteristics:
        //
        // * TRUE = IsSupported assumed to be true, no runtime check
        // * NULL = IsSupported is a runtime check, code should be behind the check or bad things happen
        // * FALSE = IsSupported assumed to be false, no runtime check, PlatformNotSupportedException if used
        //
        // The test is compiled with multiple defines to test this.

#if BASELINE_INTRINSICS
        bool vectorsAccelerated = true;
        int  byteVectorLength   = 16;
        bool?Sse2AndBelow       = true;
        bool?Sse3Group          = null;
        bool?AesLzPcl           = null;
        bool?Sse4142            = null;
        bool?PopCnt             = null;
        bool?Avx12    = false;
        bool?FmaBmi12 = false;
        bool?Avxvnni  = false;
#elif NON_VEX_INTRINSICS
        bool vectorsAccelerated = true;
        int  byteVectorLength   = 16;
        bool?Sse2AndBelow       = true;
        bool?Sse3Group          = true;
        bool?AesLzPcl           = null;
        bool?Sse4142            = true;
        bool?PopCnt             = null;
        bool?Avx12    = false;
        bool?FmaBmi12 = false;
        bool?Avxvnni  = false;
#elif VEX_INTRINSICS
        bool vectorsAccelerated = true;
        int  byteVectorLength   = 32;
        bool?Sse2AndBelow       = true;
        bool?Sse3Group          = true;
        bool?AesLzPcl           = null;
        bool?Sse4142            = true;
        bool?PopCnt             = null;
        bool?Avx12    = true;
        bool?FmaBmi12 = null;
        bool?Avxvnni  = null;
#else
#error Who dis?
#endif

        if (vectorsAccelerated != Vector.IsHardwareAccelerated)
        {
            throw new Exception($"Vectors HW acceleration state unexpected - expected {vectorsAccelerated}, got {Vector.IsHardwareAccelerated}");
        }

        if (byteVectorLength != Vector <byte> .Count)
        {
            throw new Exception($"Unexpected vector length - expected {byteVectorLength}, got {Vector<byte>.Count}");
        }

        Check("Sse", Sse2AndBelow, &SseIsSupported, Sse.IsSupported, () => Sse.Subtract(Vector128 <float> .Zero, Vector128 <float> .Zero).Equals(Vector128 <float> .Zero));
        Check("Sse.X64", Sse2AndBelow, &SseX64IsSupported, Sse.X64.IsSupported, () => Sse.X64.ConvertToInt64WithTruncation(Vector128 <float> .Zero) == 0);

        Check("Sse2", Sse2AndBelow, &Sse2IsSupported, Sse2.IsSupported, () => Sse2.Extract(Vector128 <ushort> .Zero, 0) == 0);
        Check("Sse2.X64", Sse2AndBelow, &Sse2X64IsSupported, Sse2.X64.IsSupported, () => Sse2.X64.ConvertToInt64(Vector128 <double> .Zero) == 0);

        Check("Sse3", Sse3Group, &Sse3IsSupported, Sse3.IsSupported, () => Sse3.MoveHighAndDuplicate(Vector128 <float> .Zero).Equals(Vector128 <float> .Zero));
        Check("Sse3.X64", Sse3Group, &Sse3X64IsSupported, Sse3.X64.IsSupported, null);

        Check("Ssse3", Sse3Group, &Ssse3IsSupported, Ssse3.IsSupported, () => Ssse3.Abs(Vector128 <short> .Zero).Equals(Vector128 <ushort> .Zero));
        Check("Ssse3.X64", Sse3Group, &Ssse3X64IsSupported, Ssse3.X64.IsSupported, null);

        Check("Sse41", Sse4142, &Sse41IsSupported, Sse41.IsSupported, () => Sse41.Max(Vector128 <int> .Zero, Vector128 <int> .Zero).Equals(Vector128 <int> .Zero));
        Check("Sse41.X64", Sse4142, &Sse41X64IsSupported, Sse41.X64.IsSupported, () => Sse41.X64.Extract(Vector128 <long> .Zero, 0) == 0);

        Check("Sse42", Sse4142, &Sse42IsSupported, Sse42.IsSupported, () => Sse42.Crc32(0, 0) == 0);
        Check("Sse42.X64", Sse4142, &Sse42X64IsSupported, Sse42.X64.IsSupported, () => Sse42.X64.Crc32(0, 0) == 0);

        Check("Aes", AesLzPcl, &AesIsSupported, Aes.IsSupported, () => Aes.KeygenAssist(Vector128 <byte> .Zero, 0).Equals(Vector128.Create((byte)99)));
        Check("Aes.X64", AesLzPcl, &AesX64IsSupported, Aes.X64.IsSupported, null);

        Check("Avx", Avx12, &AvxIsSupported, Avx.IsSupported, () => Avx.Add(Vector256 <double> .Zero, Vector256 <double> .Zero).Equals(Vector256 <double> .Zero));
        Check("Avx.X64", Avx12, &AvxX64IsSupported, Avx.X64.IsSupported, null);

        Check("Avx2", Avx12, &Avx2IsSupported, Avx2.IsSupported, () => Avx2.Abs(Vector256 <int> .Zero).Equals(Vector256 <uint> .Zero));
        Check("Avx2.X64", Avx12, &Avx2X64IsSupported, Avx2.X64.IsSupported, null);

        Check("Bmi1", FmaBmi12, &Bmi1IsSupported, Bmi1.IsSupported, () => Bmi1.AndNot(0, 0) == 0);
        Check("Bmi1.X64", FmaBmi12, &Bmi1X64IsSupported, Bmi1.X64.IsSupported, () => Bmi1.X64.AndNot(0, 0) == 0);

        Check("Bmi2", FmaBmi12, &Bmi2IsSupported, Bmi2.IsSupported, () => Bmi2.MultiplyNoFlags(0, 0) == 0);
        Check("Bmi2.X64", FmaBmi12, &Bmi2X64IsSupported, Bmi2.X64.IsSupported, () => Bmi2.X64.MultiplyNoFlags(0, 0) == 0);

        Check("Fma", FmaBmi12, &FmaIsSupported, Fma.IsSupported, () => Fma.MultiplyAdd(Vector128 <float> .Zero, Vector128 <float> .Zero, Vector128 <float> .Zero).Equals(Vector128 <float> .Zero));
        Check("Fma.X64", FmaBmi12, &FmaX64IsSupported, Fma.X64.IsSupported, null);

        Check("Lzcnt", AesLzPcl, &LzcntIsSupported, Lzcnt.IsSupported, () => Lzcnt.LeadingZeroCount(0) == 32);
        Check("Lzcnt.X64", AesLzPcl, &LzcntX64IsSupported, Lzcnt.X64.IsSupported, () => Lzcnt.X64.LeadingZeroCount(0) == 64);

        Check("Pclmulqdq", AesLzPcl, &PclmulqdqIsSupported, Pclmulqdq.IsSupported, () => Pclmulqdq.CarrylessMultiply(Vector128 <long> .Zero, Vector128 <long> .Zero, 0).Equals(Vector128 <long> .Zero));
        Check("Pclmulqdq.X64", AesLzPcl, &PclmulqdqX64IsSupported, Pclmulqdq.X64.IsSupported, null);

        Check("Popcnt", PopCnt, &PopcntIsSupported, Popcnt.IsSupported, () => Popcnt.PopCount(0) == 0);
        Check("Popcnt.X64", PopCnt, &PopcntX64IsSupported, Popcnt.X64.IsSupported, () => Popcnt.X64.PopCount(0) == 0);

        Check("AvxVnni", Avxvnni, &AvxVnniIsSupported, AvxVnni.IsSupported, () => AvxVnni.MultiplyWideningAndAdd(Vector128 <int> .Zero, Vector128 <byte> .Zero, Vector128 <sbyte> .Zero).Equals(Vector128 <int> .Zero));
        Check("AvxVnni.X64", Avxvnni, &AvxVnniX64IsSupported, AvxVnni.X64.IsSupported, null);

        return(s_success ? 100 : 1);
    }
示例#10
0
        public unsafe void Serialize(ref MessagePackWriter writer, sbyte[]?value, MessagePackSerializerOptions options)
        {
            if (value == null)
            {
                writer.WriteNil();
                return;
            }

            var inputLength = value.Length;

            writer.WriteArrayHeader(inputLength);
            if (inputLength == 0)
            {
                return;
            }

            fixed(sbyte *pSource = &value[0])
            {
                var inputEnd      = pSource + inputLength;
                var inputIterator = pSource;

                if (Popcnt.IsSupported)
                {
                    const int ShiftCount = 4;
                    const int Stride     = 1 << ShiftCount;
                    // We enter the SIMD mode when there are more than the Stride after alignment adjustment.
                    if (inputLength < Stride << 1)
                    {
                        goto ProcessEach;
                    }

                    {
                        // Make InputIterator Aligned
                        var offset = UnsafeMemoryAlignmentUtility.CalculateDifferenceAlign16(inputIterator);
                        inputLength -= offset;
                        var offsetEnd = inputIterator + offset;
                        while (inputIterator != offsetEnd)
                        {
                            writer.Write(*inputIterator++);
                        }
                    }

                    fixed(byte *tablePointer = &ShuffleAndMaskTable[0])
                    {
                        fixed(byte *maskTablePointer = &SingleInstructionMultipleDataPrimitiveArrayFormatterHelper.StoreMaskTable[0])
                        {
                            var vectorMinFixNegInt        = Vector128.Create((sbyte)MessagePackRange.MinFixNegativeInt);
                            var vectorMessagePackCodeInt8 = Vector128.Create(MessagePackCode.Int8);

                            for (var vectorizedEnd = inputIterator + ((inputLength >> ShiftCount) << ShiftCount); inputIterator != vectorizedEnd; inputIterator += Stride)
                            {
                                var current = Sse2.LoadVector128(inputIterator);
                                var index   = unchecked ((uint)Sse2.MoveMask(Sse2.CompareGreaterThan(vectorMinFixNegInt, current)));

                                if (index == 0)
                                {
                                    // When all 32 input values are in the FixNum range.
                                    var span = writer.GetSpan(Stride);
                                    Sse2.Store((sbyte *)Unsafe.AsPointer(ref span[0]), current);

                                    writer.Advance(Stride);
                                    continue;
                                }

                                unchecked
                                {
                                    var index0      = (byte)index;
                                    var index1      = (byte)(index >> 8);
                                    var count0      = (int)(Popcnt.PopCount(index0) + 8);
                                    var count1      = (int)(Popcnt.PopCount(index1) + 8);
                                    var countTotal  = count0 + count1;
                                    var destination = writer.GetSpan(countTotal);
                                    fixed(byte *pDestination = &destination[0])
                                    {
                                        var tempDestination = pDestination;
                                        var shuffle0        = Sse2.LoadVector128(tablePointer + (index0 << 4));
                                        var shuffled0       = Ssse3.Shuffle(current.AsByte(), shuffle0);
                                        var answer0         = Sse41.BlendVariable(shuffled0, vectorMessagePackCodeInt8, shuffle0);

                                        Sse2.MaskMove(answer0, Sse2.LoadVector128(maskTablePointer + (count0 << 4)), tempDestination);
                                        tempDestination += count0;

                                        var shuffle1  = Sse2.LoadVector128(tablePointer + (index1 << 4));
                                        var shift1    = Sse2.ShiftRightLogical128BitLane(current.AsByte(), 8);
                                        var shuffled1 = Ssse3.Shuffle(shift1, shuffle1);
                                        var answer1   = Sse41.BlendVariable(shuffled1, vectorMessagePackCodeInt8, shuffle1);

                                        Sse2.MaskMove(answer1, Sse2.LoadVector128(maskTablePointer + (count1 << 4)), tempDestination);
                                    }

                                    writer.Advance(countTotal);
                                }
                            }
                        }
                    }
                }

ProcessEach:
                while (inputIterator != inputEnd)
                {
                    writer.Write(*inputIterator++);
                }
            }
        }
示例#11
0
        public static unsafe int GetUtf16CharCountFromKnownWellFormedUtf8(ReadOnlySpan <byte> utf8Data)
        {
            // Remember: the number of resulting UTF-16 chars will never be greater than the number
            // of UTF-8 bytes given well-formed input, so we can get away with casting the final
            // result to an 'int'.

            fixed(byte *pPinnedUtf8Data = &MemoryMarshal.GetReference(utf8Data))
            {
                if (Sse2.IsSupported && Popcnt.IsSupported)
                {
                    // Optimizations via SSE2 & POPCNT are available - use them.

                    Debug.Assert(BitConverter.IsLittleEndian, "SSE2 only supported on little-endian platforms.");
                    Debug.Assert(sizeof(nint) == IntPtr.Size, "nint defined incorrectly.");
                    Debug.Assert(sizeof(nuint) == IntPtr.Size, "nuint defined incorrectly.");

                    byte *pBuffer      = pPinnedUtf8Data;
                    nuint bufferLength = (uint)utf8Data.Length;

                    // Optimization: Can we stay in the all-ASCII code paths?

                    nuint utf16CharCount = GetIndexOfFirstNonAsciiByte_Sse2(pBuffer, bufferLength);

                    if (utf16CharCount != bufferLength)
                    {
                        // Found at least one non-ASCII byte, so fall down the slower (but still vectorized) code paths.
                        // Given well-formed UTF-8 input, we can compute the number of resulting UTF-16 code units
                        // using the following formula:
                        //
                        // utf16CharCount = utf8ByteCount - numUtf8ContinuationBytes + numUtf8FourByteHeaders

                        utf16CharCount = bufferLength;

                        Vector128 <sbyte> vecAllC0 = Vector128.Create(unchecked ((sbyte)0xC0));
                        Vector128 <sbyte> vecAll80 = Vector128.Create(unchecked ((sbyte)0x80));
                        Vector128 <sbyte> vecAll6F = Vector128.Create(unchecked ((sbyte)0x6F));

                        {
                            // Perform an aligned read of the first part of the buffer.
                            // We'll mask out any data at the start of the buffer we don't care about.
                            //
                            // For example, if (pBuffer MOD 16) = 2:
                            // [ AA BB CC DD ... ] <-- original vector
                            // [ 00 00 CC DD ... ] <-- after PANDN operation

                            nint offset = -((nint)pBuffer & (sizeof(Vector128 <sbyte>) - 1));
                            Vector128 <sbyte> shouldBeMaskedOut = Sse2.CompareGreaterThan(Vector128.Create((byte)((int)offset + sizeof(Vector128 <sbyte>) - 1)).AsSByte(), VectorOfElementIndices);
                            Vector128 <sbyte> thisVector        = Sse2.AndNot(shouldBeMaskedOut, Unsafe.Read <Vector128 <sbyte> >(pBuffer + offset));

                            // If there's any data at the end of the buffer we don't care about, mask it out now.
                            // If this happens the 'bufferLength' value will be a lie, but it'll cause all of the
                            // branches later in the method to be skipped, so it's not a huge problem.

                            if (bufferLength < (nuint)offset + (uint)sizeof(Vector128 <sbyte>))
                            {
                                Vector128 <sbyte> shouldBeAllowed = Sse2.CompareLessThan(VectorOfElementIndices, Vector128.Create((byte)((int)bufferLength - (int)offset)).AsSByte());
                                thisVector   = Sse2.And(shouldBeAllowed, thisVector);
                                bufferLength = (nuint)offset + (uint)sizeof(Vector128 <sbyte>);
                            }

                            uint maskOfContinuationBytes  = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(vecAllC0, thisVector));
                            uint countOfContinuationBytes = Popcnt.PopCount(maskOfContinuationBytes);
                            utf16CharCount -= countOfContinuationBytes;

                            uint maskOfFourByteHeaders  = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(thisVector, vecAll80), vecAll6F));
                            uint countOfFourByteHeaders = Popcnt.PopCount(maskOfFourByteHeaders);
                            utf16CharCount += countOfFourByteHeaders;

                            bufferLength -= (nuint)offset;
                            bufferLength -= (uint)sizeof(Vector128 <sbyte>);

                            pBuffer += offset;
                            pBuffer += (uint)sizeof(Vector128 <sbyte>);
                        }

                        // At this point, pBuffer is guaranteed aligned.

                        Debug.Assert((nuint)pBuffer % (uint)sizeof(Vector128 <sbyte>) == 0, "pBuffer should have been aligned.");

                        while (bufferLength >= (uint)sizeof(Vector128 <sbyte>))
                        {
                            Vector128 <sbyte> thisVector = Sse2.LoadAlignedVector128((sbyte *)pBuffer);

                            uint maskOfContinuationBytes  = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(vecAllC0, thisVector));
                            uint countOfContinuationBytes = Popcnt.PopCount(maskOfContinuationBytes);
                            utf16CharCount -= countOfContinuationBytes;

                            uint maskOfFourByteHeaders  = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(thisVector, vecAll80), vecAll6F));
                            uint countOfFourByteHeaders = Popcnt.PopCount(maskOfFourByteHeaders);
                            utf16CharCount += countOfFourByteHeaders;

                            pBuffer      += sizeof(Vector128 <sbyte>);
                            bufferLength -= (uint)sizeof(Vector128 <sbyte>);
                        }

                        if ((uint)bufferLength > 0)
                        {
                            // There's still more data to be read.
                            // We need to mask out elements of the vector we don't care about.
                            // These elements will occur at the end of the vector.
                            //
                            // For example, if 14 bytes remain in the input stream:
                            // [ ... CC DD EE FF ] <-- original vector
                            // [ ... CC DD 00 00 ] <-- after PANDN operation

                            Vector128 <sbyte> shouldBeMaskedOut = Sse2.CompareGreaterThan(VectorOfElementIndices, Vector128.Create((byte)((int)bufferLength - 1)).AsSByte());
                            Vector128 <sbyte> thisVector        = Sse2.AndNot(shouldBeMaskedOut, *(Vector128 <sbyte> *)pBuffer);

                            uint maskOfContinuationBytes  = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(vecAllC0, thisVector));
                            uint countOfContinuationBytes = Popcnt.PopCount(maskOfContinuationBytes);
                            utf16CharCount -= countOfContinuationBytes;

                            uint maskOfFourByteHeaders  = (uint)Sse2.MoveMask(Sse2.CompareGreaterThan(Sse2.Xor(thisVector, vecAll80), vecAll6F));
                            uint countOfFourByteHeaders = Popcnt.PopCount(maskOfFourByteHeaders);
                            utf16CharCount += countOfFourByteHeaders;
                        }
                    }

                    return((int)utf16CharCount);
                }
                else
                {
                    // Cannot use SSE2 & POPCNT. Fall back to slower code paths.

                    throw new NotImplementedException();
                }
            }
        }
示例#12
0
        private static int CompareScalarAltPopCount(void *p1, void *p2, int size)
        {
            byte *bpx = (byte *)p1;
            byte *bpy = (byte *)p2;

            // PERF: This allows us to do pointer arithmetics and use relative addressing using the
            //       hardware instructions without needed an extra register.
            long offset = bpy - bpx;

            if (size < 8)
            {
                goto ProcessSmall;
            }

            // PERF: Current version of the JIT (2.0.5) will use a 4 instruction magic division
            //       instead of a simple shift because it is a power of 2 dividend.
            int l = size >> 3; // (Equivalent to size / 8)

            ulong xor;

            for (int i = 0; i < l; i++, bpx += 8)
            {
                // PERF: JIT will emit: ```{op} {reg}, qword ptr [rdx+rax]```
                xor = *((ulong *)bpx) ^ *(ulong *)(bpx + offset);
                if (xor != 0)
                {
                    goto Tail;
                }
            }

ProcessSmall:

            if ((size & 4) != 0)
            {
                xor = *((uint *)bpx) ^ *((uint *)(bpx + offset));
                if (xor != 0)
                {
                    goto Tail;
                }

                bpx += 4;
            }

            if ((size & 2) != 0)
            {
                xor = (ulong)(*((ushort *)bpx) ^ *((ushort *)(bpx + offset)));
                if (xor != 0)
                {
                    goto Tail;
                }

                bpx += 2;
            }

            if ((size & 1) != 0)
            {
                return(*bpx - *(bpx + offset));
            }

            return(0);

Tail:

            // PERF: This is a bit twiddling hack. Given that bitwise xoring 2 values flag the bits difference,
            //       we can use that we know we are running on little endian hardware and the very first bit set
            //       will correspond to the first byte which is different.

            bpx += Popcnt.PopCount((ulong)((long)xor & -(long)xor) - 1) >> 3;
            return(*bpx - *(bpx + offset));
        }
示例#13
0
文件: pop.cs 项目: 0xCM/arrows
 public static uint pop(sbyte src)
 => Popcnt.PopCount((uint)src);
示例#14
0
        public override IEnumerable <object> Solve(TextReader inputStream)
        {
            var n        = inputStream.ReadInt();
            var villages = new Village[n];

            for (int i = 0; i < villages.Length; i++)
            {
                var(x, y, p) = inputStream.ReadValue <int, int, int>();
                villages[i]  = new Village(x, y, p);
            }

            var xCosts = new long[1 << villages.Length, villages.Length];
            var yCosts = new long[1 << villages.Length, villages.Length];

            for (var flags = BitSet.Zero; flags < 1 << villages.Length; flags++)
            {
                for (int i = 0; i < villages.Length; i++)
                {
                    var minX = villages[i].WalkX(0);
                    var minY = villages[i].WalkY(0);

                    for (int road = 0; road < villages.Length; road++)
                    {
                        if (flags[road])
                        {
                            minX = Math.Min(minX, villages[i].WalkX(villages[road].X));
                            minY = Math.Min(minY, villages[i].WalkY(villages[road].Y));
                        }
                    }

                    xCosts[flags, i] += minX;
                    yCosts[flags, i] += minY;
                }
            }

            var results = new long[villages.Length + 1];

            results.AsSpan().Fill(long.MaxValue);

            DFS(0, 0, 0);

            foreach (var result in results)
            {
                yield return(result);
            }

            void DFS(int xFlags, int yFlags, int depth)
            {
                if (depth == villages.Length)
                {
                    long cost = 0;
                    for (int i = 0; i < villages.Length; i++)
                    {
                        var xCost = xCosts[xFlags, i];
                        var yCost = yCosts[yFlags, i];
                        cost += Math.Min(xCost, yCost);
                    }

                    var construction = Popcnt.PopCount((uint)xFlags) + Popcnt.PopCount((uint)yFlags);
                    results[construction] = Math.Min(results[construction], cost);
                }
                else
                {
                    DFS(xFlags, yFlags, depth + 1);
                    DFS(xFlags | (1 << depth), yFlags, depth + 1);
                    DFS(xFlags, yFlags | (1 << depth), depth + 1);
                }
            }
        }
示例#15
0
 public static int CountSetBits(uint Value) => (int)Popcnt.PopCount(Value);
示例#16
0
文件: pop.cs 项目: 0xCM/arrows
 public static uint pop(short src)
 => Popcnt.PopCount((uint)src);
示例#17
0
        static int Main(string[] args)
        {
            ulong sl = 0;
            long  resl;
            int   testResult = Pass;

            if (!Popcnt.IsSupported || !Environment.Is64BitProcess)
            {
                try
                {
                    resl = Popcnt.PopCount(sl);
                    Console.WriteLine("Intrinsic Popcnt.PopCount is called on non-supported hardware");
                    Console.WriteLine("Popcnt.IsSupported " + Popcnt.IsSupported);
                    Console.WriteLine("Environment.Is64BitProcess " + Environment.Is64BitProcess);
                    testResult = Fail;
                }
                catch (PlatformNotSupportedException)
                {
                }

                try
                {
                    resl = Convert.ToInt64(typeof(Popcnt).GetMethod(nameof(Popcnt.PopCount), new Type[] { sl.GetType() }).Invoke(null, new object[] { sl }));
                    Console.WriteLine("Intrinsic Popcnt.PopCount is called via reflection on non-supported hardware");
                    Console.WriteLine("Popcnt.IsSupported " + Popcnt.IsSupported);
                    Console.WriteLine("Environment.Is64BitProcess " + Environment.Is64BitProcess);
                    testResult = Fail;
                }
                catch (TargetInvocationException e) when(e.InnerException is PlatformNotSupportedException)
                {
                }
            }


            if (Popcnt.IsSupported)
            {
                if (Environment.Is64BitProcess)
                {
                    for (int i = 0; i < longPopcntTable.Length; i++)
                    {
                        sl = longPopcntTable[i].s;

                        resl = Popcnt.PopCount(sl);
                        if (resl != longPopcntTable[i].res)
                        {
                            Console.WriteLine("{0}: Inputs: 0x{1,16:x} Expected: 0x{3,16:x} actual: 0x{4,16:x}",
                                              i, sl, longPopcntTable[i].res, resl);
                            testResult = Fail;
                        }

                        resl = Convert.ToInt64(typeof(Popcnt).GetMethod(nameof(Popcnt.PopCount), new Type[] { sl.GetType() }).Invoke(null, new object[] { sl }));
                        if (resl != longPopcntTable[i].res)
                        {
                            Console.WriteLine("{0}: Inputs: 0x{1,16:x} Expected: 0x{3,16:x} actual: 0x{4,16:x} - Reflection",
                                              i, sl, longPopcntTable[i].res, resl);
                            testResult = Fail;
                        }
                    }
                }

                uint si;
                int  resi;
                for (int i = 0; i < intPopcntTable.Length; i++)
                {
                    si = intPopcntTable[i].s;

                    resi = Popcnt.PopCount(si);
                    if (resi != intPopcntTable[i].res)
                    {
                        Console.WriteLine("{0}: Inputs: 0x{1,16:x} Expected: 0x{3,16:x} actual: 0x{4,16:x}",
                                          i, si, intPopcntTable[i].res, resi);
                        testResult = Fail;
                    }

                    resi = Convert.ToInt32(typeof(Popcnt).GetMethod(nameof(Popcnt.PopCount), new Type[] { si.GetType() }).Invoke(null, new object[] { si }));
                    if (resi != intPopcntTable[i].res)
                    {
                        Console.WriteLine("{0}: Inputs: 0x{1,16:x} Expected: 0x{3,16:x} actual: 0x{4,16:x} - Reflection",
                                          i, si, intPopcntTable[i].res, resi);
                        testResult = Fail;
                    }
                }
            }

            return(testResult);
        }
示例#18
0
文件: pop.cs 项目: 0xCM/arrows
 public static uint pop(uint src)
 => Popcnt.PopCount(src);
示例#19
0
        private static int CompareScalarCmpAltPopCount(void *p1, void *p2, int size)
        {
            byte *bpx = (byte *)p1;

            // PERF: This allows us to do pointer arithmetics and use relative addressing using the
            //       hardware instructions without needed an extra register.
            long offset = (byte *)p2 - bpx;

            if ((size & 7) == 0)
            {
                goto ProcessAligned;
            }

            // We process first the "unaligned" size.
            ulong xor;

            if ((size & 4) != 0)
            {
                xor = *((uint *)bpx) ^ *((uint *)(bpx + offset));
                if (xor != 0)
                {
                    goto Tail;
                }

                bpx += 4;
            }

            if ((size & 2) != 0)
            {
                xor = (ulong)(*((ushort *)bpx) ^ *((ushort *)(bpx + offset)));
                if (xor != 0)
                {
                    goto Tail;
                }

                bpx += 2;
            }

            if ((size & 1) != 0)
            {
                int value = *bpx - *(bpx + offset);
                if (value != 0)
                {
                    return(value);
                }

                bpx += 1;
            }

ProcessAligned:

            byte *end = (byte *)p1 + size;
            byte *loopEnd = end - 16;

            while (bpx <= loopEnd)
            {
                // PERF: JIT will emit: ```{op} {reg}, qword ptr [rdx+rax]```
                if (*((ulong *)bpx) != *(ulong *)(bpx + offset))
                {
                    goto XorTail;
                }

                if (*((ulong *)(bpx + 8)) != *(ulong *)(bpx + 8 + offset))
                {
                    bpx += 8;
                    goto XorTail;
                }

                bpx += 16;
            }

            if (bpx < end)
            {
                goto XorTail;
            }

            return(0);

            XorTail : xor = *((ulong *)bpx) ^ *(ulong *)(bpx + offset);

Tail:

            // Fast-path for equals
            if (xor == 0)
            {
                return(0);
            }

            // PERF: This is a bit twiddling hack. Given that bitwise xoring 2 values flag the bits difference,
            //       we can use that we know we are running on little endian hardware and the very first bit set
            //       will correspond to the first byte which is different.

            bpx += Popcnt.PopCount((ulong)((long)xor & -(long)xor) - 1) >> 3;
            return(*bpx - *(bpx + offset));
        }
示例#20
0
 public uint PopCount() => Popcnt.PopCount(MaxValue);