public static bool Equal(Vector128 <float> vector1, Vector128 <float> vector2) { // This implementation is based on the DirectX Math Library XMVector4Equal method // https://github.com/microsoft/DirectXMath/blob/master/Inc/DirectXMathVector.inl if (AdvSimd.Arm64.IsSupported) { Vector128 <uint> vResult = AdvSimd.CompareEqual(vector1, vector2).AsUInt32(); Vector64 <byte> vResult0 = vResult.GetLower().AsByte(); Vector64 <byte> vResult1 = vResult.GetUpper().AsByte(); Vector64 <byte> vTemp10 = AdvSimd.Arm64.ZipLow(vResult0, vResult1); Vector64 <byte> vTemp11 = AdvSimd.Arm64.ZipHigh(vResult0, vResult1); Vector64 <ushort> vTemp21 = AdvSimd.Arm64.ZipHigh(vTemp10.AsUInt16(), vTemp11.AsUInt16()); return(vTemp21.AsUInt32().GetElement(1) == 0xFFFFFFFF); } else if (Sse.IsSupported) { return(Sse.MoveMask(Sse.CompareNotEqual(vector1, vector2)) == 0); } else { // Redundant test so we won't prejit remainder of this method on platforms without AdvSimd. throw new PlatformNotSupportedException(); } }
private static void NarrowFourUtf16CharsToAsciiAndWriteToBuffer(ref byte outputBuffer, ulong value) { Debug.Assert(AllCharsInUInt64AreAscii(value)); #if NETCOREAPP3_1 if (Bmi2.X64.IsSupported) { // BMI2 will work regardless of the processor's endianness. Unsafe.WriteUnaligned(ref outputBuffer, (uint)Bmi2.X64.ParallelBitExtract(value, 0x00FF00FF_00FF00FFul)); } #else if (Sse2.X64.IsSupported) { // Narrows a vector of words [ w0 w1 w2 w3 ] to a vector of bytes // [ b0 b1 b2 b3 b0 b1 b2 b3 ], then writes 4 bytes (32 bits) to the destination. Vector128 <short> vecWide = Sse2.X64.ConvertScalarToVector128UInt64(value).AsInt16(); Vector128 <uint> vecNarrow = Sse2.PackUnsignedSaturate(vecWide, vecWide).AsUInt32(); Unsafe.WriteUnaligned <uint>(ref outputBuffer, Sse2.ConvertToUInt32(vecNarrow)); } else if (AdvSimd.IsSupported) { // Narrows a vector of words [ w0 w1 w2 w3 ] to a vector of bytes // [ b0 b1 b2 b3 * * * * ], then writes 4 bytes (32 bits) to the destination. Vector128 <short> vecWide = Vector128.CreateScalarUnsafe(value).AsInt16(); Vector64 <byte> lower = AdvSimd.ExtractNarrowingSaturateUnsignedLower(vecWide); Unsafe.WriteUnaligned <uint>(ref outputBuffer, lower.AsUInt32().ToScalar()); } #endif else { if (BitConverter.IsLittleEndian) { outputBuffer = (byte)value; value >>= 16; Unsafe.Add(ref outputBuffer, 1) = (byte)value; value >>= 16; Unsafe.Add(ref outputBuffer, 2) = (byte)value; value >>= 16; Unsafe.Add(ref outputBuffer, 3) = (byte)value; } else { Unsafe.Add(ref outputBuffer, 3) = (byte)value; value >>= 16; Unsafe.Add(ref outputBuffer, 2) = (byte)value; value >>= 16; Unsafe.Add(ref outputBuffer, 1) = (byte)value; value >>= 16; outputBuffer = (byte)value; } } }