示例#1
0
        public void CollectHistogram(Span <byte> reference, Span <byte> pred, int startBlock, int endBlock)
        {
            int j;

            this.distribution.AsSpan().Clear();
            for (j = startBlock; j < endBlock; j++)
            {
                Vp8Encoding.FTransform(reference.Slice(WebpLookupTables.Vp8DspScan[j]), pred.Slice(WebpLookupTables.Vp8DspScan[j]), this.output, this.scratch);

                // Convert coefficients to bin.
#if SUPPORTS_RUNTIME_INTRINSICS
                if (Avx2.IsSupported)
                {
                    // Load.
                    ref short        outputRef = ref MemoryMarshal.GetReference <short>(this.output);
                    Vector256 <byte> out0      = Unsafe.As <short, Vector256 <byte> >(ref outputRef);

                    // v = abs(out) >> 3
                    Vector256 <ushort> abs0 = Avx2.Abs(out0.AsInt16());
                    Vector256 <short>  v0   = Avx2.ShiftRightArithmetic(abs0.AsInt16(), 3);

                    // bin = min(v, MAX_COEFF_THRESH)
                    Vector256 <short> min0 = Avx2.Min(v0, MaxCoeffThreshVec);

                    // Store.
                    Unsafe.As <short, Vector256 <short> >(ref outputRef) = min0;

                    // Convert coefficients to bin.
                    for (int k = 0; k < 16; ++k)
                    {
                        ++this.distribution[this.output[k]];
                    }
                }
示例#2
0
        public static int QuantizeBlock(Span <short> input, Span <short> output, ref Vp8Matrix mtx)
        {
#if SUPPORTS_RUNTIME_INTRINSICS
            if (Avx2.IsSupported)
            {
                // Load all inputs.
                Vector256 <short>  input0 = Unsafe.As <short, Vector256 <short> >(ref MemoryMarshal.GetReference(input));
                Vector256 <ushort> iq0    = Unsafe.As <ushort, Vector256 <ushort> >(ref mtx.IQ[0]);
                Vector256 <ushort> q0     = Unsafe.As <ushort, Vector256 <ushort> >(ref mtx.Q[0]);

                // coeff = abs(in)
                Vector256 <ushort> coeff0 = Avx2.Abs(input0);

                // coeff = abs(in) + sharpen
                Vector256 <short> sharpen0 = Unsafe.As <short, Vector256 <short> >(ref mtx.Sharpen[0]);
                Avx2.Add(coeff0.AsInt16(), sharpen0);

                // out = (coeff * iQ + B) >> QFIX
                // doing calculations with 32b precision (QFIX=17)
                // out = (coeff * iQ)
                Vector256 <ushort> coeffiQ0H = Avx2.MultiplyHigh(coeff0, iq0);
                Vector256 <ushort> coeffiQ0L = Avx2.MultiplyLow(coeff0, iq0);
                Vector256 <ushort> out00     = Avx2.UnpackLow(coeffiQ0L, coeffiQ0H);
                Vector256 <ushort> out08     = Avx2.UnpackHigh(coeffiQ0L, coeffiQ0H);

                // out = (coeff * iQ + B)
                Vector256 <uint> bias00 = Unsafe.As <uint, Vector256 <uint> >(ref mtx.Bias[0]);
                Vector256 <uint> bias08 = Unsafe.As <uint, Vector256 <uint> >(ref mtx.Bias[8]);
                out00 = Avx2.Add(out00.AsInt32(), bias00.AsInt32()).AsUInt16();
                out08 = Avx2.Add(out08.AsInt32(), bias08.AsInt32()).AsUInt16();

                // out = QUANTDIV(coeff, iQ, B, QFIX)
                out00 = Avx2.ShiftRightArithmetic(out00.AsInt32(), WebpConstants.QFix).AsUInt16();
                out08 = Avx2.ShiftRightArithmetic(out08.AsInt32(), WebpConstants.QFix).AsUInt16();

                // Pack result as 16b.
                Vector256 <short> out0 = Avx2.PackSignedSaturate(out00.AsInt32(), out08.AsInt32());

                // if (coeff > 2047) coeff = 2047
                out0 = Avx2.Min(out0, MaxCoeff2047Vec256);

                // Put the sign back.
                out0 = Avx2.Sign(out0, input0);

                // in = out * Q
                input0 = Avx2.MultiplyLow(out0, q0.AsInt16());
                ref short inputRef = ref MemoryMarshal.GetReference(input);
                Unsafe.As <short, Vector256 <short> >(ref inputRef) = input0;

                // zigzag the output before storing it.
                Vector256 <byte> tmp256 = Avx2.Shuffle(out0.AsByte(), Cst256);
                Vector256 <byte> tmp78  = Avx2.Shuffle(out0.AsByte(), Cst78);

                // Reverse the order of the 16-byte lanes.
                Vector256 <byte>  tmp87 = Avx2.Permute2x128(tmp78, tmp78, 1);
                Vector256 <short> outZ  = Avx2.Or(tmp256, tmp87).AsInt16();

                ref short outputRef = ref MemoryMarshal.GetReference(output);
示例#3
0
文件: Program.cs 项目: z77ma/runtime
    static int Main()
    {
        s_success = true;

        // We expect the AOT compiler generated HW intrinsics with the following characteristics:
        //
        // * TRUE = IsSupported assumed to be true, no runtime check
        // * NULL = IsSupported is a runtime check, code should be behind the check or bad things happen
        // * FALSE = IsSupported assumed to be false, no runtime check, PlatformNotSupportedException if used
        //
        // The test is compiled with multiple defines to test this.

#if BASELINE_INTRINSICS
        bool vectorsAccelerated = true;
        int  byteVectorLength   = 16;
        bool?Sse2AndBelow       = true;
        bool?Sse3Group          = null;
        bool?AesLzPcl           = null;
        bool?Sse4142            = null;
        bool?PopCnt             = null;
        bool?Avx12    = false;
        bool?FmaBmi12 = false;
        bool?Avxvnni  = false;
#elif NON_VEX_INTRINSICS
        bool vectorsAccelerated = true;
        int  byteVectorLength   = 16;
        bool?Sse2AndBelow       = true;
        bool?Sse3Group          = true;
        bool?AesLzPcl           = null;
        bool?Sse4142            = true;
        bool?PopCnt             = null;
        bool?Avx12    = false;
        bool?FmaBmi12 = false;
        bool?Avxvnni  = false;
#elif VEX_INTRINSICS
        bool vectorsAccelerated = true;
        int  byteVectorLength   = 32;
        bool?Sse2AndBelow       = true;
        bool?Sse3Group          = true;
        bool?AesLzPcl           = null;
        bool?Sse4142            = true;
        bool?PopCnt             = null;
        bool?Avx12    = true;
        bool?FmaBmi12 = null;
        bool?Avxvnni  = null;
#else
#error Who dis?
#endif

        if (vectorsAccelerated != Vector.IsHardwareAccelerated)
        {
            throw new Exception($"Vectors HW acceleration state unexpected - expected {vectorsAccelerated}, got {Vector.IsHardwareAccelerated}");
        }

        if (byteVectorLength != Vector <byte> .Count)
        {
            throw new Exception($"Unexpected vector length - expected {byteVectorLength}, got {Vector<byte>.Count}");
        }

        Check("Sse", Sse2AndBelow, &SseIsSupported, Sse.IsSupported, () => Sse.Subtract(Vector128 <float> .Zero, Vector128 <float> .Zero).Equals(Vector128 <float> .Zero));
        Check("Sse.X64", Sse2AndBelow, &SseX64IsSupported, Sse.X64.IsSupported, () => Sse.X64.ConvertToInt64WithTruncation(Vector128 <float> .Zero) == 0);

        Check("Sse2", Sse2AndBelow, &Sse2IsSupported, Sse2.IsSupported, () => Sse2.Extract(Vector128 <ushort> .Zero, 0) == 0);
        Check("Sse2.X64", Sse2AndBelow, &Sse2X64IsSupported, Sse2.X64.IsSupported, () => Sse2.X64.ConvertToInt64(Vector128 <double> .Zero) == 0);

        Check("Sse3", Sse3Group, &Sse3IsSupported, Sse3.IsSupported, () => Sse3.MoveHighAndDuplicate(Vector128 <float> .Zero).Equals(Vector128 <float> .Zero));
        Check("Sse3.X64", Sse3Group, &Sse3X64IsSupported, Sse3.X64.IsSupported, null);

        Check("Ssse3", Sse3Group, &Ssse3IsSupported, Ssse3.IsSupported, () => Ssse3.Abs(Vector128 <short> .Zero).Equals(Vector128 <ushort> .Zero));
        Check("Ssse3.X64", Sse3Group, &Ssse3X64IsSupported, Ssse3.X64.IsSupported, null);

        Check("Sse41", Sse4142, &Sse41IsSupported, Sse41.IsSupported, () => Sse41.Max(Vector128 <int> .Zero, Vector128 <int> .Zero).Equals(Vector128 <int> .Zero));
        Check("Sse41.X64", Sse4142, &Sse41X64IsSupported, Sse41.X64.IsSupported, () => Sse41.X64.Extract(Vector128 <long> .Zero, 0) == 0);

        Check("Sse42", Sse4142, &Sse42IsSupported, Sse42.IsSupported, () => Sse42.Crc32(0, 0) == 0);
        Check("Sse42.X64", Sse4142, &Sse42X64IsSupported, Sse42.X64.IsSupported, () => Sse42.X64.Crc32(0, 0) == 0);

        Check("Aes", AesLzPcl, &AesIsSupported, Aes.IsSupported, () => Aes.KeygenAssist(Vector128 <byte> .Zero, 0).Equals(Vector128.Create((byte)99)));
        Check("Aes.X64", AesLzPcl, &AesX64IsSupported, Aes.X64.IsSupported, null);

        Check("Avx", Avx12, &AvxIsSupported, Avx.IsSupported, () => Avx.Add(Vector256 <double> .Zero, Vector256 <double> .Zero).Equals(Vector256 <double> .Zero));
        Check("Avx.X64", Avx12, &AvxX64IsSupported, Avx.X64.IsSupported, null);

        Check("Avx2", Avx12, &Avx2IsSupported, Avx2.IsSupported, () => Avx2.Abs(Vector256 <int> .Zero).Equals(Vector256 <uint> .Zero));
        Check("Avx2.X64", Avx12, &Avx2X64IsSupported, Avx2.X64.IsSupported, null);

        Check("Bmi1", FmaBmi12, &Bmi1IsSupported, Bmi1.IsSupported, () => Bmi1.AndNot(0, 0) == 0);
        Check("Bmi1.X64", FmaBmi12, &Bmi1X64IsSupported, Bmi1.X64.IsSupported, () => Bmi1.X64.AndNot(0, 0) == 0);

        Check("Bmi2", FmaBmi12, &Bmi2IsSupported, Bmi2.IsSupported, () => Bmi2.MultiplyNoFlags(0, 0) == 0);
        Check("Bmi2.X64", FmaBmi12, &Bmi2X64IsSupported, Bmi2.X64.IsSupported, () => Bmi2.X64.MultiplyNoFlags(0, 0) == 0);

        Check("Fma", FmaBmi12, &FmaIsSupported, Fma.IsSupported, () => Fma.MultiplyAdd(Vector128 <float> .Zero, Vector128 <float> .Zero, Vector128 <float> .Zero).Equals(Vector128 <float> .Zero));
        Check("Fma.X64", FmaBmi12, &FmaX64IsSupported, Fma.X64.IsSupported, null);

        Check("Lzcnt", AesLzPcl, &LzcntIsSupported, Lzcnt.IsSupported, () => Lzcnt.LeadingZeroCount(0) == 32);
        Check("Lzcnt.X64", AesLzPcl, &LzcntX64IsSupported, Lzcnt.X64.IsSupported, () => Lzcnt.X64.LeadingZeroCount(0) == 64);

        Check("Pclmulqdq", AesLzPcl, &PclmulqdqIsSupported, Pclmulqdq.IsSupported, () => Pclmulqdq.CarrylessMultiply(Vector128 <long> .Zero, Vector128 <long> .Zero, 0).Equals(Vector128 <long> .Zero));
        Check("Pclmulqdq.X64", AesLzPcl, &PclmulqdqX64IsSupported, Pclmulqdq.X64.IsSupported, null);

        Check("Popcnt", PopCnt, &PopcntIsSupported, Popcnt.IsSupported, () => Popcnt.PopCount(0) == 0);
        Check("Popcnt.X64", PopCnt, &PopcntX64IsSupported, Popcnt.X64.IsSupported, () => Popcnt.X64.PopCount(0) == 0);

        Check("AvxVnni", Avxvnni, &AvxVnniIsSupported, AvxVnni.IsSupported, () => AvxVnni.MultiplyWideningAndAdd(Vector128 <int> .Zero, Vector128 <byte> .Zero, Vector128 <sbyte> .Zero).Equals(Vector128 <int> .Zero));
        Check("AvxVnni.X64", Avxvnni, &AvxVnniX64IsSupported, AvxVnni.X64.IsSupported, null);

        return(s_success ? 100 : 1);
    }
示例#4
0
 public static i32 Abs_i32(i32 a)
 {
     return(Avx2.Abs(a).AsInt32());
 }