Exemplo n.º 1
0
        public void RunClassFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));

            var result = Sse41.BlendVariable(_fld1, _fld2, _fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr);
        }
Exemplo n.º 2
0
        public void RunLclVarScenario_LoadAligned()
        {
            var firstOp  = Sse2.LoadAlignedVector128((Byte *)(_dataTable.inArray1Ptr));
            var secondOp = Sse2.LoadAlignedVector128((Byte *)(_dataTable.inArray2Ptr));
            var thirdOp  = Sse2.LoadAlignedVector128((Byte *)(_dataTable.inArray3Ptr));
            var result   = Sse41.BlendVariable(firstOp, secondOp, thirdOp);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(firstOp, secondOp, thirdOp, _dataTable.outArrayPtr);
        }
Exemplo n.º 3
0
        public void RunClassLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));

            var test   = new SimpleTernaryOpTest__BlendVariableUInt16();
            var result = Sse41.BlendVariable(test._fld1, test._fld2, test._fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
        }
Exemplo n.º 4
0
 public static int Main()
 {
     if (Sse41.IsSupported)
     {
         Vector128 <int> left     = Vector128.Create(1);
         Vector128 <int> right    = Vector128.Create(2);
         ref var         rightRef = ref right;
         Vector128 <int> mask     = Vector128.Create(3);
         Sse41.BlendVariable(left, rightRef, mask);
     }
Exemplo n.º 5
0
        public void RunLclVarScenario_UnsafeRead()
        {
            var firstOp  = Unsafe.Read <Vector128 <Byte> >(_dataTable.inArray1Ptr);
            var secondOp = Unsafe.Read <Vector128 <Byte> >(_dataTable.inArray2Ptr);
            var thirdOp  = Unsafe.Read <Vector128 <Byte> >(_dataTable.inArray3Ptr);
            var result   = Sse41.BlendVariable(firstOp, secondOp, thirdOp);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(firstOp, secondOp, thirdOp, _dataTable.outArrayPtr);
        }
Exemplo n.º 6
0
        public void RunStructLclFldScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));

            var test   = TestStruct.Create();
            var result = Sse41.BlendVariable(test._fld1, test._fld2, test._fld3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
        }
 public static f32 Select_f32(m32 m, f32 a, f32 b)
 {
     if (Sse41.IsSupported)
     {
         return(Sse41.BlendVariable(b, a, m.AsSingle()));
     }
     else
     {
         return(Xor(b, And(m.AsSingle(), Xor(a, b))));
     }
 }
Exemplo n.º 8
0
        public void RunClsVarScenario()
        {
            var result = Sse41.BlendVariable(
                _clsVar1,
                _clsVar2,
                _clsVar3
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_clsVar1, _clsVar2, _clsVar3, _dataTable.outArrayPtr);
        }
 public static i32 Select_i32(m32 m, i32 a, i32 b)
 {
     if (Sse41.IsSupported)
     {
         return(Sse41.BlendVariable(b, a, m));
     }
     else
     {
         return(Xor(b, And(m, Xor(a, b))));
     }
 }
Exemplo n.º 10
0
        public void RunBasicScenario_LoadAligned()
        {
            var result = Sse41.BlendVariable(
                Sse2.LoadAlignedVector128((Byte *)(_dataTable.inArray1Ptr)),
                Sse2.LoadAlignedVector128((Byte *)(_dataTable.inArray2Ptr)),
                Sse2.LoadAlignedVector128((Byte *)(_dataTable.inArray3Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
Exemplo n.º 11
0
        public void RunBasicScenario_UnsafeRead()
        {
            var result = Sse41.BlendVariable(
                Unsafe.Read <Vector128 <Byte> >(_dataTable.inArray1Ptr),
                Unsafe.Read <Vector128 <Byte> >(_dataTable.inArray2Ptr),
                Unsafe.Read <Vector128 <Byte> >(_dataTable.inArray3Ptr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
Exemplo n.º 12
0
        public void RunLclVarScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));

            var op1    = Sse2.LoadAlignedVector128((Int16 *)(_dataTable.inArray1Ptr));
            var op2    = Sse2.LoadAlignedVector128((Int16 *)(_dataTable.inArray2Ptr));
            var op3    = Sse2.LoadAlignedVector128((Int16 *)(_dataTable.inArray3Ptr));
            var result = Sse41.BlendVariable(op1, op2, op3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, op3, _dataTable.outArrayPtr);
        }
Exemplo n.º 13
0
        public void RunLclVarScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));

            var op1    = Unsafe.Read <Vector128 <Int16> >(_dataTable.inArray1Ptr);
            var op2    = Unsafe.Read <Vector128 <Int16> >(_dataTable.inArray2Ptr);
            var op3    = Unsafe.Read <Vector128 <Int16> >(_dataTable.inArray3Ptr);
            var result = Sse41.BlendVariable(op1, op2, op3);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, op3, _dataTable.outArrayPtr);
        }
Exemplo n.º 14
0
        public void RunLclVarScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));

            var firstOp  = Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray1Ptr);
            var secondOp = Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray2Ptr);
            var thirdOp  = Unsafe.Read <Vector128 <SByte> >(_dataTable.inArray3Ptr);
            var result   = Sse41.BlendVariable(firstOp, secondOp, thirdOp);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(firstOp, secondOp, thirdOp, _dataTable.outArrayPtr);
        }
Exemplo n.º 15
0
        public void RunLclVarScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));

            var firstOp  = Sse2.LoadAlignedVector128((UInt16 *)(_dataTable.inArray1Ptr));
            var secondOp = Sse2.LoadAlignedVector128((UInt16 *)(_dataTable.inArray2Ptr));
            var thirdOp  = Sse2.LoadAlignedVector128((UInt16 *)(_dataTable.inArray3Ptr));
            var result   = Sse41.BlendVariable(firstOp, secondOp, thirdOp);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(firstOp, secondOp, thirdOp, _dataTable.outArrayPtr);
        }
Exemplo n.º 16
0
        public void RunBasicScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));

            var result = Sse41.BlendVariable(
                Unsafe.Read <Vector128 <UInt16> >(_dataTable.inArray1Ptr),
                Unsafe.Read <Vector128 <UInt16> >(_dataTable.inArray2Ptr),
                Unsafe.Read <Vector128 <UInt16> >(_dataTable.inArray3Ptr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
Exemplo n.º 17
0
        public void RunBasicScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));

            var result = Sse41.BlendVariable(
                Sse2.LoadAlignedVector128((UInt16 *)(_dataTable.inArray1Ptr)),
                Sse2.LoadAlignedVector128((UInt16 *)(_dataTable.inArray2Ptr)),
                Sse2.LoadAlignedVector128((UInt16 *)(_dataTable.inArray3Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr);
        }
Exemplo n.º 18
0
        public void RunStructLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load));

            var test   = TestStruct.Create();
            var result = Sse41.BlendVariable(
                Sse2.LoadVector128((Int16 *)(&test._fld1)),
                Sse2.LoadVector128((Int16 *)(&test._fld2)),
                Sse2.LoadVector128((Int16 *)(&test._fld3))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
        }
Exemplo n.º 19
0
            public void RunStructFldScenario_Load(SimpleTernaryOpTest__BlendVariableInt16 testClass)
            {
                fixed(Vector128 <Int16> *pFld1 = &_fld1)
                fixed(Vector128 <Int16> *pFld2 = &_fld2)
                fixed(Vector128 <Int16> *pFld3 = &_fld3)
                {
                    var result = Sse41.BlendVariable(
                        Sse2.LoadVector128((Int16 *)(pFld1)),
                        Sse2.LoadVector128((Int16 *)(pFld2)),
                        Sse2.LoadVector128((Int16 *)(pFld3))
                        );

                    Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                    testClass.ValidateResult(_fld1, _fld2, _fld3, testClass._dataTable.outArrayPtr);
                }
            }
Exemplo n.º 20
0
        private static unsafe int FillBuffer(ReadOnlySpan <char> input)
        {
            int count = Math.Min(LineBuffer.Length, input.Length);
            int i     = 0;

            fixed(char *buffer = LineBuffer, pInput = input)
            {
                if (Sse2.IsSupported && count >= Vector128 <ushort> .Count)
                {
                    Vector128 <ushort> Space = Vector128.Create(SpaceCharUShort); //Space character

                    do
                    {
                        var data = Sse2.LoadVector128((ushort *)pInput + i);

                        var comp = Vector128 <ushort> .Zero;

                        comp = Sse2.CompareEqual(comp, data);

                        if (Sse41.IsSupported)
                        {
                            data = Sse41.BlendVariable(data, Space, comp);
                        }
                        else
                        {
                            comp = Sse2.And(comp, Space);

                            data = Sse2.Or(data, comp); //Elements being replaced are already 0'ed
                        }

                        Sse2.Store((ushort *)buffer + i, data);

                        i += Vector128 <ushort> .Count;
                    }while ((count - i) >= Vector128 <ushort> .Count);
                }

                while (i < count)
                {
                    char tmp = pInput[i];
                    buffer[i] = tmp == 0 ? ' ' : tmp;

                    i += 1;
                }

                return(count);
            }
        }
Exemplo n.º 21
0
        public void RunClsVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load));

            fixed(Vector128 <Int32> *pClsVar1 = &_clsVar1)
            fixed(Vector128 <Int32> *pClsVar2 = &_clsVar2)
            fixed(Vector128 <Int32> *pClsVar3 = &_clsVar3)
            {
                var result = Sse41.BlendVariable(
                    Sse2.LoadVector128((Int32 *)(pClsVar1)),
                    Sse2.LoadVector128((Int32 *)(pClsVar2)),
                    Sse2.LoadVector128((Int32 *)(pClsVar3))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_clsVar1, _clsVar2, _clsVar3, _dataTable.outArrayPtr);
            }
        }
Exemplo n.º 22
0
        public void RunClassFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load));

            fixed(Vector128 <Int16> *pFld1 = &_fld1)
            fixed(Vector128 <Int16> *pFld2 = &_fld2)
            fixed(Vector128 <Int16> *pFld3 = &_fld3)
            {
                var result = Sse41.BlendVariable(
                    Sse2.LoadVector128((Int16 *)(pFld1)),
                    Sse2.LoadVector128((Int16 *)(pFld2)),
                    Sse2.LoadVector128((Int16 *)(pFld3))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr);
            }
        }
Exemplo n.º 23
0
        public static Vector128 <T> Select <T, U>(Vector128 <T> left, Vector128 <T> right, Vector128 <U> selector)
            where T : struct where U : struct
        {
            if (Sse41.IsSupported)
            {
                if (typeof(T) == typeof(float))
                {
                    return(Sse41.BlendVariable(left.AsSingle(), right.AsSingle(), selector.AsSingle()).As <float, T>());
                }
                else if (typeof(T) == typeof(double))
                {
                    return(Sse41.BlendVariable(left.AsDouble(), right.AsDouble(), selector.AsDouble()).As <double, T>());
                }

                return(Sse41.BlendVariable(left.AsByte(), right.AsByte(), selector.AsByte()).As <byte, T>());
            }

            return(Or(And(selector.As <U, T>(), right), AndNot(selector.As <U, T>(), left)));
        }
Exemplo n.º 24
0
        public void RunClassLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario_Load));

            var test = new SimpleTernaryOpTest__BlendVariableInt32();

            fixed(Vector128 <Int32> *pFld1 = &test._fld1)
            fixed(Vector128 <Int32> *pFld2 = &test._fld2)
            fixed(Vector128 <Int32> *pFld3 = &test._fld3)
            {
                var result = Sse41.BlendVariable(
                    Sse2.LoadVector128((Int32 *)(pFld1)),
                    Sse2.LoadVector128((Int32 *)(pFld2)),
                    Sse2.LoadVector128((Int32 *)(pFld3))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr);
            }
        }
Exemplo n.º 25
0
            private static unsafe void ReplacePlusWithSpaceCore(Span <char> buffer, IntPtr state)
            {
                fixed(char *ptr = &MemoryMarshal.GetReference(buffer))
                {
                    var input  = (ushort *)state.ToPointer();
                    var output = (ushort *)ptr;

                    var i = (nint)0;
                    var n = (nint)(uint)buffer.Length;

                    if (Sse41.IsSupported && n >= Vector128 <ushort> .Count)
                    {
                        var vecPlus  = Vector128.Create((ushort)'+');
                        var vecSpace = Vector128.Create((ushort)' ');

                        do
                        {
                            var vec  = Sse2.LoadVector128(input + i);
                            var mask = Sse2.CompareEqual(vec, vecPlus);
                            var res  = Sse41.BlendVariable(vec, vecSpace, mask);
                            Sse2.Store(output + i, res);
                            i += Vector128 <ushort> .Count;
                        } while (i <= n - Vector128 <ushort> .Count);
                    }

                    for (; i < n; ++i)
                    {
                        if (input[i] != '+')
                        {
                            output[i] = input[i];
                        }
                        else
                        {
                            output[i] = ' ';
                        }
                    }
                }
            }
Exemplo n.º 26
0
        public unsafe void Serialize(ref MessagePackWriter writer, sbyte[]?value, MessagePackSerializerOptions options)
        {
            if (value == null)
            {
                writer.WriteNil();
                return;
            }

            var inputLength = value.Length;

            writer.WriteArrayHeader(inputLength);
            if (inputLength == 0)
            {
                return;
            }

            fixed(sbyte *pSource = &value[0])
            {
                var inputEnd      = pSource + inputLength;
                var inputIterator = pSource;

                if (Popcnt.IsSupported)
                {
                    const int ShiftCount = 4;
                    const int Stride     = 1 << ShiftCount;
                    // We enter the SIMD mode when there are more than the Stride after alignment adjustment.
                    if (inputLength < Stride << 1)
                    {
                        goto ProcessEach;
                    }

                    {
                        // Make InputIterator Aligned
                        var offset = UnsafeMemoryAlignmentUtility.CalculateDifferenceAlign16(inputIterator);
                        inputLength -= offset;
                        var offsetEnd = inputIterator + offset;
                        while (inputIterator != offsetEnd)
                        {
                            writer.Write(*inputIterator++);
                        }
                    }

                    fixed(byte *tablePointer = &ShuffleAndMaskTable[0])
                    {
                        fixed(byte *maskTablePointer = &SingleInstructionMultipleDataPrimitiveArrayFormatterHelper.StoreMaskTable[0])
                        {
                            var vectorMinFixNegInt        = Vector128.Create((sbyte)MessagePackRange.MinFixNegativeInt);
                            var vectorMessagePackCodeInt8 = Vector128.Create(MessagePackCode.Int8);

                            for (var vectorizedEnd = inputIterator + ((inputLength >> ShiftCount) << ShiftCount); inputIterator != vectorizedEnd; inputIterator += Stride)
                            {
                                var current = Sse2.LoadVector128(inputIterator);
                                var index   = unchecked ((uint)Sse2.MoveMask(Sse2.CompareGreaterThan(vectorMinFixNegInt, current)));

                                if (index == 0)
                                {
                                    // When all 32 input values are in the FixNum range.
                                    var span = writer.GetSpan(Stride);
                                    Sse2.Store((sbyte *)Unsafe.AsPointer(ref span[0]), current);

                                    writer.Advance(Stride);
                                    continue;
                                }

                                unchecked
                                {
                                    var index0      = (byte)index;
                                    var index1      = (byte)(index >> 8);
                                    var count0      = (int)(Popcnt.PopCount(index0) + 8);
                                    var count1      = (int)(Popcnt.PopCount(index1) + 8);
                                    var countTotal  = count0 + count1;
                                    var destination = writer.GetSpan(countTotal);
                                    fixed(byte *pDestination = &destination[0])
                                    {
                                        var tempDestination = pDestination;
                                        var shuffle0        = Sse2.LoadVector128(tablePointer + (index0 << 4));
                                        var shuffled0       = Ssse3.Shuffle(current.AsByte(), shuffle0);
                                        var answer0         = Sse41.BlendVariable(shuffled0, vectorMessagePackCodeInt8, shuffle0);

                                        Sse2.MaskMove(answer0, Sse2.LoadVector128(maskTablePointer + (count0 << 4)), tempDestination);
                                        tempDestination += count0;

                                        var shuffle1  = Sse2.LoadVector128(tablePointer + (index1 << 4));
                                        var shift1    = Sse2.ShiftRightLogical128BitLane(current.AsByte(), 8);
                                        var shuffled1 = Ssse3.Shuffle(shift1, shuffle1);
                                        var answer1   = Sse41.BlendVariable(shuffled1, vectorMessagePackCodeInt8, shuffle1);

                                        Sse2.MaskMove(answer1, Sse2.LoadVector128(maskTablePointer + (count1 << 4)), tempDestination);
                                    }

                                    writer.Advance(countTotal);
                                }
                            }
                        }
                    }
                }

ProcessEach:
                while (inputIterator != inputEnd)
                {
                    writer.Write(*inputIterator++);
                }
            }
        }
Exemplo n.º 27
0
            unsafe void IConversionProcessor.ConvertLine(byte *ipstart, byte *opstart, int cb)
            {
                float *ip = (float *)ipstart, ipe = (float *)(ipstart + cb);
                byte * op = opstart;

#if HWINTRINSICS
                if (Avx2.IsSupported)
                {
                    var vzero  = Vector256 <float> .Zero;
                    var vmin   = Vector256.Create(0.5f / byte.MaxValue);
                    var vscale = Vector256.Create((float)byte.MaxValue);

                    var vmaskp = Avx.LoadVector256((int *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(HWIntrinsics.PermuteMaskDeinterleave8x32)));

                    ipe -= Vector256 <byte> .Count;
                    while (ip <= ipe)
                    {
                        var vf0 = Avx.LoadVector256(ip);
                        var vf1 = Avx.LoadVector256(ip + Vector256 <float> .Count);
                        var vf2 = Avx.LoadVector256(ip + Vector256 <float> .Count * 2);
                        var vf3 = Avx.LoadVector256(ip + Vector256 <float> .Count * 3);
                        ip += Vector256 <byte> .Count;

                        var vfa0 = Avx.Shuffle(vf0, vf0, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa1 = Avx.Shuffle(vf1, vf1, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa2 = Avx.Shuffle(vf2, vf2, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa3 = Avx.Shuffle(vf3, vf3, HWIntrinsics.ShuffleMaskAlpha);

                        vfa0 = Avx.Max(vfa0, vmin);
                        vfa1 = Avx.Max(vfa1, vmin);
                        vfa2 = Avx.Max(vfa2, vmin);
                        vfa3 = Avx.Max(vfa3, vmin);

                        vf0 = Avx.Multiply(vf0, Avx.Reciprocal(vfa0));
                        vf1 = Avx.Multiply(vf1, Avx.Reciprocal(vfa1));
                        vf2 = Avx.Multiply(vf2, Avx.Reciprocal(vfa2));
                        vf3 = Avx.Multiply(vf3, Avx.Reciprocal(vfa3));

                        vf0 = Avx.Blend(vf0, vfa0, HWIntrinsics.BlendMaskAlpha);
                        vf1 = Avx.Blend(vf1, vfa1, HWIntrinsics.BlendMaskAlpha);
                        vf2 = Avx.Blend(vf2, vfa2, HWIntrinsics.BlendMaskAlpha);
                        vf3 = Avx.Blend(vf3, vfa3, HWIntrinsics.BlendMaskAlpha);

                        vf0 = Avx.BlendVariable(vf0, vzero, HWIntrinsics.AvxCompareEqual(vfa0, vmin));
                        vf1 = Avx.BlendVariable(vf1, vzero, HWIntrinsics.AvxCompareEqual(vfa1, vmin));
                        vf2 = Avx.BlendVariable(vf2, vzero, HWIntrinsics.AvxCompareEqual(vfa2, vmin));
                        vf3 = Avx.BlendVariable(vf3, vzero, HWIntrinsics.AvxCompareEqual(vfa3, vmin));

                        vf0 = Avx.Multiply(vf0, vscale);
                        vf1 = Avx.Multiply(vf1, vscale);
                        vf2 = Avx.Multiply(vf2, vscale);
                        vf3 = Avx.Multiply(vf3, vscale);

                        var vi0 = Avx.ConvertToVector256Int32(vf0);
                        var vi1 = Avx.ConvertToVector256Int32(vf1);
                        var vi2 = Avx.ConvertToVector256Int32(vf2);
                        var vi3 = Avx.ConvertToVector256Int32(vf3);

                        var vs0 = Avx2.PackSignedSaturate(vi0, vi1);
                        var vs1 = Avx2.PackSignedSaturate(vi2, vi3);

                        var vb0 = Avx2.PackUnsignedSaturate(vs0, vs1);
                        vb0 = Avx2.PermuteVar8x32(vb0.AsInt32(), vmaskp).AsByte();

                        Avx.Store(op, vb0);
                        op += Vector256 <byte> .Count;
                    }
                    ipe += Vector256 <byte> .Count;
                }
                else if (Sse41.IsSupported)
                {
                    var vzero  = Vector128 <float> .Zero;
                    var vmin   = Vector128.Create(0.5f / byte.MaxValue);
                    var vscale = Vector128.Create((float)byte.MaxValue);

                    ipe -= Vector128 <byte> .Count;
                    while (ip <= ipe)
                    {
                        var vf0 = Sse.LoadVector128(ip);
                        var vf1 = Sse.LoadVector128(ip + Vector128 <float> .Count);
                        var vf2 = Sse.LoadVector128(ip + Vector128 <float> .Count * 2);
                        var vf3 = Sse.LoadVector128(ip + Vector128 <float> .Count * 3);
                        ip += Vector128 <byte> .Count;

                        var vfa0 = Sse.Shuffle(vf0, vf0, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa1 = Sse.Shuffle(vf1, vf1, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa2 = Sse.Shuffle(vf2, vf2, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa3 = Sse.Shuffle(vf3, vf3, HWIntrinsics.ShuffleMaskAlpha);

                        vfa0 = Sse.Max(vfa0, vmin);
                        vfa1 = Sse.Max(vfa1, vmin);
                        vfa2 = Sse.Max(vfa2, vmin);
                        vfa3 = Sse.Max(vfa3, vmin);

                        vf0 = Sse.Multiply(vf0, Sse.Reciprocal(vfa0));
                        vf1 = Sse.Multiply(vf1, Sse.Reciprocal(vfa1));
                        vf2 = Sse.Multiply(vf2, Sse.Reciprocal(vfa2));
                        vf3 = Sse.Multiply(vf3, Sse.Reciprocal(vfa3));

                        vf0 = Sse41.Blend(vf0, vfa0, HWIntrinsics.BlendMaskAlpha);
                        vf1 = Sse41.Blend(vf1, vfa1, HWIntrinsics.BlendMaskAlpha);
                        vf2 = Sse41.Blend(vf2, vfa2, HWIntrinsics.BlendMaskAlpha);
                        vf3 = Sse41.Blend(vf3, vfa3, HWIntrinsics.BlendMaskAlpha);

                        vf0 = Sse41.BlendVariable(vf0, vzero, Sse.CompareEqual(vfa0, vmin));
                        vf1 = Sse41.BlendVariable(vf1, vzero, Sse.CompareEqual(vfa1, vmin));
                        vf2 = Sse41.BlendVariable(vf2, vzero, Sse.CompareEqual(vfa2, vmin));
                        vf3 = Sse41.BlendVariable(vf3, vzero, Sse.CompareEqual(vfa3, vmin));

                        vf0 = Sse.Multiply(vf0, vscale);
                        vf1 = Sse.Multiply(vf1, vscale);
                        vf2 = Sse.Multiply(vf2, vscale);
                        vf3 = Sse.Multiply(vf3, vscale);

                        var vi0 = Sse2.ConvertToVector128Int32(vf0);
                        var vi1 = Sse2.ConvertToVector128Int32(vf1);
                        var vi2 = Sse2.ConvertToVector128Int32(vf2);
                        var vi3 = Sse2.ConvertToVector128Int32(vf3);

                        var vs0 = Sse2.PackSignedSaturate(vi0, vi1);
                        var vs1 = Sse2.PackSignedSaturate(vi2, vi3);

                        var vb0 = Sse2.PackUnsignedSaturate(vs0, vs1);

                        Sse2.Store(op, vb0);
                        op += Vector128 <byte> .Count;
                    }
                    ipe += Vector128 <byte> .Count;
                }
#endif

                float fmax = new Vector4(byte.MaxValue).X, fround = new Vector4(0.5f).X, fmin = fround / fmax;

                while (ip < ipe)
                {
                    float f3 = ip[3];
                    if (f3 < fmin)
                    {
                        *(uint *)op = 0;
                    }
                    else
                    {
                        float f3i = fmax / f3;
                        byte  o0  = ClampToByte((int)(ip[0] * f3i + fround));
                        byte  o1  = ClampToByte((int)(ip[1] * f3i + fround));
                        byte  o2  = ClampToByte((int)(ip[2] * f3i + fround));
                        byte  o3  = ClampToByte((int)(f3 * fmax + fround));
                        op[0] = o0;
                        op[1] = o1;
                        op[2] = o2;
                        op[3] = o3;
                    }

                    ip += 4;
                    op += 4;
                }
            }
Exemplo n.º 28
0
 public static Vector128 <sbyte> _mm_blendv_epi8(Vector128 <sbyte> left, Vector128 <sbyte> right, Vector128 <sbyte> mask)
 {
     return(Sse41.BlendVariable(left, right, mask));
 }
Exemplo n.º 29
0
    public void ResizeBicubic(FastBitmap rtnImage)
    {
        float scaleX = (float)this.width / rtnImage.width;
        float scaleY = (float)this.height / rtnImage.height;

        if (scaleX > 1 || scaleY > 1)
        {
            throw new Exception("拡大のみ対応");
        }

        float[] tmpa = new float[rtnImage.width * 4 * this.height];
        fixed(float *tmpp = tmpa)
        {
            float *tmp     = tmpp;
            var    _00mask = Vector128.Create(0, 255, 255, 255, 1, 255, 255, 255, 2, 255, 255, 255, 3, 255, 255, 255);
            var    _01mask = Vector128.Create(4, 255, 255, 255, 5, 255, 255, 255, 6, 255, 255, 255, 7, 255, 255, 255);
            var    _10mask = Vector128.Create(8, 255, 255, 255, 9, 255, 255, 255, 10, 255, 255, 255, 11, 255, 255, 255);
            var    _11mask = Vector128.Create(12, 255, 255, 255, 13, 255, 255, 255, 14, 255, 255, 255, 15, 255, 255, 255);
            var    _vmask  = Vector128.Create(0, 4, 8, 12, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255);

            var _1012  = Vector128.Create(-1, 0, 1, 2);
            var _0123i = Vector128.Create(0, 1, 2, 3);

            var _0000   = Vector128.Create(0, 0, 0, 0);
            var _0000f  = Vector128.Create(0f, 0, 0, 0);
            var _255f   = Vector128.Create(255f, 255, 255, 255);
            var _1111   = Vector128.Create(1, 1, 1, 1);
            var _1111f  = Vector128.Create(1f, 1, 1, 1);
            var _4444f  = Vector128.Create(4f, 4, 4, 4);
            var _4444   = Vector128.Create(4, 4, 4, 4);
            var _5555f  = Vector128.Create(5f, 5, 5, 5);
            var _2222f  = Vector128.Create(2f, 2, 2, 2);
            var _8888f  = Vector128.Create(8f, 8, 8, 8);
            var _7f     = Vector128.Create(0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff).AsSingle();
            var _ff     = Vector128.Create(-1, -1, -1, -1);
            var _stride = Vector128.Create(rtnImage.width * 4, rtnImage.width * 4, rtnImage.width * 4, rtnImage.width * 4);

            Parallel.For(0, this.height, (y) =>
            {
                float py      = (y * scaleY);
                float *tmpPos = tmp + y * rtnImage.width * 4;
                for (int x = 0; x < rtnImage.width; x++)
                {
                    float px = (x * scaleX);
                    int sx   = (int)px;

                    var _px = Vector128.CreateScalar(px);
                    _px     = Sse.Shuffle(_px, _px, 0);

                    var _sx = Vector128.CreateScalar(sx);
                    _sx     = Sse2.Shuffle(_sx, 0);

                    var _width = Vector128.CreateScalar(this.width);
                    _width     = Sse2.Shuffle(_width, 0);

                    var _x2 = Sse2.Add(_sx, _1012);

                    var _d  = Sse.And(Sse.Subtract(_px, Sse2.ConvertToVector128Single(_x2)), _7f);
                    var _d2 = Sse.Multiply(_d, _d);
                    var _d3 = Sse.Multiply(_d2, _d);

                    var w1   = Sse.Add(_1111f, Sse.Subtract(_d3, Sse.Multiply(_2222f, _d2)));
                    var w2   = Sse.Subtract(Sse.Subtract(Sse.Add(_4444f, Sse.Multiply(_5555f, _d2)), Sse.Multiply(_d, _8888f)), _d3);
                    var wb   = Sse2.CompareGreaterThan(_d, _1111f);
                    var _w   = Sse41.BlendVariable(w1, w2, wb);
                    var _xpb = Sse2.Or(Sse2.CompareLessThan(_x2, _0000), Sse41.MultiplyLow(Sse2.AndNot(Sse2.CompareLessThan(_x2, _width), _1111).AsInt32(), _ff));
                    var _xpp = Sse2.And(_sx, _xpb);
                    var _xp  = Sse41.BlendVariable(_x2, _xpp, _xpb);

                    var p = Avx2.GatherVector128((uint *)(this._ptr + this._stride * y), _xp, 4).AsByte();


                    var _p0 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _00mask).AsInt32());
                    var _p1 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _01mask).AsInt32());
                    var _p2 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _10mask).AsInt32());
                    var _p3 = Sse2.ConvertToVector128Single(Ssse3.Shuffle(p, _11mask).AsInt32());

                    var _w0 = Sse.Shuffle(_w, _w, 0);
                    var _w1 = Sse.Shuffle(_w, _w, 0b01010101);
                    var _w2 = Sse.Shuffle(_w, _w, 0b10101010);
                    var _w3 = Sse.Shuffle(_w, _w, 0b11111111);

                    var rgbaf = Sse.Add(Sse.Add(Sse.Multiply(_p0, _w0), Sse.Multiply(_p1, _w1)), Sse.Add(Sse.Multiply(_p2, _w2), Sse.Multiply(_p3, _w3)));

                    Sse2.Store(tmpPos + x * 4, rgbaf);
                }
            });

            Parallel.For(0, rtnImage.height, (y) =>
            {
                float py = (y * scaleY);
                int sy   = (int)py;

                uint *store = stackalloc uint[4];

                var _py = Vector128.CreateScalar(py);
                _py     = Sse.Shuffle(_py, _py, 0);

                var _sy = Vector128.CreateScalar(sy);
                _sy     = Sse2.Shuffle(_sy, 0);

                var _height = Vector128.CreateScalar(this.height);
                _height     = Sse2.Shuffle(_height, 0);

                var _y2 = Sse2.Add(_sy, _1012);

                var _d  = Sse.And(Sse.Subtract(_py, Sse2.ConvertToVector128Single(_y2)), _7f);
                var _d2 = Sse.Multiply(_d, _d);
                var _d3 = Sse.Multiply(_d2, _d);

                var w1 = Sse.Add(_1111f, Sse.Subtract(_d3, Sse.Multiply(_2222f, _d2)));
                var w2 = Sse.Subtract(Sse.Subtract(Sse.Add(_4444f, Sse.Multiply(_5555f, _d2)), Sse.Multiply(_d, _8888f)), _d3);
                var wb = Sse2.CompareGreaterThan(_d, _1111f);
                var _w = Sse41.BlendVariable(w1, w2, wb);


                var _ypb = Sse2.Or(Sse2.CompareLessThan(_y2, _0000), Sse41.MultiplyLow(Sse2.AndNot(Sse2.CompareLessThan(_y2, _height), _1111).AsInt32(), _ff));
                var _ypp = Sse2.And(_sy, _ypb);
                var _yp  = Sse41.BlendVariable(_y2, _ypp, _ypb);
                var _yps = Sse41.MultiplyLow(_yp, _stride);

                var _yp0  = Sse2.Add(Sse2.Shuffle(_yps, 0), _0123i);
                var _yp1  = Sse2.Add(Sse2.Shuffle(_yps, 0b01010101), _0123i);
                var _yp2  = Sse2.Add(Sse2.Shuffle(_yps, 0b10101010), _0123i);
                var _yp3  = Sse2.Add(Sse2.Shuffle(_yps, 0b11111111), _0123i);
                uint *rtn = (uint *)(rtnImage._ptr + rtnImage._stride * y);

                for (int x = 0; x < rtnImage.width; x++)
                {
                    var _p0 = Avx2.GatherVector128((float *)(tmp), _yp0, 4);
                    var _p1 = Avx2.GatherVector128((float *)(tmp), _yp1, 4);
                    var _p2 = Avx2.GatherVector128((float *)(tmp), _yp2, 4);
                    var _p3 = Avx2.GatherVector128((float *)(tmp), _yp3, 4);

                    var _w0 = Sse.Shuffle(_w, _w, 0);
                    var _w1 = Sse.Shuffle(_w, _w, 0b01010101);
                    var _w2 = Sse.Shuffle(_w, _w, 0b10101010);
                    var _w3 = Sse.Shuffle(_w, _w, 0b11111111);

                    var rgbaf = Sse.Add(Sse.Add(Sse.Multiply(_p0, _w0), Sse.Multiply(_p1, _w1)), Sse.Add(Sse.Multiply(_p2, _w2), Sse.Multiply(_p3, _w3)));

                    var _b0 = Sse.CompareLessThan(rgbaf, _0000f);
                    rgbaf   = Sse41.BlendVariable(rgbaf, _0000f, _b0);
                    var _b1 = Sse.CompareGreaterThan(rgbaf, _255f);
                    rgbaf   = Sse41.BlendVariable(rgbaf, _255f, _b1);

                    var rgbab = Sse2.ConvertToVector128Int32(rgbaf).AsByte();
                    var rgba  = Ssse3.Shuffle(rgbab, _vmask).AsUInt32();

                    Sse2.Store(store, rgba);

                    _yp0 = Sse2.Add(_yp0, _4444);
                    _yp1 = Sse2.Add(_yp1, _4444);
                    _yp2 = Sse2.Add(_yp2, _4444);
                    _yp3 = Sse2.Add(_yp3, _4444);
                    *rtn = *store;
                    rtn++;
                }
            });
Exemplo n.º 30
0
 public static Vector128 <float> _mm_blendv_ps(Vector128 <float> left, Vector128 <float> right, Vector128 <float> mask)
 {
     return(Sse41.BlendVariable(left, right, mask));
 }