/// <inheritdoc />
        public ValueTask InvokeAsync(TiffImageDecoderContext context, ITiffImageDecoderPipelineNode next)
        {
            if (context is null)
            {
                throw new ArgumentNullException(nameof(context));
            }
            if (next is null)
            {
                throw new ArgumentNullException(nameof(next));
            }

            Span <ushort> bitsPerSample = stackalloc ushort[3];

            _bitsPerSample.CopyTo(bitsPerSample);
            bool isHigherOrderBitsFirst = _fillOrder != TiffFillOrder.LowerOrderBitsFirst;
            int  totalBitsPerSample     = bitsPerSample[0] + bitsPerSample[1] + bitsPerSample[2];

            int                 bytesPerScanline = (context.SourceImageSize.Width * totalBitsPerSample + 7) / 8;
            Memory <byte>       source           = context.UncompressedData.Slice(context.SourceReadOffset.Y * bytesPerScanline);
            ReadOnlySpan <byte> sourceSpan       = source.Span;

            using TiffPixelBufferWriter <TiffRgba64> writer = context.GetWriter <TiffRgba64>();

            int rows = context.ReadSize.Height;
            int cols = context.ReadSize.Width;

            // BitReader.Read reads bytes in big-endian way, we only need to reverse the endianness if the source is little-endian.
            bool isLittleEndian     = context.IsLittleEndian;
            bool reverseEndiannessR = isLittleEndian && bitsPerSample[0] % 8 == 0;
            bool reverseEndiannessG = isLittleEndian && bitsPerSample[1] % 8 == 0;
            bool reverseEndiannessB = isLittleEndian && bitsPerSample[2] % 8 == 0;
            bool canDoFastPath      = bitsPerSample[0] >= 16 && bitsPerSample[1] >= 16 && bitsPerSample[2] >= 16 &&
                                      !reverseEndiannessR & !reverseEndiannessG & !reverseEndiannessB;

            TiffRgba64 pixel = default;

            pixel.A = ushort.MaxValue;

            for (int row = 0; row < rows; row++)
            {
                using TiffPixelSpanHandle <TiffRgba64> pixelSpanHandle = writer.GetRowSpan(row);
                Span <TiffRgba64> pixelSpan = pixelSpanHandle.GetSpan();
                var bitReader = new BitReader(sourceSpan.Slice(0, bytesPerScanline), isHigherOrderBitsFirst);
                bitReader.Advance(context.SourceReadOffset.X * totalBitsPerSample);

                if (canDoFastPath)
                {
                    // Fast path for bits >= 8
                    for (int col = 0; col < cols; col++)
                    {
                        pixel.R        = (ushort)(FastExpandBits(bitReader.Read(bitsPerSample[0]), bitsPerSample[0], 32) >> 16);
                        pixel.G        = (ushort)(FastExpandBits(bitReader.Read(bitsPerSample[1]), bitsPerSample[1], 32) >> 16);
                        pixel.B        = (ushort)(FastExpandBits(bitReader.Read(bitsPerSample[2]), bitsPerSample[2], 32) >> 16);
                        pixelSpan[col] = pixel;
                    }
                }
                else
                {
                    // Slow path
                    for (int col = 0; col < cols; col++)
                    {
                        pixel.R        = (ushort)(ExpandBits(bitReader.Read(bitsPerSample[0]), bitsPerSample[0], 32, reverseEndiannessR) >> 16);
                        pixel.G        = (ushort)(ExpandBits(bitReader.Read(bitsPerSample[1]), bitsPerSample[1], 32, reverseEndiannessG) >> 16);
                        pixel.B        = (ushort)(ExpandBits(bitReader.Read(bitsPerSample[2]), bitsPerSample[2], 32, reverseEndiannessB) >> 16);
                        pixelSpan[col] = pixel;
                    }
                }

                sourceSpan = sourceSpan.Slice(bytesPerScanline);
            }

            return(next.RunAsync(context));
        }
        /// <inheritdoc />
        public ValueTask InvokeAsync(TiffImageDecoderContext context, ITiffImageDecoderPipelineNode next)
        {
            if (context is null)
            {
                throw new ArgumentNullException(nameof(context));
            }

            if (next is null)
            {
                throw new ArgumentNullException(nameof(next));
            }

            Span <ushort> bitsPerSample = stackalloc ushort[3];

            _bitsPerSample.CopyTo(bitsPerSample);
            bool isHigherOrderBitsFirst = _fillOrder != TiffFillOrder.LowerOrderBitsFirst;
            bool canDoFastPath          = bitsPerSample[0] >= 4 && bitsPerSample[1] >= 4 && bitsPerSample[2] >= 4;
            int  totalBitsPerSample     = bitsPerSample[0] + bitsPerSample[1] + bitsPerSample[2];

            int                 bytesPerScanline = (context.SourceImageSize.Width * totalBitsPerSample + 7) / 8;
            Memory <byte>       source           = context.UncompressedData.Slice(context.SourceReadOffset.Y * bytesPerScanline);
            ReadOnlySpan <byte> sourceSpan       = source.Span;

            using TiffPixelBufferWriter <TiffRgb24> writer = context.GetWriter <TiffRgb24>();

            int rows = context.ReadSize.Height;
            int cols = context.ReadSize.Width;

            TiffRgb24 pixel = default;

            for (int row = 0; row < rows; row++)
            {
                using TiffPixelSpanHandle <TiffRgb24> pixelSpanHandle = writer.GetRowSpan(row);
                Span <TiffRgb24> pixelSpan = pixelSpanHandle.GetSpan();
                var bitReader = new BitReader(sourceSpan.Slice(0, bytesPerScanline), isHigherOrderBitsFirst);
                bitReader.Advance(context.SourceReadOffset.X * totalBitsPerSample);

                if (canDoFastPath)
                {
                    // Fast path for bits >= 8
                    for (int col = 0; col < cols; col++)
                    {
                        pixel.R        = (byte)FastExpandBits(bitReader.Read(bitsPerSample[0]), bitsPerSample[0], 8);
                        pixel.G        = (byte)FastExpandBits(bitReader.Read(bitsPerSample[1]), bitsPerSample[1], 8);
                        pixel.B        = (byte)FastExpandBits(bitReader.Read(bitsPerSample[2]), bitsPerSample[2], 8);
                        pixelSpan[col] = pixel;
                    }
                }
                else
                {
                    // Slow path
                    for (int col = 0; col < cols; col++)
                    {
                        pixel.R        = (byte)ExpandBits(bitReader.Read(bitsPerSample[0]), bitsPerSample[0], 8);
                        pixel.G        = (byte)ExpandBits(bitReader.Read(bitsPerSample[1]), bitsPerSample[1], 8);
                        pixel.B        = (byte)ExpandBits(bitReader.Read(bitsPerSample[2]), bitsPerSample[2], 8);
                        pixelSpan[col] = pixel;
                    }
                }

                sourceSpan = sourceSpan.Slice(bytesPerScanline);
            }

            return(next.RunAsync(context));
        }
        /// <inheritdoc />
        public ValueTask InvokeAsync(TiffImageDecoderContext context, ITiffImageDecoderPipelineNode next)
        {
            if (context is null)
            {
                throw new ArgumentNullException(nameof(context));
            }
            if (next is null)
            {
                throw new ArgumentNullException(nameof(next));
            }

            Span <ushort> bitsPerSample = stackalloc ushort[4];

            _bitsPerSample.CopyTo(bitsPerSample);
            bool isHigherOrderBitsFirst = _fillOrder != TiffFillOrder.LowerOrderBitsFirst;

            int bytesPerScanlineR = (context.SourceImageSize.Width * bitsPerSample[0] + 7) / 8;
            int bytesPerScanlineG = (context.SourceImageSize.Width * bitsPerSample[1] + 7) / 8;
            int bytesPerScanlineB = (context.SourceImageSize.Width * bitsPerSample[2] + 7) / 8;
            int bytesPerScanlineA = (context.SourceImageSize.Width * bitsPerSample[3] + 7) / 8;

            ReadOnlySpan <byte> sourceSpan = context.UncompressedData.Span;
            ReadOnlySpan <byte> sourceR    = sourceSpan.Slice(0, context.SourceImageSize.Height * bytesPerScanlineR);
            ReadOnlySpan <byte> sourceG    = sourceSpan.Slice(sourceR.Length, context.SourceImageSize.Height * bytesPerScanlineG);
            ReadOnlySpan <byte> sourceB    = sourceSpan.Slice(sourceR.Length + sourceG.Length, context.SourceImageSize.Height * bytesPerScanlineB);
            ReadOnlySpan <byte> sourceA    = sourceSpan.Slice(sourceR.Length + sourceG.Length + sourceB.Length, context.SourceImageSize.Height * bytesPerScanlineA);

            sourceR = sourceR.Slice(context.SourceReadOffset.Y * bytesPerScanlineR);
            sourceG = sourceG.Slice(context.SourceReadOffset.Y * bytesPerScanlineG);
            sourceB = sourceB.Slice(context.SourceReadOffset.Y * bytesPerScanlineB);
            sourceA = sourceB.Slice(context.SourceReadOffset.Y * bytesPerScanlineA);

            using TiffPixelBufferWriter <TiffRgba64> writer = context.GetWriter <TiffRgba64>();

            int rows = context.ReadSize.Height;
            int cols = context.ReadSize.Width;

            // BitReader.Read reads bytes in big-endian way, we only need to reverse the endianness if the source is little-endian.
            bool isLittleEndian     = context.IsLittleEndian;
            bool reverseEndiannessR = isLittleEndian && bitsPerSample[0] % 8 == 0;
            bool reverseEndiannessG = isLittleEndian && bitsPerSample[1] % 8 == 0;
            bool reverseEndiannessB = isLittleEndian && bitsPerSample[2] % 8 == 0;
            bool reverseEndiannessA = isLittleEndian && bitsPerSample[3] % 8 == 0;
            bool canDoFastPath      = bitsPerSample[0] >= 16 && bitsPerSample[1] >= 16 && bitsPerSample[2] >= 16 && bitsPerSample[3] >= 16 &&
                                      !reverseEndiannessR & !reverseEndiannessG & !reverseEndiannessB & !reverseEndiannessA;

            TiffRgba64 pixel = default;

            for (int row = 0; row < rows; row++)
            {
                using TiffPixelSpanHandle <TiffRgba64> pixelSpanHandle = writer.GetRowSpan(row);
                Span <TiffRgba64> pixelSpan = pixelSpanHandle.GetSpan();
                var bitReaderR = new BitReader(sourceR.Slice(0, bytesPerScanlineR), isHigherOrderBitsFirst);
                var bitReaderG = new BitReader(sourceG.Slice(0, bytesPerScanlineG), isHigherOrderBitsFirst);
                var bitReaderB = new BitReader(sourceB.Slice(0, bytesPerScanlineB), isHigherOrderBitsFirst);
                var bitReaderA = new BitReader(sourceA.Slice(0, bytesPerScanlineA), isHigherOrderBitsFirst);
                bitReaderR.Advance(context.SourceReadOffset.X * bitsPerSample[0]);
                bitReaderG.Advance(context.SourceReadOffset.X * bitsPerSample[1]);
                bitReaderB.Advance(context.SourceReadOffset.X * bitsPerSample[2]);
                bitReaderA.Advance(context.SourceReadOffset.X * bitsPerSample[3]);

                if (canDoFastPath)
                {
                    // Fast path for bits >= 8
                    for (int col = 0; col < cols; col++)
                    {
                        pixel.R        = (ushort)(FastExpandBits(bitReaderR.Read(bitsPerSample[0]), bitsPerSample[0], 32) >> 16);
                        pixel.G        = (ushort)(FastExpandBits(bitReaderG.Read(bitsPerSample[1]), bitsPerSample[1], 32) >> 16);
                        pixel.B        = (ushort)(FastExpandBits(bitReaderB.Read(bitsPerSample[2]), bitsPerSample[2], 32) >> 16);
                        pixel.A        = (ushort)(FastExpandBits(bitReaderA.Read(bitsPerSample[3]), bitsPerSample[3], 32) >> 16);
                        pixelSpan[col] = pixel;
                    }
                }
                else
                {
                    // Slow path
                    for (int col = 0; col < cols; col++)
                    {
                        pixel.R        = (ushort)(ExpandBits(bitReaderR.Read(bitsPerSample[0]), bitsPerSample[0], 32, reverseEndiannessR) >> 16);
                        pixel.G        = (ushort)(ExpandBits(bitReaderG.Read(bitsPerSample[1]), bitsPerSample[1], 32, reverseEndiannessG) >> 16);
                        pixel.B        = (ushort)(ExpandBits(bitReaderB.Read(bitsPerSample[2]), bitsPerSample[2], 32, reverseEndiannessB) >> 16);
                        pixel.A        = (ushort)(ExpandBits(bitReaderA.Read(bitsPerSample[3]), bitsPerSample[3], 32, reverseEndiannessA) >> 16);
                        pixelSpan[col] = pixel;
                    }
                }

                if (_isAlphaAssociated)
                {
                    if (_undoColorPreMultiplying)
                    {
                        UndoColorPreMultiplying(pixelSpan);
                    }
                    else
                    {
                        WipeAlphaChanel(pixelSpan);
                    }
                }

                sourceR = sourceR.Slice(bytesPerScanlineR);
                sourceG = sourceG.Slice(bytesPerScanlineG);
                sourceB = sourceB.Slice(bytesPerScanlineB);
                sourceA = sourceA.Slice(bytesPerScanlineA);
            }

            return(next.RunAsync(context));
        }