示例#1
0
        /// <summary>
        /// Inverse DCT, de-quantization and level shift part of the JPEG decoding.
        /// Input is expected in 64x1 macro blocks and output is expected to be in 8x8
        /// macro blocks.
        /// </summary>
        /// <param name="src">Source image.</param>
        /// <param name="dst">Destination image</param>
        /// <param name="QuantInvTable">Inverse quantization tables for JPEG decoding created using QuantInvTableInit()</param>
        /// <param name="oSizeRoi">Roi size (in macro blocks?).</param>
        public static void DCTQuantInv8x8LS(NPPImage_16sC1 src, NPPImage_8uC1 dst, CudaDeviceVariable <ushort> QuantInvTable, NppiSize oSizeRoi)
        {
            NppStatus status;

            status = NPPNativeMethods.NPPi.ImageCompression.nppiDCTQuantInv8x8LS_JPEG_16s8u_C1R(src.DevicePointer, src.Pitch, dst.DevicePointer, dst.Pitch, QuantInvTable.DevicePointer, oSizeRoi);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiDCTQuantInv8x8LS_JPEG_16s8u_C1R", status));
            NPPException.CheckNppStatus(status, null);
        }
示例#2
0
        /// <summary>
        /// Huffman Encoding of the JPEG Encoding.<para/>
        /// Input is expected to be 64x1 macro blocks and output is expected as byte stuffed huffman encoded JPEG scan.
        /// </summary>
        /// <param name="pSrc">Source image.</param>
        /// <param name="restartInterval">Restart Interval, see JPEG standard.</param>
        /// <param name="Ss">Start Coefficient, see JPEG standard.</param>
        /// <param name="Se">End Coefficient, see JPEG standard.</param>
        /// <param name="Ah">Bit Approximation High, see JPEG standard.</param>
        /// <param name="Al">Bit Approximation Low, see JPEG standard.</param>
        /// <param name="pDst">Byte-stuffed huffman encoded JPEG scan.</param>
        /// <param name="nLength">Byte length of the huffman encoded JPEG scan.</param>
        /// <param name="pHuffmanTableDC">DC Huffman table.</param>
        /// <param name="pHuffmanTableAC">AC Huffman table.</param>
        /// <param name="oSizeROI">ROI</param>
        /// <param name="buffer">Scratch buffer</param>
        public static void EncodeHuffmanScan(NPPImage_16sC1 pSrc, int restartInterval, int Ss, int Se, int Ah, int Al,
                                             CudaDeviceVariable <byte> pDst, ref int nLength, NppiEncodeHuffmanSpec pHuffmanTableDC, NppiEncodeHuffmanSpec pHuffmanTableAC, NppiSize oSizeROI, CudaDeviceVariable <byte> buffer)
        {
            NppStatus status;

            status = NPPNativeMethods.NPPi.CompressionDCT.nppiEncodeHuffmanScan_JPEG_8u16s_P1R(pSrc.DevicePointer, pSrc.Pitch, restartInterval, Ss, Se, Ah, Al, pDst.DevicePointer, ref nLength, pHuffmanTableDC, pHuffmanTableAC, oSizeROI, buffer.DevicePointer);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiEncodeHuffmanScan_JPEG_8u16s_P1R", status));
            NPPException.CheckNppStatus(status, null);
        }
 /// <summary>
 /// 32-bit unsigned to 16-bit signed conversion.
 /// </summary>
 /// <param name="dst">Destination image</param>
 /// <param name="roundMode">Round mode</param>
 /// <param name="scaleFactor">scaling factor</param>
 public void Convert(NPPImage_16sC1 dst, NppRoundMode roundMode, int scaleFactor)
 {
     status = NPPNativeMethods.NPPi.BitDepthConversion.nppiConvert_32u16s_C1RSfs(_devPtrRoi, _pitch, dst.DevicePointerRoi, dst.Pitch, _sizeRoi, roundMode, scaleFactor);
     Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiConvert_32u16s_C1RSfs", status));
     NPPException.CheckNppStatus(status, this);
 }
示例#4
0
 /// <summary>
 /// Inverse DCT in WebP decoding. Input is the bitstream that contains the coefficients of 16x16 blocks.
 /// These coefficients are based on a 4x4 sub-block unit, e.g.,
 /// Coeffs in 0th 4x4 block, 1st 4x4 block 2nd 4x4 block, etc.
 /// Output is the coefficients after inverse DCT transform.
 /// The output is put in an image format (i.e. raster scan order), different from the input order.
 /// </summary>
 /// <param name="src">Source image.</param>
 /// <param name="dst">Destination image</param>
 /// <param name="oSizeRoi">Roi size (in pixels).</param>
 public void DCTQuant16Fwd8x8LS(NPPImage_16sC1 src, NPPImage_16sC1 dst, NppiSize oSizeRoi)
 {
     status = NPPNativeMethods.NPPi.CompressionDCT.nppiDCTInv4x4_WebP_16s_C1R(src.DevicePointer, src.Pitch, dst.DevicePointer, dst.Pitch, oSizeRoi);
     Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiDCTInv4x4_WebP_16s_C1R", status));
     NPPException.CheckNppStatus(status, this);
 }
示例#5
0
 /// <summary>
 /// Inverse DCT, de-quantization and level shift part of the JPEG decoding, 16-bit short integer.
 /// Input is expected in 64x1 macro blocks and output is expected to be in 8x8
 /// macro blocks. The new version of the primitive takes the ROI in image pixel size and
 /// works with DCT coefficients that are in zig-zag order.
 /// </summary>
 /// <param name="src">Source image.</param>
 /// <param name="dst">Destination image</param>
 /// <param name="pQuantizationTable">Quantization Table in zig-zag order.</param>
 /// <param name="oSizeRoi">Roi size (in pixels).</param>
 public void DCTQuant16Inv8x8LS(NPPImage_16sC1 src, NPPImage_8uC1 dst, NppiSize oSizeRoi, CudaDeviceVariable <ushort> pQuantizationTable)
 {
     status = NPPNativeMethods.NPPi.CompressionDCT.nppiDCTQuant16Inv8x8LS_JPEG_16s8u_C1R_NEW(src.DevicePointer, src.Pitch, dst.DevicePointer, dst.Pitch, pQuantizationTable.DevicePointer, oSizeRoi, _state);
     Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiDCTQuant16Inv8x8LS_JPEG_16s8u_C1R_NEW", status));
     NPPException.CheckNppStatus(status, this);
 }
示例#6
0
        public static void SaveJpeg(string aFilename, int aQuality, Bitmap aImage)
        {
            if (aImage.PixelFormat != System.Drawing.Imaging.PixelFormat.Format24bppRgb)
            {
                throw new ArgumentException("Only three channel color images are supported.");
            }

            if (aImage.Width % 16 != 0 || aImage.Height % 16 != 0)
            {
                throw new ArgumentException("The provided bitmap must have a height and width of a multiple of 16.");
            }

            JPEGCompression compression = new JPEGCompression();

            NPPImage_8uC3 src   = new NPPImage_8uC3(aImage.Width, aImage.Height);
            NPPImage_8uC1 srcY  = new NPPImage_8uC1(aImage.Width, aImage.Height);
            NPPImage_8uC1 srcCb = new NPPImage_8uC1(aImage.Width / 2, aImage.Height / 2);
            NPPImage_8uC1 srcCr = new NPPImage_8uC1(aImage.Width / 2, aImage.Height / 2);

            src.CopyToDevice(aImage);

            //System.Drawing.Bitmap is ordered BGR not RGB
            //The NPP routine BGR to YCbCR outputs the values in clamped range, following the YCbCr standard.
            //But JPEG uses unclamped values ranging all from [0..255], thus use our own color matrix:
            float[,] BgrToYCbCr = new float[3, 4]
            {
                { 0.114f, 0.587f, 0.299f, 0 },
                { 0.5f, -0.33126f, -0.16874f, 128 },
                { -0.08131f, -0.41869f, 0.5f, 128 }
            };


            src.ColorTwist(BgrToYCbCr);

            //Reduce size of of Cb and Cr channel
            src.Copy(srcY, 2);
            srcY.Resize(srcCr, 0.5, 0.5, InterpolationMode.SuperSampling);
            src.Copy(srcY, 1);
            srcY.Resize(srcCb, 0.5, 0.5, InterpolationMode.SuperSampling);
            src.Copy(srcY, 0);


            FrameHeader oFrameHeader = new FrameHeader();

            oFrameHeader.nComponents                = 3;
            oFrameHeader.nHeight                    = (ushort)aImage.Height;
            oFrameHeader.nSamplePrecision           = 8;
            oFrameHeader.nWidth                     = (ushort)aImage.Width;
            oFrameHeader.aComponentIdentifier       = new byte[] { 1, 2, 3 };
            oFrameHeader.aSamplingFactors           = new byte[] { 34, 17, 17 };   //Y channel is twice the sice of Cb/Cr channel
            oFrameHeader.aQuantizationTableSelector = new byte[] { 0, 1, 1 };

            //Get quantization tables from JPEG standard with quality scaling
            QuantizationTable[] aQuantizationTables = new QuantizationTable[2];
            aQuantizationTables[0] = new QuantizationTable(QuantizationTable.QuantizationType.Luminance, aQuality);
            aQuantizationTables[1] = new QuantizationTable(QuantizationTable.QuantizationType.Chroma, aQuality);


            CudaDeviceVariable <byte>[] pdQuantizationTables = new CudaDeviceVariable <byte> [2];
            pdQuantizationTables[0] = aQuantizationTables[0].aTable;
            pdQuantizationTables[1] = aQuantizationTables[1].aTable;


            //Get Huffman tables from JPEG standard
            HuffmanTable[] aHuffmanTables = new HuffmanTable[4];
            aHuffmanTables[0] = new HuffmanTable(HuffmanTable.HuffmanType.LuminanceDC);
            aHuffmanTables[1] = new HuffmanTable(HuffmanTable.HuffmanType.ChromaDC);
            aHuffmanTables[2] = new HuffmanTable(HuffmanTable.HuffmanType.LuminanceAC);
            aHuffmanTables[3] = new HuffmanTable(HuffmanTable.HuffmanType.ChromaAC);

            //Set header
            ScanHeader oScanHeader = new ScanHeader();

            oScanHeader.nA                     = 0;
            oScanHeader.nComponents            = 3;
            oScanHeader.nSe                    = 63;
            oScanHeader.nSs                    = 0;
            oScanHeader.aComponentSelector     = new byte[] { 1, 2, 3 };
            oScanHeader.aHuffmanTablesSelector = new byte[] { 0, 17, 17 };


            NPPImage_16sC1[] apdDCT = new NPPImage_16sC1[3];

            NPPImage_8uC1[] apDstImage = new NPPImage_8uC1[3];
            NppiSize[]      aDstSize   = new NppiSize[3];
            aDstSize[0] = new NppiSize(srcY.Width, srcY.Height);
            aDstSize[1] = new NppiSize(srcCb.Width, srcCb.Height);
            aDstSize[2] = new NppiSize(srcCr.Width, srcCr.Height);


            // Compute channel sizes as stored in the output JPEG (8x8 blocks & MCU block layout)
            NppiSize oDstImageSize = new NppiSize();
            float    frameWidth    = (float)Math.Floor((float)oFrameHeader.nWidth);
            float    frameHeight   = (float)Math.Floor((float)oFrameHeader.nHeight);

            oDstImageSize.width  = (int)Math.Max(1.0f, frameWidth);
            oDstImageSize.height = (int)Math.Max(1.0f, frameHeight);

            //Console.WriteLine("Output Size: " + oDstImageSize.width + "x" + oDstImageSize.height + "x" + (int)(oFrameHeader.nComponents));

            apDstImage[0] = srcY;
            apDstImage[1] = srcCb;
            apDstImage[2] = srcCr;

            int nMCUBlocksH = 0;
            int nMCUBlocksV = 0;

            // Compute channel sizes as stored in the JPEG (8x8 blocks & MCU block layout)
            for (int i = 0; i < oFrameHeader.nComponents; ++i)
            {
                nMCUBlocksV = Math.Max(nMCUBlocksV, oFrameHeader.aSamplingFactors[i] >> 4);
                nMCUBlocksH = Math.Max(nMCUBlocksH, oFrameHeader.aSamplingFactors[i] & 0x0f);
            }

            for (int i = 0; i < oFrameHeader.nComponents; ++i)
            {
                NppiSize oBlocks       = new NppiSize();
                NppiSize oBlocksPerMCU = new NppiSize(oFrameHeader.aSamplingFactors[i] & 0x0f, oFrameHeader.aSamplingFactors[i] >> 4);

                oBlocks.width = (int)Math.Ceiling((oFrameHeader.nWidth + 7) / 8 *
                                                  (float)(oBlocksPerMCU.width) / nMCUBlocksH);
                oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width;

                oBlocks.height = (int)Math.Ceiling((oFrameHeader.nHeight + 7) / 8 *
                                                   (float)(oBlocksPerMCU.height) / nMCUBlocksV);
                oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height;

                // Allocate Memory
                apdDCT[i] = new NPPImage_16sC1(oBlocks.width * 64, oBlocks.height);
            }



            /***************************
            *
            *   Output
            *
            ***************************/


            // Forward DCT
            for (int i = 0; i < 3; ++i)
            {
                compression.DCTQuantFwd8x8LS(apDstImage[i], apdDCT[i], aDstSize[i], pdQuantizationTables[oFrameHeader.aQuantizationTableSelector[i]]);
            }


            // Huffman Encoding
            CudaDeviceVariable <byte> pdScan = new CudaDeviceVariable <byte>(BUFFER_SIZE);
            int nScanLength = 0;

            int nTempSize = JPEGCompression.EncodeHuffmanGetSize(aDstSize[0], 3);
            CudaDeviceVariable <byte> pJpegEncoderTemp = new CudaDeviceVariable <byte>(nTempSize);

            NppiEncodeHuffmanSpec[] apHuffmanDCTableEnc = new NppiEncodeHuffmanSpec[3];
            NppiEncodeHuffmanSpec[] apHuffmanACTableEnc = new NppiEncodeHuffmanSpec[3];

            for (int i = 0; i < 3; ++i)
            {
                apHuffmanDCTableEnc[i] = JPEGCompression.EncodeHuffmanSpecInitAlloc(aHuffmanTables[(oScanHeader.aHuffmanTablesSelector[i] >> 4)].aCodes, NppiHuffmanTableType.nppiDCTable);
                apHuffmanACTableEnc[i] = JPEGCompression.EncodeHuffmanSpecInitAlloc(aHuffmanTables[(oScanHeader.aHuffmanTablesSelector[i] & 0x0f) + 2].aCodes, NppiHuffmanTableType.nppiACTable);
            }

            JPEGCompression.EncodeHuffmanScan(apdDCT, 0, oScanHeader.nSs, oScanHeader.nSe, oScanHeader.nA >> 4, oScanHeader.nA & 0x0f, pdScan, ref nScanLength, apHuffmanDCTableEnc, apHuffmanACTableEnc, aDstSize, pJpegEncoderTemp);

            for (int i = 0; i < 3; ++i)
            {
                JPEGCompression.EncodeHuffmanSpecFree(apHuffmanDCTableEnc[i]);
                JPEGCompression.EncodeHuffmanSpecFree(apHuffmanACTableEnc[i]);
            }

            // Write JPEG to byte array, as in original sample code
            byte[] pDstOutput = new byte[BUFFER_SIZE];
            int    pos        = 0;

            oFrameHeader.nWidth  = (ushort)oDstImageSize.width;
            oFrameHeader.nHeight = (ushort)oDstImageSize.height;

            writeMarker(0x0D8, pDstOutput, ref pos);
            writeJFIFTag(pDstOutput, ref pos);
            writeQuantizationTable(aQuantizationTables[0], pDstOutput, ref pos);
            writeQuantizationTable(aQuantizationTables[1], pDstOutput, ref pos);
            writeFrameHeader(oFrameHeader, pDstOutput, ref pos);
            writeHuffmanTable(aHuffmanTables[0], pDstOutput, ref pos);
            writeHuffmanTable(aHuffmanTables[1], pDstOutput, ref pos);
            writeHuffmanTable(aHuffmanTables[2], pDstOutput, ref pos);
            writeHuffmanTable(aHuffmanTables[3], pDstOutput, ref pos);
            writeScanHeader(oScanHeader, pDstOutput, ref pos);

            pdScan.CopyToHost(pDstOutput, 0, pos, nScanLength);

            pos += nScanLength;
            writeMarker(0x0D9, pDstOutput, ref pos);

            FileStream fs = new FileStream(aFilename, FileMode.Create, FileAccess.Write);

            fs.Write(pDstOutput, 0, pos);
            fs.Close();

            //cleanup:
            fs.Dispose();
            pJpegEncoderTemp.Dispose();
            pdScan.Dispose();
            apdDCT[2].Dispose();
            apdDCT[1].Dispose();
            apdDCT[0].Dispose();
            pdQuantizationTables[1].Dispose();
            pdQuantizationTables[0].Dispose();

            srcCr.Dispose();
            srcCb.Dispose();
            srcY.Dispose();
            src.Dispose();
            compression.Dispose();
        }
示例#7
0
        public static Bitmap LoadJpeg(string aFilename)
        {
            JPEGCompression compression = new JPEGCompression();

            byte[] pJpegData    = File.ReadAllBytes(aFilename);
            int    nInputLength = pJpegData.Length;


            // Check if this is a valid JPEG file
            int nPos    = 0;
            int nMarker = nextMarker(pJpegData, ref nPos, nInputLength);

            if (nMarker != 0x0D8)
            {
                throw new ArgumentException(aFilename + " is not a JPEG file.");
            }

            nMarker = nextMarker(pJpegData, ref nPos, nInputLength);

            // Parsing and Huffman Decoding (on host)
            FrameHeader oFrameHeader = new FrameHeader();

            oFrameHeader.aComponentIdentifier       = new byte[3];
            oFrameHeader.aSamplingFactors           = new byte[3];
            oFrameHeader.aQuantizationTableSelector = new byte[3];

            QuantizationTable[] aQuantizationTables = new QuantizationTable[4];
            aQuantizationTables[0] = new QuantizationTable();
            aQuantizationTables[1] = new QuantizationTable();
            aQuantizationTables[2] = new QuantizationTable();
            aQuantizationTables[3] = new QuantizationTable();


            CudaDeviceVariable <byte>[] pdQuantizationTables = new CudaDeviceVariable <byte> [4];
            pdQuantizationTables[0] = new CudaDeviceVariable <byte>(64);
            pdQuantizationTables[1] = new CudaDeviceVariable <byte>(64);
            pdQuantizationTables[2] = new CudaDeviceVariable <byte>(64);
            pdQuantizationTables[3] = new CudaDeviceVariable <byte>(64);

            HuffmanTable[] aHuffmanTables = new HuffmanTable[4];
            aHuffmanTables[0] = new HuffmanTable();
            aHuffmanTables[1] = new HuffmanTable();
            aHuffmanTables[2] = new HuffmanTable();
            aHuffmanTables[3] = new HuffmanTable();


            ScanHeader oScanHeader = new ScanHeader();

            oScanHeader.aComponentSelector     = new byte[3];
            oScanHeader.aHuffmanTablesSelector = new byte[3];


            int nMCUBlocksH = 0;
            int nMCUBlocksV = 0;

            int nRestartInterval = -1;

            NppiSize[] aSrcSize = new NppiSize[3];

            short[][]        aphDCT   = new short[3][];
            NPPImage_16sC1[] apdDCT   = new NPPImage_16sC1[3];
            int[]            aDCTStep = new int[3];

            NPPImage_8uC1[] apSrcImage    = new NPPImage_8uC1[3];
            int[]           aSrcImageStep = new int[3];

            NPPImage_8uC1[] apDstImage    = new NPPImage_8uC1[3];
            int[]           aDstImageStep = new int[3];
            NppiSize[]      aDstSize      = new NppiSize[3];

            //Same read routine as in NPP JPEG sample from Nvidia
            while (nMarker != -1)
            {
                if (nMarker == 0x0D8)
                {
                    // Embeded Thumbnail, skip it
                    int nNextMarker = nextMarker(pJpegData, ref nPos, nInputLength);

                    while (nNextMarker != -1 && nNextMarker != 0x0D9)
                    {
                        nNextMarker = nextMarker(pJpegData, ref nPos, nInputLength);
                    }
                }

                if (nMarker == 0x0DD)
                {
                    readRestartInterval(pJpegData, ref nPos, ref nRestartInterval);
                }

                if ((nMarker == 0x0C0) | (nMarker == 0x0C2))
                {
                    //Assert Baseline for this Sample
                    //Note: NPP does support progressive jpegs for both encode and decode
                    if (nMarker != 0x0C0)
                    {
                        pdQuantizationTables[0].Dispose();
                        pdQuantizationTables[1].Dispose();
                        pdQuantizationTables[2].Dispose();
                        pdQuantizationTables[3].Dispose();

                        throw new ArgumentException(aFilename + " is not a Baseline-JPEG file.");
                    }

                    // Baseline or Progressive Frame Header
                    readFrameHeader(pJpegData, ref nPos, ref oFrameHeader);
                    //Console.WriteLine("Image Size: " + oFrameHeader.nWidth + "x" + oFrameHeader.nHeight + "x" + (int)(oFrameHeader.nComponents));

                    //Assert 3-Channel Image for this Sample
                    if (oFrameHeader.nComponents != 3)
                    {
                        pdQuantizationTables[0].Dispose();
                        pdQuantizationTables[1].Dispose();
                        pdQuantizationTables[2].Dispose();
                        pdQuantizationTables[3].Dispose();

                        throw new ArgumentException(aFilename + " is not a three channel JPEG file.");
                    }

                    // Compute channel sizes as stored in the JPEG (8x8 blocks & MCU block layout)
                    for (int i = 0; i < oFrameHeader.nComponents; ++i)
                    {
                        nMCUBlocksV = Math.Max(nMCUBlocksV, oFrameHeader.aSamplingFactors[i] >> 4);
                        nMCUBlocksH = Math.Max(nMCUBlocksH, oFrameHeader.aSamplingFactors[i] & 0x0f);
                    }

                    for (int i = 0; i < oFrameHeader.nComponents; ++i)
                    {
                        NppiSize oBlocks       = new NppiSize();
                        NppiSize oBlocksPerMCU = new NppiSize(oFrameHeader.aSamplingFactors[i] & 0x0f, oFrameHeader.aSamplingFactors[i] >> 4);

                        oBlocks.width = (int)Math.Ceiling((oFrameHeader.nWidth + 7) / 8 *
                                                          (float)(oBlocksPerMCU.width) / nMCUBlocksH);
                        oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width;

                        oBlocks.height = (int)Math.Ceiling((oFrameHeader.nHeight + 7) / 8 *
                                                           (float)(oBlocksPerMCU.height) / nMCUBlocksV);
                        oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height;

                        aSrcSize[i].width  = oBlocks.width * 8;
                        aSrcSize[i].height = oBlocks.height * 8;

                        // Allocate Memory
                        apdDCT[i]   = new NPPImage_16sC1(oBlocks.width * 64, oBlocks.height);
                        aDCTStep[i] = apdDCT[i].Pitch;

                        apSrcImage[i]    = new NPPImage_8uC1(aSrcSize[i].width, aSrcSize[i].height);
                        aSrcImageStep[i] = apSrcImage[i].Pitch;

                        aphDCT[i] = new short[aDCTStep[i] * oBlocks.height];
                    }
                }

                if (nMarker == 0x0DB)
                {
                    // Quantization Tables
                    readQuantizationTables(pJpegData, ref nPos, aQuantizationTables);
                }

                if (nMarker == 0x0C4)
                {
                    // Huffman Tables
                    readHuffmanTables(pJpegData, ref nPos, aHuffmanTables);
                }

                if (nMarker == 0x0DA)
                {
                    // Scan
                    readScanHeader(pJpegData, ref nPos, ref oScanHeader);
                    nPos += 6 + oScanHeader.nComponents * 2;

                    int nAfterNextMarkerPos = nPos;
                    int nAfterScanMarker    = nextMarker(pJpegData, ref nAfterNextMarkerPos, nInputLength);

                    if (nRestartInterval > 0)
                    {
                        while (nAfterScanMarker >= 0x0D0 && nAfterScanMarker <= 0x0D7)
                        {
                            // This is a restart marker, go on
                            nAfterScanMarker = nextMarker(pJpegData, ref nAfterNextMarkerPos, nInputLength);
                        }
                    }

                    NppiDecodeHuffmanSpec[] apHuffmanDCTableDec = new NppiDecodeHuffmanSpec[3];
                    NppiDecodeHuffmanSpec[] apHuffmanACTableDec = new NppiDecodeHuffmanSpec[3];

                    for (int i = 0; i < 3; ++i)
                    {
                        apHuffmanDCTableDec[i] = JPEGCompression.DecodeHuffmanSpecInitAllocHost(aHuffmanTables[(oScanHeader.aHuffmanTablesSelector[i] >> 4)].aCodes, NppiHuffmanTableType.nppiDCTable);
                        apHuffmanACTableDec[i] = JPEGCompression.DecodeHuffmanSpecInitAllocHost(aHuffmanTables[(oScanHeader.aHuffmanTablesSelector[i] & 0x0f) + 2].aCodes, NppiHuffmanTableType.nppiACTable);
                    }

                    byte[] img = new byte[nAfterNextMarkerPos - nPos - 2];
                    Buffer.BlockCopy(pJpegData, nPos, img, 0, nAfterNextMarkerPos - nPos - 2);


                    JPEGCompression.DecodeHuffmanScanHost(img, nRestartInterval, oScanHeader.nSs, oScanHeader.nSe, oScanHeader.nA >> 4, oScanHeader.nA & 0x0f, aphDCT[0], aphDCT[1], aphDCT[2], aDCTStep, apHuffmanDCTableDec, apHuffmanACTableDec, aSrcSize);

                    for (int i = 0; i < 3; ++i)
                    {
                        JPEGCompression.DecodeHuffmanSpecFreeHost(apHuffmanDCTableDec[i]);
                        JPEGCompression.DecodeHuffmanSpecFreeHost(apHuffmanACTableDec[i]);
                    }
                }

                nMarker = nextMarker(pJpegData, ref nPos, nInputLength);
            }


            // Copy DCT coefficients and Quantization Tables from host to device
            for (int i = 0; i < 4; ++i)
            {
                pdQuantizationTables[i].CopyToDevice(aQuantizationTables[i].aTable);
            }

            for (int i = 0; i < 3; ++i)
            {
                apdDCT[i].CopyToDevice(aphDCT[i], aDCTStep[i]);
            }

            // Inverse DCT
            for (int i = 0; i < 3; ++i)
            {
                compression.DCTQuantInv8x8LS(apdDCT[i], apSrcImage[i], aSrcSize[i], pdQuantizationTables[oFrameHeader.aQuantizationTableSelector[i]]);
            }

            //Alloc final image
            NPPImage_8uC3 res = new NPPImage_8uC3(apSrcImage[0].Width, apSrcImage[0].Height);

            //Copy Y color plane to first channel
            apSrcImage[0].Copy(res, 0);

            //Cb anc Cr channel might be smaller
            if ((oFrameHeader.aSamplingFactors[0] & 0x0f) == 1 && oFrameHeader.aSamplingFactors[0] >> 4 == 1)
            {
                //Color planes are of same size as Y channel
                apSrcImage[1].Copy(res, 1);
                apSrcImage[2].Copy(res, 2);
            }
            else
            {
                //rescale color planes to full size
                double scaleX = oFrameHeader.aSamplingFactors[0] & 0x0f;
                double scaleY = oFrameHeader.aSamplingFactors[0] >> 4;

                apSrcImage[1].ResizeSqrPixel(apSrcImage[0], scaleX, scaleY, 0, 0, InterpolationMode.Lanczos);
                apSrcImage[0].Copy(res, 1);
                apSrcImage[2].ResizeSqrPixel(apSrcImage[0], scaleX, scaleY, 0, 0, InterpolationMode.Lanczos);
                apSrcImage[0].Copy(res, 2);
            }


            //System.Drawing.Bitmap is ordered BGR not RGB
            //The NPP routine YCbCR to BGR needs clampled input values, following the YCbCr standard.
            //But JPEG uses unclamped values ranging all from [0..255], thus use our own color matrix:
            float[,] YCbCrToBgr = new float[3, 4]
            {
                { 1.0f, 1.772f, 0.0f, -226.816f },
                { 1.0f, -0.34414f, -0.71414f, 135.45984f },
                { 1.0f, 0.0f, 1.402f, -179.456f }
            };

            //Convert from YCbCr to BGR
            res.ColorTwist(YCbCrToBgr);

            Bitmap bmp = new Bitmap(apSrcImage[0].Width, apSrcImage[0].Height, System.Drawing.Imaging.PixelFormat.Format24bppRgb);

            res.CopyToHost(bmp);

            //Cleanup:
            res.Dispose();
            apSrcImage[2].Dispose();
            apSrcImage[1].Dispose();
            apSrcImage[0].Dispose();

            apdDCT[2].Dispose();
            apdDCT[1].Dispose();
            apdDCT[0].Dispose();

            pdQuantizationTables[0].Dispose();
            pdQuantizationTables[1].Dispose();
            pdQuantizationTables[2].Dispose();
            pdQuantizationTables[3].Dispose();

            compression.Dispose();

            return(bmp);
        }