public static void SaveJpeg(string aFilename, int aQuality, Bitmap aImage) { if (aImage.PixelFormat != System.Drawing.Imaging.PixelFormat.Format24bppRgb) { throw new ArgumentException("Only three channel color images are supported."); } if (aImage.Width % 16 != 0 || aImage.Height % 16 != 0) { throw new ArgumentException("The provided bitmap must have a height and width of a multiple of 16."); } JPEGCompression compression = new JPEGCompression(); NPPImage_8uC3 src = new NPPImage_8uC3(aImage.Width, aImage.Height); NPPImage_8uC1 srcY = new NPPImage_8uC1(aImage.Width, aImage.Height); NPPImage_8uC1 srcCb = new NPPImage_8uC1(aImage.Width / 2, aImage.Height / 2); NPPImage_8uC1 srcCr = new NPPImage_8uC1(aImage.Width / 2, aImage.Height / 2); src.CopyToDevice(aImage); //System.Drawing.Bitmap is ordered BGR not RGB //The NPP routine BGR to YCbCR outputs the values in clamped range, following the YCbCr standard. //But JPEG uses unclamped values ranging all from [0..255], thus use our own color matrix: float[,] BgrToYCbCr = new float[3, 4] { { 0.114f, 0.587f, 0.299f, 0 }, { 0.5f, -0.33126f, -0.16874f, 128 }, { -0.08131f, -0.41869f, 0.5f, 128 } }; src.ColorTwist(BgrToYCbCr); //Reduce size of of Cb and Cr channel src.Copy(srcY, 2); srcY.Resize(srcCr, 0.5, 0.5, InterpolationMode.SuperSampling); src.Copy(srcY, 1); srcY.Resize(srcCb, 0.5, 0.5, InterpolationMode.SuperSampling); src.Copy(srcY, 0); FrameHeader oFrameHeader = new FrameHeader(); oFrameHeader.nComponents = 3; oFrameHeader.nHeight = (ushort)aImage.Height; oFrameHeader.nSamplePrecision = 8; oFrameHeader.nWidth = (ushort)aImage.Width; oFrameHeader.aComponentIdentifier = new byte[] { 1, 2, 3 }; oFrameHeader.aSamplingFactors = new byte[] { 34, 17, 17 }; //Y channel is twice the sice of Cb/Cr channel oFrameHeader.aQuantizationTableSelector = new byte[] { 0, 1, 1 }; //Get quantization tables from JPEG standard with quality scaling QuantizationTable[] aQuantizationTables = new QuantizationTable[2]; aQuantizationTables[0] = new QuantizationTable(QuantizationTable.QuantizationType.Luminance, aQuality); aQuantizationTables[1] = new QuantizationTable(QuantizationTable.QuantizationType.Chroma, aQuality); CudaDeviceVariable <byte>[] pdQuantizationTables = new CudaDeviceVariable <byte> [2]; pdQuantizationTables[0] = aQuantizationTables[0].aTable; pdQuantizationTables[1] = aQuantizationTables[1].aTable; //Get Huffman tables from JPEG standard HuffmanTable[] aHuffmanTables = new HuffmanTable[4]; aHuffmanTables[0] = new HuffmanTable(HuffmanTable.HuffmanType.LuminanceDC); aHuffmanTables[1] = new HuffmanTable(HuffmanTable.HuffmanType.ChromaDC); aHuffmanTables[2] = new HuffmanTable(HuffmanTable.HuffmanType.LuminanceAC); aHuffmanTables[3] = new HuffmanTable(HuffmanTable.HuffmanType.ChromaAC); //Set header ScanHeader oScanHeader = new ScanHeader(); oScanHeader.nA = 0; oScanHeader.nComponents = 3; oScanHeader.nSe = 63; oScanHeader.nSs = 0; oScanHeader.aComponentSelector = new byte[] { 1, 2, 3 }; oScanHeader.aHuffmanTablesSelector = new byte[] { 0, 17, 17 }; NPPImage_16sC1[] apdDCT = new NPPImage_16sC1[3]; NPPImage_8uC1[] apDstImage = new NPPImage_8uC1[3]; NppiSize[] aDstSize = new NppiSize[3]; aDstSize[0] = new NppiSize(srcY.Width, srcY.Height); aDstSize[1] = new NppiSize(srcCb.Width, srcCb.Height); aDstSize[2] = new NppiSize(srcCr.Width, srcCr.Height); // Compute channel sizes as stored in the output JPEG (8x8 blocks & MCU block layout) NppiSize oDstImageSize = new NppiSize(); float frameWidth = (float)Math.Floor((float)oFrameHeader.nWidth); float frameHeight = (float)Math.Floor((float)oFrameHeader.nHeight); oDstImageSize.width = (int)Math.Max(1.0f, frameWidth); oDstImageSize.height = (int)Math.Max(1.0f, frameHeight); //Console.WriteLine("Output Size: " + oDstImageSize.width + "x" + oDstImageSize.height + "x" + (int)(oFrameHeader.nComponents)); apDstImage[0] = srcY; apDstImage[1] = srcCb; apDstImage[2] = srcCr; int nMCUBlocksH = 0; int nMCUBlocksV = 0; // Compute channel sizes as stored in the JPEG (8x8 blocks & MCU block layout) for (int i = 0; i < oFrameHeader.nComponents; ++i) { nMCUBlocksV = Math.Max(nMCUBlocksV, oFrameHeader.aSamplingFactors[i] >> 4); nMCUBlocksH = Math.Max(nMCUBlocksH, oFrameHeader.aSamplingFactors[i] & 0x0f); } for (int i = 0; i < oFrameHeader.nComponents; ++i) { NppiSize oBlocks = new NppiSize(); NppiSize oBlocksPerMCU = new NppiSize(oFrameHeader.aSamplingFactors[i] & 0x0f, oFrameHeader.aSamplingFactors[i] >> 4); oBlocks.width = (int)Math.Ceiling((oFrameHeader.nWidth + 7) / 8 * (float)(oBlocksPerMCU.width) / nMCUBlocksH); oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width; oBlocks.height = (int)Math.Ceiling((oFrameHeader.nHeight + 7) / 8 * (float)(oBlocksPerMCU.height) / nMCUBlocksV); oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height; // Allocate Memory apdDCT[i] = new NPPImage_16sC1(oBlocks.width * 64, oBlocks.height); } /*************************** * * Output * ***************************/ // Forward DCT for (int i = 0; i < 3; ++i) { compression.DCTQuantFwd8x8LS(apDstImage[i], apdDCT[i], aDstSize[i], pdQuantizationTables[oFrameHeader.aQuantizationTableSelector[i]]); } // Huffman Encoding CudaDeviceVariable <byte> pdScan = new CudaDeviceVariable <byte>(BUFFER_SIZE); int nScanLength = 0; int nTempSize = JPEGCompression.EncodeHuffmanGetSize(aDstSize[0], 3); CudaDeviceVariable <byte> pJpegEncoderTemp = new CudaDeviceVariable <byte>(nTempSize); NppiEncodeHuffmanSpec[] apHuffmanDCTableEnc = new NppiEncodeHuffmanSpec[3]; NppiEncodeHuffmanSpec[] apHuffmanACTableEnc = new NppiEncodeHuffmanSpec[3]; for (int i = 0; i < 3; ++i) { apHuffmanDCTableEnc[i] = JPEGCompression.EncodeHuffmanSpecInitAlloc(aHuffmanTables[(oScanHeader.aHuffmanTablesSelector[i] >> 4)].aCodes, NppiHuffmanTableType.nppiDCTable); apHuffmanACTableEnc[i] = JPEGCompression.EncodeHuffmanSpecInitAlloc(aHuffmanTables[(oScanHeader.aHuffmanTablesSelector[i] & 0x0f) + 2].aCodes, NppiHuffmanTableType.nppiACTable); } JPEGCompression.EncodeHuffmanScan(apdDCT, 0, oScanHeader.nSs, oScanHeader.nSe, oScanHeader.nA >> 4, oScanHeader.nA & 0x0f, pdScan, ref nScanLength, apHuffmanDCTableEnc, apHuffmanACTableEnc, aDstSize, pJpegEncoderTemp); for (int i = 0; i < 3; ++i) { JPEGCompression.EncodeHuffmanSpecFree(apHuffmanDCTableEnc[i]); JPEGCompression.EncodeHuffmanSpecFree(apHuffmanACTableEnc[i]); } // Write JPEG to byte array, as in original sample code byte[] pDstOutput = new byte[BUFFER_SIZE]; int pos = 0; oFrameHeader.nWidth = (ushort)oDstImageSize.width; oFrameHeader.nHeight = (ushort)oDstImageSize.height; writeMarker(0x0D8, pDstOutput, ref pos); writeJFIFTag(pDstOutput, ref pos); writeQuantizationTable(aQuantizationTables[0], pDstOutput, ref pos); writeQuantizationTable(aQuantizationTables[1], pDstOutput, ref pos); writeFrameHeader(oFrameHeader, pDstOutput, ref pos); writeHuffmanTable(aHuffmanTables[0], pDstOutput, ref pos); writeHuffmanTable(aHuffmanTables[1], pDstOutput, ref pos); writeHuffmanTable(aHuffmanTables[2], pDstOutput, ref pos); writeHuffmanTable(aHuffmanTables[3], pDstOutput, ref pos); writeScanHeader(oScanHeader, pDstOutput, ref pos); pdScan.CopyToHost(pDstOutput, 0, pos, nScanLength); pos += nScanLength; writeMarker(0x0D9, pDstOutput, ref pos); FileStream fs = new FileStream(aFilename, FileMode.Create, FileAccess.Write); fs.Write(pDstOutput, 0, pos); fs.Close(); //cleanup: fs.Dispose(); pJpegEncoderTemp.Dispose(); pdScan.Dispose(); apdDCT[2].Dispose(); apdDCT[1].Dispose(); apdDCT[0].Dispose(); pdQuantizationTables[1].Dispose(); pdQuantizationTables[0].Dispose(); srcCr.Dispose(); srcCb.Dispose(); srcY.Dispose(); src.Dispose(); compression.Dispose(); }
public static Bitmap LoadJpeg(string aFilename) { JPEGCompression compression = new JPEGCompression(); byte[] pJpegData = File.ReadAllBytes(aFilename); int nInputLength = pJpegData.Length; // Check if this is a valid JPEG file int nPos = 0; int nMarker = nextMarker(pJpegData, ref nPos, nInputLength); if (nMarker != 0x0D8) { throw new ArgumentException(aFilename + " is not a JPEG file."); } nMarker = nextMarker(pJpegData, ref nPos, nInputLength); // Parsing and Huffman Decoding (on host) FrameHeader oFrameHeader = new FrameHeader(); oFrameHeader.aComponentIdentifier = new byte[3]; oFrameHeader.aSamplingFactors = new byte[3]; oFrameHeader.aQuantizationTableSelector = new byte[3]; QuantizationTable[] aQuantizationTables = new QuantizationTable[4]; aQuantizationTables[0] = new QuantizationTable(); aQuantizationTables[1] = new QuantizationTable(); aQuantizationTables[2] = new QuantizationTable(); aQuantizationTables[3] = new QuantizationTable(); CudaDeviceVariable <byte>[] pdQuantizationTables = new CudaDeviceVariable <byte> [4]; pdQuantizationTables[0] = new CudaDeviceVariable <byte>(64); pdQuantizationTables[1] = new CudaDeviceVariable <byte>(64); pdQuantizationTables[2] = new CudaDeviceVariable <byte>(64); pdQuantizationTables[3] = new CudaDeviceVariable <byte>(64); HuffmanTable[] aHuffmanTables = new HuffmanTable[4]; aHuffmanTables[0] = new HuffmanTable(); aHuffmanTables[1] = new HuffmanTable(); aHuffmanTables[2] = new HuffmanTable(); aHuffmanTables[3] = new HuffmanTable(); ScanHeader oScanHeader = new ScanHeader(); oScanHeader.aComponentSelector = new byte[3]; oScanHeader.aHuffmanTablesSelector = new byte[3]; int nMCUBlocksH = 0; int nMCUBlocksV = 0; int nRestartInterval = -1; NppiSize[] aSrcSize = new NppiSize[3]; short[][] aphDCT = new short[3][]; NPPImage_16sC1[] apdDCT = new NPPImage_16sC1[3]; int[] aDCTStep = new int[3]; NPPImage_8uC1[] apSrcImage = new NPPImage_8uC1[3]; int[] aSrcImageStep = new int[3]; NPPImage_8uC1[] apDstImage = new NPPImage_8uC1[3]; int[] aDstImageStep = new int[3]; NppiSize[] aDstSize = new NppiSize[3]; //Same read routine as in NPP JPEG sample from Nvidia while (nMarker != -1) { if (nMarker == 0x0D8) { // Embeded Thumbnail, skip it int nNextMarker = nextMarker(pJpegData, ref nPos, nInputLength); while (nNextMarker != -1 && nNextMarker != 0x0D9) { nNextMarker = nextMarker(pJpegData, ref nPos, nInputLength); } } if (nMarker == 0x0DD) { readRestartInterval(pJpegData, ref nPos, ref nRestartInterval); } if ((nMarker == 0x0C0) | (nMarker == 0x0C2)) { //Assert Baseline for this Sample //Note: NPP does support progressive jpegs for both encode and decode if (nMarker != 0x0C0) { pdQuantizationTables[0].Dispose(); pdQuantizationTables[1].Dispose(); pdQuantizationTables[2].Dispose(); pdQuantizationTables[3].Dispose(); throw new ArgumentException(aFilename + " is not a Baseline-JPEG file."); } // Baseline or Progressive Frame Header readFrameHeader(pJpegData, ref nPos, ref oFrameHeader); //Console.WriteLine("Image Size: " + oFrameHeader.nWidth + "x" + oFrameHeader.nHeight + "x" + (int)(oFrameHeader.nComponents)); //Assert 3-Channel Image for this Sample if (oFrameHeader.nComponents != 3) { pdQuantizationTables[0].Dispose(); pdQuantizationTables[1].Dispose(); pdQuantizationTables[2].Dispose(); pdQuantizationTables[3].Dispose(); throw new ArgumentException(aFilename + " is not a three channel JPEG file."); } // Compute channel sizes as stored in the JPEG (8x8 blocks & MCU block layout) for (int i = 0; i < oFrameHeader.nComponents; ++i) { nMCUBlocksV = Math.Max(nMCUBlocksV, oFrameHeader.aSamplingFactors[i] >> 4); nMCUBlocksH = Math.Max(nMCUBlocksH, oFrameHeader.aSamplingFactors[i] & 0x0f); } for (int i = 0; i < oFrameHeader.nComponents; ++i) { NppiSize oBlocks = new NppiSize(); NppiSize oBlocksPerMCU = new NppiSize(oFrameHeader.aSamplingFactors[i] & 0x0f, oFrameHeader.aSamplingFactors[i] >> 4); oBlocks.width = (int)Math.Ceiling((oFrameHeader.nWidth + 7) / 8 * (float)(oBlocksPerMCU.width) / nMCUBlocksH); oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width; oBlocks.height = (int)Math.Ceiling((oFrameHeader.nHeight + 7) / 8 * (float)(oBlocksPerMCU.height) / nMCUBlocksV); oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height; aSrcSize[i].width = oBlocks.width * 8; aSrcSize[i].height = oBlocks.height * 8; // Allocate Memory apdDCT[i] = new NPPImage_16sC1(oBlocks.width * 64, oBlocks.height); aDCTStep[i] = apdDCT[i].Pitch; apSrcImage[i] = new NPPImage_8uC1(aSrcSize[i].width, aSrcSize[i].height); aSrcImageStep[i] = apSrcImage[i].Pitch; aphDCT[i] = new short[aDCTStep[i] * oBlocks.height]; } } if (nMarker == 0x0DB) { // Quantization Tables readQuantizationTables(pJpegData, ref nPos, aQuantizationTables); } if (nMarker == 0x0C4) { // Huffman Tables readHuffmanTables(pJpegData, ref nPos, aHuffmanTables); } if (nMarker == 0x0DA) { // Scan readScanHeader(pJpegData, ref nPos, ref oScanHeader); nPos += 6 + oScanHeader.nComponents * 2; int nAfterNextMarkerPos = nPos; int nAfterScanMarker = nextMarker(pJpegData, ref nAfterNextMarkerPos, nInputLength); if (nRestartInterval > 0) { while (nAfterScanMarker >= 0x0D0 && nAfterScanMarker <= 0x0D7) { // This is a restart marker, go on nAfterScanMarker = nextMarker(pJpegData, ref nAfterNextMarkerPos, nInputLength); } } NppiDecodeHuffmanSpec[] apHuffmanDCTableDec = new NppiDecodeHuffmanSpec[3]; NppiDecodeHuffmanSpec[] apHuffmanACTableDec = new NppiDecodeHuffmanSpec[3]; for (int i = 0; i < 3; ++i) { apHuffmanDCTableDec[i] = JPEGCompression.DecodeHuffmanSpecInitAllocHost(aHuffmanTables[(oScanHeader.aHuffmanTablesSelector[i] >> 4)].aCodes, NppiHuffmanTableType.nppiDCTable); apHuffmanACTableDec[i] = JPEGCompression.DecodeHuffmanSpecInitAllocHost(aHuffmanTables[(oScanHeader.aHuffmanTablesSelector[i] & 0x0f) + 2].aCodes, NppiHuffmanTableType.nppiACTable); } byte[] img = new byte[nAfterNextMarkerPos - nPos - 2]; Buffer.BlockCopy(pJpegData, nPos, img, 0, nAfterNextMarkerPos - nPos - 2); JPEGCompression.DecodeHuffmanScanHost(img, nRestartInterval, oScanHeader.nSs, oScanHeader.nSe, oScanHeader.nA >> 4, oScanHeader.nA & 0x0f, aphDCT[0], aphDCT[1], aphDCT[2], aDCTStep, apHuffmanDCTableDec, apHuffmanACTableDec, aSrcSize); for (int i = 0; i < 3; ++i) { JPEGCompression.DecodeHuffmanSpecFreeHost(apHuffmanDCTableDec[i]); JPEGCompression.DecodeHuffmanSpecFreeHost(apHuffmanACTableDec[i]); } } nMarker = nextMarker(pJpegData, ref nPos, nInputLength); } // Copy DCT coefficients and Quantization Tables from host to device for (int i = 0; i < 4; ++i) { pdQuantizationTables[i].CopyToDevice(aQuantizationTables[i].aTable); } for (int i = 0; i < 3; ++i) { apdDCT[i].CopyToDevice(aphDCT[i], aDCTStep[i]); } // Inverse DCT for (int i = 0; i < 3; ++i) { compression.DCTQuantInv8x8LS(apdDCT[i], apSrcImage[i], aSrcSize[i], pdQuantizationTables[oFrameHeader.aQuantizationTableSelector[i]]); } //Alloc final image NPPImage_8uC3 res = new NPPImage_8uC3(apSrcImage[0].Width, apSrcImage[0].Height); //Copy Y color plane to first channel apSrcImage[0].Copy(res, 0); //Cb anc Cr channel might be smaller if ((oFrameHeader.aSamplingFactors[0] & 0x0f) == 1 && oFrameHeader.aSamplingFactors[0] >> 4 == 1) { //Color planes are of same size as Y channel apSrcImage[1].Copy(res, 1); apSrcImage[2].Copy(res, 2); } else { //rescale color planes to full size double scaleX = oFrameHeader.aSamplingFactors[0] & 0x0f; double scaleY = oFrameHeader.aSamplingFactors[0] >> 4; apSrcImage[1].ResizeSqrPixel(apSrcImage[0], scaleX, scaleY, 0, 0, InterpolationMode.Lanczos); apSrcImage[0].Copy(res, 1); apSrcImage[2].ResizeSqrPixel(apSrcImage[0], scaleX, scaleY, 0, 0, InterpolationMode.Lanczos); apSrcImage[0].Copy(res, 2); } //System.Drawing.Bitmap is ordered BGR not RGB //The NPP routine YCbCR to BGR needs clampled input values, following the YCbCr standard. //But JPEG uses unclamped values ranging all from [0..255], thus use our own color matrix: float[,] YCbCrToBgr = new float[3, 4] { { 1.0f, 1.772f, 0.0f, -226.816f }, { 1.0f, -0.34414f, -0.71414f, 135.45984f }, { 1.0f, 0.0f, 1.402f, -179.456f } }; //Convert from YCbCr to BGR res.ColorTwist(YCbCrToBgr); Bitmap bmp = new Bitmap(apSrcImage[0].Width, apSrcImage[0].Height, System.Drawing.Imaging.PixelFormat.Format24bppRgb); res.CopyToHost(bmp); //Cleanup: res.Dispose(); apSrcImage[2].Dispose(); apSrcImage[1].Dispose(); apSrcImage[0].Dispose(); apdDCT[2].Dispose(); apdDCT[1].Dispose(); apdDCT[0].Dispose(); pdQuantizationTables[0].Dispose(); pdQuantizationTables[1].Dispose(); pdQuantizationTables[2].Dispose(); pdQuantizationTables[3].Dispose(); compression.Dispose(); return(bmp); }