private void AllocateImagesNPP(Bitmap size) { int w = size.Width; int h = size.Height; if (inputImage8uC3 == null) { inputImage8uC1 = new NPPImage_8uC1(w, h); inputImage8uC3 = new NPPImage_8uC3(w, h); inputImage8uC4 = new NPPImage_8uC4(w, h); imageBayer = new NPPImage_32fC1(w, h); inputImage32f = new NPPImage_32fC3(w, h); noisyImage8u = new NPPImage_8uC3(w, h); noiseImage32f = new NPPImage_32fC3(w, h); resultImage8u = new NPPImage_8uC3(w, h); resultImage32f = new NPPImage_32fC3(w, h); return; } if (inputImage8uC3.Width >= w && inputImage8uC3.Height >= h) { inputImage8uC1.SetRoi(0, 0, w, h); inputImage8uC3.SetRoi(0, 0, w, h); inputImage8uC4.SetRoi(0, 0, w, h); imageBayer.SetRoi(0, 0, w, h); inputImage32f.SetRoi(0, 0, w, h); noisyImage8u.SetRoi(0, 0, w, h); noiseImage32f.SetRoi(0, 0, w, h); resultImage8u.SetRoi(0, 0, w, h); resultImage32f.SetRoi(0, 0, w, h); } else { inputImage8uC1.Dispose(); inputImage8uC3.Dispose(); inputImage8uC4.Dispose(); imageBayer.Dispose(); inputImage32f.Dispose(); noisyImage8u.Dispose(); noiseImage32f.Dispose(); resultImage8u.Dispose(); resultImage32f.Dispose(); inputImage8uC1 = new NPPImage_8uC1(w, h); inputImage8uC3 = new NPPImage_8uC3(w, h); inputImage8uC4 = new NPPImage_8uC4(w, h); imageBayer = new NPPImage_32fC1(w, h); inputImage32f = new NPPImage_32fC3(w, h); noisyImage8u = new NPPImage_8uC3(w, h); noiseImage32f = new NPPImage_32fC3(w, h); resultImage8u = new NPPImage_8uC3(w, h); resultImage32f = new NPPImage_32fC3(w, h); } }
static void Main(string[] args) { //Read CL arguments for (int i = 0; i < args.Length; i++) { if (args[i] == "-d") { deviceID = int.Parse(args[++i]); } if (args[i] == "-lr") { learning_rate = double.Parse(args[++i], System.Globalization.NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture); } if (args[i] == "-iso") { ISO = args[++i]; } if (args[i] == "-t") { crosscheck = true; } if (args[i] == "-w") { warmStart = int.Parse(args[++i]); Console.WriteLine("Start with epoch " + warmStart); } if (args[i] == "-s") { saveImages = true; } } Console.WriteLine("Using device ID: " + deviceID); Console.WriteLine("Learning rate: " + learning_rate); //Init Cuda stuff ctx = new PrimaryContext(deviceID); ctx.SetCurrent(); Console.WriteLine("Context created"); CUmodule modPatch = ctx.LoadModulePTX("PatchProcessing.ptx"); Console.WriteLine("modPatch loaded"); CUmodule modBorder = ctx.LoadModulePTX("BorderTreatment.ptx"); Console.WriteLine("modBorder loaded"); CUmodule modError = ctx.LoadModulePTX("ErrorComputation.ptx"); Console.WriteLine("modError loaded"); CUmodule modPRelu = ctx.LoadModulePTX("PRelu.ptx"); Console.WriteLine("modPRelu loaded"); CUmodule modDeBayer = ctx.LoadModulePTX("DeBayer.ptx"); Console.WriteLine("all modules loaded"); deBayerGreenKernel = new DeBayerGreenKernel(modDeBayer, ctx); deBayerRedBlueKernel = new DeBayerRedBlueKernel(modDeBayer, ctx); //Both deBayer kernels are load from the same module: setting the constant variable for bayer pattern one is enough... deBayerGreenKernel.BayerPattern = new BayerColor[] { BayerColor.Red, BayerColor.Green, BayerColor.Green, BayerColor.Blue }; prepareDataKernel = new PrepareDataKernel(modPatch, ctx); restoreImageKernel = new RestoreImageKernel(modPatch, ctx); Console.WriteLine("kernels loaded"); int countOwn = 468083; int count5k = 33408; string fileBase = @"/ssd/data/TrainingsDataNN/"; List <float3> WhiteBalanceFactors = new List <float3>(); FileStream fs1 = new FileStream(fileBase + "FromOwnDataset/WhiteBalancesOwn.txt", FileMode.Open, FileAccess.Read); FileStream fs2 = new FileStream(fileBase + "From5kDataset/WhiteBalances5k.txt", FileMode.Open, FileAccess.Read); StreamReader sr1 = new StreamReader(fs1); StreamReader sr2 = new StreamReader(fs2); for (int i = 0; i < countOwn; i++) { fileRawList.Add(fileBase + "FromOwnDataset/ISO" + ISO + "/img_" + i.ToString("0000000") + ".bin"); fileTrouthList.Add(fileBase + "FromOwnDataset/GroundTruth/img_" + i.ToString("0000000") + ".bin"); string line = sr1.ReadLine(); string[] values = line.Split('\t'); float3 wb = new float3(float.Parse(values[1], System.Globalization.NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture), float.Parse(values[2], System.Globalization.NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture), float.Parse(values[3], System.Globalization.NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture)); WhiteBalanceFactors.Add(wb); } for (int i = 0; i < count5k; i++) { fileRawList.Add(fileBase + "From5kDataset/ISO" + ISO + "/img_" + i.ToString("0000000") + ".bin"); fileTrouthList.Add(fileBase + "From5kDataset/GroundTruth/img_" + i.ToString("0000000") + ".bin"); string line = sr2.ReadLine(); string[] values = line.Split('\t'); float3 wb = new float3(float.Parse(values[1], System.Globalization.NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture), float.Parse(values[2], System.Globalization.NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture), float.Parse(values[3], System.Globalization.NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture)); WhiteBalanceFactors.Add(wb); } sr2.Close(); sr1.Close(); baOriginal = new float3[countOwn + count5k][]; baRAW = new float[countOwn + count5k][]; Random rand = new Random(0); //random order for the image patches for (int i = 0; i < countOwn + count5k - 1; i++) { int r = i + (rand.Next() % (countOwn + count5k - i)); string temp = fileRawList[i]; fileRawList[i] = fileRawList[r]; fileRawList[r] = temp; temp = fileTrouthList[i]; fileTrouthList[i] = fileTrouthList[r]; fileTrouthList[r] = temp; float3 tempf = WhiteBalanceFactors[i]; WhiteBalanceFactors[i] = WhiteBalanceFactors[r]; WhiteBalanceFactors[r] = tempf; } Console.WriteLine("Initialization done!"); int trainingSize = (int)((countOwn + count5k) * 0.9f); //4 patches per file int testSize = fileRawList.Count - trainingSize; CudaBlas blas = new CudaBlas(PointerMode.Host); CudaDNNContext cudnn = new CudaDNNContext(); int patchSize = 31; int patchSize4 = 66; //Size of an 2x2 patch read from file int batch = 64; float normalization = 0.5f; //define neural network: StartLayer start = new StartLayer(patchSize, patchSize, 3, batch); FinalLayer final = new FinalLayer(patchSize, patchSize, 3, batch, FinalLayer.Norm.Mix, ctx, modError); ConvolutionalLayer conv1 = new ConvolutionalLayer(patchSize, patchSize, 3, patchSize, patchSize, 64, batch, 9, 9, ConvolutionalLayer.Activation.PRelu, blas, cudnn, ctx, modBorder, modPRelu); ConvolutionalLayer conv2 = new ConvolutionalLayer(patchSize, patchSize, 64, patchSize, patchSize, 64, batch, 5, 5, ConvolutionalLayer.Activation.PRelu, blas, cudnn, ctx, modBorder, modPRelu); ConvolutionalLayer conv3 = new ConvolutionalLayer(patchSize, patchSize, 64, patchSize, patchSize, 3, batch, 5, 5, ConvolutionalLayer.Activation.None, blas, cudnn, ctx, modBorder, modPRelu); start.ConnectFollowingLayer(conv1); conv1.ConnectFollowingLayer(conv2); conv2.ConnectFollowingLayer(conv3); conv3.ConnectFollowingLayer(final); CudaDeviceVariable <float3> imgA = new CudaDeviceVariable <float3>(patchSize4 * patchSize4); CudaDeviceVariable <float3> imgB = new CudaDeviceVariable <float3>(patchSize4 * patchSize4); CudaDeviceVariable <float> rawd = new CudaDeviceVariable <float>(patchSize4 * patchSize4); CudaDeviceVariable <float> inputImgs = new CudaDeviceVariable <float>(patchSize * patchSize * 3 * batch); CudaDeviceVariable <float> groundTrouth = new CudaDeviceVariable <float>(patchSize * patchSize * 3 * batch); NPPImage_8uC3 imgU3a = new NPPImage_8uC3(patchSize, patchSize); NPPImage_8uC3 imgU3b = new NPPImage_8uC3(patchSize, patchSize); NPPImage_8uC3 imgU3c = new NPPImage_8uC3(patchSize, patchSize); Bitmap a = new Bitmap(patchSize, patchSize, PixelFormat.Format24bppRgb); Bitmap b = new Bitmap(patchSize, patchSize, PixelFormat.Format24bppRgb); Bitmap c = new Bitmap(patchSize, patchSize, PixelFormat.Format24bppRgb); Random randImageOutput = new Random(0); Random randForInit = new Random(0); start.InitRandomWeight(randForInit); conv1.SetActivation(0.1f); conv2.SetActivation(0.1f); int startEpoch = warmStart; FileStream fs; //restore network in case of warm start: if (warmStart > 0) { fs = new FileStream("epoch_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + "_" + (warmStart - 1) + ".cnn", FileMode.Open, FileAccess.Read); start.RestoreValues(fs); fs.Close(); fs.Dispose(); } //validate results on validation data set if (crosscheck) { FileStream csvResult = new FileStream("results_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + ".csv", FileMode.Append, FileAccess.Write); StreamWriter sw = new StreamWriter(csvResult); sw.WriteLine("L1;L2;Mix;Filename"); for (int i = 0; i < 2000; i += 1) { string filename = "epoch_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + "_" + i + ".cnn"; try { FileStream cnn = new FileStream(filename, FileMode.Open, FileAccess.Read); start.RestoreValues(cnn); cnn.Close(); cnn.Dispose(); } catch (Exception) { Console.WriteLine("Skipping: " + i); continue; } double errorL1 = 0; double errorL2 = 0; double errorMix = 0; for (int iter = 0; iter < testSize / batch * 4; iter++) { //Prepare batch for training: for (int ba = 0; ba < batch / 4; ba++) { int idx = iter * (batch / 4) + ba + trainingSize; float3[] original; float[] raw; if (baRAW[idx - trainingSize] == null) { original = ReadRAWFloat3(fileTrouthList[idx]); raw = ReadRAWFloat(fileRawList[idx]); baOriginal[idx - trainingSize] = original; baRAW[idx - trainingSize] = raw; } else { original = baOriginal[idx - trainingSize]; raw = baRAW[idx - trainingSize]; } rawd.CopyToDevice(raw); imgA.CopyToDevice(original); deBayerGreenKernel.RunSafe(rawd, imgB, patchSize4, new float3(0, 0, 0), WhiteBalanceFactors[idx]); deBayerRedBlueKernel.RunSafe(rawd, imgB, patchSize4, new float3(0, 0, 0), WhiteBalanceFactors[idx]); prepareDataKernel.RunSafe(imgA, imgB, groundTrouth, inputImgs, ba, normalization, WhiteBalanceFactors[idx]); } start.SetData(inputImgs); final.SetGroundTrouth(groundTrouth); float err = start.InferenceTraining(inputImgs); errorMix += err; errorL1 += final.GetError(FinalLayer.Norm.L1); errorL2 += final.GetError(FinalLayer.Norm.L2); } Console.WriteLine("Results for: " + filename); Console.WriteLine("Mean Error L1: " + errorL1 / testSize * batch / 4); Console.WriteLine("Mean Error L2: " + errorL2 / testSize * batch / 4); Console.WriteLine("Mean Error Mix: " + errorMix / testSize * batch / 4); sw.Write((errorL1 / testSize * batch / 4).ToString().Replace(".", ",")); sw.Write(";"); sw.Write((errorL2 / testSize * batch / 4).ToString().Replace(".", ",")); sw.Write(";"); sw.Write((errorMix / testSize * batch / 4).ToString().Replace(".", ",")); sw.Write(";"); sw.WriteLine(filename); sw.Flush(); } sw.Close(); csvResult.Close(); csvResult.Dispose(); } //or train existing network: else { double error = 0; double errorEpoch = 0; for (int epoch = startEpoch; epoch < 2000; epoch++) { errorEpoch = 0; error = 0; for (int iter = 0; iter < trainingSize / batch * 4; iter++) { //Prepare batch for training: for (int ba = 0; ba < batch / 4; ba++) { int idx = iter * (batch / 4) + ba; float3[] original; float[] raw; if (baRAW[idx] == null) { original = ReadRAWFloat3(fileTrouthList[idx]); raw = ReadRAWFloat(fileRawList[idx]); baOriginal[idx] = original; baRAW[idx] = raw; } else { original = baOriginal[idx]; raw = baRAW[idx]; } rawd.CopyToDevice(raw); imgA.CopyToDevice(original); deBayerGreenKernel.RunSafe(rawd, imgB, patchSize4, new float3(0, 0, 0), WhiteBalanceFactors[idx]); deBayerRedBlueKernel.RunSafe(rawd, imgB, patchSize4, new float3(0, 0, 0), WhiteBalanceFactors[idx]); prepareDataKernel.RunSafe(imgA, imgB, groundTrouth, inputImgs, ba, normalization, WhiteBalanceFactors[idx]); } start.SetData(inputImgs); final.SetGroundTrouth(groundTrouth); float err = start.InferenceTraining(inputImgs); final.BackPropagation(groundTrouth); start.UpdateWeights(GetLearningRate(epoch * (trainingSize) / batch * 4 + iter));//*0+951342 error += err; errorEpoch += err; if ((epoch * trainingSize / batch * 4 + iter) % 1000 == 0 && iter != 0) { FileStream status = new FileStream("status_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + ".csv", FileMode.Append, FileAccess.Write); StreamWriter sw = new StreamWriter(status); sw.WriteLine((error / 1000.0).ToString().Replace(".", ",") + ";" + GetLearningRate(epoch * trainingSize / batch * 4 + iter).ToString().Replace(".", ",")); sw.Close(); status.Close(); status.Dispose(); error = 0; } //if ((epoch * trainingSize / batch * 4 + iter) % 10000 == 0) //{ // fs = new FileStream("iter_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + "_" + (epoch * trainingSize / batch * 4 + iter) + ".cnn", FileMode.Create, FileAccess.Write); // start.SaveValues(fs); // fs.Close(); // fs.Dispose(); // Console.WriteLine("Network saved for iteration " + (epoch * trainingSize / batch * 4 + iter) + "!"); //} Console.WriteLine("Epoch: " + epoch + " Iteration: " + (epoch * trainingSize / batch * 4 + iter) + ", Error: " + err); if (saveImages && iter == 0)//(epoch * trainingSize / batch * 4 + iter) % 10000 == 0 && { for (int i = 0; i < 1; i++) { int imgidx = randImageOutput.Next(batch); float3 wb = WhiteBalanceFactors[iter * (batch / 4) + imgidx / 4]; restoreImageKernel.RunSafe(groundTrouth, imgU3a, imgidx, wb.x, wb.y, wb.z, normalization); restoreImageKernel.RunSafe(inputImgs, imgU3b, imgidx, wb.x, wb.y, wb.z, normalization); CudaDeviceVariable <float> res = final.GetResult(); restoreImageKernel.RunSafe(res, imgU3c, imgidx, wb.x, wb.y, wb.z, normalization); imgU3a.CopyToHost(a); imgU3b.CopyToHost(b); imgU3c.CopyToHost(c); a.Save("GroundTrouth_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + "_" + epoch + "_" + imgidx + ".png");// * trainingSize / batch * 4 + iter b.Save("Input_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + "_" + epoch + "_" + imgidx + ".png"); c.Save("Result_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + "_" + epoch + "_" + imgidx + ".png"); } } } errorEpoch /= trainingSize / batch * 4; fs = new FileStream("errorEpoch_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + ".csv", FileMode.Append, FileAccess.Write); StreamWriter sw2 = new StreamWriter(fs); sw2.WriteLine(errorEpoch.ToString().Replace(".", ",")); sw2.Close(); fs.Close(); fs.Dispose(); fs = new FileStream("epoch_" + learning_rate.ToString(CultureInfo.InvariantCulture) + "_" + ISO + "_" + epoch + ".cnn", FileMode.Create, FileAccess.Write); start.SaveValues(fs); fs.Close(); fs.Dispose(); } } }
private void btn_open_Click(object sender, EventArgs e) { if (!_nppOK) { return; } CleanUp(); OpenFileDialog ofd = new OpenFileDialog(); ofd.Filter = "Images|*.jpg;*.bmp;*.png;*.tif"; if (ofd.ShowDialog() != System.Windows.Forms.DialogResult.OK) { return; } Bitmap src = new Bitmap(ofd.FileName); switch (src.PixelFormat) { case PixelFormat.Format24bppRgb: _colorChannels = 3; break; case PixelFormat.Format32bppArgb: _colorChannels = 4; break; case PixelFormat.Format32bppRgb: _colorChannels = 4; break; case PixelFormat.Format8bppIndexed: _colorChannels = 1; break; default: _colorChannels = 0; txt_info.AppendText(ofd.FileName + " has an unsupported pixel format.\n"); break; } try { switch (_colorChannels) { case 1: //Allocate memory on device for one channel images... src_c1 = new NPPImage_8uC1(src.Width, src.Height); dest_c1 = new NPPImage_8uC1(src.Width, src.Height); src_c1.CopyToDevice(src); txt_info.AppendText("Info: Loaded image '" + ofd.FileName + "' succesfully (Size: " + src.Width.ToString() + " x " + src.Height.ToString() + ", color channels: " + _colorChannels.ToString() + ")\n"); break; case 3: //As of version 5, NPP has new histogram and LUT functions for three channel images, no more need to convert first to 4 channels. //Allocate memory on device for four channel images... src_c3 = new NPPImage_8uC3(src.Width, src.Height); dest_c3 = new NPPImage_8uC3(src.Width, src.Height); //Fill 3 channel image in device memory src_c3.CopyToDevice(src); txt_info.AppendText("Info: Loaded image '" + ofd.FileName + "' succesfully (Size: " + src.Width.ToString() + " x " + src.Height.ToString() + ", color channels: " + _colorChannels.ToString() + ")\n"); break; case 4: //Allocate memory on device for four channel images... src_c4 = new NPPImage_8uC4(src.Width, src.Height); dest_c4 = new NPPImage_8uC4(src.Width, src.Height); src_c4.CopyToDevice(src); txt_info.AppendText("Info: Loaded image '" + ofd.FileName + "' succesfully (Size: " + src.Width.ToString() + " x " + src.Height.ToString() + ", color channels: " + _colorChannels.ToString() + ")\n"); break; } } catch (Exception ex) { if (ex is NPPException) { txt_info.AppendText("NPPException: " + ex.Message + "\n"); CleanUp(); } else if (ex is CudaException) { txt_info.AppendText("CudaException: " + ex.Message + "\n"); CleanUp(); } else { throw; } } //Show original image pictureBox_src.Image = src; }
private void btn_Resize_Click(object sender, EventArgs e) { if ((Bitmap)pic_Image.Image == null) { return; } Bitmap bmp = (Bitmap)pic_Image.Image; int w = bmp.Width; int h = bmp.Height; if ((w <= 16 || h <= 16) && trk_Size.Value < 100) { MessageBox.Show("Image is too small for resizing."); return; } int newW = (int)(trk_Size.Value / 100.0f * w); int newH = (int)(trk_Size.Value / 100.0f * h); if (newW % 16 != 0) { newW = newW - (newW % 16); } if (newW < 16) { newW = 16; } if (newH % 16 != 0) { newH = newH - (newH % 16); } if (newH < 16) { newH = 16; } double ratioW = newW / (double)w; double ratioH = newH / (double)h; if (ratioW == 1 && ratioH == 1) { return; } if (bmp.PixelFormat != System.Drawing.Imaging.PixelFormat.Format24bppRgb) { MessageBox.Show("Only three channel color images are supported!"); return; } NPPImage_8uC3 imgIn = new NPPImage_8uC3(w, h); NPPImage_8uC3 imgOut = new NPPImage_8uC3(newW, newH); InterpolationMode interpol = InterpolationMode.SuperSampling; if (ratioH >= 1 || ratioW >= 1) { interpol = InterpolationMode.Lanczos; } imgIn.CopyToDevice(bmp); imgIn.ResizeSqrPixel(imgOut, ratioW, ratioH, 0, 0, interpol); Bitmap bmpRes = new Bitmap(newW, newH, System.Drawing.Imaging.PixelFormat.Format24bppRgb); imgOut.CopyToHost(bmpRes); pic_Image.Image = bmpRes; imgIn.Dispose(); imgOut.Dispose(); }
public float RunSafe(CudaDeviceVariable <float> imgIn, NPPImage_8uC3 imgOut, int imgOffset, float facR, float facG, float facB, float add) { //restoreImage(const float* __restrict__ imgIn, uchar3* imgOut, float facR, float facG, float facB) return(base.Run(imgIn.DevicePointer, imgOut.DevicePointer, imgOut.Pitch, imgOffset, facR, facG, facB, add)); }
private void btn_openImg_Click(object sender, EventArgs e) { OpenFileDialog ofd = new OpenFileDialog(); ofd.Filter = "Images|*.bmp;*.jpg;*.jpeg;*.tiff;*.tif;*.png;*.gif"; if (ofd.ShowDialog() != System.Windows.Forms.DialogResult.OK) { return; } bmp_src = new Bitmap(ofd.FileName); if (bmp_src.PixelFormat != PixelFormat.Format24bppRgb) { MessageBox.Show("Only 24-bit RGB images are supported!"); bmp_src = null; bmp_mask = null; bmp_res = null; if (npp_bmp_src != null) { npp_bmp_src.Dispose(); } if (npp_bmp_res != null) { npp_bmp_res.Dispose(); } if (npp_bmp_mask != null) { npp_bmp_mask.Dispose(); } if (d_bmp_src != null) { d_bmp_src.Dispose(); } if (d_bmp_res != null) { d_bmp_res.Dispose(); } if (d_bmp_mask != null) { d_bmp_mask.Dispose(); } return; } width = bmp_src.Width; height = bmp_src.Height; marker = new int[width * height]; bmp_res = new Bitmap(width, height, PixelFormat.Format32bppArgb); bmp_mask = new Bitmap(width, height, PixelFormat.Format8bppIndexed); SetPalette(bmp_mask); pictureBox_src.Image = bmp_src; selection.x = (int)Math.Ceiling(width * 0.1); selection.y = (int)Math.Ceiling(height * 0.1); selection.width = width - 2 * selection.x; selection.height = height - 2 * selection.y; if (npp_bmp_src != null) { npp_bmp_src.Dispose(); } if (npp_bmp_res != null) { npp_bmp_res.Dispose(); } if (npp_bmp_mask != null) { npp_bmp_mask.Dispose(); } if (d_bmp_src != null) { d_bmp_src.Dispose(); } if (d_bmp_res != null) { d_bmp_res.Dispose(); } if (d_bmp_mask != null) { d_bmp_mask.Dispose(); } NPPImage_8uC3 npp_temp = new NPPImage_8uC3(width, height); CudaPitchedDeviceVariable <uchar3> d_bmp_temp = new CudaPitchedDeviceVariable <uchar3>(npp_temp.DevicePointer, width, height, npp_temp.Pitch); npp_temp.CopyToDevice(bmp_src); npp_bmp_src = new NPPImage_8uC4(width, height); npp_bmp_res = new NPPImage_8uC4(width, height); npp_bmp_mask = new NPPImage_8uC1(width, height); d_bmp_src = new CudaPitchedDeviceVariable <uchar4>(npp_bmp_src.DevicePointer, width, height, npp_bmp_src.Pitch); d_bmp_res = new CudaPitchedDeviceVariable <uchar4>(npp_bmp_res.DevicePointer, width, height, npp_bmp_res.Pitch); d_bmp_mask = new CudaPitchedDeviceVariable <byte>(npp_bmp_mask.DevicePointer, width, height, npp_bmp_mask.Pitch); grabcut = new GrabCut(d_bmp_src, d_bmp_mask, width, height); grabcut.grabCutUtils.convertRGBToRGBA(d_bmp_src, d_bmp_temp, width, height); d_bmp_temp.Dispose(); npp_temp.Dispose(); }
public static void SaveJpeg(string aFilename, int aQuality, Bitmap aImage) { if (aImage.PixelFormat != System.Drawing.Imaging.PixelFormat.Format24bppRgb) { throw new ArgumentException("Only three channel color images are supported."); } if (aImage.Width % 16 != 0 || aImage.Height % 16 != 0) { throw new ArgumentException("The provided bitmap must have a height and width of a multiple of 16."); } JPEGCompression compression = new JPEGCompression(); NPPImage_8uC3 src = new NPPImage_8uC3(aImage.Width, aImage.Height); NPPImage_8uC1 srcY = new NPPImage_8uC1(aImage.Width, aImage.Height); NPPImage_8uC1 srcCb = new NPPImage_8uC1(aImage.Width / 2, aImage.Height / 2); NPPImage_8uC1 srcCr = new NPPImage_8uC1(aImage.Width / 2, aImage.Height / 2); src.CopyToDevice(aImage); //System.Drawing.Bitmap is ordered BGR not RGB //The NPP routine BGR to YCbCR outputs the values in clamped range, following the YCbCr standard. //But JPEG uses unclamped values ranging all from [0..255], thus use our own color matrix: float[,] BgrToYCbCr = new float[3, 4] { { 0.114f, 0.587f, 0.299f, 0 }, { 0.5f, -0.33126f, -0.16874f, 128 }, { -0.08131f, -0.41869f, 0.5f, 128 } }; src.ColorTwist(BgrToYCbCr); //Reduce size of of Cb and Cr channel src.Copy(srcY, 2); srcY.Resize(srcCr, 0.5, 0.5, InterpolationMode.SuperSampling); src.Copy(srcY, 1); srcY.Resize(srcCb, 0.5, 0.5, InterpolationMode.SuperSampling); src.Copy(srcY, 0); FrameHeader oFrameHeader = new FrameHeader(); oFrameHeader.nComponents = 3; oFrameHeader.nHeight = (ushort)aImage.Height; oFrameHeader.nSamplePrecision = 8; oFrameHeader.nWidth = (ushort)aImage.Width; oFrameHeader.aComponentIdentifier = new byte[] { 1, 2, 3 }; oFrameHeader.aSamplingFactors = new byte[] { 34, 17, 17 }; //Y channel is twice the sice of Cb/Cr channel oFrameHeader.aQuantizationTableSelector = new byte[] { 0, 1, 1 }; //Get quantization tables from JPEG standard with quality scaling QuantizationTable[] aQuantizationTables = new QuantizationTable[2]; aQuantizationTables[0] = new QuantizationTable(QuantizationTable.QuantizationType.Luminance, aQuality); aQuantizationTables[1] = new QuantizationTable(QuantizationTable.QuantizationType.Chroma, aQuality); CudaDeviceVariable <byte>[] pdQuantizationTables = new CudaDeviceVariable <byte> [2]; pdQuantizationTables[0] = aQuantizationTables[0].aTable; pdQuantizationTables[1] = aQuantizationTables[1].aTable; //Get Huffman tables from JPEG standard HuffmanTable[] aHuffmanTables = new HuffmanTable[4]; aHuffmanTables[0] = new HuffmanTable(HuffmanTable.HuffmanType.LuminanceDC); aHuffmanTables[1] = new HuffmanTable(HuffmanTable.HuffmanType.ChromaDC); aHuffmanTables[2] = new HuffmanTable(HuffmanTable.HuffmanType.LuminanceAC); aHuffmanTables[3] = new HuffmanTable(HuffmanTable.HuffmanType.ChromaAC); //Set header ScanHeader oScanHeader = new ScanHeader(); oScanHeader.nA = 0; oScanHeader.nComponents = 3; oScanHeader.nSe = 63; oScanHeader.nSs = 0; oScanHeader.aComponentSelector = new byte[] { 1, 2, 3 }; oScanHeader.aHuffmanTablesSelector = new byte[] { 0, 17, 17 }; NPPImage_16sC1[] apdDCT = new NPPImage_16sC1[3]; NPPImage_8uC1[] apDstImage = new NPPImage_8uC1[3]; NppiSize[] aDstSize = new NppiSize[3]; aDstSize[0] = new NppiSize(srcY.Width, srcY.Height); aDstSize[1] = new NppiSize(srcCb.Width, srcCb.Height); aDstSize[2] = new NppiSize(srcCr.Width, srcCr.Height); // Compute channel sizes as stored in the output JPEG (8x8 blocks & MCU block layout) NppiSize oDstImageSize = new NppiSize(); float frameWidth = (float)Math.Floor((float)oFrameHeader.nWidth); float frameHeight = (float)Math.Floor((float)oFrameHeader.nHeight); oDstImageSize.width = (int)Math.Max(1.0f, frameWidth); oDstImageSize.height = (int)Math.Max(1.0f, frameHeight); //Console.WriteLine("Output Size: " + oDstImageSize.width + "x" + oDstImageSize.height + "x" + (int)(oFrameHeader.nComponents)); apDstImage[0] = srcY; apDstImage[1] = srcCb; apDstImage[2] = srcCr; int nMCUBlocksH = 0; int nMCUBlocksV = 0; // Compute channel sizes as stored in the JPEG (8x8 blocks & MCU block layout) for (int i = 0; i < oFrameHeader.nComponents; ++i) { nMCUBlocksV = Math.Max(nMCUBlocksV, oFrameHeader.aSamplingFactors[i] >> 4); nMCUBlocksH = Math.Max(nMCUBlocksH, oFrameHeader.aSamplingFactors[i] & 0x0f); } for (int i = 0; i < oFrameHeader.nComponents; ++i) { NppiSize oBlocks = new NppiSize(); NppiSize oBlocksPerMCU = new NppiSize(oFrameHeader.aSamplingFactors[i] & 0x0f, oFrameHeader.aSamplingFactors[i] >> 4); oBlocks.width = (int)Math.Ceiling((oFrameHeader.nWidth + 7) / 8 * (float)(oBlocksPerMCU.width) / nMCUBlocksH); oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width; oBlocks.height = (int)Math.Ceiling((oFrameHeader.nHeight + 7) / 8 * (float)(oBlocksPerMCU.height) / nMCUBlocksV); oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height; // Allocate Memory apdDCT[i] = new NPPImage_16sC1(oBlocks.width * 64, oBlocks.height); } /*************************** * * Output * ***************************/ // Forward DCT for (int i = 0; i < 3; ++i) { compression.DCTQuantFwd8x8LS(apDstImage[i], apdDCT[i], aDstSize[i], pdQuantizationTables[oFrameHeader.aQuantizationTableSelector[i]]); } // Huffman Encoding CudaDeviceVariable <byte> pdScan = new CudaDeviceVariable <byte>(BUFFER_SIZE); int nScanLength = 0; int nTempSize = JPEGCompression.EncodeHuffmanGetSize(aDstSize[0], 3); CudaDeviceVariable <byte> pJpegEncoderTemp = new CudaDeviceVariable <byte>(nTempSize); NppiEncodeHuffmanSpec[] apHuffmanDCTableEnc = new NppiEncodeHuffmanSpec[3]; NppiEncodeHuffmanSpec[] apHuffmanACTableEnc = new NppiEncodeHuffmanSpec[3]; for (int i = 0; i < 3; ++i) { apHuffmanDCTableEnc[i] = JPEGCompression.EncodeHuffmanSpecInitAlloc(aHuffmanTables[(oScanHeader.aHuffmanTablesSelector[i] >> 4)].aCodes, NppiHuffmanTableType.nppiDCTable); apHuffmanACTableEnc[i] = JPEGCompression.EncodeHuffmanSpecInitAlloc(aHuffmanTables[(oScanHeader.aHuffmanTablesSelector[i] & 0x0f) + 2].aCodes, NppiHuffmanTableType.nppiACTable); } JPEGCompression.EncodeHuffmanScan(apdDCT, 0, oScanHeader.nSs, oScanHeader.nSe, oScanHeader.nA >> 4, oScanHeader.nA & 0x0f, pdScan, ref nScanLength, apHuffmanDCTableEnc, apHuffmanACTableEnc, aDstSize, pJpegEncoderTemp); for (int i = 0; i < 3; ++i) { JPEGCompression.EncodeHuffmanSpecFree(apHuffmanDCTableEnc[i]); JPEGCompression.EncodeHuffmanSpecFree(apHuffmanACTableEnc[i]); } // Write JPEG to byte array, as in original sample code byte[] pDstOutput = new byte[BUFFER_SIZE]; int pos = 0; oFrameHeader.nWidth = (ushort)oDstImageSize.width; oFrameHeader.nHeight = (ushort)oDstImageSize.height; writeMarker(0x0D8, pDstOutput, ref pos); writeJFIFTag(pDstOutput, ref pos); writeQuantizationTable(aQuantizationTables[0], pDstOutput, ref pos); writeQuantizationTable(aQuantizationTables[1], pDstOutput, ref pos); writeFrameHeader(oFrameHeader, pDstOutput, ref pos); writeHuffmanTable(aHuffmanTables[0], pDstOutput, ref pos); writeHuffmanTable(aHuffmanTables[1], pDstOutput, ref pos); writeHuffmanTable(aHuffmanTables[2], pDstOutput, ref pos); writeHuffmanTable(aHuffmanTables[3], pDstOutput, ref pos); writeScanHeader(oScanHeader, pDstOutput, ref pos); pdScan.CopyToHost(pDstOutput, 0, pos, nScanLength); pos += nScanLength; writeMarker(0x0D9, pDstOutput, ref pos); FileStream fs = new FileStream(aFilename, FileMode.Create, FileAccess.Write); fs.Write(pDstOutput, 0, pos); fs.Close(); //cleanup: fs.Dispose(); pJpegEncoderTemp.Dispose(); pdScan.Dispose(); apdDCT[2].Dispose(); apdDCT[1].Dispose(); apdDCT[0].Dispose(); pdQuantizationTables[1].Dispose(); pdQuantizationTables[0].Dispose(); srcCr.Dispose(); srcCb.Dispose(); srcY.Dispose(); src.Dispose(); compression.Dispose(); }
public static Bitmap LoadJpeg(string aFilename) { JPEGCompression compression = new JPEGCompression(); byte[] pJpegData = File.ReadAllBytes(aFilename); int nInputLength = pJpegData.Length; // Check if this is a valid JPEG file int nPos = 0; int nMarker = nextMarker(pJpegData, ref nPos, nInputLength); if (nMarker != 0x0D8) { throw new ArgumentException(aFilename + " is not a JPEG file."); } nMarker = nextMarker(pJpegData, ref nPos, nInputLength); // Parsing and Huffman Decoding (on host) FrameHeader oFrameHeader = new FrameHeader(); oFrameHeader.aComponentIdentifier = new byte[3]; oFrameHeader.aSamplingFactors = new byte[3]; oFrameHeader.aQuantizationTableSelector = new byte[3]; QuantizationTable[] aQuantizationTables = new QuantizationTable[4]; aQuantizationTables[0] = new QuantizationTable(); aQuantizationTables[1] = new QuantizationTable(); aQuantizationTables[2] = new QuantizationTable(); aQuantizationTables[3] = new QuantizationTable(); CudaDeviceVariable <byte>[] pdQuantizationTables = new CudaDeviceVariable <byte> [4]; pdQuantizationTables[0] = new CudaDeviceVariable <byte>(64); pdQuantizationTables[1] = new CudaDeviceVariable <byte>(64); pdQuantizationTables[2] = new CudaDeviceVariable <byte>(64); pdQuantizationTables[3] = new CudaDeviceVariable <byte>(64); HuffmanTable[] aHuffmanTables = new HuffmanTable[4]; aHuffmanTables[0] = new HuffmanTable(); aHuffmanTables[1] = new HuffmanTable(); aHuffmanTables[2] = new HuffmanTable(); aHuffmanTables[3] = new HuffmanTable(); ScanHeader oScanHeader = new ScanHeader(); oScanHeader.aComponentSelector = new byte[3]; oScanHeader.aHuffmanTablesSelector = new byte[3]; int nMCUBlocksH = 0; int nMCUBlocksV = 0; int nRestartInterval = -1; NppiSize[] aSrcSize = new NppiSize[3]; short[][] aphDCT = new short[3][]; NPPImage_16sC1[] apdDCT = new NPPImage_16sC1[3]; int[] aDCTStep = new int[3]; NPPImage_8uC1[] apSrcImage = new NPPImage_8uC1[3]; int[] aSrcImageStep = new int[3]; NPPImage_8uC1[] apDstImage = new NPPImage_8uC1[3]; int[] aDstImageStep = new int[3]; NppiSize[] aDstSize = new NppiSize[3]; //Same read routine as in NPP JPEG sample from Nvidia while (nMarker != -1) { if (nMarker == 0x0D8) { // Embeded Thumbnail, skip it int nNextMarker = nextMarker(pJpegData, ref nPos, nInputLength); while (nNextMarker != -1 && nNextMarker != 0x0D9) { nNextMarker = nextMarker(pJpegData, ref nPos, nInputLength); } } if (nMarker == 0x0DD) { readRestartInterval(pJpegData, ref nPos, ref nRestartInterval); } if ((nMarker == 0x0C0) | (nMarker == 0x0C2)) { //Assert Baseline for this Sample //Note: NPP does support progressive jpegs for both encode and decode if (nMarker != 0x0C0) { pdQuantizationTables[0].Dispose(); pdQuantizationTables[1].Dispose(); pdQuantizationTables[2].Dispose(); pdQuantizationTables[3].Dispose(); throw new ArgumentException(aFilename + " is not a Baseline-JPEG file."); } // Baseline or Progressive Frame Header readFrameHeader(pJpegData, ref nPos, ref oFrameHeader); //Console.WriteLine("Image Size: " + oFrameHeader.nWidth + "x" + oFrameHeader.nHeight + "x" + (int)(oFrameHeader.nComponents)); //Assert 3-Channel Image for this Sample if (oFrameHeader.nComponents != 3) { pdQuantizationTables[0].Dispose(); pdQuantizationTables[1].Dispose(); pdQuantizationTables[2].Dispose(); pdQuantizationTables[3].Dispose(); throw new ArgumentException(aFilename + " is not a three channel JPEG file."); } // Compute channel sizes as stored in the JPEG (8x8 blocks & MCU block layout) for (int i = 0; i < oFrameHeader.nComponents; ++i) { nMCUBlocksV = Math.Max(nMCUBlocksV, oFrameHeader.aSamplingFactors[i] >> 4); nMCUBlocksH = Math.Max(nMCUBlocksH, oFrameHeader.aSamplingFactors[i] & 0x0f); } for (int i = 0; i < oFrameHeader.nComponents; ++i) { NppiSize oBlocks = new NppiSize(); NppiSize oBlocksPerMCU = new NppiSize(oFrameHeader.aSamplingFactors[i] & 0x0f, oFrameHeader.aSamplingFactors[i] >> 4); oBlocks.width = (int)Math.Ceiling((oFrameHeader.nWidth + 7) / 8 * (float)(oBlocksPerMCU.width) / nMCUBlocksH); oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width; oBlocks.height = (int)Math.Ceiling((oFrameHeader.nHeight + 7) / 8 * (float)(oBlocksPerMCU.height) / nMCUBlocksV); oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height; aSrcSize[i].width = oBlocks.width * 8; aSrcSize[i].height = oBlocks.height * 8; // Allocate Memory apdDCT[i] = new NPPImage_16sC1(oBlocks.width * 64, oBlocks.height); aDCTStep[i] = apdDCT[i].Pitch; apSrcImage[i] = new NPPImage_8uC1(aSrcSize[i].width, aSrcSize[i].height); aSrcImageStep[i] = apSrcImage[i].Pitch; aphDCT[i] = new short[aDCTStep[i] * oBlocks.height]; } } if (nMarker == 0x0DB) { // Quantization Tables readQuantizationTables(pJpegData, ref nPos, aQuantizationTables); } if (nMarker == 0x0C4) { // Huffman Tables readHuffmanTables(pJpegData, ref nPos, aHuffmanTables); } if (nMarker == 0x0DA) { // Scan readScanHeader(pJpegData, ref nPos, ref oScanHeader); nPos += 6 + oScanHeader.nComponents * 2; int nAfterNextMarkerPos = nPos; int nAfterScanMarker = nextMarker(pJpegData, ref nAfterNextMarkerPos, nInputLength); if (nRestartInterval > 0) { while (nAfterScanMarker >= 0x0D0 && nAfterScanMarker <= 0x0D7) { // This is a restart marker, go on nAfterScanMarker = nextMarker(pJpegData, ref nAfterNextMarkerPos, nInputLength); } } NppiDecodeHuffmanSpec[] apHuffmanDCTableDec = new NppiDecodeHuffmanSpec[3]; NppiDecodeHuffmanSpec[] apHuffmanACTableDec = new NppiDecodeHuffmanSpec[3]; for (int i = 0; i < 3; ++i) { apHuffmanDCTableDec[i] = JPEGCompression.DecodeHuffmanSpecInitAllocHost(aHuffmanTables[(oScanHeader.aHuffmanTablesSelector[i] >> 4)].aCodes, NppiHuffmanTableType.nppiDCTable); apHuffmanACTableDec[i] = JPEGCompression.DecodeHuffmanSpecInitAllocHost(aHuffmanTables[(oScanHeader.aHuffmanTablesSelector[i] & 0x0f) + 2].aCodes, NppiHuffmanTableType.nppiACTable); } byte[] img = new byte[nAfterNextMarkerPos - nPos - 2]; Buffer.BlockCopy(pJpegData, nPos, img, 0, nAfterNextMarkerPos - nPos - 2); JPEGCompression.DecodeHuffmanScanHost(img, nRestartInterval, oScanHeader.nSs, oScanHeader.nSe, oScanHeader.nA >> 4, oScanHeader.nA & 0x0f, aphDCT[0], aphDCT[1], aphDCT[2], aDCTStep, apHuffmanDCTableDec, apHuffmanACTableDec, aSrcSize); for (int i = 0; i < 3; ++i) { JPEGCompression.DecodeHuffmanSpecFreeHost(apHuffmanDCTableDec[i]); JPEGCompression.DecodeHuffmanSpecFreeHost(apHuffmanACTableDec[i]); } } nMarker = nextMarker(pJpegData, ref nPos, nInputLength); } // Copy DCT coefficients and Quantization Tables from host to device for (int i = 0; i < 4; ++i) { pdQuantizationTables[i].CopyToDevice(aQuantizationTables[i].aTable); } for (int i = 0; i < 3; ++i) { apdDCT[i].CopyToDevice(aphDCT[i], aDCTStep[i]); } // Inverse DCT for (int i = 0; i < 3; ++i) { compression.DCTQuantInv8x8LS(apdDCT[i], apSrcImage[i], aSrcSize[i], pdQuantizationTables[oFrameHeader.aQuantizationTableSelector[i]]); } //Alloc final image NPPImage_8uC3 res = new NPPImage_8uC3(apSrcImage[0].Width, apSrcImage[0].Height); //Copy Y color plane to first channel apSrcImage[0].Copy(res, 0); //Cb anc Cr channel might be smaller if ((oFrameHeader.aSamplingFactors[0] & 0x0f) == 1 && oFrameHeader.aSamplingFactors[0] >> 4 == 1) { //Color planes are of same size as Y channel apSrcImage[1].Copy(res, 1); apSrcImage[2].Copy(res, 2); } else { //rescale color planes to full size double scaleX = oFrameHeader.aSamplingFactors[0] & 0x0f; double scaleY = oFrameHeader.aSamplingFactors[0] >> 4; apSrcImage[1].ResizeSqrPixel(apSrcImage[0], scaleX, scaleY, 0, 0, InterpolationMode.Lanczos); apSrcImage[0].Copy(res, 1); apSrcImage[2].ResizeSqrPixel(apSrcImage[0], scaleX, scaleY, 0, 0, InterpolationMode.Lanczos); apSrcImage[0].Copy(res, 2); } //System.Drawing.Bitmap is ordered BGR not RGB //The NPP routine YCbCR to BGR needs clampled input values, following the YCbCr standard. //But JPEG uses unclamped values ranging all from [0..255], thus use our own color matrix: float[,] YCbCrToBgr = new float[3, 4] { { 1.0f, 1.772f, 0.0f, -226.816f }, { 1.0f, -0.34414f, -0.71414f, 135.45984f }, { 1.0f, 0.0f, 1.402f, -179.456f } }; //Convert from YCbCr to BGR res.ColorTwist(YCbCrToBgr); Bitmap bmp = new Bitmap(apSrcImage[0].Width, apSrcImage[0].Height, System.Drawing.Imaging.PixelFormat.Format24bppRgb); res.CopyToHost(bmp); //Cleanup: res.Dispose(); apSrcImage[2].Dispose(); apSrcImage[1].Dispose(); apSrcImage[0].Dispose(); apdDCT[2].Dispose(); apdDCT[1].Dispose(); apdDCT[0].Dispose(); pdQuantizationTables[0].Dispose(); pdQuantizationTables[1].Dispose(); pdQuantizationTables[2].Dispose(); pdQuantizationTables[3].Dispose(); compression.Dispose(); return(bmp); }