public static NPPImage Load(string path, cudaStream_t stream) { NPPImage result = new NPPImage(); byte[] rawData; if (Path.GetExtension(path).Contains("pgm")) { using (FileStream fs = new FileStream(path, FileMode.Open)) { using (TextReader tReader = new StreamReader(fs)) using (BinaryReader bReader = new BinaryReader(fs)) { string formatLine = tReader.ReadLine(); // skip string sizeLine = tReader.ReadLine(); string[] splitted = sizeLine.Split(' '); result.width = int.Parse(splitted[0]); result.height = int.Parse(splitted[1]); string maxValueLine = tReader.ReadLine(); // skip int pos = formatLine.Length + sizeLine.Length + maxValueLine.Length + 3; fs.Seek(pos, SeekOrigin.Begin); // TODO: optimize that part rawData = bReader.ReadBytes((int)(fs.Length - pos)); } } } else if (Path.GetExtension(path).Contains("png")) { Bitmap image = Bitmap.FromFile(path) as Bitmap; result.width = image.Width; result.height = image.Height; rawData = new byte[result.width * result.height]; int index = 0; for (int j = 0; j < result.height; ++j) { for (int i = 0; i < result.width; ++i, ++index) { rawData[index] = image.GetPixel(i, j).R; } } } else { throw new NotSupportedException("unsupported file format"); } IntPtr deviceData; size_t p; cuda.ERROR_CHECK(cuda.MallocPitch(out deviceData, out p, result.width * sizeof(ushort), result.height)); result.pitch = (int)p; result.hostData = new ushort[result.height * result.width]; for (int j = 0; j < result.height; ++j) { for (int i = 0; i < result.width; ++i) { result.hostData[j * result.width + i] = rawData[j * result.width + i]; } } var handle = GCHandle.Alloc(result.hostData, GCHandleType.Pinned); cuda.ERROR_CHECK(cuda.Memcpy2DAsync(deviceData, p, handle.AddrOfPinnedObject(), result.width * sizeof(ushort), result.width * sizeof(ushort), result.height, cudaMemcpyKind.cudaMemcpyHostToDevice, stream)); handle.Free(); result.deviceData = deviceData; return(result); }
static void Main(string[] args) { // init CUDA IntPtr d; cuda.Malloc(out d, sizeof(int)); cuda.Free(d); HybRunner runner = HybRunner.Cuda(); cudaDeviceProp prop; cuda.GetDeviceProperties(out prop, 0); dynamic wrapped = runner.Wrap(new Program()); runner.saveAssembly(); cudaStream_t stream; cuda.StreamCreate(out stream); NppStreamContext context = new NppStreamContext { hStream = stream, nCudaDevAttrComputeCapabilityMajor = prop.major, nCudaDevAttrComputeCapabilityMinor = prop.minor, nCudaDeviceId = 0, nMaxThreadsPerBlock = prop.maxThreadsPerBlock, nMaxThreadsPerMultiProcessor = prop.maxThreadsPerMultiProcessor, nMultiProcessorCount = prop.multiProcessorCount, nSharedMemPerBlock = 0 }; Random rand = new Random(); using (NPPImage input = NPPImage.Load(inputFileName, stream)) { uchar4[] output = new uchar4[input.width * input.height]; IntPtr d_output; cuda.Malloc(out d_output, input.width * input.height * 4 * sizeof(byte)); // working area IntPtr oDeviceDst32u; size_t oDeviceDst32uPitch; cuda.ERROR_CHECK(cuda.MallocPitch(out oDeviceDst32u, out oDeviceDst32uPitch, input.width * sizeof(int), input.height)); IntPtr segments; size_t segmentsPitch; cuda.ERROR_CHECK(cuda.MallocPitch(out segments, out segmentsPitch, input.width * sizeof(ushort), input.height)); NppiSize oSizeROI = new NppiSize { width = input.width, height = input.height }; int nBufferSize = 0; IntPtr pScratchBufferNPP1, pScratchBufferNPP2; // compute maximum label NPPI.ERROR_CHECK(NPPI.LabelMarkersGetBufferSize_16u_C1R(oSizeROI, out nBufferSize)); cuda.ERROR_CHECK(cuda.Malloc(out pScratchBufferNPP1, nBufferSize)); int maxLabel; NPPI.ERROR_CHECK(NPPI.LabelMarkers_16u_C1IR_Ctx(input.deviceData, input.pitch, oSizeROI, 165, NppiNorm.nppiNormInf, out maxLabel, pScratchBufferNPP1, context)); // compress labels NPPI.ERROR_CHECK(NPPI.CompressMarkerLabelsGetBufferSize_16u_C1R(maxLabel, out nBufferSize)); cuda.ERROR_CHECK(cuda.Malloc(out pScratchBufferNPP2, nBufferSize)); NPPI.ERROR_CHECK(NPPI.CompressMarkerLabels_16u_C1IR_Ctx(input.deviceData, input.pitch, oSizeROI, maxLabel, out maxLabel, pScratchBufferNPP2, context)); uchar4[] colormap = new uchar4[maxLabel + 1]; for (int i = 0; i <= maxLabel; ++i) { colormap[i] = new uchar4 { x = (byte)(rand.Next() % 256), y = (byte)(rand.Next() % 256), z = (byte)(rand.Next() % 256), w = 0 }; } IntPtr d_colormap; cuda.Malloc(out d_colormap, (maxLabel + 1) * 4 * sizeof(byte)); var handle = GCHandle.Alloc(colormap, GCHandleType.Pinned); cuda.Memcpy(d_colormap, handle.AddrOfPinnedObject(), (maxLabel + 1) * 4 * sizeof(byte), cudaMemcpyKind.cudaMemcpyHostToDevice); handle.Free(); NPP_ImageSegmentationx46Programx46ColorizeLabels_ExternCWrapperStream_CUDA( 8 * prop.multiProcessorCount, 1, 256, 1, 1, 0, stream, // cuda configuration input.deviceData, d_output, d_colormap, maxLabel + 1, input.pitch * input.height / sizeof(ushort), input.width, input.pitch / sizeof(ushort)); handle = GCHandle.Alloc(output, GCHandleType.Pinned); cuda.Memcpy(handle.AddrOfPinnedObject(), d_output, input.width * input.height * sizeof(byte) * 4, cudaMemcpyKind.cudaMemcpyDeviceToHost); handle.Free(); NPPImage.Save(segmentsFileName, output, input.width, input.height); Process.Start(segmentsFileName); cuda.ERROR_CHECK(cuda.Free(oDeviceDst32u)); cuda.ERROR_CHECK(cuda.Free(segments)); cuda.ERROR_CHECK(cuda.Free(pScratchBufferNPP1)); cuda.ERROR_CHECK(cuda.Free(pScratchBufferNPP2)); } }