public static NPPImage Load(string path, cudaStream_t stream)
        {
            NPPImage result = new NPPImage();

            byte[] rawData;
            if (Path.GetExtension(path).Contains("pgm"))
            {
                using (FileStream fs = new FileStream(path, FileMode.Open))
                {
                    using (TextReader tReader = new StreamReader(fs))
                        using (BinaryReader bReader = new BinaryReader(fs))
                        {
                            string   formatLine = tReader.ReadLine(); // skip
                            string   sizeLine   = tReader.ReadLine();
                            string[] splitted   = sizeLine.Split(' ');
                            result.width  = int.Parse(splitted[0]);
                            result.height = int.Parse(splitted[1]);

                            string maxValueLine = tReader.ReadLine(); // skip
                            int    pos          = formatLine.Length + sizeLine.Length + maxValueLine.Length + 3;
                            fs.Seek(pos, SeekOrigin.Begin);

                            // TODO: optimize that part
                            rawData = bReader.ReadBytes((int)(fs.Length - pos));
                        }
                }
            }
            else if (Path.GetExtension(path).Contains("png"))
            {
                Bitmap image = Bitmap.FromFile(path) as Bitmap;
                result.width  = image.Width;
                result.height = image.Height;
                rawData       = new byte[result.width * result.height];
                int index = 0;
                for (int j = 0; j < result.height; ++j)
                {
                    for (int i = 0; i < result.width; ++i, ++index)
                    {
                        rawData[index] = image.GetPixel(i, j).R;
                    }
                }
            }
            else
            {
                throw new NotSupportedException("unsupported file format");
            }

            IntPtr deviceData;
            size_t p;

            cuda.ERROR_CHECK(cuda.MallocPitch(out deviceData, out p, result.width * sizeof(ushort), result.height));
            result.pitch = (int)p;

            result.hostData = new ushort[result.height * result.width];
            for (int j = 0; j < result.height; ++j)
            {
                for (int i = 0; i < result.width; ++i)
                {
                    result.hostData[j * result.width + i] = rawData[j * result.width + i];
                }
            }

            var handle = GCHandle.Alloc(result.hostData, GCHandleType.Pinned);

            cuda.ERROR_CHECK(cuda.Memcpy2DAsync(deviceData, p, handle.AddrOfPinnedObject(), result.width * sizeof(ushort), result.width * sizeof(ushort), result.height, cudaMemcpyKind.cudaMemcpyHostToDevice, stream));
            handle.Free();
            result.deviceData = deviceData;

            return(result);
        }
Exemplo n.º 2
0
        static void Main(string[] args)
        {
            // init CUDA
            IntPtr d;

            cuda.Malloc(out d, sizeof(int));
            cuda.Free(d);

            HybRunner      runner = HybRunner.Cuda();
            cudaDeviceProp prop;

            cuda.GetDeviceProperties(out prop, 0);
            dynamic wrapped = runner.Wrap(new Program());

            runner.saveAssembly();
            cudaStream_t stream;

            cuda.StreamCreate(out stream);

            NppStreamContext context = new NppStreamContext
            {
                hStream = stream,
                nCudaDevAttrComputeCapabilityMajor = prop.major,
                nCudaDevAttrComputeCapabilityMinor = prop.minor,
                nCudaDeviceId                = 0,
                nMaxThreadsPerBlock          = prop.maxThreadsPerBlock,
                nMaxThreadsPerMultiProcessor = prop.maxThreadsPerMultiProcessor,
                nMultiProcessorCount         = prop.multiProcessorCount,
                nSharedMemPerBlock           = 0
            };

            Random rand = new Random();

            using (NPPImage input = NPPImage.Load(inputFileName, stream))
            {
                uchar4[] output = new uchar4[input.width * input.height];
                IntPtr   d_output;
                cuda.Malloc(out d_output, input.width * input.height * 4 * sizeof(byte));

                // working area
                IntPtr oDeviceDst32u;
                size_t oDeviceDst32uPitch;
                cuda.ERROR_CHECK(cuda.MallocPitch(out oDeviceDst32u, out oDeviceDst32uPitch, input.width * sizeof(int), input.height));
                IntPtr segments;
                size_t segmentsPitch;
                cuda.ERROR_CHECK(cuda.MallocPitch(out segments, out segmentsPitch, input.width * sizeof(ushort), input.height));

                NppiSize oSizeROI = new NppiSize {
                    width = input.width, height = input.height
                };
                int    nBufferSize = 0;
                IntPtr pScratchBufferNPP1, pScratchBufferNPP2;

                // compute maximum label
                NPPI.ERROR_CHECK(NPPI.LabelMarkersGetBufferSize_16u_C1R(oSizeROI, out nBufferSize));
                cuda.ERROR_CHECK(cuda.Malloc(out pScratchBufferNPP1, nBufferSize));
                int maxLabel;
                NPPI.ERROR_CHECK(NPPI.LabelMarkers_16u_C1IR_Ctx(input.deviceData, input.pitch, oSizeROI, 165, NppiNorm.nppiNormInf, out maxLabel, pScratchBufferNPP1, context));


                // compress labels
                NPPI.ERROR_CHECK(NPPI.CompressMarkerLabelsGetBufferSize_16u_C1R(maxLabel, out nBufferSize));
                cuda.ERROR_CHECK(cuda.Malloc(out pScratchBufferNPP2, nBufferSize));
                NPPI.ERROR_CHECK(NPPI.CompressMarkerLabels_16u_C1IR_Ctx(input.deviceData, input.pitch, oSizeROI, maxLabel, out maxLabel, pScratchBufferNPP2, context));

                uchar4[] colormap = new uchar4[maxLabel + 1];
                for (int i = 0; i <= maxLabel; ++i)
                {
                    colormap[i] = new uchar4 {
                        x = (byte)(rand.Next() % 256), y = (byte)(rand.Next() % 256), z = (byte)(rand.Next() % 256), w = 0
                    };
                }

                IntPtr d_colormap;
                cuda.Malloc(out d_colormap, (maxLabel + 1) * 4 * sizeof(byte));
                var handle = GCHandle.Alloc(colormap, GCHandleType.Pinned);
                cuda.Memcpy(d_colormap, handle.AddrOfPinnedObject(), (maxLabel + 1) * 4 * sizeof(byte), cudaMemcpyKind.cudaMemcpyHostToDevice);
                handle.Free();

                NPP_ImageSegmentationx46Programx46ColorizeLabels_ExternCWrapperStream_CUDA(
                    8 * prop.multiProcessorCount, 1, 256, 1, 1, 0, stream, // cuda configuration
                    input.deviceData, d_output, d_colormap, maxLabel + 1, input.pitch * input.height / sizeof(ushort), input.width, input.pitch / sizeof(ushort));

                handle = GCHandle.Alloc(output, GCHandleType.Pinned);
                cuda.Memcpy(handle.AddrOfPinnedObject(), d_output, input.width * input.height * sizeof(byte) * 4, cudaMemcpyKind.cudaMemcpyDeviceToHost);
                handle.Free();
                NPPImage.Save(segmentsFileName, output, input.width, input.height);
                Process.Start(segmentsFileName);

                cuda.ERROR_CHECK(cuda.Free(oDeviceDst32u));
                cuda.ERROR_CHECK(cuda.Free(segments));
                cuda.ERROR_CHECK(cuda.Free(pScratchBufferNPP1));
                cuda.ERROR_CHECK(cuda.Free(pScratchBufferNPP2));
            }
        }