Exemplo n.º 1
0
        static void Main(string[] args)
        {
            // init CUDA
            IntPtr d;

            cuda.Malloc(out d, sizeof(int));
            cuda.Free(d);

            HybRunner      runner = HybRunner.Cuda();
            cudaDeviceProp prop;

            cuda.GetDeviceProperties(out prop, 0);
            dynamic wrapped = runner.Wrap(new Program());

            runner.saveAssembly();
            cudaStream_t stream;

            cuda.StreamCreate(out stream);

            NppStreamContext context = new NppStreamContext
            {
                hStream = stream,
                nCudaDevAttrComputeCapabilityMajor = prop.major,
                nCudaDevAttrComputeCapabilityMinor = prop.minor,
                nCudaDeviceId                = 0,
                nMaxThreadsPerBlock          = prop.maxThreadsPerBlock,
                nMaxThreadsPerMultiProcessor = prop.maxThreadsPerMultiProcessor,
                nMultiProcessorCount         = prop.multiProcessorCount,
                nSharedMemPerBlock           = 0
            };

            Random rand = new Random();

            using (NPPImage input = NPPImage.Load(inputFileName, stream))
            {
                uchar4[] output = new uchar4[input.width * input.height];
                IntPtr   d_output;
                cuda.Malloc(out d_output, input.width * input.height * 4 * sizeof(byte));

                // working area
                IntPtr oDeviceDst32u;
                size_t oDeviceDst32uPitch;
                cuda.ERROR_CHECK(cuda.MallocPitch(out oDeviceDst32u, out oDeviceDst32uPitch, input.width * sizeof(int), input.height));
                IntPtr segments;
                size_t segmentsPitch;
                cuda.ERROR_CHECK(cuda.MallocPitch(out segments, out segmentsPitch, input.width * sizeof(ushort), input.height));

                NppiSize oSizeROI = new NppiSize {
                    width = input.width, height = input.height
                };
                int    nBufferSize = 0;
                IntPtr pScratchBufferNPP1, pScratchBufferNPP2;

                // compute maximum label
                NPPI.ERROR_CHECK(NPPI.LabelMarkersGetBufferSize_16u_C1R(oSizeROI, out nBufferSize));
                cuda.ERROR_CHECK(cuda.Malloc(out pScratchBufferNPP1, nBufferSize));
                int maxLabel;
                NPPI.ERROR_CHECK(NPPI.LabelMarkers_16u_C1IR_Ctx(input.deviceData, input.pitch, oSizeROI, 165, NppiNorm.nppiNormInf, out maxLabel, pScratchBufferNPP1, context));


                // compress labels
                NPPI.ERROR_CHECK(NPPI.CompressMarkerLabelsGetBufferSize_16u_C1R(maxLabel, out nBufferSize));
                cuda.ERROR_CHECK(cuda.Malloc(out pScratchBufferNPP2, nBufferSize));
                NPPI.ERROR_CHECK(NPPI.CompressMarkerLabels_16u_C1IR_Ctx(input.deviceData, input.pitch, oSizeROI, maxLabel, out maxLabel, pScratchBufferNPP2, context));

                uchar4[] colormap = new uchar4[maxLabel + 1];
                for (int i = 0; i <= maxLabel; ++i)
                {
                    colormap[i] = new uchar4 {
                        x = (byte)(rand.Next() % 256), y = (byte)(rand.Next() % 256), z = (byte)(rand.Next() % 256), w = 0
                    };
                }

                IntPtr d_colormap;
                cuda.Malloc(out d_colormap, (maxLabel + 1) * 4 * sizeof(byte));
                var handle = GCHandle.Alloc(colormap, GCHandleType.Pinned);
                cuda.Memcpy(d_colormap, handle.AddrOfPinnedObject(), (maxLabel + 1) * 4 * sizeof(byte), cudaMemcpyKind.cudaMemcpyHostToDevice);
                handle.Free();

                NPP_ImageSegmentationx46Programx46ColorizeLabels_ExternCWrapperStream_CUDA(
                    8 * prop.multiProcessorCount, 1, 256, 1, 1, 0, stream, // cuda configuration
                    input.deviceData, d_output, d_colormap, maxLabel + 1, input.pitch * input.height / sizeof(ushort), input.width, input.pitch / sizeof(ushort));

                handle = GCHandle.Alloc(output, GCHandleType.Pinned);
                cuda.Memcpy(handle.AddrOfPinnedObject(), d_output, input.width * input.height * sizeof(byte) * 4, cudaMemcpyKind.cudaMemcpyDeviceToHost);
                handle.Free();
                NPPImage.Save(segmentsFileName, output, input.width, input.height);
                Process.Start(segmentsFileName);

                cuda.ERROR_CHECK(cuda.Free(oDeviceDst32u));
                cuda.ERROR_CHECK(cuda.Free(segments));
                cuda.ERROR_CHECK(cuda.Free(pScratchBufferNPP1));
                cuda.ERROR_CHECK(cuda.Free(pScratchBufferNPP2));
            }
        }