コード例 #1
1
ファイル: Form1.cs プロジェクト: kunzmi/managedCuda
        private void initGLAndCuda()
        {
            //Create render target control
            m_renderControl = new OpenTK.GLControl(GraphicsMode.Default, 1, 0, GraphicsContextFlags.Default);
            m_renderControl.Dock = DockStyle.Fill;
            m_renderControl.BackColor = Color.White;
            m_renderControl.BorderStyle = BorderStyle.FixedSingle;
            m_renderControl.KeyDown += new KeyEventHandler(m_renderControl_KeyDown);
            m_renderControl.MouseMove += new MouseEventHandler(m_renderControl_MouseMove);
            m_renderControl.MouseDown += new MouseEventHandler(m_renderControl_MouseDown);
            m_renderControl.SizeChanged += new EventHandler(m_renderControl_SizeChanged);

            panel1.Controls.Add(m_renderControl);
            Console.WriteLine("   OpenGL device is Available");

            int deviceID = CudaContext.GetMaxGflopsDeviceId();

            ctx = CudaContext.CreateOpenGLContext(deviceID, CUCtxFlags.BlockingSync);
            string console = string.Format("CUDA device [{0}] has {1} Multi-Processors", ctx.GetDeviceName(), ctx.GetDeviceInfo().MultiProcessorCount);
            Console.WriteLine(console);

            CUmodule module = ctx.LoadModulePTX("kernel.ptx");

            addForces_k = new CudaKernel("addForces_k", module, ctx);
            advectVelocity_k = new CudaKernel("advectVelocity_k", module, ctx);
            diffuseProject_k = new CudaKernel("diffuseProject_k", module, ctx);
            updateVelocity_k = new CudaKernel("updateVelocity_k", module, ctx);
            advectParticles_k = new CudaKernel("advectParticles_OGL", module, ctx);

            hvfield = new cData[DS];
            dvfield = new CudaPitchedDeviceVariable<cData>(DIM, DIM);
            tPitch = dvfield.Pitch;

            dvfield.CopyToDevice(hvfield);

            vxfield = new CudaDeviceVariable<cData>(DS);
            vyfield = new CudaDeviceVariable<cData>(DS);

            // Create particle array
            particles = new cData[DS];
            initParticles(particles, DIM, DIM);

            // TODO: update kernels to use the new unpadded memory layout for perf
            // rather than the old FFTW-compatible layout
            planr2c = new CudaFFTPlan2D(DIM, DIM, cufftType.R2C, Compatibility.FFTWPadding);
            planc2r = new CudaFFTPlan2D(DIM, DIM, cufftType.C2R, Compatibility.FFTWPadding);

            GL.GenBuffers(1, out vbo);
            GL.BindBuffer(BufferTarget.ArrayBuffer, vbo);
            GL.BufferData<cData>(BufferTarget.ArrayBuffer, new IntPtr(cData.SizeOf * DS), particles, BufferUsageHint.DynamicDraw);
            int bsize;
            GL.GetBufferParameter(BufferTarget.ArrayBuffer, BufferParameterName.BufferSize, out bsize);

            if (bsize != DS * cData.SizeOf)
                throw new Exception("Sizes don't match.");

            GL.BindBuffer(BufferTarget.ArrayBuffer, 0);

            cuda_vbo_resource = new CudaGraphicsInteropResourceCollection();
            cuda_vbo_resource.Add(new CudaOpenGLBufferInteropResource(vbo, CUGraphicsRegisterFlags.None));

            texref = new CudaTextureArray2D(advectVelocity_k, "texref", CUAddressMode.Wrap, CUFilterMode.Linear, 0, CUArrayFormat.Float, DIM, DIM, CudaArray2DNumChannels.Two);

            stopwatch = new CudaStopWatch(CUEventFlags.Default);

            reshape();
            isInit = true;
            display();
        }
コード例 #2
0
        public void ApplyMatte(int mode, CudaPitchedDeviceVariable <uchar4> result, CudaPitchedDeviceVariable <uchar4> image, CudaPitchedDeviceVariable <byte> matte, int width, int height)
        {
            dim3 block = new dim3(32, 8, 1);
            dim3 grid  = new dim3((width + 31) / 32, (height + 31) / 32, 1);

            switch (mode)
            {
            case 0:
                ApplyMatteKernelMode0.BlockDimensions = block;
                ApplyMatteKernelMode0.GridDimensions  = grid;
                ApplyMatteKernelMode0.Run(result.DevicePointer, (int)result.Pitch / 4, image.DevicePointer, (int)image.Pitch / 4, matte.DevicePointer, (int)matte.Pitch, width, height);

                //ApplyMatteKernel<0><<<grid, block>>>(result, result_pitch/4, image, image_pitch/4, matte, matte_pitch, width, height);
                break;

            case 1:
                ApplyMatteKernelMode1.BlockDimensions = block;
                ApplyMatteKernelMode1.GridDimensions  = grid;
                ApplyMatteKernelMode1.Run(result.DevicePointer, (int)result.Pitch / 4, image.DevicePointer, (int)image.Pitch / 4, matte.DevicePointer, (int)matte.Pitch, width, height);

                //ApplyMatteKernel<1><<<grid, block>>>(result, result_pitch/4, image, image_pitch/4, matte, matte_pitch, width, height);
                break;

            case 2:
                ApplyMatteKernelMode2.BlockDimensions = block;
                ApplyMatteKernelMode2.GridDimensions  = grid;
                ApplyMatteKernelMode2.Run(result.DevicePointer, (int)result.Pitch / 4, image.DevicePointer, (int)image.Pitch / 4, matte.DevicePointer, (int)matte.Pitch, width, height);

                //ApplyMatteKernel<2><<<grid, block>>>(result, result_pitch/4, image, image_pitch/4, matte, matte_pitch, width, height);
                break;
            }
        }
コード例 #3
0
        /// <summary>
        /// Binds a linear address range to the texture reference. <para/>
        /// Any previous address or CUDA array state associated with the texture reference is superseded by this function. <para/>
        /// Any memory previously bound to the texture reference is unbound.<para/>
        /// Size my differ to the previous bound variable, but type must be the same.
        /// </summary>
        /// <param name="deviceVar">New device variable to bind this texture reference to.</param>
        public void Reset(CudaPitchedDeviceVariable <T> deviceVar)
        {
            if (disposed)
            {
                throw new ObjectDisposedException(this.ToString());
            }

            _height   = deviceVar.Height;
            _width    = deviceVar.Width;
            _dataSize = deviceVar.TotalSizeInBytes;
            _devVar   = deviceVar;


            CUDAArrayDescriptor arrayDescr = new CUDAArrayDescriptor();

            arrayDescr.Format      = _format;
            arrayDescr.Height      = _height;
            arrayDescr.NumChannels = (uint)_numChannels;
            arrayDescr.Width       = _width;
            res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetAddress2D_v2(_texref, ref arrayDescr, _devVar.DevicePointer, _devVar.Pitch);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetAddress2D", res));
            if (res != CUResult.Success)
            {
                throw new CudaException(res);
            }
        }
コード例 #4
0
ファイル: Form1.cs プロジェクト: xiaotie/DNCCuda
        private void addForces(CudaPitchedDeviceVariable <float2> v, int dx, int dy, int spx, int spy, float fx, float fy, int r, SizeT tPitch)
        {
            dim3 tids = new dim3((uint)(2 * r + 1), (uint)(2 * r + 1), 1);

            addForces_k.GridDimensions  = new dim3(1);
            addForces_k.BlockDimensions = tids;
            addForces_k.Run(v.DevicePointer, dx, dy, spx, spy, fx, fy, r, tPitch);
        }
コード例 #5
0
ファイル: GraphCut.cs プロジェクト: furusdara/cuda
 /// <summary>
 /// Graphcut of a flow network (32bit floating point edge capacities). The
 /// function computes the minimal cut (graphcut) of a 2D regular 4-connected
 /// graph. <para/>
 /// The inputs are the capacities of the horizontal (in transposed form),
 /// vertical and terminal (source and sink) edges. The capacities to source and
 /// sink
 /// are stored as capacity differences in the terminals array
 /// ( terminals(x) = source(x) - sink(x) ). The implementation assumes that the
 /// edge capacities
 /// for boundary edges that would connect to nodes outside the specified domain
 /// are set to 0 (for example left(0,*) == 0). If this is not fulfilled the
 /// computed labeling may be wrong!<para/>
 /// The computed binary labeling is encoded as unsigned 8bit values (0 and >0).
 /// </summary>
 /// <param name="Terminals">Pointer to differences of terminal edge capacities</param>
 /// <param name="LeftTransposed">Pointer to transposed left edge capacities</param>
 /// <param name="RightTransposed">Pointer to transposed right edge capacities</param>
 /// <param name="Top">Pointer to top edge capacities (top(*,0) must be 0)</param>
 /// <param name="Bottom">Pointer to bottom edge capacities (bottom(*,height-1)</param>
 /// <param name="Label">Pointer to destination label image </param>
 /// <returns></returns>
 public void GraphCut(CudaPitchedDeviceVariable <float> Terminals, CudaPitchedDeviceVariable <float> LeftTransposed, CudaPitchedDeviceVariable <float> RightTransposed,
                      CudaPitchedDeviceVariable <float> Top, CudaPitchedDeviceVariable <float> Bottom, CudaPitchedDeviceVariable <byte> Label)
 {
     status = NPPNativeMethods.NPPi.ImageLabeling.nppiGraphcut_32f8u(Terminals.DevicePointer, LeftTransposed.DevicePointer,
                                                                     RightTransposed.DevicePointer, Top.DevicePointer, Bottom.DevicePointer, Terminals.Pitch, LeftTransposed.Pitch, _size,
                                                                     Label.DevicePointer, Label.Pitch, _state);
     Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiGraphcut_32f8u", status));
     NPPException.CheckNppStatus(status, this);
 }
コード例 #6
0
ファイル: Form1.cs プロジェクト: xiaotie/DNCCuda
        private void advectParticles(CudaDeviceVariable <vertex> p, CudaPitchedDeviceVariable <float2> v, int dx, int dy, float dt, SizeT tPitch)
        {
            dim3 grid = new dim3((uint)((dx / TILEX) + (!(dx % TILEX != 0) ? 0 : 1)), (uint)((dy / TILEY) + (!(dy % TILEY != 0) ? 0 : 1)), 1);

            dim3 tids = new dim3(TIDSX, TIDSY, 1);

            advectParticles_k.GridDimensions  = grid;
            advectParticles_k.BlockDimensions = tids;
            advectParticles_k.Run(p.DevicePointer, v.DevicePointer, dx, dy, dt, TILEY / TIDSY, tPitch);
        }
コード例 #7
0
        public void DownscaleTrimap(CudaPitchedDeviceVariable <byte> small_image, int small_width, int small_height, CudaPitchedDeviceVariable <byte> image, int width, int height)
        {
            dim3 grid  = new dim3((width + 63) / 64, (height + 63) / 64, 1);
            dim3 block = new dim3(32, 8, 1);

            downscaleKernel2.BlockDimensions = block;
            downscaleKernel2.GridDimensions  = grid;
            downscaleKernel2.Run(small_image.DevicePointer, (int)small_image.Pitch, small_width, small_height, image.DevicePointer, (int)image.Pitch, width, height);
            //downscaleKernel<<<grid, block>>>(small_image, small_pitch, small_width, small_height, image, pitch, width, height, maxfilter_functor());
        }
コード例 #8
0
        public void DataTerm(CudaPitchedDeviceVariable <int> terminals, int gmmN, CudaDeviceVariable <float> gmm, int gmm_pitch, CudaPitchedDeviceVariable <uchar4> image, CudaPitchedDeviceVariable <byte> trimap, int width, int height)
        {
            dim3 block = new dim3(32, 8, 1);
            dim3 grid  = new dim3((int)((width + block.x - 1) / block.x), (int)((height + block.y - 1) / block.y), 1);

            DataTermKernel.BlockDimensions = block;
            DataTermKernel.GridDimensions  = grid;
            DataTermKernel.Run(terminals.DevicePointer, (int)terminals.Pitch / 4, gmmN, gmm.DevicePointer, (int)gmm_pitch / 4, image.DevicePointer, (int)image.Pitch / 4, trimap.DevicePointer, (int)trimap.Pitch, width, height);
            //DataTermKernel<<<grid, block>>>(terminals, terminal_pitch/4, gmmN, gmm, gmm_pitch/4, image, image_pitch/4, trimap, trimap_pitch, width, height);
        }
コード例 #9
0
ファイル: Form1.cs プロジェクト: xiaotie/DNCCuda
        private void updateVelocity(CudaPitchedDeviceVariable <float2> v, CudaDeviceVariable <float2> vx, CudaDeviceVariable <float2> vy, int dx, int pdx, int dy, SizeT tPitch)
        {
            dim3 grid = new dim3((uint)((dx / TILEX) + (!(dx % TILEX != 0) ? 0 : 1)), (uint)((dy / TILEY) + (!(dy % TILEY != 0) ? 0 : 1)), 1);

            dim3 tids = new dim3(TIDSX, TIDSY, 1);

            updateVelocity_k.GridDimensions  = grid;
            updateVelocity_k.BlockDimensions = tids;
            updateVelocity_k.Run(v.DevicePointer, vx.DevicePointer, vy.DevicePointer, dx, pdx, dy, TILEY / TIDSY, tPitch);
        }
コード例 #10
0
 /// <summary>
 /// 1 channel connected region contours image to generate contours geometry info list in host memory.
 ///  <para/>
 /// Note that ALL input and output data for the function MUST be in device memory except where noted otherwise.
 /// Also nFirstContourID and nLastContourID allow only a portion of the contour geometry lists in the image to be output.
 ///  <para/>
 /// Note that the geometry list for each contour will begin at pContoursGeometryListsHost[pContoursPixelStartingOffsetHost[nContourID]		/// sizeof(NppiContourPixelGeometryInfo).
 ///  <para/>
 /// Note that due to the nature of some imput images contour ID 0 can sometimes contain ALL contours in the image which
 /// can significantly increase the time taken to output the geometry lists.  In these cases setting nFirstContourGeometryListID to >= 1
 /// significantly speed up geometry list output performance and all individual contours will still be output.
 /// </summary>
 /// <param name="pMarkerLabelsInfoListDev">pointer to device memory buffer which contains the output returned by the corresponding nppiCompressedMarkerLabelsUFInfo_32u_C1R_Ctx call.</param>
 /// <param name="pMarkerLabelsInfoListHost">pointer to host memory buffer which will be output by this function with additional information added.</param>
 /// <param name="pContoursDirectionImageDev">Source-Image Pointer to output image in device memory containing per contour pixel direction info around each uniquely labeled connected pixel region returned by corresponding nppiCompressedMarkerLabelsUFInfo call. </param>
 /// <param name="pContoursPixelGeometryListsHost">pointer to host memory buffer allocated to be at least as big as size returned by corresponding nppiCompressedMarkerLabelsUFGetGeometryListsSize call. </param>
 /// <param name="pContoursPixelCountsListHost">host memory pointer to array of nMaxMarkerLabelID uintegers returned by previous call to nppiCompressedMarkerLabelsUFContoursPixelGeometryLists_C1R_Ctx. </param>
 /// <param name="pContoursPixelsFoundListHost">host memory pointer to array of nMaxMarkerLabelID uintegers returned by this call representing the number of contour pixels found during geometry list generation. </param>
 /// <param name="pContoursPixelsStartingOffsetHost">host memory pointer to array of uintegers returned by this call representing the starting offset index of each contour found during geometry list generation. </param>
 /// <param name="nMaxMarkerLabelID">the value of the maximum marker label ID returned by corresponding compress marker labels UF call. </param>
 /// <param name="nFirstContourGeometryListID">the ID of the first contour geometry list to output. </param>
 /// <param name="nLastContourGeometryListID">the ID of the last contour geometry list to output.  </param>
 /// <param name="nppStreamCtx">NPP stream context.</param>
 public void CompressedMarkerLabelsUFContoursGenerateGeometryLists(CudaDeviceVariable <NppiCompressedMarkerLabelsInfo> pMarkerLabelsInfoListDev, NppiCompressedMarkerLabelsInfo[] pMarkerLabelsInfoListHost,
                                                                   CudaPitchedDeviceVariable <NppiContourPixelDirectionInfo> pContoursDirectionImageDev, NppiContourPixelGeometryInfo[] pContoursPixelGeometryListsHost,
                                                                   uint[] pContoursPixelCountsListHost, uint[] pContoursPixelsFoundListHost, uint[] pContoursPixelsStartingOffsetHost, uint nMaxMarkerLabelID, uint nFirstContourGeometryListID,
                                                                   uint nLastContourGeometryListID, NppStreamContext nppStreamCtx)
 {
     status = NPPNativeMethods_Ctx.NPPi.LabelMarkers.nppiCompressedMarkerLabelsUFContoursGenerateGeometryLists_C1R_Ctx(pMarkerLabelsInfoListDev.DevicePointer, pMarkerLabelsInfoListHost,
                                                                                                                       pContoursDirectionImageDev.DevicePointer, pContoursDirectionImageDev.Pitch, pContoursPixelGeometryListsHost, pContoursPixelCountsListHost,
                                                                                                                       pContoursPixelsFoundListHost, pContoursPixelsStartingOffsetHost, nMaxMarkerLabelID, nFirstContourGeometryListID, nLastContourGeometryListID, _sizeRoi, nppStreamCtx);
     Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiCompressedMarkerLabelsUFContoursGenerateGeometryLists_C1R_Ctx", status));
     NPPException.CheckNppStatus(status, this);
 }
コード例 #11
0
        public void convertRGBToRGBA(CudaPitchedDeviceVariable <uchar4> i4, CudaPitchedDeviceVariable <uchar3> i3, int width, int height)
        {
            dim3 block = new dim3(32, 8, 1);
            dim3 grid  = new dim3((width + 31) / 32, (height + 31) / 32, 1);

            convertRGBToRGBAKernel.BlockDimensions = block;
            convertRGBToRGBAKernel.GridDimensions  = grid;

            convertRGBToRGBAKernel.Run(i4.DevicePointer, (int)i4.Pitch, i3.DevicePointer, (int)i3.Pitch, width, height);
            //convertRGBToRGBAKernel<<<grid, block>>>(i4, i4_pitch, i3, i3_pitch, width, height);
        }
コード例 #12
0
        public GrabCut(CudaPitchedDeviceVariable <uchar4> image, CudaPitchedDeviceVariable <byte> trimap, int width, int height)
        {
            d_trimap = trimap;
            //The first one will also init the CUDA context!
            grabCutUtils = new GrabCutUtils();
            grabCutGMM   = new GrabCutGMM();

            size.width  = width;
            size.height = height;
            graphcut8   = new GraphCut8(size);

            gmms          = 2 * COLOR_CLUSTER;
            edge_strength = EDGE_STRENGTH;

            blocks    = ((width + 31) / 32) * ((height + 31) / 32);
            gmm_pitch = 11 * sizeof(float);

            //d_image =  new CudaPitchedDeviceVariable<uchar4>(size.width, size.height);
            //d_image.CopyToDevice(image);
            d_image = image;

            // Doublebuffered alpha
            d_alpha    = new CudaPitchedDeviceVariable <byte> [2];
            d_alpha[0] = new CudaPitchedDeviceVariable <byte>(size.width, size.height, 4);
            d_alpha[1] = new CudaPitchedDeviceVariable <byte>(size.width, size.height, 4);

            // Graph
            d_terminals        = new CudaPitchedDeviceVariable <int>(size.width, size.height);
            d_top              = new CudaPitchedDeviceVariable <int>(size.width, size.height);
            d_topleft          = new CudaPitchedDeviceVariable <int>(size.width, size.height);
            d_topright         = new CudaPitchedDeviceVariable <int>(size.width, size.height);
            d_bottom           = new CudaPitchedDeviceVariable <int>(size.width, size.height);
            d_bottomleft       = new CudaPitchedDeviceVariable <int>(size.width, size.height);
            d_bottomright      = new CudaPitchedDeviceVariable <int>(size.width, size.height);
            d_left_transposed  = new CudaPitchedDeviceVariable <int>(size.height, size.width);
            d_right_transposed = new CudaPitchedDeviceVariable <int>(size.height, size.width);


            //int scratch_gc_size = 0;
            //nppiGraphcut8GetSize(size, &scratch_gc_size);

            int scratch_gmm_size = (int)(blocks * gmm_pitch * gmms + blocks * 4);

            d_scratch_mem = new CudaDeviceVariable <byte>(scratch_gmm_size);
            //CUDA_SAFE_CALL( cudaMalloc(&d_scratch_mem, MAX(scratch_gmm_size, scratch_gc_size)) );

            //NPP_CHECK_NPP(nppiGraphcutInitAlloc(size, &pState, d_scratch_mem) );
            d_gmm = new CudaDeviceVariable <float>(gmm_pitch * gmms);
            //CUDA_SAFE_CALL( cudaMalloc(&d_gmm, gmm_pitch * gmms) );

            // Estimate color models on lower res input image first
            createSmallImage(Math.Max(width / 4, height / 4));
        }
コード例 #13
0
        public void TrimapFromRect(CudaPitchedDeviceVariable <byte> alpha, NppiRect rect, int width, int height)
        {
            dim3 block = new dim3(32, 8, 1);
            dim3 grid  = new dim3((int)((width + (block.x * 4) - 1) / (block.x * 4)), (height + 31) / 32, 1);

            //rect.y = height - 1 - (rect.y + rect.height - 1) ; // Flip horizontal (FreeImage inverts y axis)

            TrimapFromRectKernel.BlockDimensions = block;
            TrimapFromRectKernel.GridDimensions  = grid;
            TrimapFromRectKernel.Run(alpha.DevicePointer, (int)alpha.Pitch, rect, width, height);
            //TrimapFromRectKernel<<<grid, block>>>(alpha, alpha_pitch, rect, width, height );
        }
コード例 #14
0
ファイル: Form1.cs プロジェクト: xiaotie/DNCCuda
        void advectVelocity(CudaPitchedDeviceVariable <cData> v, CudaDeviceVariable <cData> vx, CudaDeviceVariable <cData> vy, int dx, int pdx, int dy, float dt, SizeT tPitch)
        {
            dim3 grid = new dim3((uint)((dx / TILEX) + (!(dx % TILEX != 0) ? 0 : 1)), (uint)((dy / TILEY) + (!(dy % TILEY != 0) ? 0 : 1)), 1);

            dim3 tids = new dim3(TIDSX, TIDSY, 1);

            updateTexture(v, DIM * float2.SizeOf, DIM, tPitch);

            advectVelocity_k.GridDimensions  = grid;
            advectVelocity_k.BlockDimensions = tids;
            advectVelocity_k.Run(v.DevicePointer, vx.DevicePointer, vy.DevicePointer, dx, pdx, dy, dt, TILEY / TIDSY);
        }
コード例 #15
0
ファイル: Form1.cs プロジェクト: xiaotie/DNCCuda
        void advectParticles(uint vbo, CudaPitchedDeviceVariable <cData> v, int dx, int dy, float dt, SizeT tPitch)
        {
            dim3 grid = new dim3((uint)((dx / TILEX) + (!(dx % TILEX != 0) ? 0 : 1)), (uint)((dy / TILEY) + (!(dy % TILEY != 0) ? 0 : 1)), 1);

            dim3 tids = new dim3(TIDSX, TIDSY, 1);

            cuda_vbo_resource.MapAllResources();
            CUdeviceptr p = cuda_vbo_resource[0].GetMappedPointer();

            advectParticles_k.GridDimensions  = grid;
            advectParticles_k.BlockDimensions = tids;
            advectParticles_k.Run(p, v.DevicePointer, dx, dy, dt, TILEY / TIDSY, tPitch);
            cuda_vbo_resource.UnmapAllResources();
        }
コード例 #16
0
        public void UpsampleAlpha(CudaPitchedDeviceVariable <byte> alpha, CudaPitchedDeviceVariable <byte> small_alpha, int width, int height, int small_width, int small_height)
        {
            dim3 grid  = new dim3((width + 127) / 128, (height + 31) / 32, 1);
            dim3 block = new dim3(32, 8, 1);

            int factor = width / small_width;
            int shift  = 0;

            while (factor > (1 << shift))
            {
                shift++;
            }

            upsampleAlphaKernel.BlockDimensions = block;
            upsampleAlphaKernel.GridDimensions  = grid;
            upsampleAlphaKernel.Run(alpha.DevicePointer, small_alpha.DevicePointer, (int)alpha.Pitch, width, height, shift);
            //upsampleAlphaKernel<<<grid, block>>>(alpha, small_alpha, alpha_pitch, width, height, shift);
        }
コード例 #17
0
        public void EdgeCues(float alpha, CudaPitchedDeviceVariable <uchar4> image, CudaPitchedDeviceVariable <int> left_transposed, CudaPitchedDeviceVariable <int> right_transposed,
                             CudaPitchedDeviceVariable <int> top, CudaPitchedDeviceVariable <int> bottom, CudaPitchedDeviceVariable <int> topleft, CudaPitchedDeviceVariable <int> topright,
                             CudaPitchedDeviceVariable <int> bottomleft, CudaPitchedDeviceVariable <int> bottomright, int width, int height, CudaDeviceVariable <byte> scratch_mem)
        {
            if (texref == null)
            {
                texref = new CudaTextureLinearPitched2D <uchar4>(MeanEdgeStrengthReductionKernel, "imageTex", CUAddressMode.Clamp, CUFilterMode.Point, CUTexRefSetFlags.ReadAsInteger, CUArrayFormat.UnsignedInt8, image);
            }
            else
            {
                texref.Reset(image);
            }

            if (texref2 == null)
            {
                texref2 = new CudaTextureLinearPitched2D <uchar4>(EdgeCuesKernel, "imageTex", CUAddressMode.Clamp, CUFilterMode.Point, CUTexRefSetFlags.ReadAsInteger, CUArrayFormat.UnsignedInt8, image);
            }
            else
            {
                texref2.Reset(image);
            }


            dim3 grid        = new dim3((width + 31) / 32, (height + 31) / 32, 1);
            dim3 block       = new dim3(32, 4, 1);
            dim3 large_block = new dim3(32, 8, 1);

            MeanEdgeStrengthReductionKernel.BlockDimensions = large_block;
            MeanEdgeStrengthReductionKernel.GridDimensions  = grid;
            MeanEdgeStrengthFinalKernel.BlockDimensions     = block;
            MeanEdgeStrengthFinalKernel.GridDimensions      = new dim3(1, 1, 1);
            EdgeCuesKernel.BlockDimensions = block;
            EdgeCuesKernel.GridDimensions  = grid;

            MeanEdgeStrengthReductionKernel.Run(width, height, scratch_mem.DevicePointer);

            //MeanEdgeStrengthReductionKernel<<<grid, large_block>>>( width, height, scratch_mem);
            MeanEdgeStrengthFinalKernel.Run(scratch_mem.DevicePointer, grid.x * grid.y);
            //MeanEdgeStrengthFinalKernel<<<1,block>>>( scratch_mem, grid.x * grid.y);

            EdgeCuesKernel.Run(alpha, scratch_mem.DevicePointer, left_transposed.DevicePointer, right_transposed.DevicePointer, top.DevicePointer, bottom.DevicePointer, topleft.DevicePointer, topright.DevicePointer,
                               bottomleft.DevicePointer, bottomright.DevicePointer, (int)top.Pitch / 4, (int)right_transposed.Pitch / 4, width, height);
            //EdgeCuesKernel<<<grid, block>>>( alpha , scratch_mem, left_transposed, right_transposed, top, bottom, topleft, topright, bottomleft, bottomright, pitch / 4, transposed_pitch/ 4, width, height );
        }
コード例 #18
0
ファイル: CudaArray2D.cs プロジェクト: SciSharp/TensorSharp
        /// <summary>
        /// Copy from this array to a pitched device variable
        /// </summary>
        /// <typeparam name="T">device variable base type</typeparam>
        /// <param name="aDeviceVariable">Destination</param>
        public void CopyFromThisToDevice <T>(CudaPitchedDeviceVariable <T> aDeviceVariable) where T : struct
        {
            CUDAMemCpy2D copyParams = new CUDAMemCpy2D();

            copyParams.dstDevice     = aDeviceVariable.DevicePointer;
            copyParams.dstMemoryType = CUMemoryType.Device;
            copyParams.dstPitch      = aDeviceVariable.Pitch;
            copyParams.srcArray      = _cuArray;
            copyParams.srcMemoryType = CUMemoryType.Array;
            copyParams.Height        = aDeviceVariable.Height;
            copyParams.WidthInBytes  = aDeviceVariable.WidthInBytes;

            res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpy2D_v2(ref copyParams);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpy2D", res));
            if (res != CUResult.Success)
            {
                throw new CudaException(res);
            }
        }
コード例 #19
0
        private void InitializeCUFFT()
        {
            g_hvfield = new float2[DS];

            g_dvfield = new CudaPitchedDeviceVariable <float2>(DIM, DIM);
            g_tPitch  = g_dvfield.Pitch; //Store pitch in g_tPitch to keep consistency to the C++ code
            g_dvfield.CopyToDevice(g_hvfield);

            // Temporary complex velocity field data
            g_vxfield = new CudaDeviceVariable <float2>(PDS);
            g_vyfield = new CudaDeviceVariable <float2>(PDS);

            texref = new CudaTextureArray2D(advectVelocity_k, "texref", CUAddressMode.Wrap, CUFilterMode.Linear, 0, CUArrayFormat.Float, DIM, DIM, CudaArray2DNumChannels.Two);

            g_particles = new float2[DS];
            initParticles(g_particles, DIM, DIM);

            g_planr2c = new CudaFFTPlan2D(DIM, DIM, cufftType.R2C, Compatibility.PADDING);
            g_planc2r = new CudaFFTPlan2D(DIM, DIM, cufftType.C2R, Compatibility.PADDING);
        }
コード例 #20
0
        public bool SegmentationChanged(CudaDeviceVariable <byte> d_changed, CudaPitchedDeviceVariable <byte> alpha_old, CudaPitchedDeviceVariable <byte> alpha_new, int width, int height)
        {
            dim3 grid  = new dim3((width + 31) / 32, (height + 31) / 32, 1);
            dim3 block = new dim3(32, 8, 1);

            CudaDeviceVariable <int> d_changedInt = new CudaDeviceVariable <int>(d_changed.DevicePointer, false);

            d_changedInt[0] = 0;

            SegmentationChangedKernel.BlockDimensions = block;
            SegmentationChangedKernel.GridDimensions  = grid;
            SegmentationChangedKernel.Run(d_changedInt.DevicePointer, alpha_old.DevicePointer, alpha_new.DevicePointer, (int)alpha_old.Pitch, width, height);
            //SegmentationChangedKernel<<<grid, block>>>(d_changed, alpha_old, alpha_new, alpha_pitch, width, height);

            int h_changed = d_changedInt[0];

            //error = cudaMemcpy(&h_changed, d_changed, 4, cudaMemcpyDeviceToHost);

            return(h_changed != 0);
        }
コード例 #21
0
ファイル: CudaGraph.cs プロジェクト: zhongkaifu/managedCuda
        /// <summary>
        /// Creates a memset node and adds it to a graph<para/>
        /// Creates a new memset node and adds it to graph with
        /// dependencies specified via dependencies.<para/>
        /// It is possible for dependencies to be null, in which case the node will be placed
        /// at the root of the graph. Dependencies may not have any duplicate entries.<para/>
        /// The element size must be 1, 2, or 4 bytes.<para/>
        /// When the graph is launched, the node will perform the memset described by memsetParams.
        /// </summary>
        /// <param name="dependencies">can be null</param>
        /// <param name="deviceVariable">When the graph is launched, the node will perform the memset on deviceVariable.</param>
        /// <param name="value">Value to set</param>
        /// <param name="ctx">Cuda context used for the operation</param>
        /// <returns>A handle to the new node will be returned.</returns>
        public CUgraphNode AddMemsetNode <T>(CUgraphNode[] dependencies, CudaPitchedDeviceVariable <T> deviceVariable, uint value, CudaContext ctx) where T : struct
        {
            CUgraphNode node            = new CUgraphNode();
            SizeT       numDependencies = 0;

            if (dependencies != null)
            {
                numDependencies = dependencies.Length;
            }

            CudaMemsetNodeParams memsetParams = CudaMemsetNodeParams.init <T>(deviceVariable, value);

            res = DriverAPINativeMethods.GraphManagment.cuGraphAddMemsetNode(ref node, _graph, dependencies, numDependencies, ref memsetParams, ctx.Context);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuGraphAddMemsetNode", res));
            if (res != CUResult.Success)
            {
                throw new CudaException(res);
            }

            return(node);
        }
コード例 #22
0
        private void createSmallImage(int max_dim)
        {
            int[] temp_width  = new int[2];
            int[] temp_height = new int[2];

            CudaPitchedDeviceVariable <uchar4>[] d_temp = new CudaPitchedDeviceVariable <uchar4> [2];

            temp_width[0]  = (int)Math.Ceiling(size.width * 0.5f);
            temp_height[0] = (int)Math.Ceiling(size.height * 0.5f);

            temp_width[1]  = (int)Math.Ceiling(temp_width[0] * 0.5f);
            temp_height[1] = (int)Math.Ceiling(temp_height[0] * 0.5f);

            d_temp[0] = new CudaPitchedDeviceVariable <uchar4>(temp_width[0], temp_height[0]);
            d_temp[1] = new CudaPitchedDeviceVariable <uchar4>(temp_width[1], temp_height[1]);

            // Alloc also the small trimaps
            d_small_trimap    = new CudaPitchedDeviceVariable <byte> [2];
            d_small_trimap[0] = new CudaPitchedDeviceVariable <byte>(temp_width[0], temp_height[0], 4);
            d_small_trimap[1] = new CudaPitchedDeviceVariable <byte>(temp_width[1], temp_height[1], 4);

            grabCutGMM.Downscale(d_temp[0], temp_width[0], temp_height[0], d_image, size.width, size.height);
            int current = 0;

            while (temp_width[current] > max_dim || temp_height[current] > max_dim)
            {
                grabCutGMM.Downscale(d_temp[1 - current], temp_width[1 - current], temp_height[1 - current], d_temp[current], temp_width[current], temp_height[current]);
                current ^= 1;
                temp_width[1 - current]  = (int)Math.Ceiling(temp_width[current] * 0.5f);
                temp_height[1 - current] = (int)Math.Ceiling(temp_height[current] * 0.5f);
            }

            d_small_image     = d_temp[current];
            small_size.width  = temp_width[current];
            small_size.height = temp_height[current];

            graphcut8Small = new GraphCut8(small_size);
            d_temp[1 - current].Dispose();
        }
コード例 #23
0
        /// <summary>
        /// 1 channel 32-bit uinteger connected region marker label renumbered from a previous call to nppiCompressMarkerLabelsUF or
        /// nppiCmpressMarkerLabelsUFBatch functions to eliminate label ID sparseness.
        /// </summary>
        /// <param name="nMaxMarkerLabelID">the value of the maximum marker label ID returned by corresponding compress marker labels UF call. </param>
        /// <param name="pMarkerLabelsInfoList">pointer to device memory buffer at least as large as value returned by the corresponding CompressedMarkerLabelsGetInfoListSize call.</param>
        /// <param name="pContoursImage">optional output image containing contours (boundaries) around each uniquely labeled connected pixel region, set to NULL if not needed. </param>
        /// <param name="pContoursDirectionImage">optional output image containing per contour pixel direction info around each uniquely labeled connected pixel region, set to NULL if not needed. </param>
        /// <param name="pContoursTotalsInfoHost">unique per call optional host memory pointer to NppiContourTotalsInfo structure in host memory, MUST be set if pContoursDirectionImage is set. </param>
        /// <param name="pContoursPixelCountsListDev">unique per call optional device memory pointer to array of nMaxMarkerLabelID uintegers in host memory, MUST be set if pContoursDirectionImage is set. </param>
        /// <param name="pContoursPixelCountsListHost">unique per call optional host memory pointer to array of nMaxMarkerLabelID uintegers in host memory, MUST be set if pContoursDirectionImage is set. </param>
        /// <param name="pContoursPixelStartingOffsetHost">unique per call optional host memory pointer to array of uintegers returned by this call representing the starting offset index of each contour found during geometry list generation </param>
        /// <param name="nppStreamCtx">NPP stream context.</param>
        public void CompressedMarkerLabelsUFInfo(
            uint nMaxMarkerLabelID, CudaDeviceVariable <NppiCompressedMarkerLabelsInfo> pMarkerLabelsInfoList, NPPImage_8uC1 pContoursImage,
            CudaPitchedDeviceVariable <NppiContourPixelDirectionInfo> pContoursDirectionImage, NppiContourTotalsInfo[] pContoursTotalsInfoHost,
            CudaDeviceVariable <uint> pContoursPixelCountsListDev, uint[] pContoursPixelCountsListHost, uint[] pContoursPixelStartingOffsetHost, NppStreamContext nppStreamCtx)
        {
            CUdeviceptr ptrMarkerLabelsInfoList       = new CUdeviceptr();
            CUdeviceptr ptrContoursImage              = new CUdeviceptr();
            CUdeviceptr ptrContoursDirectionImage     = new CUdeviceptr();
            CUdeviceptr ptrContoursPixelCountsListDev = new CUdeviceptr();
            int         pitchContoursImage            = 0;
            int         pitchContoursDirectionImage   = 0;

            if (pMarkerLabelsInfoList != null)
            {
                ptrMarkerLabelsInfoList = pMarkerLabelsInfoList.DevicePointer;
            }
            if (pContoursImage != null)
            {
                ptrContoursImage   = pContoursImage.DevicePointerRoi;
                pitchContoursImage = pContoursImage.Pitch;
            }
            if (pContoursDirectionImage != null)
            {
                ptrContoursDirectionImage   = pContoursDirectionImage.DevicePointer;
                pitchContoursDirectionImage = pContoursDirectionImage.Pitch;
            }
            if (pContoursPixelCountsListDev != null)
            {
                ptrContoursPixelCountsListDev = pContoursPixelCountsListDev.DevicePointer;
            }


            status = NPPNativeMethods_Ctx.NPPi.LabelMarkers.nppiCompressedMarkerLabelsUFInfo_32u_C1R_Ctx(_devPtrRoi, _pitch, _sizeRoi, nMaxMarkerLabelID, ptrMarkerLabelsInfoList,
                                                                                                         ptrContoursImage, pitchContoursImage, ptrContoursDirectionImage, pitchContoursDirectionImage, pContoursTotalsInfoHost, ptrContoursPixelCountsListDev,
                                                                                                         pContoursPixelCountsListHost, pContoursPixelStartingOffsetHost, nppStreamCtx);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiCompressedMarkerLabelsUFInfo_32u_C1R_Ctx", status));
            NPPException.CheckNppStatus(status, this);
        }
コード例 #24
0
 /// <summary>
 /// Asynchron copy device to host
 /// </summary>
 /// <param name="deviceVar"></param>
 /// <param name="stream"></param>
 public void AsyncCopyFromDevice(CudaPitchedDeviceVariable <T> deviceVar, CUstream stream)
 {
     AsyncCopyFromDevice(deviceVar.DevicePointer, deviceVar.Pitch, stream);
 }
コード例 #25
0
 /// <summary>
 /// Synchron copy device to host
 /// </summary>
 /// <param name="deviceVar"></param>
 public void SynchronCopyFromDevice(CudaPitchedDeviceVariable <T> deviceVar)
 {
     SynchronCopyFromDevice(deviceVar.DevicePointer, deviceVar.Pitch);
 }
コード例 #26
0
 public static NPPImage_16uC4 ToNPPImage(this CudaPitchedDeviceVariable <ushort4> deviceVar)
 {
     return(new NPPImage_16uC4(deviceVar.DevicePointer, deviceVar.Width, deviceVar.Height, deviceVar.Pitch));
 }
コード例 #27
0
ファイル: BasicTypes.cs プロジェクト: lvaleriu/managedCuda
		/// <summary>
		/// 
		/// </summary>
		/// <param name="var"></param>
		public CudaResourceDesc(CudaPitchedDeviceVariable<VectorTypes.uint4> var)
		{
			resType = CUResourceType.Pitch2D;
			flags = 0;
			res = new CudaResourceDescUnion();
			res.hArray = new CUarray();
			res.hMipmappedArray = new CUmipmappedArray();
			res.linear = new CudaResourceDescLinear();
			res.pitch2D = new CudaResourceDescPitch2D();
			res.pitch2D.devPtr = var.DevicePointer;
			res.pitch2D.format = CUArrayFormat.UnsignedInt32;
			res.pitch2D.height = var.Height;
			res.pitch2D.numChannels = 4;
			res.pitch2D.pitchInBytes = var.Pitch;
			res.pitch2D.width = var.Width;
		}
コード例 #28
0
ファイル: NPPImage_8uC4.cs プロジェクト: lvaleriu/managedCuda
		/// <summary>
		/// Creates a new NPPImage from allocated device ptr.
		/// </summary>
		/// <param name="devPtr">Already allocated device ptr.</param>
		public NPPImage_8uC4(CudaPitchedDeviceVariable<ManagedCuda.VectorTypes.uchar4> devPtr)
			:this(devPtr, false)
		{
		}
コード例 #29
0
ファイル: Form1.cs プロジェクト: kunzmi/managedCuda
        void updateVelocity(CudaPitchedDeviceVariable<cData> v, CudaDeviceVariable<cData> vx, CudaDeviceVariable<cData> vy, int dx, int pdx, int dy, SizeT tPitch)
        {
            dim3 grid = new dim3((uint)((dx / TILEX) + (!(dx % TILEX != 0) ? 0 : 1)), (uint)((dy / TILEY) + (!(dy % TILEY != 0) ? 0 : 1)), 1);

            dim3 tids = new dim3(TIDSX, TIDSY, 1);

            updateVelocity_k.GridDimensions = grid;
            updateVelocity_k.BlockDimensions = tids;
            updateVelocity_k.Run(v.DevicePointer, vx.DevicePointer, vy.DevicePointer, dx, pdx, dy, TILEY / TIDSY, tPitch);
        }
コード例 #30
0
ファイル: GraphCut.cs プロジェクト: lvaleriu/managedCuda
		/// <summary>
		/// Graphcut of a flow network (32bit floating point edge capacities). The
		/// function computes the minimal cut (graphcut) of a 2D regular 8-connected
		/// graph. <para/>
		/// The inputs are the capacities of the horizontal (in transposed form),
		/// vertical and terminal (source and sink) edges. The capacities to source and
		/// sink 
		/// are stored as capacity differences in the terminals array 
		/// ( terminals(x) = source(x) - sink(x) ). The implementation assumes that the
		/// edge capacities 
		/// for boundary edges that would connect to nodes outside the specified domain
		/// are set to 0 (for example left(0,*) == 0). If this is not fulfilled the
		/// computed labeling may be wrong!<para/>
		/// The computed binary labeling is encoded as unsigned 8bit values (0 and >0).
		/// </summary>
		/// <param name="Terminals">Pointer to differences of terminal edge capacities</param>
		/// <param name="LeftTransposed">Pointer to transposed left edge capacities</param>
		/// <param name="RightTransposed">Pointer to transposed right edge capacities</param>
		/// <param name="Top">Pointer to top edge capacities (top(*,0) must be 0)</param>
		/// <param name="TopLeft">Pointer to top left edge capacities (topleft(*,0) </param>
		/// <param name="TopRight">Pointer to top right edge capacities (topright(*,0)</param>
		/// <param name="Bottom">Pointer to bottom edge capacities (bottom(*,height-1)</param>
		/// <param name="BottomLeft">Pointer to bottom left edge capacities </param>
		/// <param name="BottomRight">Pointer to bottom right edge capacities </param>
		/// <param name="Label">Pointer to destination label image </param>
		/// <returns></returns>
		public void GraphCut(CudaPitchedDeviceVariable<float> Terminals, CudaPitchedDeviceVariable<float> LeftTransposed, CudaPitchedDeviceVariable<float> RightTransposed,
			CudaPitchedDeviceVariable<float> Top, CudaPitchedDeviceVariable<float> TopLeft, CudaPitchedDeviceVariable<float> TopRight, CudaPitchedDeviceVariable<float> Bottom, CudaPitchedDeviceVariable<float> BottomLeft,
			CudaPitchedDeviceVariable<float> BottomRight, CudaPitchedDeviceVariable<byte> Label)
		{
			status = NPPNativeMethods.NPPi.ImageLabeling.nppiGraphcut8_32f8u(Terminals.DevicePointer, LeftTransposed.DevicePointer,
				RightTransposed.DevicePointer, Top.DevicePointer, TopLeft.DevicePointer, TopRight.DevicePointer, Bottom.DevicePointer,
				BottomLeft.DevicePointer, BottomRight.DevicePointer, Terminals.Pitch, LeftTransposed.Pitch, _size, Label.DevicePointer,
				Label.Pitch, _state);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiGraphcut8_32f8u", status));
			NPPException.CheckNppStatus(status, this);
		}
コード例 #31
0
ファイル: Form1.cs プロジェクト: kunzmi/managedCuda
        private void advectParticles(CudaDeviceVariable<vertex> p, CudaPitchedDeviceVariable<float2> v, int dx, int dy, float dt, SizeT tPitch)
        {
            dim3 grid = new dim3((uint)((dx / TILEX) + (!(dx % TILEX != 0) ? 0 : 1)), (uint)((dy / TILEY) + (!(dy % TILEY != 0) ? 0 : 1)), 1);

            dim3 tids = new dim3(TIDSX, TIDSY, 1);

            advectParticles_k.GridDimensions = grid;
            advectParticles_k.BlockDimensions = tids;
            advectParticles_k.Run(p.DevicePointer, v.DevicePointer, dx, dy, dt, TILEY / TIDSY, tPitch);
        }
コード例 #32
0
ファイル: Form1.cs プロジェクト: kunzmi/managedCuda
        private void InitializeCUFFT()
        {
            g_hvfield = new float2[DS];

            g_dvfield = new CudaPitchedDeviceVariable<float2>(DIM, DIM);
            g_tPitch = g_dvfield.Pitch; //Store pitch in g_tPitch to keep consistency to the C++ code
            g_dvfield.CopyToDevice(g_hvfield);

            // Temporary complex velocity field data
            g_vxfield = new CudaDeviceVariable<float2>(PDS);
            g_vyfield = new CudaDeviceVariable<float2>(PDS);

            //create the texture reference explicitly
            texref = new CudaTextureArray2D(advectVelocity_k, "texref", CUAddressMode.Wrap, CUFilterMode.Linear, 0, CUArrayFormat.Float, DIM, DIM, CudaArray2DNumChannels.Two);

            g_particles = new float2[DS];
            initParticles(g_particles, DIM, DIM);

            g_planr2c = new CudaFFTPlan2D(DIM, DIM, cufftType.R2C, Compatibility.FFTWPadding);
            g_planc2r = new CudaFFTPlan2D(DIM, DIM, cufftType.C2R, Compatibility.FFTWPadding);
        }
コード例 #33
0
ファイル: Form1.cs プロジェクト: xiaotie/DNCCuda
 private void updateTexture(CudaPitchedDeviceVariable <float2> data, SizeT wib, SizeT h, SizeT pitch)
 {
     texref.Array.CopyFromDeviceToThis <float2>(data);
 }
コード例 #34
0
ファイル: NPPImage_8uC1.cs プロジェクト: lvaleriu/managedCuda
		/// <summary>
		/// One-channel 8-bit unsigned image SqrIntegral. 
		/// Destination integral image is 32-bit signed int.
		/// Destination square integral image is 64-bit double floating point.
		/// </summary>
		/// <param name="dst">Destination-Image</param>
		/// <param name="sqr">Destination-Image</param>
		/// <param name="nVal">The value to add to pDst image pixels</param>
		/// <param name="nValSqr">The value to add to pSqr image pixels</param>
		public void Integral(NPPImage_32sC1 dst, CudaPitchedDeviceVariable<double> sqr, int nVal, double nValSqr)
		{
			status = NPPNativeMethods.NPPi.Integral.nppiSqrIntegral_8u32s64f_C1R(_devPtrRoi, _pitch, dst.DevicePointerRoi, dst.Pitch, sqr.DevicePointer, sqr.Pitch, _sizeRoi, nVal, nValSqr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiSqrIntegral_8u32s64f_C1R", status));
			NPPException.CheckNppStatus(status, this);
		}
コード例 #35
0
ファイル: Form1.cs プロジェクト: kunzmi/managedCuda
        void advectParticles(uint vbo, CudaPitchedDeviceVariable<cData> v, int dx, int dy, float dt, SizeT tPitch)
        {
            dim3 grid = new dim3((uint)((dx / TILEX) + (!(dx % TILEX != 0) ? 0 : 1)), (uint)((dy / TILEY) + (!(dy % TILEY != 0) ? 0 : 1)), 1);

            dim3 tids = new dim3(TIDSX, TIDSY, 1);

            cuda_vbo_resource.MapAllResources();
            CUdeviceptr p = cuda_vbo_resource[0].GetMappedPointer();
            advectParticles_k.GridDimensions = grid;
            advectParticles_k.BlockDimensions = tids;
            advectParticles_k.Run(p, v.DevicePointer, dx, dy, dt, TILEY / TIDSY, tPitch);
            cuda_vbo_resource.UnmapAllResources();
        }
コード例 #36
0
 public static NPPImage_32fC2 ToNPPImage(this CudaPitchedDeviceVariable <float2> deviceVar)
 {
     return(new NPPImage_32fC2(deviceVar.DevicePointer, deviceVar.Width, deviceVar.Height, deviceVar.Pitch));
 }
コード例 #37
0
ファイル: Form1.cs プロジェクト: kunzmi/managedCuda
        void advectVelocity(CudaPitchedDeviceVariable<cData> v, CudaDeviceVariable<cData> vx, CudaDeviceVariable<cData> vy, int dx, int pdx, int dy, float dt, SizeT tPitch)
        {
            dim3 grid = new dim3((uint)((dx / TILEX) + (!(dx % TILEX != 0) ? 0 : 1)), (uint)((dy / TILEY) + (!(dy % TILEY != 0) ? 0 : 1)), 1);

            dim3 tids = new dim3(TIDSX, TIDSY, 1);

            updateTexture(v, DIM * float2.SizeOf, DIM, tPitch);

            advectVelocity_k.GridDimensions = grid;
            advectVelocity_k.BlockDimensions = tids;
            advectVelocity_k.Run(v.DevicePointer, vx.DevicePointer, vy.DevicePointer, dx, pdx, dy, dt, TILEY / TIDSY);
        }
コード例 #38
0
 public static NPPImage_32fcC1 ToNPPImage(this CudaPitchedDeviceVariable <cuFloatComplex> deviceVar)
 {
     return(new NPPImage_32fcC1(deviceVar.DevicePointer, deviceVar.Width, deviceVar.Height, deviceVar.Pitch));
 }
コード例 #39
0
ファイル: Form1.cs プロジェクト: kunzmi/managedCuda
 void updateTexture(CudaPitchedDeviceVariable<cData> data, SizeT wib, SizeT h, SizeT pitch)
 {
     texref.Array.CopyFromDeviceToThis<float2>(data);
 }
コード例 #40
0
 public static NPPImage_8uC1 ToNPPImage(this CudaPitchedDeviceVariable <byte> deviceVar)
 {
     return(new NPPImage_8uC1(deviceVar.DevicePointer, deviceVar.Width, deviceVar.Height, deviceVar.Pitch));
 }
コード例 #41
0
ファイル: Form1.cs プロジェクト: kunzmi/managedCuda
        void addForces(CudaPitchedDeviceVariable<float2> v, int dx, int dy, int spx, int spy, float fx, float fy, int r, SizeT tPitch)
        {
            dim3 tids = new dim3((uint)(2 * r + 1), (uint)(2 * r + 1), 1);

            addForces_k.GridDimensions = new dim3(1);
            addForces_k.BlockDimensions = tids;
            addForces_k.Run(v.DevicePointer, dx, dy, spx, spy, fx, fy, r, tPitch);
        }
コード例 #42
0
 public static NPPImage_8sC3 ToNPPImage(this CudaPitchedDeviceVariable <char3> deviceVar)
 {
     return(new NPPImage_8sC3(deviceVar.DevicePointer, deviceVar.Width, deviceVar.Height, deviceVar.Pitch));
 }
コード例 #43
0
ファイル: NPPImage_8uC4.cs プロジェクト: lvaleriu/managedCuda
		/// <summary>
		/// Creates a new NPPImage from allocated device ptr.
		/// </summary>
		/// <param name="devPtr">Already allocated device ptr.</param>
		/// <param name="isOwner">If TRUE, devPtr is freed when disposing</param>
		public NPPImage_8uC4(CudaPitchedDeviceVariable<ManagedCuda.VectorTypes.uchar4> devPtr, bool isOwner)
		{
			_devPtr = devPtr.DevicePointer;
			_devPtrRoi = _devPtr;
			_sizeOriginal.width = devPtr.Width;
			_sizeOriginal.height = devPtr.Height;
			_sizeRoi.width = devPtr.Width;
			_sizeRoi.height = devPtr.Height;
			_pitch = devPtr.Pitch;
			_channels = 4;
			_isOwner = isOwner;
			_typeSize = sizeof(byte);
		}
コード例 #44
0
ファイル: Program.cs プロジェクト: kunzmi/managedCuda
        static void Main(string[] args)
        {
            const int nx = 2048;
            const int ny = 2048;

            // shifts applied to x and y data
            const int x_shift = 5;
            const int y_shift = 7;

            ShrQATest.shrQAStart(args);

            if ((nx%TILE_DIM != 0)  || (ny%TILE_DIM != 0))
            {
                Console.Write("nx and ny must be multiples of TILE_DIM\n");
                ShrQATest.shrQAFinishExit(args, ShrQATest.eQAstatus.QA_WAIVED);
            }

            // execution configuration parameters
            dim3 grid = new dim3(nx/TILE_DIM, ny/TILE_DIM, 1);
            dim3 threads = new dim3(TILE_DIM, TILE_DIM, 1);

            // This will pick the best possible CUDA capable device
            int devID = findCudaDevice(args);

            //Load Kernel image from resources
            string resName;
            if (IntPtr.Size == 8)
                resName = "simplePitchLinearTexture_x64.ptx";
            else
                resName = "simplePitchLinearTexture.ptx";

            string resNamespace = "simplePitchLinearTexture";
            string resource = resNamespace + "." + resName;
            Stream stream = Assembly.GetExecutingAssembly().GetManifestResourceStream(resource);
            if (stream == null) throw new ArgumentException("Kernel not found in resources.");
            byte[] kernels = new byte[stream.Length];

            int bytesToRead = (int)stream.Length;
            while (bytesToRead > 0)
            {
                bytesToRead -= stream.Read(kernels, (int)stream.Position, bytesToRead);
            }

            CudaKernel PLKernel = ctx.LoadKernelPTX(kernels, "shiftPitchLinear");
            CudaKernel ArrayKernel = ctx.LoadKernelPTX(kernels, "shiftArray");

            CudaStopWatch stopwatch = new CudaStopWatch();

            // ----------------------------------
            // Host allocation and initialization
            // ----------------------------------

            float[] h_idata = new float[nx * ny];
            float[] h_odata = new float[nx * ny];
            float[] gold = new float[nx * ny];

            for (int i = 0; i < nx * ny; ++i) h_idata[i] = (float)i;

            // ------------------------
            // Device memory allocation
            // ------------------------

            // Pitch linear input data
            CudaPitchedDeviceVariable<float> d_idataPL = new CudaPitchedDeviceVariable<float>(nx, ny);

            // Array input data
            CudaArray2D d_idataArray = new CudaArray2D(CUArrayFormat.Float, nx, ny, CudaArray2DNumChannels.One);

            // Pitch linear output data
            CudaPitchedDeviceVariable<float> d_odata = new CudaPitchedDeviceVariable<float>(nx, ny);

            // ------------------------
            // copy host data to device
            // ------------------------

            // Pitch linear
            d_idataPL.CopyToDevice(h_idata);

            // Array
            d_idataArray.CopyFromHostToThis<float>(h_idata);

            // ----------------------
            // Bind texture to memory
            // ----------------------

            // Pitch linear
            CudaTextureLinearPitched2D<float> texRefPL = new CudaTextureLinearPitched2D<float>(PLKernel, "texRefPL", CUAddressMode.Wrap, CUFilterMode.Point, CUTexRefSetFlags.NormalizedCoordinates, CUArrayFormat.Float, d_idataPL);
            CudaTextureArray2D texRefArray = new CudaTextureArray2D(ArrayKernel, "texRefArray", CUAddressMode.Wrap, CUFilterMode.Point, CUTexRefSetFlags.NormalizedCoordinates, d_idataArray);

            // ---------------------
            // reference calculation
            // ---------------------

            for (int j = 0; j < ny; j++)
            {
                int jshift = (j + y_shift) % ny;
                for (int i = 0; i < nx; i++)
                {
                    int ishift = (i + x_shift) % nx;
                    gold[j * nx + i] = h_idata[jshift * nx + ishift];
                }
            }

            // ----------------
            // shiftPitchLinear
            // ----------------

            ctx.ClearMemory(d_odata.DevicePointer, 0, d_odata.TotalSizeInBytes);
            PLKernel.BlockDimensions = threads;
            PLKernel.GridDimensions = grid;
            stopwatch.Start();
            for (int i=0; i < NUM_REPS; i++)
            {
                PLKernel.Run(d_odata.DevicePointer, (int)(d_odata.Pitch/sizeof(float)), nx, ny, x_shift, y_shift);
            }
            stopwatch.Stop();
            stopwatch.StopEvent.Synchronize();
            float timePL = stopwatch.GetElapsedTime();

            // check results
            d_odata.CopyToHost(h_odata);

            bool res = cutComparef(gold, h_odata);

            bool success = true;
            if (res == false) {
                Console.Write("*** shiftPitchLinear failed ***\n");
                success = false;
            }

            // ----------
            // shiftArray
            // ----------

            ctx.ClearMemory(d_odata.DevicePointer, 0, d_odata.TotalSizeInBytes);
            ArrayKernel.BlockDimensions = threads;
            ArrayKernel.GridDimensions = grid;
            stopwatch.Start();
            for (int i=0; i < NUM_REPS; i++) {
                ArrayKernel.Run(d_odata.DevicePointer, (int)(d_odata.Pitch/sizeof(float)), nx, ny, x_shift, y_shift);

            }

            stopwatch.Stop();
            stopwatch.StopEvent.Synchronize();
            float timeArray = stopwatch.GetElapsedTime();

            // check results
            d_odata.CopyToHost(h_odata);

            res = cutComparef(gold, h_odata);

            if (res == false) {
                Console.Write("*** shiftArray failed ***\n");
                success = false;
            }

            float bandwidthPL = 2.0f*1000.0f*nx*ny*sizeof(float)/(1e+9f)/(timePL/NUM_REPS);
            float bandwidthArray = 2.0f*1000.0f*nx*ny*sizeof(float)/(1e+9f)/(timeArray/NUM_REPS);
            Console.Write("\nBandwidth (GB/s) for pitch linear: {0}; for array: {1}\n",
                bandwidthPL, bandwidthArray);

            float fetchRatePL = nx*ny/1e+6f/(timePL/(1000.0f*NUM_REPS));
            float fetchRateArray = nx*ny/1e+6f/(timeArray/(1000.0f*NUM_REPS));
            Console.Write("\nTexture fetch rate (Mpix/s) for pitch linear: {0}; for array: {1}\n\n",
                fetchRatePL, fetchRateArray);

            // cleanup
            texRefPL.Dispose();
            texRefArray.Dispose();
            d_idataPL.Dispose();
            d_idataArray.Dispose();
            d_odata.Dispose();
            stopwatch.Dispose();
            ctx.Dispose();

            ShrQATest.shrQAFinishExit(args, (success == true) ? ShrQATest.eQAstatus.QA_PASSED : ShrQATest.eQAstatus.QA_FAILED);
        }