protected void LoadNextField() { // Keep t1 timestep as new t0. Update mapping on device side. _t0X = _t1X; new CudaTextureArray2D(_advectParticlesKernel, "vX_t0", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.None, _t0X); _t0Y = _t1Y; new CudaTextureArray2D(_advectParticlesKernel, "vY_t0", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.None, _t0Y); CurrentTime++; // Load new t1. LoaderNCF ncFile = RedSea.Singleton.GetLoaderNCF(CurrentTime); ScalarField t1X = ncFile.LoadFieldSlice(_ensembleRanges[0]); ScalarField t1Y = ncFile.LoadFieldSlice(_ensembleRanges[1]); ncFile.Close(); // All members are above each other. int vHeight = _height * _numMembers; // vX, t=1 _t1X = new CudaArray2D(CUArrayFormat.Float, _width, vHeight, CudaArray2DNumChannels.One); _t1X.CopyFromHostToThis(t1X.Data); new CudaTextureArray2D(_advectParticlesKernel, "vX_t1", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.None, _t1X); // vY, t=1 _t1Y = new CudaArray2D(CUArrayFormat.Float, _width, vHeight, CudaArray2DNumChannels.One); _t1Y.CopyFromHostToThis(t1Y.Data); new CudaTextureArray2D(_advectParticlesKernel, "vY_t1", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.None, _t1Y); }
public void Step(float stepSize) { // Load the next vector fields to device memory. LoadNextField(); _cudaDxMapper.MapAllResources(); CudaArray2D lastFlowMap = _cudaDxMapper[0].GetMappedArray2D(0, 0); // Advect from each member to each member. In each block, the same configuration is choosen. dim3 grid = new dim3((int)((float)_width / BLOCK_SIZE + 0.5f), (int)((float)_height / BLOCK_SIZE + 0.5f), _numMembers); // Advect a block in each member-member combination. dim3 threads = new dim3(BLOCK_SIZE, BLOCK_SIZE); _advectParticlesKernel.GridDimensions = grid; _advectParticlesKernel.BlockDimensions = threads; // (float* mapT1, const int width, const int height, const int numMembers, /*float timeScale, */ float stepSize, float minDensity, float invalid) _advectParticlesKernel.Run(_pongFlowMap.DevicePointer, _width, _height, _numMembers, stepSize, 0.000001f, _texInvalidValue); // Swap the Texture2D handles. CudaSurfObject surf = new CudaSurfObject(lastFlowMap); grid.z = 1; _copyMapDataKernel.GridDimensions = grid; _copyMapDataKernel.BlockDimensions = threads; _copyMapDataKernel.Run(surf.SurfObject, _pongFlowMap.DevicePointer, _width, _height); _cudaDxMapper.UnmapAllResources(); }
/// <summary> /// Creates a surface object. <c>ResDesc</c> describes /// the data to perform surface load/stores on. <c>ResDesc.resType</c> must be /// <see cref="CUResourceType.Array"/> and <c>ResDesc.hArray</c> /// must be set to a valid CUDA array handle. /// </summary> /// <param name="array">CudaArray2D</param> public CudaSurfObject(CudaArray2D array) { _resDesc = new CudaResourceDesc(array); _surfObject = new CUsurfObject(); res = DriverAPINativeMethods.SurfaceObjects.cuSurfObjectCreate(ref _surfObject, ref _resDesc); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuSurfObjectCreate", res)); if (res != CUResult.Success) throw new CudaException(res); }
private void InitializeResources() { int vHeight = _height * _numMembers; _t0Y = new CudaArray2D(CUArrayFormat.Float, _width, vHeight, CudaArray2DNumChannels.One); _t0X = new CudaArray2D(CUArrayFormat.Float, _width, vHeight, CudaArray2DNumChannels.One); _t1Y = new CudaArray2D(CUArrayFormat.Float, _width, vHeight, CudaArray2DNumChannels.One); _t1X = new CudaArray2D(CUArrayFormat.Float, _width, vHeight, CudaArray2DNumChannels.One); _initialized = true; }
protected void LoadNextField() { // Keep t1 timestep as new t0. Update mapping on device side. var tmp = _t0X; _t0X = _t1X; _t1X = tmp; new CudaTextureArray2D(_advectParticlesKernel, "vX_t0", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.None, _t0X); tmp = _t0X; _t0Y = _t1Y; _t1Y = tmp; new CudaTextureArray2D(_advectParticlesKernel, "vY_t0", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.None, _t0Y); CurrentTime++; // Load new t1. LoaderNCF ncFile = RedSea.Singleton.GetLoaderNCF(CurrentTime); ScalarField t1X = ncFile.LoadFieldSlice(_ensembleRanges[0]); ScalarField t1Y = ncFile.LoadFieldSlice(_ensembleRanges[1]); ncFile.Close(); // All members are above each other. int vHeight = _height * _numMembers; float[] paddedArray = new float[_t1X.Width * _t1X.Height]; Array.Copy(t1X.Data, paddedArray, t1X.Data.Length); // vX, t=1 _t1X.CopyFromHostToThis <float>(paddedArray); new CudaTextureArray2D(_advectParticlesKernel, "vX_t1", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.None, _t1X); Array.Copy(t1Y.Data, paddedArray, t1Y.Data.Length); // vY, t=1 _t1Y.CopyFromHostToThis <float>(paddedArray); new CudaTextureArray2D(_advectParticlesKernel, "vY_t1", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.None, _t1Y); }
static void Main(string[] args) { const int nx = 2048; const int ny = 2048; // shifts applied to x and y data const int x_shift = 5; const int y_shift = 7; ShrQATest.shrQAStart(args); if ((nx%TILE_DIM != 0) || (ny%TILE_DIM != 0)) { Console.Write("nx and ny must be multiples of TILE_DIM\n"); ShrQATest.shrQAFinishExit(args, ShrQATest.eQAstatus.QA_WAIVED); } // execution configuration parameters dim3 grid = new dim3(nx/TILE_DIM, ny/TILE_DIM, 1); dim3 threads = new dim3(TILE_DIM, TILE_DIM, 1); // This will pick the best possible CUDA capable device int devID = findCudaDevice(args); //Load Kernel image from resources string resName; if (IntPtr.Size == 8) resName = "simplePitchLinearTexture_x64.ptx"; else resName = "simplePitchLinearTexture.ptx"; string resNamespace = "simplePitchLinearTexture"; string resource = resNamespace + "." + resName; Stream stream = Assembly.GetExecutingAssembly().GetManifestResourceStream(resource); if (stream == null) throw new ArgumentException("Kernel not found in resources."); byte[] kernels = new byte[stream.Length]; int bytesToRead = (int)stream.Length; while (bytesToRead > 0) { bytesToRead -= stream.Read(kernels, (int)stream.Position, bytesToRead); } CudaKernel PLKernel = ctx.LoadKernelPTX(kernels, "shiftPitchLinear"); CudaKernel ArrayKernel = ctx.LoadKernelPTX(kernels, "shiftArray"); CudaStopWatch stopwatch = new CudaStopWatch(); // ---------------------------------- // Host allocation and initialization // ---------------------------------- float[] h_idata = new float[nx * ny]; float[] h_odata = new float[nx * ny]; float[] gold = new float[nx * ny]; for (int i = 0; i < nx * ny; ++i) h_idata[i] = (float)i; // ------------------------ // Device memory allocation // ------------------------ // Pitch linear input data CudaPitchedDeviceVariable<float> d_idataPL = new CudaPitchedDeviceVariable<float>(nx, ny); // Array input data CudaArray2D d_idataArray = new CudaArray2D(CUArrayFormat.Float, nx, ny, CudaArray2DNumChannels.One); // Pitch linear output data CudaPitchedDeviceVariable<float> d_odata = new CudaPitchedDeviceVariable<float>(nx, ny); // ------------------------ // copy host data to device // ------------------------ // Pitch linear d_idataPL.CopyToDevice(h_idata); // Array d_idataArray.CopyFromHostToThis<float>(h_idata); // ---------------------- // Bind texture to memory // ---------------------- // Pitch linear CudaTextureLinearPitched2D<float> texRefPL = new CudaTextureLinearPitched2D<float>(PLKernel, "texRefPL", CUAddressMode.Wrap, CUFilterMode.Point, CUTexRefSetFlags.NormalizedCoordinates, CUArrayFormat.Float, d_idataPL); CudaTextureArray2D texRefArray = new CudaTextureArray2D(ArrayKernel, "texRefArray", CUAddressMode.Wrap, CUFilterMode.Point, CUTexRefSetFlags.NormalizedCoordinates, d_idataArray); // --------------------- // reference calculation // --------------------- for (int j = 0; j < ny; j++) { int jshift = (j + y_shift) % ny; for (int i = 0; i < nx; i++) { int ishift = (i + x_shift) % nx; gold[j * nx + i] = h_idata[jshift * nx + ishift]; } } // ---------------- // shiftPitchLinear // ---------------- ctx.ClearMemory(d_odata.DevicePointer, 0, d_odata.TotalSizeInBytes); PLKernel.BlockDimensions = threads; PLKernel.GridDimensions = grid; stopwatch.Start(); for (int i=0; i < NUM_REPS; i++) { PLKernel.Run(d_odata.DevicePointer, (int)(d_odata.Pitch/sizeof(float)), nx, ny, x_shift, y_shift); } stopwatch.Stop(); stopwatch.StopEvent.Synchronize(); float timePL = stopwatch.GetElapsedTime(); // check results d_odata.CopyToHost(h_odata); bool res = cutComparef(gold, h_odata); bool success = true; if (res == false) { Console.Write("*** shiftPitchLinear failed ***\n"); success = false; } // ---------- // shiftArray // ---------- ctx.ClearMemory(d_odata.DevicePointer, 0, d_odata.TotalSizeInBytes); ArrayKernel.BlockDimensions = threads; ArrayKernel.GridDimensions = grid; stopwatch.Start(); for (int i=0; i < NUM_REPS; i++) { ArrayKernel.Run(d_odata.DevicePointer, (int)(d_odata.Pitch/sizeof(float)), nx, ny, x_shift, y_shift); } stopwatch.Stop(); stopwatch.StopEvent.Synchronize(); float timeArray = stopwatch.GetElapsedTime(); // check results d_odata.CopyToHost(h_odata); res = cutComparef(gold, h_odata); if (res == false) { Console.Write("*** shiftArray failed ***\n"); success = false; } float bandwidthPL = 2.0f*1000.0f*nx*ny*sizeof(float)/(1e+9f)/(timePL/NUM_REPS); float bandwidthArray = 2.0f*1000.0f*nx*ny*sizeof(float)/(1e+9f)/(timeArray/NUM_REPS); Console.Write("\nBandwidth (GB/s) for pitch linear: {0}; for array: {1}\n", bandwidthPL, bandwidthArray); float fetchRatePL = nx*ny/1e+6f/(timePL/(1000.0f*NUM_REPS)); float fetchRateArray = nx*ny/1e+6f/(timeArray/(1000.0f*NUM_REPS)); Console.Write("\nTexture fetch rate (Mpix/s) for pitch linear: {0}; for array: {1}\n\n", fetchRatePL, fetchRateArray); // cleanup texRefPL.Dispose(); texRefArray.Dispose(); d_idataPL.Dispose(); d_idataArray.Dispose(); d_odata.Dispose(); stopwatch.Dispose(); ctx.Dispose(); ShrQATest.shrQAFinishExit(args, (success == true) ? ShrQATest.eQAstatus.QA_PASSED : ShrQATest.eQAstatus.QA_FAILED); }
/// <summary> /// /// </summary> /// <param name="var"></param> public CudaResourceDesc(CudaArray2D var) { resType = CUResourceType.Array; flags = 0; res = new CudaResourceDescUnion(); res.hMipmappedArray = new CUmipmappedArray(); res.linear = new CudaResourceDescLinear(); res.pitch2D = new CudaResourceDescPitch2D(); res.hArray = var.CUArray; }
static void Main(string[] args) { const int nx = 2048; const int ny = 2048; // shifts applied to x and y data const int x_shift = 5; const int y_shift = 7; ShrQATest.shrQAStart(args); if ((nx % TILE_DIM != 0) || (ny % TILE_DIM != 0)) { Console.Write("nx and ny must be multiples of TILE_DIM\n"); ShrQATest.shrQAFinishExit(args, ShrQATest.eQAstatus.QA_WAIVED); } // execution configuration parameters dim3 grid = new dim3(nx / TILE_DIM, ny / TILE_DIM, 1); dim3 threads = new dim3(TILE_DIM, TILE_DIM, 1); // This will pick the best possible CUDA capable device int devID = findCudaDevice(args); //Load Kernel image from resources string resName; if (IntPtr.Size == 8) { resName = "simplePitchLinearTexture_x64.ptx"; } else { resName = "simplePitchLinearTexture.ptx"; } string resNamespace = "simplePitchLinearTexture"; string resource = resNamespace + "." + resName; Stream stream = Assembly.GetExecutingAssembly().GetManifestResourceStream(resource); if (stream == null) { throw new ArgumentException("Kernel not found in resources."); } byte[] kernels = new byte[stream.Length]; int bytesToRead = (int)stream.Length; while (bytesToRead > 0) { bytesToRead -= stream.Read(kernels, (int)stream.Position, bytesToRead); } CudaKernel PLKernel = ctx.LoadKernelPTX(kernels, "shiftPitchLinear"); CudaKernel ArrayKernel = ctx.LoadKernelPTX(kernels, "shiftArray"); CudaStopWatch stopwatch = new CudaStopWatch(); // ---------------------------------- // Host allocation and initialization // ---------------------------------- float[] h_idata = new float[nx * ny]; float[] h_odata = new float[nx * ny]; float[] gold = new float[nx * ny]; for (int i = 0; i < nx * ny; ++i) { h_idata[i] = (float)i; } // ------------------------ // Device memory allocation // ------------------------ // Pitch linear input data CudaPitchedDeviceVariable <float> d_idataPL = new CudaPitchedDeviceVariable <float>(nx, ny); // Array input data CudaArray2D d_idataArray = new CudaArray2D(CUArrayFormat.Float, nx, ny, CudaArray2DNumChannels.One); // Pitch linear output data CudaPitchedDeviceVariable <float> d_odata = new CudaPitchedDeviceVariable <float>(nx, ny); // ------------------------ // copy host data to device // ------------------------ // Pitch linear d_idataPL.CopyToDevice(h_idata); // Array d_idataArray.CopyFromHostToThis <float>(h_idata); // ---------------------- // Bind texture to memory // ---------------------- // Pitch linear CudaTextureLinearPitched2D <float> texRefPL = new CudaTextureLinearPitched2D <float>(PLKernel, "texRefPL", CUAddressMode.Wrap, CUFilterMode.Point, CUTexRefSetFlags.NormalizedCoordinates, CUArrayFormat.Float, d_idataPL); CudaTextureArray2D texRefArray = new CudaTextureArray2D(ArrayKernel, "texRefArray", CUAddressMode.Wrap, CUFilterMode.Point, CUTexRefSetFlags.NormalizedCoordinates, d_idataArray); // --------------------- // reference calculation // --------------------- for (int j = 0; j < ny; j++) { int jshift = (j + y_shift) % ny; for (int i = 0; i < nx; i++) { int ishift = (i + x_shift) % nx; gold[j * nx + i] = h_idata[jshift * nx + ishift]; } } // ---------------- // shiftPitchLinear // ---------------- ctx.ClearMemory(d_odata.DevicePointer, 0, d_odata.TotalSizeInBytes); PLKernel.BlockDimensions = threads; PLKernel.GridDimensions = grid; stopwatch.Start(); for (int i = 0; i < NUM_REPS; i++) { PLKernel.Run(d_odata.DevicePointer, (int)(d_odata.Pitch / sizeof(float)), nx, ny, x_shift, y_shift); } stopwatch.Stop(); stopwatch.StopEvent.Synchronize(); float timePL = stopwatch.GetElapsedTime(); // check results d_odata.CopyToHost(h_odata); bool res = cutComparef(gold, h_odata); bool success = true; if (res == false) { Console.Write("*** shiftPitchLinear failed ***\n"); success = false; } // ---------- // shiftArray // ---------- ctx.ClearMemory(d_odata.DevicePointer, 0, d_odata.TotalSizeInBytes); ArrayKernel.BlockDimensions = threads; ArrayKernel.GridDimensions = grid; stopwatch.Start(); for (int i = 0; i < NUM_REPS; i++) { ArrayKernel.Run(d_odata.DevicePointer, (int)(d_odata.Pitch / sizeof(float)), nx, ny, x_shift, y_shift); } stopwatch.Stop(); stopwatch.StopEvent.Synchronize(); float timeArray = stopwatch.GetElapsedTime(); // check results d_odata.CopyToHost(h_odata); res = cutComparef(gold, h_odata); if (res == false) { Console.Write("*** shiftArray failed ***\n"); success = false; } float bandwidthPL = 2.0f * 1000.0f * nx * ny * sizeof(float) / (1e+9f) / (timePL / NUM_REPS); float bandwidthArray = 2.0f * 1000.0f * nx * ny * sizeof(float) / (1e+9f) / (timeArray / NUM_REPS); Console.Write("\nBandwidth (GB/s) for pitch linear: {0}; for array: {1}\n", bandwidthPL, bandwidthArray); float fetchRatePL = nx * ny / 1e+6f / (timePL / (1000.0f * NUM_REPS)); float fetchRateArray = nx * ny / 1e+6f / (timeArray / (1000.0f * NUM_REPS)); Console.Write("\nTexture fetch rate (Mpix/s) for pitch linear: {0}; for array: {1}\n\n", fetchRatePL, fetchRateArray); // cleanup texRefPL.Dispose(); texRefArray.Dispose(); d_idataPL.Dispose(); d_idataArray.Dispose(); d_odata.Dispose(); stopwatch.Dispose(); ctx.Dispose(); ShrQATest.shrQAFinishExit(args, (success == true) ? ShrQATest.eQAstatus.QA_PASSED : ShrQATest.eQAstatus.QA_FAILED); }
/// <summary> /// Creates a new 2D texture from array memory. Allocates a new 2D array. /// </summary> /// <param name="kernel"></param> /// <param name="texName"></param> /// <param name="addressMode0"></param> /// <param name="addressMode1"></param> /// <param name="filterMode"></param> /// <param name="flags"></param> /// <param name="format"></param> /// <param name="height">In elements</param> /// <param name="width">In elements</param> /// <param name="numChannels">1,2 or 4</param> public CudaTextureArray2D(CudaKernel kernel, string texName, CUAddressMode addressMode0, CUAddressMode addressMode1, CUFilterMode filterMode, CUTexRefSetFlags flags, CUArrayFormat format, SizeT width, SizeT height, CudaArray2DNumChannels numChannels) { _texref = new CUtexref(); res = DriverAPINativeMethods.ModuleManagement.cuModuleGetTexRef(ref _texref, kernel.CUModule, texName); Debug.WriteLine(String.Format("{0:G}, {1}: {2}, Texture name: {3}", DateTime.Now, "cuModuleGetTexRef", res, texName)); if (res != CUResult.Success) throw new CudaException(res); res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetAddressMode(_texref, 0, addressMode0); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetAddressMode", res)); if (res != CUResult.Success) throw new CudaException(res); res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetAddressMode(_texref, 1, addressMode1); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetAddressMode", res)); if (res != CUResult.Success) throw new CudaException(res); res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetFilterMode(_texref, filterMode); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetFilterMode", res)); if (res != CUResult.Success) throw new CudaException(res); res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetFlags(_texref, flags); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetFlags", res)); if (res != CUResult.Success) throw new CudaException(res); res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetFormat(_texref, format, (int)numChannels); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetFormat", res)); if (res != CUResult.Success) throw new CudaException(res); _filtermode = filterMode; _flags = flags; _addressMode0 = addressMode0; _addressMode1 = addressMode1; _format = format; _height = height; _width = width; _numChannels = (int)numChannels; _name = texName; _module = kernel.CUModule; _cufunction = kernel.CUFunction; _channelSize = CudaHelperMethods.GetChannelSize(format); _dataSize = height * width * _numChannels * _channelSize; _array = new CudaArray2D(format, width, height, numChannels); res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetArray(_texref, _array.CUArray, CUTexRefSetArrayFlags.OverrideFormat); Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetArray", res)); if (res != CUResult.Success) throw new CudaException(res); //res = DriverAPINativeMethods.ParameterManagement.cuParamSetTexRef(kernel.CUFunction, CUParameterTexRef.Default, _texref); //Debug.WriteLine("{0:G}, {1}: {2}", DateTime.Now, "cuParamSetTexRef", res); //if (res != CUResult.Success) throw new CudaException(res); }
/// <summary> /// Creates a new 2D texture from array memory /// </summary> /// <param name="kernel"></param> /// <param name="texName"></param> /// <param name="addressMode"></param> /// <param name="filterMode"></param> /// <param name="flags"></param> /// <param name="array"></param> public CudaTextureArray2D(CudaKernel kernel, string texName, CUAddressMode addressMode, CUFilterMode filterMode, CUTexRefSetFlags flags, CudaArray2D array) : this(kernel, texName, addressMode, addressMode, filterMode, flags, array) { }
/// <summary> /// Setup as empty map with only one value at 1. /// </summary> /// <param name="pos"></param> /// <param name="fieldEnsemble"></param> /// <param name="startTime"></param> /// <param name="endTime"></param> public void SetupPoint(Int2 pos, int startTime) { // ~~~~~~~~~~~~~~ Copy relevant data ~~~~~~~~~~~~~~ \\ // Count up when advection was executed. CurrentTime = startTime; _startTime = startTime; // ~~~~~~~~~~~~ Load ensemble ~~~~~~~~~~~~ \\ // Load fields first to get the grid size. //Loader ncFile = new Loader(RedSea.Singleton.DataFolder + (_startTime + 1) + RedSea.Singleton.FileName); //ScalarField t0X = ncFile.LoadFieldSlice(_ensembleRanges[0]); //ScalarField t0Y = ncFile.LoadFieldSlice(_ensembleRanges[1]); //ncFile.Close(); LoaderNCF ncFile = RedSea.Singleton.GetLoaderNCF(_startTime); ScalarField t1X = ncFile.LoadFieldSlice(_ensembleRanges[0]); ScalarField t1Y = ncFile.LoadFieldSlice(_ensembleRanges[1]); ncFile.Close(); // ~~~~~~~~~~~~~~ Copy relevant data ~~~~~~~~~~~~~~ \\ // Keep for plane creation and size reference. _ensembleGrid = t1X.Grid as RectlinearGrid; // Mapper for binding the SlimDX texture to CUDA easily. _cudaDxMapper = new CudaGraphicsInteropResourceCollection(); // Tell CUDA which value is a border. _texInvalidValue = t1X.InvalidValue ?? float.MaxValue; // ~~~~~~~~~~~~ Fill CUDA resources ~~~~~~~~~~~~ \\ // All members are above each other. int vHeight = _height * _numMembers; //// vX, t=0 //_t0X = new CudaArray2D(CUArrayFormat.Float, _width, vHeight, CudaArray2DNumChannels.One); //_t0X.CopyFromHostToThis<float>(t0X.Data); //new CudaTextureArray2D(_advectParticlesKernel, "vX_t0", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.None, _t0X); //// vY, t=0 //_t0Y = new CudaArray2D(CUArrayFormat.Float, _width, vHeight, CudaArray2DNumChannels.One); //_t0Y.CopyFromHostToThis<float>(t0Y.Data); //new CudaTextureArray2D(_advectParticlesKernel, "vY_t0", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.None, _t0Y); // vX, t=1 _t1X = new CudaArray2D(CUArrayFormat.Float, _width, vHeight, CudaArray2DNumChannels.One); _t1X.CopyFromHostToThis <float>(t1X.Data); new CudaTextureArray2D(_advectParticlesKernel, "vX_t1", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.None, _t1X); // vY, t=1 _t1Y = new CudaArray2D(CUArrayFormat.Float, _width, vHeight, CudaArray2DNumChannels.One); _t1Y.CopyFromHostToThis <float>(t1Y.Data); new CudaTextureArray2D(_advectParticlesKernel, "vY_t1", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.None, _t1Y); // ~~~~~~~~~~~~~ Create texture ~~~~~~~~~~~~~~~~~~~~ \\ // Create texture. Completely zero, except for one point. Texture2DDescription desc = new Texture2DDescription { ArraySize = 1, BindFlags = BindFlags.ShaderResource, CpuAccessFlags = CpuAccessFlags.None, Format = Format.R32_Float, Width = _width, Height = _height, MipLevels = 1, OptionFlags = ResourceOptionFlags.None, SampleDescription = new SampleDescription(1, 0), Usage = ResourceUsage.Default }; // Put field data into stream/rectangle object float[] zeros = new float[_width * _height]; Array.Clear(zeros, 0, zeros.Length); // Fill the empty texture. DataRectangle texData = new DataRectangle(_width * sizeof(float), new DataStream(zeros, true, true)); _pongFlowMap = new CudaDeviceVariable <float>(_width * _height);//new Texture2D(_device, desc, texData); // Magically, copy to device happens here. _pongFlowMap = zeros; // Add one pixel for integration. zeros[pos.X + pos.Y * _width] = 1; texData = new DataRectangle(_width * sizeof(float), new DataStream(zeros, true, true)); // Create texture. FlowMap = new Texture2D(_device, desc, texData); // ~~~~~~~~~ Make textures mappable to CUDA ~~~~~~~~~~ \\ _cudaDxMapper.Add(new CudaDirectXInteropResource(FlowMap.ComPointer, CUGraphicsRegisterFlags.None, CudaContext.DirectXVersion.D3D11)); _cudaDxMapper.MapAllResources(); CudaArray2D lastFlowMap = _cudaDxMapper[0].GetMappedArray2D(0, 0); new CudaTextureArray2D(_advectParticlesKernel, "flowMap", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.None, lastFlowMap); _cudaDxMapper.UnmapAllResources(); }
public void SetImage(CudaPitchedDeviceVariable <uchar4> image, Rotation orientation) { bool sizeChanged = false; if (_realImageWidth != image.Width || _realImageHeight != image.Height || _rotation != orientation) { sizeChanged = true; } //Always create a new texture to recreate the mipmaps... //if (_realImageWidth != image.Width || _realImageHeight != image.Height || _rotation != orientation) { _rotation = orientation; _realImageWidth = image.Width; _realImageHeight = image.Height; _imageWidth = image.Width; _imageHeight = image.Height; if (_rotation == Rotation._90 || _rotation == Rotation._270) { _imageWidth = _realImageHeight; _imageHeight = _realImageWidth; } initTexture(image.Width, image.Height); } d3dimage.Lock(); _graphicsres.MapAllResources(); CudaArray2D arr = _graphicsres[0].GetMappedArray2D(0, 0); arr.CopyFromDeviceToThis <uchar4>(image); _graphicsres.UnmapAllResources(); arr.Dispose(); d3dimage.Unlock(); //if size didn't change, don't change geometry if (sizeChanged) { SetProjectionTransform(); SlimDX.Matrix rotMat = new SlimDX.Matrix(); switch (_rotation) { case Rotation._0: rotMat = SlimDX.Matrix.RotationZ(0); break; case Rotation._90: rotMat = SlimDX.Matrix.RotationZ((float)(90.0 / 180.0 * Math.PI)); break; case Rotation._180: rotMat = SlimDX.Matrix.RotationZ((float)(180.0 / 180.0 * Math.PI)); break; case Rotation._270: rotMat = SlimDX.Matrix.RotationZ((float)(270.0 / 180.0 * Math.PI)); break; } _device.SetTransform(TransformState.View, rotMat); } updateFrame(); }
private void Image_MouseMove(object sender, MouseEventArgs e) { System.Windows.Point position = GetPositionWithDpi(e); int pX = (int)position.X; int pY = (int)position.Y; Point pixel = GetImagePixelFromMouseCoordinate(position); uchar4[] p = new uchar4[1]; if (!double.IsInfinity(pixel.X) && !double.IsInfinity(pixel.Y)) { d3dimage.Lock(); _graphicsres.MapAllResources(); CudaArray2D arr = _graphicsres[0].GetMappedArray2D(0, 0); CUDAMemCpy2D copy = new CUDAMemCpy2D(); GCHandle handle = GCHandle.Alloc(p, GCHandleType.Pinned); copy.dstHost = handle.AddrOfPinnedObject(); copy.srcArray = arr.CUArray; copy.srcMemoryType = CUMemoryType.Array; copy.dstMemoryType = CUMemoryType.Host; copy.Height = 1; copy.WidthInBytes = 4; copy.srcXInBytes = (int)pixel.X * 4; copy.srcY = (int)pixel.Y; arr.CopyData(copy); _graphicsres.UnmapAllResources(); arr.Dispose(); handle.Free(); d3dimage.Unlock(); } SetValue(ColorOfPixelProperty, Color.FromArgb(p[0].w, p[0].z, p[0].y, p[0].x)); SetValue(PixelCoordinateProperty, pixel); if (_clicked) { _viewShiftX += (-_lastX + pX) / _scaleFac * _projFac; _viewShiftY += (-_lastY + pY) / _scaleFac * _projFac; _lastX = pX; _lastY = pY; SlimDX.Matrix matScale = SlimDX.Matrix.Scaling(_scaleFac, _scaleFac, 1); float shiftScale = Math.Min((float)ActualWidthDpi, (float)ActualHeightDpi); SlimDX.Matrix matTrans = SlimDX.Matrix.Translation(_viewShiftX / shiftScale * _scaleFac, _viewShiftY / shiftScale * _scaleFac, 0); SlimDX.Matrix mat = matScale * matTrans; SlimDX.Matrix rotMat = new SlimDX.Matrix(); switch (_rotation) { case Rotation._0: rotMat = SlimDX.Matrix.RotationZ(0); break; case Rotation._90: rotMat = SlimDX.Matrix.RotationZ((float)(90.0 / 180.0 * Math.PI)); break; case Rotation._180: rotMat = SlimDX.Matrix.RotationZ((float)(180.0 / 180.0 * Math.PI)); break; case Rotation._270: rotMat = SlimDX.Matrix.RotationZ((float)(270.0 / 180.0 * Math.PI)); break; } _device.SetTransform(TransformState.View, rotMat * mat); updateFrame(); } }