public override void Swap(VectorBase other) { if (!this.IsLocked || !other.IsLocked) { throw new ApplicationException("works only in locked mode"); } CudaVector _other = other as CudaVector; if (_other == null) { throw new ArgumentException("other must be of type CudaVector.", "other"); } if (_other.Part.LocalLength != this.Part.LocalLength) { throw new ArgumentException("mismatch in vector size."); } CUdeviceptr temp = _other.d_data; _other.d_data = this.d_data; this.d_data = temp; }
internal override void CallDriver(CUstream stream, double alpha, CudaVector a, double beta, CudaVector acc) { CUdeviceptr d_x = a.GetDevicePointer(); CUdeviceptr d_result = acc.GetDevicePointer(); int offset = 0; cu.ParamSetp(sparseMultiply, offset, d_cellData); offset += sizeof(long); cu.ParamSetp(sparseMultiply, offset, d_x); offset += sizeof(long); cu.ParamSetp(sparseMultiply, offset, d_cellColIdx); offset += sizeof(long); cu.ParamSetp(sparseMultiply, offset, d_result); offset += sizeof(long); cu.ParamSetd(sparseMultiply, offset, alpha); offset += sizeof(double); cu.ParamSetd(sparseMultiply, offset, beta); offset += sizeof(double); cu.ParamSeti(sparseMultiply, offset, cellsize); offset += sizeof(uint); cu.ParamSeti(sparseMultiply, offset, cellrowsperblock); offset += sizeof(uint); cu.ParamSeti(sparseMultiply, offset, cellsperrow); offset += sizeof(uint); cu.ParamSeti(sparseMultiply, offset, stride); offset += sizeof(uint); cu.ParamSeti(sparseMultiply, offset, rowcount); offset += sizeof(uint); cu.ParamSetSize(sparseMultiply, (uint)offset); cu.FuncSetBlockShape(sparseMultiply, blocksize, 1, 1); cu.FuncSetSharedSize(sparseMultiply, (uint)(blocksize * sizeof(double) + 2 * cellrowsperblock * sizeof(int))); cu.LaunchGridAsync(sparseMultiply, blockcount, 1, stream); }
/// <summary> see CUDA doc; </summary> public static void MemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, uint ByteCount) { testResult(my.cuMemcpyDtoD(dstDevice, srcDevice, ByteCount)); }
/// <summary> see CUDA doc; </summary> static public void ParamSetp(CUfunction hfunc, int offset, CUdeviceptr ptr) { ParamSetl(hfunc, offset, (long)ptr.p); }
/// <summary> see CUDA doc; </summary> public static void MemcpyHtoD(CUdeviceptr dstDevice, IntPtr srcHost, uint ByteCount) { testResult(my.cuMemcpyHtoD(dstDevice, srcHost, ByteCount)); }
/// <summary> see CUDA doc; </summary> static public void MemFree(CUdeviceptr dptr) { testResult(my.cuMemFree(dptr)); }
/// <summary> see CUDA doc; </summary> public static void MemHostGetDevicePointer(out CUdeviceptr dptr, IntPtr p, uint flags) { testResult(my.cuMemHostGetDevicePointer(out dptr, p, flags)); }
/// <summary> see CUDA doc; </summary> public static void MemsetD8(CUdeviceptr dstDevice, byte uc, uint N) { testResult(my.cuMemsetD8(dstDevice, uc, N)); }
/// <summary> see CUDA doc; </summary> public static void MemAlloc(out CUdeviceptr dptr, uint bytesize) { testResult(my.cuMemAlloc(out dptr, bytesize)); }