public override void FillSendBuffer() { if (!owner.IsLocked) { throw new ApplicationException("works only in locked mode"); } base.FillSendBuffer(); if (size > 0) { int offset = 0; cu.ParamSetp(cufill, offset, d_SendBuffer); offset += sizeof(long); cu.ParamSetp(cufill, offset, d_IndicesToSend); offset += sizeof(long); cu.ParamSetp(cufill, offset, owner.GetDevicePointer()); offset += sizeof(long); cu.ParamSeti(cufill, offset, size); offset += sizeof(uint); cu.ParamSetSize(cufill, (uint)offset); cu.FuncSetBlockShape(cufill, blocksize, 1, 1); //{ // int major, minor; // cu.DeviceComputeCapability(out major, out minor, this.m_Cu m_CUDAdev); // if (major >= 2) // cu.FuncSetCacheConfig(cufill, CUfunc_cache.CU_FUNC_CACHE_PREFER_L1); //} cu.LaunchGridAsync(cufill, blockcount, 1, stream); cu.StreamSynchronize(stream); } }
internal override void CallDriver(CUstream stream, double alpha, CudaVector a, double beta, CudaVector acc) { CUdeviceptr d_x = a.GetDevicePointer(); CUdeviceptr d_result = acc.GetDevicePointer(); int offset = 0; cu.ParamSetp(sparseMultiply, offset, d_val); offset += sizeof(long); cu.ParamSetp(sparseMultiply, offset, d_colIdx); offset += sizeof(long); cu.ParamSetp(sparseMultiply, offset, d_x); offset += sizeof(long); cu.ParamSetp(sparseMultiply, offset, d_result); offset += sizeof(long); cu.ParamSetd(sparseMultiply, offset, alpha); offset += sizeof(double); cu.ParamSetd(sparseMultiply, offset, beta); offset += sizeof(double); cu.ParamSeti(sparseMultiply, offset, size); offset += sizeof(uint); cu.ParamSeti(sparseMultiply, offset, colCount); offset += sizeof(uint); cu.ParamSeti(sparseMultiply, offset, valStride); offset += sizeof(uint); cu.ParamSeti(sparseMultiply, offset, colStride); offset += sizeof(uint); cu.ParamSetSize(sparseMultiply, (uint)offset); cu.FuncSetBlockShape(sparseMultiply, blocksize, 1, 1); cu.LaunchGridAsync(sparseMultiply, blockcount, 1, stream); }
/// <summary> /// For each <em>j</em>, <br/> /// this[j] = this[j]*<paramref name="other"/>[j] /// </summary> /// <param name="other"></param> public override void MultiplyElementWise(VectorBase other) { if (!this.IsLocked || !other.IsLocked) { throw new ApplicationException("works only in locked mode"); } CudaVector _other = other as CudaVector; if (_other == null) { throw new ArgumentException("other must be of type CudaVector.", "other"); } if (_other.Part.LocalLength != this.Part.LocalLength) { throw new ArgumentException("mismatch in vector size."); } int offset = 0; cu.ParamSetp(cumew, offset, d_data); offset += sizeof(long); cu.ParamSetp(cumew, offset, _other.GetDevicePointer()); offset += sizeof(long); cu.ParamSeti(cumew, offset, size); offset += sizeof(uint); cu.ParamSetSize(cumew, (uint)offset); cu.FuncSetBlockShape(cumew, blocksize, 1, 1); cu.LaunchGrid(cumew, blockcountfull, 1); }
public override double InnerProd(VectorBase other) { if (!this.IsLocked || !other.IsLocked) { throw new ApplicationException("works only in locked mode"); } CudaVector _other = other as CudaVector; if (_other == null) { throw new ArgumentException("other must be of type CudaVector.", "other"); } if (_other.Part.LocalLength != this.Part.LocalLength) { throw new ArgumentException("mismatch in vector size."); } int offset = 0; double finalResult = 0.0; cu.ParamSetp(cuinnerprod, offset, d_data); offset += sizeof(long); cu.ParamSetp(cuinnerprod, offset, _other.GetDevicePointer()); offset += sizeof(long); cu.ParamSetp(cuinnerprod, offset, d_result); offset += sizeof(long); cu.ParamSeti(cuinnerprod, offset, size); offset += sizeof(uint); cu.ParamSetSize(cuinnerprod, (uint)offset); cu.FuncSetBlockShape(cuinnerprod, blocksize, 1, 1); cu.FuncSetSharedSize(cuinnerprod, (uint)(blocksize * sizeof(double))); cu.LaunchGrid(cuinnerprod, blockcounthalf, 1); cu.CtxSynchronize(); unsafe { double *ptr = (double *)h_result; for (int i = 0; i < blockcounthalf; i++) { finalResult += ptr[i]; } } double dotProdGlobal = double.NaN; unsafe { csMPI.Raw.Allreduce((IntPtr)(&finalResult), (IntPtr)(&dotProdGlobal), 1, csMPI.Raw._DATATYPE.DOUBLE, csMPI.Raw._OP.SUM, csMPI.Raw._COMM.WORLD); } return(dotProdGlobal); }
internal override void SpMV_External_Begin(double alpha, double beta, VectorBase acc) { m_alpha = alpha; CudaVector _acc = (CudaVector)acc; d_acc = _acc.GetDevicePointer(); unsafe { double *_acc_stor = (double *)h_ElementsToAcc; for (int i = (int)extSize - 1; i >= 0; i--) { *_acc_stor = 0; _acc_stor++; } } }
public override void CopyFrom(VectorBase other) { if (!this.IsLocked || !other.IsLocked) { throw new ApplicationException("works only in locked mode"); } CudaVector _other = other as CudaVector; if (_other == null) { throw new ArgumentException("other must be of type CudaVector.", "other"); } if (_other.Part.LocalLength != this.Part.LocalLength) { throw new ArgumentException("mismatch in vector size."); } cu.MemcpyDtoD(d_data, _other.GetDevicePointer(), (uint)(size * sizeof(double))); }
internal override void CallDriver(CUstream stream, double alpha, CudaVector a, double beta, CudaVector acc) { CUdeviceptr d_x = a.GetDevicePointer(); CUdeviceptr d_result = acc.GetDevicePointer(); int offset = 0; cu.ParamSetp(sparseMultiply, offset, d_cellData); offset += sizeof(long); cu.ParamSetp(sparseMultiply, offset, d_x); offset += sizeof(long); cu.ParamSetp(sparseMultiply, offset, d_cellColIdx); offset += sizeof(long); cu.ParamSetp(sparseMultiply, offset, d_result); offset += sizeof(long); cu.ParamSetd(sparseMultiply, offset, alpha); offset += sizeof(double); cu.ParamSetd(sparseMultiply, offset, beta); offset += sizeof(double); cu.ParamSeti(sparseMultiply, offset, cellsize); offset += sizeof(uint); cu.ParamSeti(sparseMultiply, offset, cellrowsperblock); offset += sizeof(uint); cu.ParamSeti(sparseMultiply, offset, cellsperrow); offset += sizeof(uint); cu.ParamSeti(sparseMultiply, offset, stride); offset += sizeof(uint); cu.ParamSeti(sparseMultiply, offset, rowcount); offset += sizeof(uint); cu.ParamSetSize(sparseMultiply, (uint)offset); cu.FuncSetBlockShape(sparseMultiply, blocksize, 1, 1); cu.FuncSetSharedSize(sparseMultiply, (uint)(blocksize * sizeof(double) + 2 * cellrowsperblock * sizeof(int))); cu.LaunchGridAsync(sparseMultiply, blockcount, 1, stream); }