/// <summary> /// Multiply with other vector element-wise /// </summary> /// <param name="other">Other vector</param> public override void MultiplyElementWise(VectorBase other) { if (!this.IsLocked || !other.IsLocked) { throw new ApplicationException("works only in locked mode"); } clVector _other = other as clVector; if (_other == null) { throw new ArgumentException("other must be of type clVector.", "other"); } if (_other.Part.LocalLength != this.Part.LocalLength) { throw new ArgumentException("mismatch in vector size."); } cl.SetKernelArg(clmew, 0, d_data); cl.SetKernelArg(clmew, 1, _other.GetDevicePointer()); cl.SetKernelArg(clmew, 2, size); int[] global = { globalsize }; int[] local = { localsize }; cl.EnqueueNDRangeKernel(device.cq, clmew, 1, global, local); }
internal override void SpMV_Local_Start(double alpha, VectorBase a, double beta, VectorBase acc) { if (!m_IsLocked) { throw new ApplicationException("object must be locked."); } clVector _a = a as clVector; clVector _acc = acc as clVector; if (_a == null) { throw new ArgumentException("a must be of type clVector.", "a"); } if (_acc == null) { throw new ArgumentException("acc must be of type clVector.", "acc"); } SetArguments(alpha, _a, beta, _acc); int[] local = { localsize }; int[] global = { globalsize }; localEvent = cl.EnqueueNDRangeKernel(device.cq, clmultiply, 1, global, local); }
internal clCommVector(MatrixBase M, clVector v) : base(M, v) { this.owner = v; clfill = cl.CreateKernel(owner.device.vectorProgram, "fillSendBuffer"); IDictionary <int, int[]> comLists = M._SpmvCommPattern.ComLists; //int[] procranks = new int[comLists.Count]; // put all proccessor ranks in one list to have a unique ordering int totLen = 0; foreach (int procRnk in comLists.Keys) { int l = comLists[procRnk].Length; base.SendBuffersLengths[procRnk] = l; totLen += l; } size = totLen; globalsize = size; int m = size % localsize; if (m > 0) { globalsize += localsize - m; } if (size > 0) { // alloc h_IndicesToSend = new int[size]; d_IndicesToSend = cl.CreateBuffer(owner.device.env.context, cl_mem_flags.CL_MEM_READ_ONLY, (uint)size * sizeof(int)); h_SendBuffer = Marshal.AllocHGlobal(size * sizeof(double)); d_SendBuffer = cl.CreateBuffer(owner.device.env.context, cl_mem_flags.CL_MEM_WRITE_ONLY, (uint)size * sizeof(double)); // concat lists: int i0 = 0; unsafe { double *P0 = (double *)h_SendBuffer; foreach (int procRnk in comLists.Keys) { base.SendBuffers[procRnk] = (IntPtr)P0; // startaddres for sending to process 'procRnk' int l = base.SendBuffersLengths[procRnk]; P0 += l; Array.Copy(comLists[procRnk], 0, h_IndicesToSend, i0, l); // concat comm list i0 += l; } } cl.EnqueueWriteBuffer(owner.device.cq, d_IndicesToSend, true, 0, (uint)size * sizeof(int), h_IndicesToSend); } }
/// <summary> /// Get the inner product with other vector /// </summary> /// <param name="other">Other vector</param> /// <returns>Returns the inner product</returns> public override double InnerProd(VectorBase other) { if (!this.IsLocked || !other.IsLocked) { throw new ApplicationException("works only in locked mode"); } clVector _other = other as clVector; if (_other == null) { throw new ArgumentException("other must be of type clVector.", "other"); } if (_other.Part.LocalLength != this.Part.LocalLength) { throw new ArgumentException("mismatch in vector size."); } double InnerProdLocal = 0.0; cl.SetKernelArg(clinnerprod, 0, d_data); cl.SetKernelArg(clinnerprod, 1, _other.GetDevicePointer()); cl.SetKernelArg(clinnerprod, 2, d_result); cl.SetKernelArgLocalSize(clinnerprod, 3, (uint)localsize * sizeof(double)); cl.SetKernelArg(clinnerprod, 4, size); int[] global = { globalsizehalf }; int[] local = { localsize }; cl.EnqueueNDRangeKernel(device.cq, clinnerprod, 1, global, local); IntPtr h_result; cl.EnqueueMapBuffer(device.cq, d_result, out h_result, true, cl_map_flags.CL_MAP_READ, 0, (uint)groups * sizeof(double)); unsafe { double *ptr = (double *)h_result; for (int i = 0; i < groups; i++) { InnerProdLocal += ptr[i]; } } cl.EnqueueUnmapMemObject(device.cq, d_result, h_result); double InnerProdGlobal = double.NaN; unsafe { csMPI.Raw.Allreduce((IntPtr)(&InnerProdLocal), (IntPtr)(&InnerProdGlobal), 1, csMPI.Raw._DATATYPE.DOUBLE, csMPI.Raw._OP.SUM, csMPI.Raw._COMM.WORLD); } return(InnerProdGlobal); }
internal override void SetArguments(double alpha, clVector a, double beta, clVector acc) { cl_mem d_x = a.GetDevicePointer(); cl_mem d_result = acc.GetDevicePointer(); cl.SetKernelArg(clmultiply, 0, d_val); cl.SetKernelArg(clmultiply, 1, d_colIdx); cl.SetKernelArg(clmultiply, 2, d_rowStart); cl.SetKernelArg(clmultiply, 3, d_result); cl.SetKernelArg(clmultiply, 4, d_x); cl.SetKernelArgLocalSize(clmultiply, 5, (uint)(localsize + 1) * sizeof(int)); cl.SetKernelArg(clmultiply, 6, alpha); cl.SetKernelArg(clmultiply, 7, beta); cl.SetKernelArg(clmultiply, 8, size); }
internal override void SetArguments(double alpha, clVector a, double beta, clVector acc) { cl_mem d_x = a.GetDevicePointer(); cl_mem d_result = acc.GetDevicePointer(); cl.SetKernelArg(clmultiply, 0, d_val); cl.SetKernelArg(clmultiply, 1, d_colIdx); cl.SetKernelArg(clmultiply, 2, d_x); cl.SetKernelArg(clmultiply, 3, d_result); cl.SetKernelArg(clmultiply, 4, alpha); cl.SetKernelArg(clmultiply, 5, beta); cl.SetKernelArg(clmultiply, 6, size); cl.SetKernelArg(clmultiply, 7, colCount); cl.SetKernelArg(clmultiply, 8, valStride); cl.SetKernelArg(clmultiply, 9, colStride); }
internal override void SpMV_External_Begin(double alpha, double beta, VectorBase acc) { m_alpha = alpha; clVector _acc = (clVector)acc; d_acc = _acc.GetDevicePointer(); unsafe { double *_acc_stor = (double *)h_ElementsToAcc; for (int i = (int)extSize - 1; i >= 0; i--) { *_acc_stor = 0; _acc_stor++; } } }
internal override void SetArguments(double alpha, clVector a, double beta, clVector acc) { cl_mem d_x = a.GetDevicePointer(); cl_mem d_result = acc.GetDevicePointer(); cl.SetKernelArg(clmultiply, 0, d_val); cl.SetKernelArg(clmultiply, 1, d_colIdx); cl.SetKernelArg(clmultiply, 2, d_xSubStart); cl.SetKernelArg(clmultiply, 3, d_blockSubVector); cl.SetKernelArg(clmultiply, 4, d_x); cl.SetKernelArg(clmultiply, 5, d_result); cl.SetKernelArgLocalSize(clmultiply, 6, (uint)maxBlockSubVectorLength * sizeof(double)); cl.SetKernelArg(clmultiply, 7, alpha); cl.SetKernelArg(clmultiply, 8, beta); cl.SetKernelArg(clmultiply, 9, size); cl.SetKernelArg(clmultiply, 10, colCount); cl.SetKernelArg(clmultiply, 11, valStride); cl.SetKernelArg(clmultiply, 12, colStride); }
internal override void SetArguments(double alpha, clVector a, double beta, clVector acc) { cl_mem d_x = a.GetDevicePointer(); cl_mem d_result = acc.GetDevicePointer(); cl.SetKernelArg(clmultiply, 0, d_cellData); cl.SetKernelArg(clmultiply, 1, d_x); cl.SetKernelArg(clmultiply, 2, d_cellColIdx); cl.SetKernelArg(clmultiply, 3, d_result); cl.SetKernelArgLocalSize(clmultiply, 4, (uint)(localsize * sizeof(double))); cl.SetKernelArgLocalSize(clmultiply, 5, (uint)(cellrowsperblock * sizeof(int))); cl.SetKernelArgLocalSize(clmultiply, 6, (uint)(cellrowsperblock * sizeof(int))); cl.SetKernelArg(clmultiply, 7, alpha); cl.SetKernelArg(clmultiply, 8, beta); cl.SetKernelArg(clmultiply, 9, cellsize); cl.SetKernelArg(clmultiply, 10, cellrowsperblock); cl.SetKernelArg(clmultiply, 11, cellsperrow); cl.SetKernelArg(clmultiply, 12, stride); cl.SetKernelArg(clmultiply, 13, size); }
/// <summary> /// Copy content from other vector /// </summary> /// <param name="other">Other vector</param> public override void CopyFrom(VectorBase other) { if (!this.IsLocked || !other.IsLocked) { throw new ApplicationException("works only in locked mode"); } clVector _other = other as clVector; if (_other == null) { throw new ArgumentException("other must be of type clVector.", "other"); } if (_other.Part.LocalLength != this.Part.LocalLength) { throw new ArgumentException("mismatch in vector size."); } cl.EnqueueCopyBuffer(device.cq, _other.GetDevicePointer(), d_data, 0, 0, (uint)(size * sizeof(double))); }
/// <summary> /// Swap contents with other vector /// </summary> /// <param name="other">Other vector</param> public override void Swap(VectorBase other) { if (!this.IsLocked || !other.IsLocked) { throw new ApplicationException("works only in locked mode"); } clVector _other = other as clVector; if (_other == null) { throw new ArgumentException("other must be of type clVector.", "other"); } if (_other.Part.LocalLength != this.Part.LocalLength) { throw new ArgumentException("mismatch in vector size."); } cl_mem temp = _other.d_data; _other.d_data = this.d_data; this.d_data = temp; }
abstract internal void SetArguments(double alpha, clVector a, double beta, clVector acc);