Пример #1
0
        internal override void CallDriver(CUstream stream, double alpha, CudaVector a, double beta, CudaVector acc)
        {
            CUdeviceptr d_x      = a.GetDevicePointer();
            CUdeviceptr d_result = acc.GetDevicePointer();

            int offset = 0;

            cu.ParamSetp(sparseMultiply, offset, d_val);
            offset += sizeof(long);
            cu.ParamSetp(sparseMultiply, offset, d_colIdx);
            offset += sizeof(long);
            cu.ParamSetp(sparseMultiply, offset, d_x);
            offset += sizeof(long);
            cu.ParamSetp(sparseMultiply, offset, d_result);
            offset += sizeof(long);
            cu.ParamSetd(sparseMultiply, offset, alpha);
            offset += sizeof(double);
            cu.ParamSetd(sparseMultiply, offset, beta);
            offset += sizeof(double);
            cu.ParamSeti(sparseMultiply, offset, size);
            offset += sizeof(uint);
            cu.ParamSeti(sparseMultiply, offset, colCount);
            offset += sizeof(uint);
            cu.ParamSeti(sparseMultiply, offset, valStride);
            offset += sizeof(uint);
            cu.ParamSeti(sparseMultiply, offset, colStride);
            offset += sizeof(uint);

            cu.ParamSetSize(sparseMultiply, (uint)offset);
            cu.FuncSetBlockShape(sparseMultiply, blocksize, 1, 1);

            cu.LaunchGridAsync(sparseMultiply, blockcount, 1, stream);
        }
Пример #2
0
        /// <summary>
        /// For each <em>j</em>, <br/>
        /// this[j] = this[j]*<paramref name="other"/>[j]
        /// </summary>
        /// <param name="other"></param>
        public override void MultiplyElementWise(VectorBase other)
        {
            if (!this.IsLocked || !other.IsLocked)
            {
                throw new ApplicationException("works only in locked mode");
            }

            CudaVector _other = other as CudaVector;

            if (_other == null)
            {
                throw new ArgumentException("other must be of type CudaVector.", "other");
            }

            if (_other.Part.LocalLength != this.Part.LocalLength)
            {
                throw new ArgumentException("mismatch in vector size.");
            }

            int offset = 0;

            cu.ParamSetp(cumew, offset, d_data);
            offset += sizeof(long);
            cu.ParamSetp(cumew, offset, _other.GetDevicePointer());
            offset += sizeof(long);
            cu.ParamSeti(cumew, offset, size);
            offset += sizeof(uint);

            cu.ParamSetSize(cumew, (uint)offset);
            cu.FuncSetBlockShape(cumew, blocksize, 1, 1);

            cu.LaunchGrid(cumew, blockcountfull, 1);
        }
Пример #3
0
        public override double InnerProd(VectorBase other)
        {
            if (!this.IsLocked || !other.IsLocked)
            {
                throw new ApplicationException("works only in locked mode");
            }

            CudaVector _other = other as CudaVector;

            if (_other == null)
            {
                throw new ArgumentException("other must be of type CudaVector.", "other");
            }

            if (_other.Part.LocalLength != this.Part.LocalLength)
            {
                throw new ArgumentException("mismatch in vector size.");
            }

            int    offset      = 0;
            double finalResult = 0.0;

            cu.ParamSetp(cuinnerprod, offset, d_data);
            offset += sizeof(long);
            cu.ParamSetp(cuinnerprod, offset, _other.GetDevicePointer());
            offset += sizeof(long);
            cu.ParamSetp(cuinnerprod, offset, d_result);
            offset += sizeof(long);
            cu.ParamSeti(cuinnerprod, offset, size);
            offset += sizeof(uint);

            cu.ParamSetSize(cuinnerprod, (uint)offset);
            cu.FuncSetBlockShape(cuinnerprod, blocksize, 1, 1);
            cu.FuncSetSharedSize(cuinnerprod, (uint)(blocksize * sizeof(double)));

            cu.LaunchGrid(cuinnerprod, blockcounthalf, 1);
            cu.CtxSynchronize();

            unsafe {
                double *ptr = (double *)h_result;
                for (int i = 0; i < blockcounthalf; i++)
                {
                    finalResult += ptr[i];
                }
            }

            double dotProdGlobal = double.NaN;

            unsafe {
                csMPI.Raw.Allreduce((IntPtr)(&finalResult), (IntPtr)(&dotProdGlobal), 1, csMPI.Raw._DATATYPE.DOUBLE, csMPI.Raw._OP.SUM, csMPI.Raw._COMM.WORLD);
            }
            return(dotProdGlobal);
        }
Пример #4
0
            internal CudaCommVector(MatrixBase M, CudaVector v, CUstream stream)
                : base(M, v)
            {
                this.owner  = v;
                this.stream = stream;
                cufill      = owner.m_env.Get_CudaVectorKernelDP_Function("fillSendBuffer");

                IDictionary <int, int[]> comLists = M._SpmvCommPattern.ComLists;
                //int[] procranks = new int[comLists.Count]; // put all proccessor ranks in one list to have a unique ordering

                int totLen = 0;

                foreach (int procRnk in comLists.Keys)
                {
                    int l = comLists[procRnk].Length;
                    base.SendBuffersLengths[procRnk] = l;
                    totLen += l;
                }

                size       = totLen;
                blockcount = (int)Math.Ceiling((decimal)size / blocksize);
                if (size > 0)
                {
                    // alloc
                    h_IndicesToSend = new int[size];
                    cu.MemAlloc(out d_IndicesToSend, (uint)size * sizeof(int));

                    cu.MemHostAlloc(out h_SendBuffer, sizeof(double) * (uint)size, CUmem_host_alloc.CU_MEMHOSTALLOC_DEVICEMAP);
                    cu.MemHostGetDevicePointer(out d_SendBuffer, h_SendBuffer, 0);

                    // concat lists:
                    int i0 = 0;
                    unsafe {
                        double *P0 = (double *)h_SendBuffer;

                        foreach (int procRnk in comLists.Keys)
                        {
                            base.SendBuffers[procRnk] = (IntPtr)P0;  // startaddres for sending to process 'procRnk'

                            int l = base.SendBuffersLengths[procRnk];
                            P0 += l;
                            Array.Copy(comLists[procRnk], 0, h_IndicesToSend, i0, l); // concat comm list
                            i0 += l;
                        }
                    }

                    cu.MemcpyHtoD(d_IndicesToSend, h_IndicesToSend, (uint)size * sizeof(int));
                }
            }
Пример #5
0
        internal override void SpMV_External_Begin(double alpha, double beta, VectorBase acc)
        {
            m_alpha = alpha;
            CudaVector _acc = (CudaVector)acc;

            d_acc = _acc.GetDevicePointer();

            unsafe {
                double *_acc_stor = (double *)h_ElementsToAcc;
                for (int i = (int)extSize - 1; i >= 0; i--)
                {
                    *_acc_stor = 0;
                    _acc_stor++;
                }
            }
        }
Пример #6
0
        public override void CopyFrom(VectorBase other)
        {
            if (!this.IsLocked || !other.IsLocked)
            {
                throw new ApplicationException("works only in locked mode");
            }

            CudaVector _other = other as CudaVector;

            if (_other == null)
            {
                throw new ArgumentException("other must be of type CudaVector.", "other");
            }

            if (_other.Part.LocalLength != this.Part.LocalLength)
            {
                throw new ArgumentException("mismatch in vector size.");
            }

            cu.MemcpyDtoD(d_data, _other.GetDevicePointer(), (uint)(size * sizeof(double)));
        }
Пример #7
0
        internal override void SpMV_Local_Start(double alpha, VectorBase a, double beta, VectorBase acc)
        {
            if (!m_IsLocked)
            {
                throw new ApplicationException("object must be locked.");
            }

            CudaVector _a   = a as CudaVector;
            CudaVector _acc = acc as CudaVector;

            if (_a == null)
            {
                throw new ArgumentException("a must be of type CudaVector.", "a");
            }
            if (_acc == null)
            {
                throw new ArgumentException("acc must be of type CudaVector.", "acc");
            }

            CallDriver(intStream, alpha, _a, beta, _acc);
        }
Пример #8
0
        internal override void CallDriver(CUstream stream, double alpha, CudaVector a, double beta, CudaVector acc)
        {
            CUdeviceptr d_x      = a.GetDevicePointer();
            CUdeviceptr d_result = acc.GetDevicePointer();

            int offset = 0;

            cu.ParamSetp(sparseMultiply, offset, d_cellData);
            offset += sizeof(long);
            cu.ParamSetp(sparseMultiply, offset, d_x);
            offset += sizeof(long);
            cu.ParamSetp(sparseMultiply, offset, d_cellColIdx);
            offset += sizeof(long);
            cu.ParamSetp(sparseMultiply, offset, d_result);
            offset += sizeof(long);
            cu.ParamSetd(sparseMultiply, offset, alpha);
            offset += sizeof(double);
            cu.ParamSetd(sparseMultiply, offset, beta);
            offset += sizeof(double);
            cu.ParamSeti(sparseMultiply, offset, cellsize);
            offset += sizeof(uint);
            cu.ParamSeti(sparseMultiply, offset, cellrowsperblock);
            offset += sizeof(uint);
            cu.ParamSeti(sparseMultiply, offset, cellsperrow);
            offset += sizeof(uint);
            cu.ParamSeti(sparseMultiply, offset, stride);
            offset += sizeof(uint);
            cu.ParamSeti(sparseMultiply, offset, rowcount);
            offset += sizeof(uint);

            cu.ParamSetSize(sparseMultiply, (uint)offset);
            cu.FuncSetBlockShape(sparseMultiply, blocksize, 1, 1);
            cu.FuncSetSharedSize(sparseMultiply, (uint)(blocksize * sizeof(double) + 2 * cellrowsperblock * sizeof(int)));

            cu.LaunchGridAsync(sparseMultiply, blockcount, 1, stream);
        }
Пример #9
0
        public override void Swap(VectorBase other)
        {
            if (!this.IsLocked || !other.IsLocked)
            {
                throw new ApplicationException("works only in locked mode");
            }

            CudaVector _other = other as CudaVector;

            if (_other == null)
            {
                throw new ArgumentException("other must be of type CudaVector.", "other");
            }

            if (_other.Part.LocalLength != this.Part.LocalLength)
            {
                throw new ArgumentException("mismatch in vector size.");
            }

            CUdeviceptr temp = _other.d_data;

            _other.d_data = this.d_data;
            this.d_data   = temp;
        }
Пример #10
0
 abstract internal void CallDriver(CUstream stream, double alpha, CudaVector a, double beta, CudaVector acc);