Exemple #1
0
            public override void FillSendBuffer()
            {
                if (!owner.IsLocked)
                {
                    throw new ApplicationException("works only in locked mode");
                }

                base.FillSendBuffer();

                if (size > 0)
                {
                    int offset = 0;

                    cu.ParamSetp(cufill, offset, d_SendBuffer);
                    offset += sizeof(long);
                    cu.ParamSetp(cufill, offset, d_IndicesToSend);
                    offset += sizeof(long);
                    cu.ParamSetp(cufill, offset, owner.GetDevicePointer());
                    offset += sizeof(long);
                    cu.ParamSeti(cufill, offset, size);
                    offset += sizeof(uint);

                    cu.ParamSetSize(cufill, (uint)offset);
                    cu.FuncSetBlockShape(cufill, blocksize, 1, 1);
                    //{
                    //    int major, minor;
                    //    cu.DeviceComputeCapability(out major, out minor, this.m_Cu m_CUDAdev);
                    //    if (major >= 2)
                    //        cu.FuncSetCacheConfig(cufill, CUfunc_cache.CU_FUNC_CACHE_PREFER_L1);
                    //}
                    cu.LaunchGridAsync(cufill, blockcount, 1, stream);
                    cu.StreamSynchronize(stream);
                }
            }
Exemple #2
0
        internal override void CallDriver(CUstream stream, double alpha, CudaVector a, double beta, CudaVector acc)
        {
            CUdeviceptr d_x      = a.GetDevicePointer();
            CUdeviceptr d_result = acc.GetDevicePointer();

            int offset = 0;

            cu.ParamSetp(sparseMultiply, offset, d_val);
            offset += sizeof(long);
            cu.ParamSetp(sparseMultiply, offset, d_colIdx);
            offset += sizeof(long);
            cu.ParamSetp(sparseMultiply, offset, d_x);
            offset += sizeof(long);
            cu.ParamSetp(sparseMultiply, offset, d_result);
            offset += sizeof(long);
            cu.ParamSetd(sparseMultiply, offset, alpha);
            offset += sizeof(double);
            cu.ParamSetd(sparseMultiply, offset, beta);
            offset += sizeof(double);
            cu.ParamSeti(sparseMultiply, offset, size);
            offset += sizeof(uint);
            cu.ParamSeti(sparseMultiply, offset, colCount);
            offset += sizeof(uint);
            cu.ParamSeti(sparseMultiply, offset, valStride);
            offset += sizeof(uint);
            cu.ParamSeti(sparseMultiply, offset, colStride);
            offset += sizeof(uint);

            cu.ParamSetSize(sparseMultiply, (uint)offset);
            cu.FuncSetBlockShape(sparseMultiply, blocksize, 1, 1);

            cu.LaunchGridAsync(sparseMultiply, blockcount, 1, stream);
        }
Exemple #3
0
        /// <summary>
        /// For each <em>j</em>, <br/>
        /// this[j] = this[j]*<paramref name="other"/>[j]
        /// </summary>
        /// <param name="other"></param>
        public override void MultiplyElementWise(VectorBase other)
        {
            if (!this.IsLocked || !other.IsLocked)
            {
                throw new ApplicationException("works only in locked mode");
            }

            CudaVector _other = other as CudaVector;

            if (_other == null)
            {
                throw new ArgumentException("other must be of type CudaVector.", "other");
            }

            if (_other.Part.LocalLength != this.Part.LocalLength)
            {
                throw new ArgumentException("mismatch in vector size.");
            }

            int offset = 0;

            cu.ParamSetp(cumew, offset, d_data);
            offset += sizeof(long);
            cu.ParamSetp(cumew, offset, _other.GetDevicePointer());
            offset += sizeof(long);
            cu.ParamSeti(cumew, offset, size);
            offset += sizeof(uint);

            cu.ParamSetSize(cumew, (uint)offset);
            cu.FuncSetBlockShape(cumew, blocksize, 1, 1);

            cu.LaunchGrid(cumew, blockcountfull, 1);
        }
Exemple #4
0
        public override double InnerProd(VectorBase other)
        {
            if (!this.IsLocked || !other.IsLocked)
            {
                throw new ApplicationException("works only in locked mode");
            }

            CudaVector _other = other as CudaVector;

            if (_other == null)
            {
                throw new ArgumentException("other must be of type CudaVector.", "other");
            }

            if (_other.Part.LocalLength != this.Part.LocalLength)
            {
                throw new ArgumentException("mismatch in vector size.");
            }

            int    offset      = 0;
            double finalResult = 0.0;

            cu.ParamSetp(cuinnerprod, offset, d_data);
            offset += sizeof(long);
            cu.ParamSetp(cuinnerprod, offset, _other.GetDevicePointer());
            offset += sizeof(long);
            cu.ParamSetp(cuinnerprod, offset, d_result);
            offset += sizeof(long);
            cu.ParamSeti(cuinnerprod, offset, size);
            offset += sizeof(uint);

            cu.ParamSetSize(cuinnerprod, (uint)offset);
            cu.FuncSetBlockShape(cuinnerprod, blocksize, 1, 1);
            cu.FuncSetSharedSize(cuinnerprod, (uint)(blocksize * sizeof(double)));

            cu.LaunchGrid(cuinnerprod, blockcounthalf, 1);
            cu.CtxSynchronize();

            unsafe {
                double *ptr = (double *)h_result;
                for (int i = 0; i < blockcounthalf; i++)
                {
                    finalResult += ptr[i];
                }
            }

            double dotProdGlobal = double.NaN;

            unsafe {
                csMPI.Raw.Allreduce((IntPtr)(&finalResult), (IntPtr)(&dotProdGlobal), 1, csMPI.Raw._DATATYPE.DOUBLE, csMPI.Raw._OP.SUM, csMPI.Raw._COMM.WORLD);
            }
            return(dotProdGlobal);
        }
Exemple #5
0
        internal override void SpMV_External_Begin(double alpha, double beta, VectorBase acc)
        {
            m_alpha = alpha;
            CudaVector _acc = (CudaVector)acc;

            d_acc = _acc.GetDevicePointer();

            unsafe {
                double *_acc_stor = (double *)h_ElementsToAcc;
                for (int i = (int)extSize - 1; i >= 0; i--)
                {
                    *_acc_stor = 0;
                    _acc_stor++;
                }
            }
        }
Exemple #6
0
        public override void CopyFrom(VectorBase other)
        {
            if (!this.IsLocked || !other.IsLocked)
            {
                throw new ApplicationException("works only in locked mode");
            }

            CudaVector _other = other as CudaVector;

            if (_other == null)
            {
                throw new ArgumentException("other must be of type CudaVector.", "other");
            }

            if (_other.Part.LocalLength != this.Part.LocalLength)
            {
                throw new ArgumentException("mismatch in vector size.");
            }

            cu.MemcpyDtoD(d_data, _other.GetDevicePointer(), (uint)(size * sizeof(double)));
        }
Exemple #7
0
        internal override void CallDriver(CUstream stream, double alpha, CudaVector a, double beta, CudaVector acc)
        {
            CUdeviceptr d_x      = a.GetDevicePointer();
            CUdeviceptr d_result = acc.GetDevicePointer();

            int offset = 0;

            cu.ParamSetp(sparseMultiply, offset, d_cellData);
            offset += sizeof(long);
            cu.ParamSetp(sparseMultiply, offset, d_x);
            offset += sizeof(long);
            cu.ParamSetp(sparseMultiply, offset, d_cellColIdx);
            offset += sizeof(long);
            cu.ParamSetp(sparseMultiply, offset, d_result);
            offset += sizeof(long);
            cu.ParamSetd(sparseMultiply, offset, alpha);
            offset += sizeof(double);
            cu.ParamSetd(sparseMultiply, offset, beta);
            offset += sizeof(double);
            cu.ParamSeti(sparseMultiply, offset, cellsize);
            offset += sizeof(uint);
            cu.ParamSeti(sparseMultiply, offset, cellrowsperblock);
            offset += sizeof(uint);
            cu.ParamSeti(sparseMultiply, offset, cellsperrow);
            offset += sizeof(uint);
            cu.ParamSeti(sparseMultiply, offset, stride);
            offset += sizeof(uint);
            cu.ParamSeti(sparseMultiply, offset, rowcount);
            offset += sizeof(uint);

            cu.ParamSetSize(sparseMultiply, (uint)offset);
            cu.FuncSetBlockShape(sparseMultiply, blocksize, 1, 1);
            cu.FuncSetSharedSize(sparseMultiply, (uint)(blocksize * sizeof(double) + 2 * cellrowsperblock * sizeof(int)));

            cu.LaunchGridAsync(sparseMultiply, blockcount, 1, stream);
        }