private void SetCudaDataForFormat() { evalThreads = threadsPerRow * sliceSize; int N = sizeSV; evalBlocks = (int)Math.Ceiling(1.0 * N * threadsPerRow / evalThreads); align = (int)Math.Ceiling(1.0 * sliceSize * threadsPerRow / 64) * 64; float[] vecVals; int[] vecColIdx; int[] vecLenght; int[] sliceStart; CudaHelpers.TransformToSlicedEllpack(out vecVals, out vecColIdx, out sliceStart, out vecLenght, TrainedModel.SupportElements, threadsPerRow, sliceSize); float[] selfSum = TrainedModel.SupportElements.AsParallel().Select(c => c.Values.Sum()).ToArray(); //copy data to device, set cuda function parameters //copy data to device, set cuda function parameters valsPtr = cuda.CopyHostToDevice(vecVals); idxPtr = cuda.CopyHostToDevice(vecColIdx); vecLengthPtr = cuda.CopyHostToDevice(vecLenght); sliceStartPtr = cuda.CopyHostToDevice(sliceStart); selfSumPtr = cuda.CopyHostToDevice(selfSum); }
private void SetCudaData() { float[] vecVals; int[] vecIdx; int[] vecLenght; CudaHelpers.TransformToCSRFormat(out vecVals, out vecIdx, out vecLenght, TrainedModel.SupportElements); evalBlocks = (sizeSV + evalThreads - 1) / evalThreads; //copy data to device, set cuda function parameters valsPtr = cuda.CopyHostToDevice(vecVals); idxPtr = cuda.CopyHostToDevice(vecIdx); vecPointerPtr = cuda.CopyHostToDevice(vecLenght); }
private void SetCudaDataForERTILP() { float[] vecVals; int[] vecColIdx; int[] vecLenght; evalBlocks = (int)Math.Ceiling((ThreadsPerRow * sizeSV + 0.0) / evalThreads); int align = ThreadsPerRow * Prefetch; CudaHelpers.TransformToERTILPFormat(out vecVals, out vecColIdx, out vecLenght, TrainedModel.SupportElements, align, ThreadsPerRow); //copy data to device, set cuda function parameters valsPtr = cuda.CopyHostToDevice(vecVals); idxPtr = cuda.CopyHostToDevice(vecColIdx); vecLengthPtr = cuda.CopyHostToDevice(vecLenght); }
private void SetCudaDataForEllpack() { float[] vecVals; int[] vecColIdx; int[] vecLenght; CudaHelpers.TransformToEllpackRFormat(out vecVals, out vecColIdx, out vecLenght, TrainedModel.SupportElements); float[] selfLinDot = TrainedModel.SupportElements.Select(c => c.DotProduct()).ToArray(); evalBlocks = (sizeSV + evalThreads - 1) / evalThreads; //copy data to device, set cuda function parameters valsPtr = cuda.CopyHostToDevice(vecVals); idxPtr = cuda.CopyHostToDevice(vecColIdx); vecLengthPtr = cuda.CopyHostToDevice(vecLenght); selfDotPtr = cuda.CopyHostToDevice(selfLinDot); }
private void SetCudaDataForFormat() { evalThreads = threadsPerRow * sliceSize; int N = sizeSV; evalBlocks = (int)Math.Ceiling(1.0 * N * threadsPerRow / evalThreads); align = (int)Math.Ceiling(1.0 * sliceSize * threadsPerRow / 64) * 64; float[] vecVals; int[] vecColIdx; int[] vecLenght; int[] sliceStart; CudaHelpers.TransformToSERTILP(out vecVals, out vecColIdx, out sliceStart, out vecLenght, TrainedModel.SupportElements, threadsPerRow, sliceSize, preFechSize); //copy data to device, set cuda function parameters //copy data to device, set cuda function parameters valsPtr = cuda.CopyHostToDevice(vecVals); idxPtr = cuda.CopyHostToDevice(vecColIdx); vecLengthPtr = cuda.CopyHostToDevice(vecLenght); sliceStartPtr = cuda.CopyHostToDevice(sliceStart); }
private void SetCudaData() { float[] vecVals; int[] vecIdx; int[] vecLenght; CudaHelpers.TransformToCSRFormat(out vecVals, out vecIdx, out vecLenght, TrainedModel.SupportElements); evalBlocks = (sizeSV+evalThreads-1) / evalThreads; //copy data to device, set cuda function parameters valsPtr = cuda.CopyHostToDevice(vecVals); idxPtr = cuda.CopyHostToDevice(vecIdx); vecPointerPtr = cuda.CopyHostToDevice(vecLenght); }
private void SetCudaDataForEllpack() { float[] vecVals; int[] vecColIdx; int[] vecLenght; int align = 2; CudaHelpers.TransformToEllpackRFormat(out vecVals, out vecColIdx, out vecLenght, TrainedModel.SupportElements, align); float[] selfLinDot = TrainedModel.SupportElements.Select(c => c.DotProduct()).ToArray(); evalBlocks = (sizeSV+evalThreads-1) / evalThreads; //copy data to device, set cuda function parameters valsPtr = cuda.CopyHostToDevice(vecVals); idxPtr = cuda.CopyHostToDevice(vecColIdx); vecLengthPtr = cuda.CopyHostToDevice(vecLenght); selfDotPtr = cuda.CopyHostToDevice(selfLinDot); }
private void SetCudaDataForFormat() { evalThreads = threadsPerRow * sliceSize; int N = sizeSV; evalBlocks = (int)Math.Ceiling(1.0 * N * threadsPerRow / evalThreads); align = (int)Math.Ceiling(1.0 * sliceSize * threadsPerRow / 64) * 64; float[] vecVals; int[] vecColIdx; int[] vecLenght; int[] sliceStart; CudaHelpers.TransformToSlicedEllpack(out vecVals, out vecColIdx, out sliceStart, out vecLenght, TrainedModel.SupportElements, threadsPerRow, sliceSize); float[] selfSum = TrainedModel.SupportElements.AsParallel().Select(c => c.Values.Sum()).ToArray(); //copy data to device, set cuda function parameters //copy data to device, set cuda function parameters valsPtr = cuda.CopyHostToDevice(vecVals); idxPtr = cuda.CopyHostToDevice(vecColIdx); vecLengthPtr = cuda.CopyHostToDevice(vecLenght); sliceStartPtr = cuda.CopyHostToDevice(sliceStart); selfSumPtr = cuda.CopyHostToDevice(selfSum); }
private void SetCudaDataForFormat() { evalThreads = threadsPerRow * sliceSize; int N = sizeSV; evalBlocks = (int)Math.Ceiling(1.0 * N * threadsPerRow / evalThreads); align = (int)Math.Ceiling(1.0 * sliceSize * threadsPerRow / 64) * 64; float[] vecVals; int[] vecColIdx; int[] vecLenght; int[] sliceStart; CudaHelpers.TransformToSERTILP(out vecVals, out vecColIdx, out sliceStart, out vecLenght, TrainedModel.SupportElements, threadsPerRow, sliceSize, preFechSize); //copy data to device, set cuda function parameters //copy data to device, set cuda function parameters valsPtr = cuda.CopyHostToDevice(vecVals); idxPtr = cuda.CopyHostToDevice(vecColIdx); vecLengthPtr = cuda.CopyHostToDevice(vecLenght); sliceStartPtr = cuda.CopyHostToDevice(sliceStart); }
public override T Execute <T>(params object[] args) { if (string.IsNullOrEmpty(m_entryMethod)) { throw new Exception("No method is loaded"); } // Create cuda context GASS.CUDA.CUDA cuda = new GASS.CUDA.CUDA(true); // Load module (PTX) and get function GASS.CUDA.Types.CUmodule module = cuda.LoadModule(m_ptx); GASS.CUDA.Types.CUfunction func = cuda.GetModuleFunction(module, m_entryMethod); // Create data float[] A = new float[N]; float[] B = new float[N]; float[] C = new float[N]; Random rand = new Random(); for (int i = 0; i < N; i++) { A[i] = rand.Next(1000); B[i] = rand.Next(1000); C[i] = -1; } // Allocate data on device GASS.CUDA.Types.CUdeviceptr dA = cuda.Allocate <float>(A); GASS.CUDA.Types.CUdeviceptr dB = cuda.Allocate <float>(B); GASS.CUDA.Types.CUdeviceptr dC = cuda.Allocate <float>(C); // Copy data to device cuda.CopyHostToDevice <float>(dA, A); cuda.CopyHostToDevice <float>(dB, B); cuda.CopyHostToDevice <float>(dC, C); // Load parameters int offset = 0; cuda.SetParameter(func, offset, dA); offset += Marshal.SizeOf(typeof(IntPtr)); cuda.SetParameter(func, offset, dB); offset += Marshal.SizeOf(typeof(IntPtr)); cuda.SetParameter(func, offset, dC); offset += Marshal.SizeOf(typeof(IntPtr)); cuda.SetParameterSize(func, (uint)offset); // Setup execution int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock; cuda.SetFunctionBlockShape(func, threadsPerBlock, 1, 1); // Launch execution cuda.Launch(func, blocksPerGrid, 1); // Wait for and retrive result cuda.SynchronizeContext(); cuda.CopyDeviceToHost <float>(dC, C); // Clean device memory cuda.Free(dA); cuda.Free(dB); cuda.Free(dC); return(default(T)); }
private void SetCudaDataForERTILP() { float[] vecVals; int[] vecColIdx; int[] vecLenght; evalBlocks = (int)Math.Ceiling((ThreadsPerRow * sizeSV + 0.0) / evalThreads); int align = ThreadsPerRow * Prefetch; CudaHelpers.TransformToERTILPFormat(out vecVals, out vecColIdx, out vecLenght, TrainedModel.SupportElements, align, ThreadsPerRow); //copy data to device, set cuda function parameters valsPtr = cuda.CopyHostToDevice(vecVals); idxPtr = cuda.CopyHostToDevice(vecColIdx); vecLengthPtr = cuda.CopyHostToDevice(vecLenght); }