public static void calc_e(GThread thread, int n, int[] dx, int[] dy, int[] e) { for (int i = 0; i < n; i++) { e[i] = 2 * dy[i] - dx[i]; } }
public static void ApplyKernel(GThread thread, int[] outputData) { //int[,] cache = thread.AllocateShared<int>("cache", X_SIZE, Y_SIZE); int targetX = thread.blockIdx.x; int targetY = 0; float value = 0; while(targetY < Y_SIZE) { for (int kernelX = KERNEL_SIZE / -2; kernelX <= KERNEL_SIZE / 2; kernelX++) for (int kernelY = KERNEL_SIZE / -2; kernelY <= KERNEL_SIZE / 2; kernelY++) { int realX = targetX + kernelX; int realY = targetY + kernelY; if (realX >= 0 && realX < X_SIZE && realY >= 0 && realY < Y_SIZE) value += MemoryKernel[kernelX + KERNEL_SIZE / 2, kernelY + KERNEL_SIZE / 2] * MemoryMain2D[realX, realY]; //Debug.WriteLine(String.Format("hoge: {0}",kernelX)); } //cache[targetX, targetY] = (int)value; //outputData[targetX + targetY * X_SIZE] = cache[targetX, targetY]; outputData[targetX + targetY * X_SIZE] = (int)value; targetY++; value = 0; } }
public static void Dot(GThread thread, float[] a, float[] b, float[] c) { float[] cache = thread.AllocateShared<float>("cache", threadsPerBlock); int tid = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x; int cacheIndex = thread.threadIdx.x; float temp = 0; while (tid < N) { temp += a[tid] * b[tid]; tid += thread.blockDim.x * thread.gridDim.x; } // set the cache values cache[cacheIndex] = temp; // synchronize threads in this block thread.SyncThreads(); // for reductions, threadsPerBlock must be a power of 2 // because of the following code int i = thread.blockDim.x / 2; while (i != 0) { if (cacheIndex < i) cache[cacheIndex] += cache[cacheIndex + i]; thread.SyncThreads(); i /= 2; } if (cacheIndex == 0) c[thread.blockIdx.x] = cache[0]; }
public static void histo_kernel(GThread thread, byte[] buffer, int size, uint[] histo) { // clear out the accumulation buffer called temp // since we are launched with 256 threads, it is easy // to clear that memory with one write per thread uint[] temp = thread.AllocateShared<uint>("temp", 256); temp[thread.threadIdx.x] = 0; thread.SyncThreads(); // calculate the starting index and the offset to the next // block that each thread will be processing int i = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x; int stride = thread.blockDim.x * thread.gridDim.x; while (i < size) { thread.atomicAdd(ref temp[buffer[i]], 1 ); i += stride; } // sync the data from the above writes to shared memory // then add the shared memory values to the values from // the other thread blocks using global memory // atomic adds // same as before, since we have 256 threads, updating the // global histogram is just one write per thread! thread.SyncThreads(); thread.atomicAdd(ref (histo[thread.threadIdx.x]), temp[thread.threadIdx.x]); }
public static void thekernel(GThread thread, SphereOpenCL[] s, byte[] ptr) { int x = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x; int y = thread.threadIdx.y + thread.blockIdx.y * thread.blockDim.y; int offset = x + y * thread.blockDim.x * thread.gridDim.x; float ox = (x - ray_gui.DIM / 2); float oy = (y - ray_gui.DIM / 2); float r = 0, g = 0, b = 0; float maxz = -INF; for (int i = 0; i < SPHERES; i++) { float n = 0; float t = hit(s[i], ox, oy, ref n); if (t > maxz) { float fscale = n; r = s[i].r * fscale; g = s[i].g * fscale; b = s[i].b * fscale; maxz = t; } } ptr[offset * 4 + 0] = (byte)(r * 255); ptr[offset * 4 + 1] = (byte)(g * 255); ptr[offset * 4 + 2] = (byte)(b * 255); ptr[offset * 4 + 3] = 255; }
public static void MuxArray(GThread thread, int[] a, int[] b, int[] c) { int tid = thread.blockIdx.x; if (tid < N) c[tid] = a[tid] * b[tid]; }
public static void Product(GThread thread, int[] a, int[] b, int[] c) { int tid = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x; int[] cache = thread.AllocateShared<int>("cache", 4); int temp = 0; int cacheIndex=thread.threadIdx.x; while (tid < N) { temp = temp + a[tid] * b[tid]; tid += thread.blockDim.x * thread.gridDim.x; } cache[thread.threadIdx.x] = temp; thread.SyncThreads(); int i = thread.blockDim.x / 2; while (i != 0) { if (cacheIndex < i) { cache[cacheIndex] += cache[cacheIndex + i]; } thread.SyncThreads(); i /= 2; } if (cacheIndex == 0) { c[thread.blockIdx.x] = cache[0]; } }
public static void add(GThread thread, int[] a, int[] b, int[] sum) { int index = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x; while (index < N) { sum[index] = sum[index] + a[index] + b[index]; index = index + thread.blockDim.x * thread.gridDim.x; } }
public static void calc_e_v2(GThread thread, int n, int[] dx, int[] dy, int[] e) { int i = thread.blockDim.x * thread.blockIdx.x + thread.threadIdx.x; while(i < n) { e[i] = 2 * dy[i] - dx[i]; i += (thread.blockDim.x * thread.gridDim.x); } }
public static void add_0(GThread thread, int[] a, int[] b, int[] c) { int tid = thread.blockIdx.x; while (tid < N) { c[tid] = a[tid] + b[tid]; tid += thread.gridDim.x; } }
public static void SetValueGPUDouble(GThread thread, int n, double[] vector, double value) { int tid = thread.blockIdx.x; if (tid < n) { vector[tid] = value; } }
public static void SetValueGPUSingle(GThread thread, int n, float[] vector, float value) { int tid = thread.blockIdx.x; if (tid < n) { vector[tid] = value; } }
public static void integerIntrinsicsInt64(GThread thread, long[] input, long[] output) { int i = 0; int x = 0; output[i++] = thread.popcountll(0x5555555555555555); // 32 output[i++] = thread.clzll(0x1FFFFFFFFF000); // 15 output[i++] = (long)thread.umul64hi(0x0FFFFFFFFF000, 0x0555555555555555); output[i++] = (long)thread.mul64hi(0x0FFFFFFFFF000, 0x0555555555555555); }
public static void Copy(GThread thread, double[] prev, double[] next) { for (int tid = (thread.blockDim.x * thread.blockIdx.x + thread.threadIdx.x); tid < prev.Length; tid += thread.blockDim.x * thread.gridDim.x) { next[tid] = prev[tid]; } }
public static void calc_e_v2(GThread thread, int n, int[] dx, int[] dy, int[] e) { int i = thread.blockDim.x * thread.blockIdx.x + thread.threadIdx.x; while (i < n) { e[i] = 2 * dy[i] - dx[i]; i += (thread.blockDim.x * thread.gridDim.x); } }
public static void parentKernel(GThread thread, int[] a, int[] c, short coeff) { //childKernel(thread, a, c, coeff); int rc; //BROKEN thread.Launch(N / 2, numberYouFirstThoughtOf() * coeff, "childKernel", a, c, numberYouFirstThoughtOf() * coeff + 23 * a[0]); thread.Launch(N, 1, "childKernel", a, c, coeff * numberYouFirstThoughtOf());//a[0]);//numberYouFirstThoughtOf() * coeff + 23 * rc = thread.SynchronizeDevice(); int count = 0; rc = thread.GetDeviceCount(ref count); }
private void ga_ThreadFailed(GThread thread, Exception e) { //to prevent multiple events from over-writing each other lock (threadCompleteLock) { logger.Debug("Thread failed: " + thread.Id + "\n" + e.ToString()); UpdateStatus(); printStdFiles(thread as RenderThread); } }
public static void warpSizeDevice(GThread thread, int[] a) { int tid = thread.blockIdx.x; if (tid < N) { int x = thread.warpSize + a[tid]; a[tid] = x; } }
public static void MulByIndex(GThread thread, int[] prev, int[] next) { for (int tid = (thread.blockDim.x * thread.blockIdx.x + thread.threadIdx.x); tid < next.Length; tid += thread.blockDim.x * thread.gridDim.x) { next[tid] = prev[tid] * tid; } }
public static void FillArrayIntGPU(GThread thread, int kernelCount, int[] array, int value) { var cIndex = (thread.blockIdx.x * thread.blockDim.x) + thread.threadIdx.x; while (cIndex < kernelCount) { array[cIndex] = value; cIndex += thread.blockDim.x * thread.gridDim.x; } }
public static void add(GThread thread, int[] a, int[] b, int[] c) { int tid = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x; while (tid < N) { c[tid] = a[tid] + b[tid]; tid += thread.blockDim.x * thread.gridDim.x; } }
public static void CalculateTanhGPU(GThread thread, float[] inputs, float[] outputs, int size) { var index = (thread.blockIdx.x * thread.blockDim.x) + thread.threadIdx.x; while (index < size) { outputs[index] = GMath.Tanh(inputs[index]); index += MAX_BLOCKS_DIM * MAX_THREAD_COUNT; } }
private static void fillWorkMemory(GThread thread, int nodeAmount, int[] workMemory) { var startIndex = (thread.blockIdx.x * thread.blockDim.x + thread.threadIdx.x) * nodeAmount * 2; for (int i = 0; i < nodeAmount; i++) { workMemory[startIndex + i] = i; workMemory[startIndex + nodeAmount + i] = 0; } }
static void ThreadFinished(GThread th) { // cast GThread back to MultiplierThread MultiplierThread thread = (MultiplierThread)th; Console.WriteLine( "thread # {0} finished with result '{1}'", thread.Id, thread.Result); }
public static void add_1(GThread thread, int[] a, int[] b, int[] c) { int tid = thread.blockIdx.x; while (tid < a.Length) { c[tid] = a[tid] + b[tid]; tid += thread.gridDim.x; } }
public static void adder(GThread thread, int[] a, int[] b, int[] c) { int tid = thread.get_global_id(0); //int tid = thread.blockIdx.x; if (tid < N) { c[tid] = a[tid] + b[tid]; } }
public static void GpuFindPathDistance(GThread thread, int permutations, int cities, float[] latitudes, float[] longitudes, AnswerStruct[] answer) { var threadIndex = thread.threadIdx.x; // thread index within the block var blockIndex = thread.blockIdx.x; // block index within the grid var threadsPerBlock = thread.blockDim.x; var blocksPerGrid = thread.gridDim.x; var threadsPerGrid = threadsPerBlock * blocksPerGrid; var permutation = threadIndex + blockIndex * threadsPerBlock; var paths = thread.AllocateShared <int>("path", _threadsPerBlock, _cities); var bestDistances = thread.AllocateShared <float>("dist", _threadsPerBlock); var bestPermutations = thread.AllocateShared <int>("perm", _threadsPerBlock); var bestDistance = float.MaxValue; var bestPermutation = 0; while (permutation < permutations) { var distance = FindPathDistance(permutations, permutation, cities, latitudes, longitudes, paths, threadIndex); if (distance < bestDistance) { bestDistance = distance; bestPermutation = permutation; } permutation += threadsPerGrid; } bestDistances[threadIndex] = bestDistance; bestPermutations[threadIndex] = bestPermutation; thread.SyncThreads(); // credit: CUDA By Example, page 79: // http://www.amazon.com/CUDA-Example-Introduction-General-Purpose-Programming/dp/0131387685 for (var i = threadsPerBlock / 2; i > 0; i /= 2) { if (threadIndex < i) { if (bestDistances[threadIndex] > bestDistances[threadIndex + i]) { bestDistances[threadIndex] = bestDistances[threadIndex + i]; bestPermutations[threadIndex] = bestPermutations[threadIndex + i]; } } thread.SyncThreads(); } if (threadIndex == 0) { answer[thread.blockIdx.x].distance = bestDistances[0]; answer[thread.blockIdx.x].pathNo = bestPermutations[0]; } }
public static void Advect(GThread thread, int N, int b, float dt, float[] output, float[] input, float[] u, float[] v) { int i, j, i0, j0, i1, j1; float s0, t0, s1, t1, dt0, x, y; float[] d = output; float[] d0 = input; i = CalculateThreadIndexX(thread); j = CalculateThreadIndexY(thread); int size = N + 2; if (i >= size) { return; } if (j >= size) { return; } dt0 = dt * N; x = i - dt0 * u[IX(i, j, N)]; y = j - dt0 * v[IX(i, j, N)]; if (x < 0.5f) { x = 0.5f; } if (x > N + 0.5f) { x = N + 0.5f; } i0 = (int)x; i1 = i0 + 1; if (y < 0.5f) { y = 0.5f; } if (y > N + 0.5f) { y = N + 0.5f; } j0 = (int)y; j1 = j0 + 1; s1 = x - i0; s0 = 1 - s1; t1 = y - j0; t0 = 1 - t1; d[IX(i, j, N)] = s0 * (t0 * d0[IX(i0, j0, N)] + t1 * d0[IX(i0, j1, N)]) + s1 * (t0 * d0[IX(i1, j0, N)] + t1 * d0[IX(i1, j1, N)]); }
public void SetWorkingDirectoryTestSimpleScenario() { string workingDirectory = @"C:\"; GThreadMock threadFiller = new GThreadMock(); GThread thread = threadFiller; thread.SetWorkingDirectory(workingDirectory); Assert.AreEqual(workingDirectory, threadFiller.GetWorkingDirectory()); }
public static void global2DStructArray(GThread thread, ComplexFloat[,] result) { int x = thread.blockIdx.x; int y = 0; while (y < result.GetLength(1)) { result[x, y] = result[x, y].Add(result[x, y]); y++; } }
public static void global2DArray(GThread thread, int[,] result) { int x = thread.blockIdx.x; int y = 0; while (y < YSIZE) { result[x, y] = result[x, y] * result.Rank; y++; } }
private void ga_ThreadFinish(GThread thread) { //to prevent multiple events from over-writing each other lock (threadCompleteLock) { logger.Debug("Thread finished: " + thread.Id); UpdateStatus(); unpackThread(thread); printStdFiles(thread as RenderThread); } }
private void ThreadFinished(GThread th) { // cast GThread back to eduGRID_Thread eduGRID_Thread thread = (eduGRID_Thread)th; this.Append_Queryset(this.Queryset.GetUpperBound(0), "", "", "Bot", thread.Result); this.Refresh_Display(); //tmr_Scroll.Enabled = true; //ga.Threads.Clear(); //ga.Stop(); }
public static void MacLane(GThread thread, int[,] a, int[,] b, int[] c, int[] d, int[] e) { int columns = d.Length; for (int tid = thread.blockDim.x * thread.blockIdx.x + thread.threadIdx.x; tid < columns; tid += thread.blockDim.x * thread.gridDim.x) { d[tid] = (d[tid] - 1) * (d[tid] - 2); } }
public static void complexSub(GThread thread, ComplexD[,] a, ComplexD[,] b, ComplexD[,] c) { int x = thread.blockIdx.x; int y = 0; while (y < YSIZE) { c[x, y] = ComplexD.Subtract(a[x, y], b[x, y]); y++; } }
private static void GpuSub(GThread thread, float[] t1, float[] t2, float[] result) { int id = (thread.blockDim.x * thread.blockIdx.x) + thread.threadIdx.x; if (id >= result.Length) { return; } result[id] = t1[id] - t2[id % t2.Length]; }
public static void complexMpy(GThread thread, ComplexD[,] a, ComplexD[,] b, ComplexD[,] c) { int x = thread.blockIdx.x; int y = 0; while (y < YSIZE) { c[x, y] = ComplexD.Multiply(a[x, y], b[x, y]); y++; } }
public static void Select(GThread thread, int[,] a, int[,] b, int[] c, int[] d, int[] e) { int rows = c.GetLength(0); for (int tid = thread.blockDim.x * thread.blockIdx.x + thread.threadIdx.x; tid < rows; tid += thread.blockDim.x * thread.gridDim.x) { c[tid] = a[tid, c[tid]]; } }
public static void add_4(GThread thread, int[] a, int[] b, int[] c) { int tid = thread.blockIdx.x; int rank = a.Rank; while (tid < c.Length) { c[tid] = a[tid] + b[tid]; tid += thread.gridDim.x; } }
/// <summary> /// Calculate the most significant 64 bits of the 128-bit product x * y, where x and y are 64-bit integers. /// </summary> /// <param name="thread">The thread.</param> /// <param name="x">The x.</param> /// <param name="y">The y.</param> /// <returns>Returns the most significant 64 bits of the product x * y.</returns> public static ulong umul64hi(this GThread thread, ulong x, ulong y) { #if !NET35 BigInteger product = BigInteger.Multiply(x, y); product = product >> 64; ulong l = (ulong)product; return(l); #else throw new NotSupportedException(); #endif }
public static void complexDiv(GThread thread, ComplexF[,] a, ComplexF[,] b, ComplexF[,] c) { int x = thread.blockIdx.x; int y = 0; while (y < YSIZE) { c[x, y] = ComplexF.Divide(a[x, y], b[x, y]); y++; } }
public static void SyncThreadCountKernel(GThread thread, int[] input, int[] output) { var tid = thread.threadIdx.x; int value = input[tid]; bool predicate = value == 1; var count = thread.SyncThreadsCount(predicate); if (tid == 0) output[0] = count; }
/// <summary> /// Count the number of consecutive leading zero bits, starting at the most significant bit (bit 63) of x. /// </summary> /// <param name="thread">The thread.</param> /// <param name="val">The value.</param> /// <returns>Returns a value between 0 and 64 inclusive representing the number of zero bits.</returns> public static int clzll(this GThread thread, long val) { int leadingZeros = 0; while (val != 0) { val = val >> 1; leadingZeros++; } return(64 - leadingZeros); }
/// <summary> /// Count the number of consecutive leading zero bits, starting at the most significant bit (bit 31) of x. /// </summary> /// <param name="thread">The thread.</param> /// <param name="val">The value.</param> /// <returns>Returns a value between 0 and 32 inclusive representing the number of zero bits.</returns> public static int clz(this GThread thread, int val) { int leadingZeros = 0; while (val != 0) { val = val >> 1; leadingZeros++; } return(32 - leadingZeros); }
public static void add(GThread thread, int[] a, int[] b, int[] c, int[] d, int[] e, int[] sum) { //To get Array Index for each Thread int index = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x; while (index < N) { sum[index] = a[index] + b[index] + c[index] + d[index] + e[index]; index = index + thread.blockDim.x * thread.gridDim.x; } }
public static void thekernel(GThread thread, byte[] ptr) { int x = thread.blockIdx.x; int y = thread.blockIdx.y; int offset = x + y * thread.gridDim.x; int juliaValue = julia(x, y); ptr[offset * 4 + 0] = (byte)(255.0F * juliaValue); ptr[offset * 4 + 1] = 0; ptr[offset * 4 + 2] = 0; ptr[offset * 4 + 3] = 255; }
public static void thekernel(GThread thread, int[] a, int[] b, int[] c) { int idx = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x; if (idx < N) { int idx1 = (idx + 1) % 256; int idx2 = (idx + 2) % 256; float aS = (a[idx] + a[idx1] + a[idx2]) / 3.0f; float bS = (b[idx] + b[idx1] + b[idx2]) / 3.0f; c[idx] = (int)(aS + bS) / 2; } }
public static void BallotKernel(GThread thread, int[] input, int[] output) { var tid = thread.threadIdx.x; var wid = thread.threadIdx.x / 32; var twid = thread.threadIdx.x % 32; int value = input[tid]; bool predicate = value == 1; var ballot = thread.Ballot(predicate); if (twid == 0) output[wid] = ballot; }
public static void GPU_MA(GThread thread, int[,] GPU_A, int[,] GPU_B, int[,] GPU_C, int Size) { int x = thread.threadIdx.x + thread.blockDim.x * thread.blockIdx.x; int y = thread.threadIdx.y + thread.blockDim.y * thread.blockIdx.y; if (x < Size && y < Size) { //GPU_C[y, x] = y; GPU_C[y, x] = 0; for (int z = 0; z < Size; z++) { GPU_C[y, x] += GPU_A[y, z] * GPU_B[z, x]; } } }
public static void DoSomeMath(GThread thread, int[] start, int[] end, double[] result) { int tid = thread.blockIdx.x; int i = 0; int tot = end[0] - start[0]; while (tid < N) { while (i < tot) { result[i] = Math.Sin((i + 3.14)); i++; } tid += thread.gridDim.x; } }
public static void GetSamplesByte(GThread thread, byte[] samples, float[,] output) { var channels = output.GetLength(0); var sampleCount = output.GetLength(1); const float mid = 128; int tid = thread.blockIdx.x; while (tid < sampleCount) { for (int i = 0; i < channels; i++) { output[i, tid] = (samples[(tid * channels) + i] / mid) - 1.0f; } tid += thread.gridDim.x; } }
public static void CalculateMandelbrot( GThread thread, float minX, float maxY, float stepX, float stepY, int[,] result) { var y = thread.get_global_id(0); var x = thread.get_global_id(1); if (x >= result.GetLength(1) || y >= result.GetLength(0)) return; float real = minX + x * stepX; float imaginary = maxY - y * stepY; result[y, x] = GetMandelbrotIterationsFor(real, imaginary); }
public static void GPU_MA(GThread thread, int[] GPU_A, int[] GPU_B, int[] GPU_C, int Size, int Size1d) { int i = thread.threadIdx.x + thread.blockDim.x * thread.blockIdx.x; if (i < Size1d) { GPU_C[i] = 0; int x = i / Size; int y = i % Size; //D[i] = (x*Size) + y; for (int z = 0; z < Size; z++) { GPU_C[i] += GPU_A[(x * Size) + z] * GPU_B[(z * Size) + y]; } } }
public static void Amplify(GThread thread, float[,] samples, float amplication) { var channels = samples.GetLength(0); var sampleCount = samples.GetLength(1); int tid = thread.blockIdx.x; while (tid < sampleCount) { for (int i = 0; i < channels; i++) { // The framework will clip anything that is overamplified // so quality won't be the best but this is just an example samples[i, tid] *= amplication; } tid += thread.gridDim.x; } }
public static void GetSamplesInt16(GThread thread, short[] samples, float[,] output) { var channels = output.GetLength(0); var sampleCount = output.GetLength(1); const float mid = -short.MinValue; const float min = -short.MaxValue; int tid = thread.blockIdx.x; while (tid < sampleCount) { for (int i = 0; i < channels; i++) { output[i, tid] = ((samples[(tid * channels) + i] - min) / mid) - 1.0f; } tid += thread.gridDim.x; } }
public static void GetSamplesDouble(GThread thread, ulong[] samples, float[,] output) { // Warning: Untested (LAV Audio Decoder doesn't support output of double format) var channels = output.GetLength(0); var sampleCount = output.GetLength(1); int tid = thread.blockIdx.x; while (tid < sampleCount) { for (int i = 0; i < channels; i++) { output[i, tid] = ConvertDoubleToFloat(samples[(tid * channels) + i]); } tid += thread.gridDim.x; } }
public static void thekernel(GThread thread, byte[] ptr, int ticks) { // map from threadIdx/BlockIdx to pixel position int x = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x; int y = thread.threadIdx.y + thread.blockIdx.y * thread.blockDim.y; int offset = x + y * thread.blockDim.x * thread.gridDim.x; // now calculate the value at that position float fx = x - DIM/2; float fy = y - DIM/2; float d = GMath.Sqrt(fx * fx + fy * fy ); //float d = thread.sqrtf(fx * fx + fy * fy); byte grey = (byte)(128.0f + 127.0f * GMath.Cos(d / 10.0f - ticks / 7.0f) / (d/10.0f + 1.0f)); ptr[offset*4 + 0] = grey; ptr[offset*4 + 1] = grey; ptr[offset*4 + 2] = grey; ptr[offset*4 + 3] = 255; }
public static void DefineLower(GThread thread, int n, int[] rowsICP, int[] colsICP) { rowsICP[0] = 0; colsICP[0] = 0; int inz = 1; for (int k = 1; k < n; k++) { rowsICP[k] = inz; for (int j = k - 1; j <= k; j++) { colsICP[inz] = j; inz++; } } rowsICP[n] = inz; }
public static void Execute() { _gpu = CudafyHost.GetDevice(eGPUType.Cuda); CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, _gpu.GetArchitecture(), typeof(SIMDFunctions)); //CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, eArchitecture.sm_12, typeof(SIMDFunctions)); _gpu.LoadModule(km); int w = 1024; int h = 1024; for (int loop = 0; loop < 3; loop++) { uint[] a = new uint[w * h]; Fill(a); uint[] dev_a = _gpu.CopyToDevice(a); uint[] b = new uint[w * h]; Fill(b); uint[] dev_b = _gpu.CopyToDevice(b); uint[] c = new uint[w * h]; uint[] dev_c = _gpu.Allocate(c); _gpu.StartTimer(); _gpu.Launch(h, w, "SIMDFunctionTest", dev_a, dev_b, dev_c); _gpu.CopyFromDevice(dev_c, c); float time = _gpu.StopTimer(); Console.WriteLine("Time: {0}", time); if (loop == 0) { bool passed = true; GThread thread = new GThread(1, 1, null); for (int i = 0; i < w * h; i++) { uint exp = thread.vadd2(a[i], b[i]); if (exp != c[i]) passed = false; } Console.WriteLine("Test {0}", passed ? "passed. " : "failed!"); } _gpu.FreeAll(); } }
public static void Calculate(GThread thread, double[,] distance, double[,] f, double[] x, double[] y) { int i = thread.blockIdx.x * thread.blockDim.x + thread.threadIdx.x+1; int j = thread.blockIdx.y * thread.blockDim.y + thread.threadIdx.y+1; if (f[i - 1, j] == -1 || f[i - 1, j - 1] == -1 || f[i, j - 1] == -1) { return; } if (f[i - 1, j] <= f[i - 1, j - 1] && f[i - 1, j] <= f[i, j - 1]) { f[i, j] = distance[i - 1, j - 1] + f[i - 1, j]; } else if (f[i, j - 1] <= f[i - 1, j - 1] && f[i, j - 1] <= f[i - 1, j]) { f[i, j] = distance[i - 1, j - 1] + f[i, j - 1]; } else if (f[i - 1, j - 1] <= f[i, j - 1] && f[i - 1, j - 1] <= f[i - 1, j]) { f[i, j] = distance[i - 1, j - 1] + f[i - 1, j - 1]; } //return f[x.Length, y.Length]; }