private static void IlGpuKernelConstants( ArrayView2D <Real> mSquaredDistances, ArrayView <Real> mCoordinates, SpecializedValue <int> c, int n) { // Same as CudaKernelOptimised2, but the number of coordinates is given as a meta-constant. // Also, we write the results as float2. var shared = SharedMemory.GetDynamic <Real>(); var coordinatesI = shared.GetSubView(0, c * Group.DimX); var coordinatesJ = shared.GetSubView(c * Group.DimX); var bI = Grid.IdxY * Group.DimX; var bJ = Grid.IdxX * Group.DimX; for (int k = 0; k != c; ++k) { if (bI + Group.IdxX < n) { coordinatesI[k * Group.DimX + Group.IdxX] = mCoordinates[k * n + bI + Group.IdxX]; } if (bJ + Group.IdxX < n) { coordinatesJ[k * Group.DimX + Group.IdxX] = mCoordinates[k * n + bJ + Group.IdxX]; } } Group.Barrier(); var line = Group.IdxX / (Group.DimX / 2); var tid = Group.IdxX % (Group.DimX / 2); if (bJ + tid * 2 < n) { var coordinatesJ2 = coordinatesJ.Cast <IlReal2>(); for (int i = line; i < Group.DimX & bI + i < n; i += 2) { var dist = default(IlReal2); for (int k = 0; k != c; ++k) { var coord1 = coordinatesI[k * Group.DimX + i]; var coord2 = coordinatesJ2[(k * Group.DimX / 2) + tid]; var diff = new IlReal2(coord1 - coord2.X, coord1 - coord2.Y); dist += diff * diff; } var dst = mSquaredDistances.Cast <IlReal2>(); dst[bJ / 2 + tid, bI + i] = dist; } } }
private static void IlGpuKernelLocalMemory( ArrayView2D <Real> mSquaredDistances, ArrayView <Real> mCoordinates, SpecializedValue <int> dimX, SpecializedValue <int> c, int n) { // Same as KernelConstants, but use both local and shared memory to increase the effective shared memory. var coordinatesI = SharedMemory.Allocate <Real>(c * dimX); var coordinatesJ = new IlReal2[c.Value]; var bI = Grid.IdxY * dimX; var bJ = Grid.IdxX * dimX; var line = Group.IdxX / (dimX / 2); var tid = Group.IdxX % (dimX / 2); var isActive = bJ + tid * 2 < n; for (int k = 0; k != c.Value; ++k) { if (bI + Group.IdxX < n) { coordinatesI[k * dimX + Group.IdxX] = mCoordinates[k * n + bI + Group.IdxX]; } if (isActive) { var mCoordinates2 = mCoordinates.Cast <IlReal2>(); coordinatesJ[k] = mCoordinates2[(k * n + bJ) / 2 + tid]; } } Group.Barrier(); if (isActive) { for (int i = line; i < dimX && bI + i < n; i += 2) { var dist = default(IlReal2); for (int k = 0; k != c.Value; ++k) { var coord1 = coordinatesI[k * dimX + i]; var coord2 = coordinatesJ[k]; var diff = new IlReal2(coord1 - coord2.X, coord1 - coord2.Y); dist += diff * diff; } var dst = mSquaredDistances.Cast <IlReal2>(); dst[bJ / 2 + tid, bI + i] = dist; } } }