public void Sort() { int n = value.Length; if (n <= 1) { return; } int num_levels = (__builtin_popcount(n) == 1 ? 31 : 32) - __builtin_clz(n); left.Value(0); rigt.Value(n - 1); for (int start_level = 1; start_level <= num_levels; start_level++) { for (int level = start_level; level >= 1; level--) { new Shader("SIFT.shaders.bitonic_merge.glsl").QueueForRunInSequence(n, ("start_level", start_level), ("level", level), ("reverse", level == start_level ? 1 : 0), value, left, rigt, output); value.Swap(output); //Print(value); } } }
public void Sort() { int n = value.Length; if (n <= 1) { return; } int max_level = (__builtin_popcount(n) == 1 ? 31 : 32) - __builtin_clz(n); left.Value(0); rigt.Value(n - 1); //Print(value); for (int level = 1; level <= max_level; level++) { //OpenTK.Graphics.OpenGL.GL.Finish(); //Print(Timing(() => //{ new Shader("SIFT.shaders.merge_sort.glsl").QueueForRunInSequence(n, ("level", level), value, left, rigt, output); // OpenTK.Graphics.OpenGL.GL.Finish(); //})); value.Swap(output); //Print(value); //Print(debug); } }
public void Sort() { int n = value.Length; if (n <= 1) { return; } int max_level = (__builtin_popcount(n) == 1 ? 31 : 32) - __builtin_clz(n); left.Value(0); rigt.Value(n - 1); //Print(value); int stride = 16;// Math.Max(1, n / Shader.default_group_size_x / gpu_execute_unit_count); //Print("stride =", stride); for (int level = 1; level <= max_level; level++) { //OpenTK.Graphics.OpenGL.GL.Finish(); //Print(Timing(() => //{ new Shader("SIFT.shaders.merge_batch_sort.glsl").QueueForRunInSequence((n + stride - 1) / stride, ("level", level), ("stride", stride), value, left, rigt, output); // OpenTK.Graphics.OpenGL.GL.Finish(); //})); value.Swap(output); //Print(value); //Print(debug); } }
public void Sort() { int n = value.Length; if (n <= 1) { return; } int max_level = (__builtin_popcount(n) == 1 ? 31 : 32) - __builtin_clz(n); left.Value(0); rigt.Value(n - 1); //Print(value); for (int level = 1; level <= max_level; level++) { int total_execute_cnt = 0; for (int stride_level = max_level; stride_level >= 0; stride_level--) { // (1 << sl) - 1, 3(1 << sl) - 1, 5(1 << sl) - 1, 7(1 << sl) - 1, ... int offset = (1 << stride_level) - 1; //int stride = 1 << stride_level << 1; // offset + ? * stride <= n - 1 // ? * stride <= n - 1 - offset int execute_cnt = ((n - 1 - offset) >> stride_level >> 1) + 1; total_execute_cnt += execute_cnt; //OpenTK.Graphics.OpenGL.GL.Finish(); //Print(Timing(() => //{ new Shader("SIFT.shaders.merge_sort_write_a.glsl").QueueForRunInSequence(execute_cnt, ("level", level), ("stride_level", stride_level), value, left, rigt, a); // OpenTK.Graphics.OpenGL.GL.Finish(); //})); //Print(a); } //Print(); Assert(total_execute_cnt == n); new Shader("SIFT.shaders.merge_sort_read_a_write_o.glsl").QueueForRunInSequence(n, ("level", level), value, left, rigt, a, output); value.Swap(output); //Print(value); //Print(debug); } }
public void Sort() { int n = value.Length; if (n <= 1) { return; } Assert(__builtin_popcount(n) == 1); for (int i = 1; i < n; i++) { (left[i], rigt[i]) = ((i << 1) | 1) > n - 1 ? (-1, -1) : (i << 1, (i << 1) | 1); } int root = 1, spare = 0; BiSort(root, spare, true); Flatten(root, 0, n - 2); output[n - 1] = value[spare]; value.Swap(output); }
private void PMerge(int n) { Assert(__builtin_popcount(n) == 1); for (int thread_id = 0; thread_id < n - 1; thread_id++) { int i = thread_id; int cto = __builtin_ctz(~i); int shift = (1 << (cto - 1)); (left[i], rigt[i]) = cto == 0 ? /*leaf*/ (-1, -1) : (i - shift, i + shift); roots[i] = i; spares[i] = i + (1 << cto); } for (int start_level = 1; start_level <= __builtin_ctz(n); start_level++) { //PrintTree(n / 2 - 1, n - 1); for (int level = start_level; level >= 1; level--) { int id_max = n >> level; for (int thread_id = 0; thread_id < id_max; thread_id++) { int i = (thread_id << level) + (1 << (level - 1)) - 1; int origin_id = i; // origin location on binary tree (before any node swaps) int root = roots[origin_id]; int spare = spares[origin_id]; // 10101001111 -> 101010 //Print("i",i,"root", root, "spare", spare); bool ascend = __builtin_popcount(i >> start_level) % 2 == 0; if ((value[root] < value[spare]) != ascend) // exchange { (value[root], value[spare]) = (value[spare], value[root]); (left[root], rigt[root]) = (rigt[root], left[root]); } if (level > 1) { (int p, int q) = (left[root], rigt[root]); while (p != -1) { if ((value[p] < value[q]) != ascend) // swap left tree of p & q, go right { (value[p], value[q]) = (value[q], value[p]); (left[p], left[q]) = (left[q], left[p]); (p, q) = (rigt[p], rigt[q]); } else // go left { (p, q) = (left[p], left[q]); } } int shift = 1 << (level - 2); (roots[origin_id - shift], spares[origin_id - shift]) = (left[root], root); (roots[origin_id + shift], spares[origin_id + shift]) = (rigt[root], spare); } } } } for (int level = __builtin_ctz(n); level >= 1; level--) { int id_max = n >> level; for (int thread_id = 0; thread_id < id_max; thread_id++) { int i = (thread_id << level) + (1 << (level - 1)) - 1; int origin_id = i; // origin location on binary tree (before any node swaps) int root = roots[origin_id]; spares[i] = value[root]; } } spares[n - 1] = value[n - 1]; value.Swap(spares); }