public static void histo_kernel(GThread thread, byte[] buffer, int size, uint[] histo)
        {
            // clear out the accumulation buffer called temp
            // since we are launched with 256 threads, it is easy
            // to clear that memory with one write per thread
            uint[] temp = thread.AllocateShared <uint>("temp", 256);
            temp[thread.threadIdx.x] = 0;
            thread.SyncThreads();

            // calculate the starting index and the offset to the next
            // block that each thread will be processing
            int i      = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x;
            int stride = thread.blockDim.x * thread.gridDim.x;

            while (i < size)
            {
                thread.atomicAdd(ref temp[buffer[i]], 1);
                i += stride;
            }
            // sync the data from the above writes to shared memory
            // then add the shared memory values to the values from
            // the other thread blocks using global memory
            // atomic adds
            // same as before, since we have 256 threads, updating the
            // global histogram is just one write per thread!
            thread.SyncThreads();

            thread.atomicAdd(ref (histo[thread.threadIdx.x]), temp[thread.threadIdx.x]);
        }
Example #2
0
        public static void histo_kernel(GThread thread, byte[] buffer, int size, uint[] histo) 
        {
            // clear out the accumulation buffer called temp
            // since we are launched with 256 threads, it is easy
            // to clear that memory with one write per thread
            uint[] temp = thread.AllocateShared<uint>("temp", 256);
            temp[thread.threadIdx.x] = 0;
            thread.SyncThreads();

            // calculate the starting index and the offset to the next
            // block that each thread will be processing
            int i = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x;
            int stride = thread.blockDim.x * thread.gridDim.x;
            while (i < size) 
            {
                thread.atomicAdd(ref temp[buffer[i]], 1 );
                i += stride;
            }
            // sync the data from the above writes to shared memory
            // then add the shared memory values to the values from
            // the other thread blocks using global memory
            // atomic adds
            // same as before, since we have 256 threads, updating the
            // global histogram is just one write per thread!
            thread.SyncThreads();

            thread.atomicAdd(ref (histo[thread.threadIdx.x]), temp[thread.threadIdx.x]);
        }
Example #3
0
        public static void CalculateNeuralNetwork(GThread thread, float[] a, float[,,] b, float[] c)
        {
            int startIndex = thread.blockIdx.x * Utils.BLOCK_SIZE * Utils.CHUNK_SIZE + thread.threadIdx.x * Utils.CHUNK_SIZE;

            for (int layerIndex = 0; layerIndex < Utils.LAYER_SIZE; layerIndex++)
            {
                for (int i = 0; i < Utils.CHUNK_SIZE; i++)
                {
                    int   itemId = startIndex + i;
                    float sum    = 0;
                    for (int j = 0; j < Utils.N; j++)
                    {
                        sum += b[layerIndex, itemId, j] * a[j];
                    }
                    c[itemId] = sum;
                }
                thread.SyncThreads();
                for (int i = 0; i < Utils.CHUNK_SIZE; i++)
                {
                    int itemId = startIndex + i;
                    a[itemId] = c[itemId];
                }
                thread.SyncThreads();
            }
        }
Example #4
0
        public static void Dot(GThread thread, float[] a, float[] b, float[] c)
        {
            float[] cache = thread.AllocateShared<float>("cache", threadsPerBlock);

            int tid = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x;
            int cacheIndex = thread.threadIdx.x;

            float temp = 0;
            while (tid < N)
            {
                temp += a[tid] * b[tid];
                tid += thread.blockDim.x * thread.gridDim.x;
            }

            // set the cache values
            cache[cacheIndex] = temp;

            // synchronize threads in this block
            thread.SyncThreads();

            // for reductions, threadsPerBlock must be a power of 2
            // because of the following code
            int i = thread.blockDim.x / 2;
            while (i != 0)
            {
                if (cacheIndex < i)
                    cache[cacheIndex] += cache[cacheIndex + i];
                thread.SyncThreads();
                i /= 2;
            }

            if (cacheIndex == 0)
                c[thread.blockIdx.x] = cache[0];
        }
        public static void GpuFindPathDistance(GThread thread, int permutations, int cities,
                                               float[] latitudes, float[] longitudes, AnswerStruct[] answer)
        {
            var threadIndex     = thread.threadIdx.x;           // thread index within the block
            var blockIndex      = thread.blockIdx.x;            // block index within the grid
            var threadsPerBlock = thread.blockDim.x;
            var blocksPerGrid   = thread.gridDim.x;
            var threadsPerGrid  = threadsPerBlock * blocksPerGrid;
            var permutation     = threadIndex + blockIndex * threadsPerBlock;

            var paths            = thread.AllocateShared <int>("path", _threadsPerBlock, _cities);
            var bestDistances    = thread.AllocateShared <float>("dist", _threadsPerBlock);
            var bestPermutations = thread.AllocateShared <int>("perm", _threadsPerBlock);

            var bestDistance    = float.MaxValue;
            var bestPermutation = 0;

            while (permutation < permutations)
            {
                var distance = FindPathDistance(permutations, permutation,
                                                cities, latitudes, longitudes, paths, threadIndex);
                if (distance < bestDistance)
                {
                    bestDistance    = distance;
                    bestPermutation = permutation;
                }
                permutation += threadsPerGrid;
            }

            bestDistances[threadIndex]    = bestDistance;
            bestPermutations[threadIndex] = bestPermutation;

            thread.SyncThreads();

            // credit: CUDA By Example, page 79:
            // http://www.amazon.com/CUDA-Example-Introduction-General-Purpose-Programming/dp/0131387685
            for (var i = threadsPerBlock / 2; i > 0; i /= 2)
            {
                if (threadIndex < i)
                {
                    if (bestDistances[threadIndex] > bestDistances[threadIndex + i])
                    {
                        bestDistances[threadIndex]    = bestDistances[threadIndex + i];
                        bestPermutations[threadIndex] = bestPermutations[threadIndex + i];
                    }
                }
                thread.SyncThreads();
            }

            if (threadIndex == 0)
            {
                answer[thread.blockIdx.x].distance = bestDistances[0];
                answer[thread.blockIdx.x].pathNo   = bestPermutations[0];
            }
        }
Example #6
0
        public static void PeaksCompare_Da(GThread thread, double[] LeftInsilicoMasses, double[] RightInsilicoMasses, double[] peakListMasses, double[] differencesArray, int[] MatchesFound, int[] arraySize, int[] numberOfMatches, double[] Tolerance, double[] modifiedIonsArray)
        {
            int tidx = thread.blockIdx.x * thread.blockDim.x + thread.threadIdx.x;

            int    ILindex = tidx % LeftInsilicoMasses.Length;
            int    IRindex = tidx % RightInsilicoMasses.Length;
            int    MIindex = tidx % modifiedIonsArray.Length;
            int    Pindex  = tidx / LeftInsilicoMasses.Length;
            double peak    = peakListMasses[Pindex];

            // modified array index ***

            if (tidx < LeftInsilicoMasses.Length * peakListMasses.Length)
            {
                differencesArray[tidx] = LeftInsilicoMasses[ILindex] - peak;
                if (differencesArray[tidx] < 0)
                {
                    differencesArray[tidx] = differencesArray[tidx] * -1;
                }

                thread.SyncThreads();

                differencesArray[tidx + (LeftInsilicoMasses.Length * peakListMasses.Length)] = RightInsilicoMasses[IRindex] - peak;
                if (differencesArray[tidx + (LeftInsilicoMasses.Length * peakListMasses.Length)] < 0)
                {
                    differencesArray[tidx + (LeftInsilicoMasses.Length * peakListMasses.Length)] = differencesArray[tidx + (LeftInsilicoMasses.Length * peakListMasses.Length)] * -1;
                }
            }
            thread.SyncThreads();

            if (tidx < modifiedIonsArray.Length * peakListMasses.Length)
            {
                differencesArray[tidx + ((LeftInsilicoMasses.Length * peakListMasses.Length) + (RightInsilicoMasses.Length * peakListMasses.Length))] = modifiedIonsArray[MIindex] - peak;
                //thread.SyncThreads();
                if (differencesArray[tidx + ((LeftInsilicoMasses.Length * peakListMasses.Length) + (RightInsilicoMasses.Length * peakListMasses.Length))] < 0)
                {
                    differencesArray[tidx + ((LeftInsilicoMasses.Length * peakListMasses.Length) + (RightInsilicoMasses.Length * peakListMasses.Length))] = differencesArray[tidx + ((LeftInsilicoMasses.Length * peakListMasses.Length) + (RightInsilicoMasses.Length * peakListMasses.Length))] * -1;
                }
            }
            thread.SyncThreads();

            if (differencesArray[tidx] < Tolerance[0])
            {
                MatchesFound[tidx] = 1;
            }
            else
            {
                MatchesFound[tidx] = 0;
            }

            thread.SyncThreads();
        }
Example #7
0
        public static void GpuFindPathDistance(GThread thread,
                                               long permutations, LatLongStruct[] gpuLatLong, AnswerStruct[] answer)
        {
            var threadsPerGrid   = thread.blockDim.x * thread.gridDim.x;
            var path             = thread.AllocateShared <int>("path", _cities, _threadsPerBlock);
            var bestDistances    = thread.AllocateShared <float>("dist", _threadsPerBlock);
            var bestPermutations = thread.AllocateShared <long> ("perm", _threadsPerBlock);

            var permutation     = (long)(thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x);
            var bestDistance    = float.MaxValue;
            var bestPermutation = 0L;

            while (permutation < permutations)
            {
                var distance = FindPathDistance(thread, permutations, permutation, gpuLatLong, path);
                if (distance < bestDistance)
                {
                    bestDistance    = distance;
                    bestPermutation = permutation;
                }
                permutation += threadsPerGrid;
            }

            bestDistances[thread.threadIdx.x]    = bestDistance;
            bestPermutations[thread.threadIdx.x] = bestPermutation;
            thread.SyncThreads();

            // credit: CUDA By Example, page 79:
            // http://www.amazon.com/CUDA-Example-Introduction-General-Purpose-Programming/dp/0131387685
            for (int i = thread.blockDim.x / 2; i > 0; i /= 2)
            {
                if (thread.threadIdx.x < i)
                {
                    if (bestDistances[thread.threadIdx.x] > bestDistances[thread.threadIdx.x + i])
                    {
                        bestDistances[thread.threadIdx.x]    = bestDistances[thread.threadIdx.x + i];
                        bestPermutations[thread.threadIdx.x] = bestPermutations[thread.threadIdx.x + i];
                    }
                }
                thread.SyncThreads();
            }

            if (thread.threadIdx.x == 0)
            {
                answer[thread.blockIdx.x].distance = bestDistances[0];
                answer[thread.blockIdx.x].pathNo   = bestPermutations[0];
            }
        }
Example #8
0
        public static void PrzeniesWynikDoMacierzySumGPU(GThread watek, float[,] macierzSum, float[] wyjscie, int[] warstwa)
        {
            int x = watek.blockIdx.x;

            macierzSum[warstwa[0], x] = wyjscie[x];
            watek.SyncThreads();
        }
Example #9
0
        public static void LiczDeltyWarstwyGPU(GThread watek, int[] warstwa, float[,] macierzDelt, float[,] macierzWyjsc, float[,] macierzSum)
        {
            int x = watek.blockIdx.x;

            macierzDelt[warstwa[0], x] = macierzSum[warstwa[0], x] * macierzWyjsc[warstwa[0], x] * (1 - macierzWyjsc[warstwa[0], x]);
            watek.SyncThreads();
        }
Example #10
0
        public static void LiczDelteOstatniejWarstwyGPU(GThread watek, float[,] macierzDelt, float[,] macierzWyjsc, int[] warstwa, float[] odpowiedz)
        {
            int x = watek.blockIdx.x;

            macierzDelt[warstwa[0], x] = -1f * (macierzWyjsc[warstwa[0], x] - odpowiedz[x]) * macierzWyjsc[warstwa[0], x] * (1f - macierzWyjsc[warstwa[0], x]);
            watek.SyncThreads();
        }
Example #11
0
        public static void Product(GThread thread, int[] a, int[] b, int[] c)
        {
            int tid = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x;
            int[] cache = thread.AllocateShared<int>("cache", 4);
            int temp = 0;
            int cacheIndex=thread.threadIdx.x;
            while (tid < N)
            {
                temp = temp + a[tid] * b[tid];
                tid += thread.blockDim.x * thread.gridDim.x;
            }
            cache[thread.threadIdx.x] = temp;

            thread.SyncThreads();

            int i = thread.blockDim.x / 2;
            while (i != 0)
            {
                if (cacheIndex < i)
                {
                    cache[cacheIndex] += cache[cacheIndex + i];
                }
                thread.SyncThreads();

                i /= 2;
            }
            if (cacheIndex == 0)
            {
                c[thread.blockIdx.x] = cache[0];
            }
        }
Example #12
0
        public static void UzupelnijWejscie(GThread watek, float[,] macierzWejsc, float[] wektorWejsciowy)
        {
            int x = watek.blockIdx.x;

            macierzWejsc[0, x] = wektorWejsciowy[x];
            watek.SyncThreads();
            // watek.SyncThreads();
        }
Example #13
0
        public static void ZerujWektorFloat(GThread watek, float[] wektor)
        {
            int x = watek.blockIdx.x;

            wektor[x] = 0f;
            watek.SyncThreads();
            // watek.SyncThreads();
        }
Example #14
0
        public static void UzupelnijWarstwe(GThread watek, float[, ,] neuron, float[,] macierzWejsc, float[,] macierzWyjsc, int[] warstwa)
        {
            int x = watek.blockIdx.x;
            int y = watek.blockIdx.y;

            watek.atomicAdd(ref macierzWyjsc[warstwa[0], x], neuron[warstwa[0], x, y] * macierzWejsc[warstwa[0], y]);
            macierzWejsc[warstwa[0] + 1, x] = macierzWyjsc[warstwa[0], x];
            watek.SyncThreads();
        }
Example #15
0
        public static void ZerujMacierzFloat(GThread watek, float[,] macierz)
        {
            int x = watek.blockIdx.x;
            int y = watek.blockIdx.y;

            macierz[x, y] = 0f;
            watek.SyncThreads();
            // watek.SyncThreads();
        }
Example #16
0
        public static void PoliczFunkcjeAktywacji(GThread watek, int[] warstwa, float[,] macierzWejsc, float[,] macierzWyjsc, float[] sumy)
        {
            int x = watek.blockIdx.x;

            macierzWyjsc[warstwa[0], x]     = 1f / (1f + GMath.Exp(-sumy[x]));
            macierzWejsc[warstwa[0] + 1, x] = macierzWyjsc[warstwa[0], x];
            watek.SyncThreads();
            //  watek.SyncThreads();
        }
Example #17
0
        public static void LiczWarstweGPU(GThread watek, float[, ,] neuron, float[] wyjscie, int[] warstwa, float[,] macierzWejsc, float[,] macierzSum)
        {
            int x = watek.blockIdx.x;
            int y = watek.blockIdx.y;

            watek.atomicAdd(ref wyjscie[x], neuron[warstwa[0], x, y] * macierzWejsc[warstwa[0], y]);
            watek.SyncThreads();
            //wyjscie[x] += neuron[warstwa[0], x, y] * macierzWejsc[warstwa[0], y];
        }
Example #18
0
        public static void LiczSumyWarstwyGPU(GThread watek, int[] warstwa, float[,] macierzDelt, float[,] macierzSum, float[, ,] neuron)
        {
            int x = watek.blockIdx.x;
            int y = watek.blockIdx.y;

            watek.atomicAdd(ref macierzSum[warstwa[0], x], macierzDelt[warstwa[0] + 1, y] * neuron[warstwa[0] + 1, y, x]);
            //macierzSum[warstwa[0], x] += macierzDelt[warstwa[0] + 1, y] * neuron[warstwa[0] + 1, y, x];
            watek.SyncThreads();
        }
        public static void dot(GThread thread, float[] a, float[] b, float[] c)
        {
            float[] cache = thread.AllocateShared <float>("cache", threadsPerBlock);

            int tid        = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x;
            int cacheIndex = thread.threadIdx.x;

            float temp = 0;

            while (tid < N)
            {
                temp += a[tid] * b[tid];
                tid  += thread.blockDim.x * thread.gridDim.x;
            }

            // set the cache values
            cache[cacheIndex] = temp;

            // synchronize threads in this block
            thread.SyncThreads();

            // for reductions, threadsPerBlock must be a power of 2
            // because of the following code
            int i = thread.blockDim.x / 2;

            while (i != 0)
            {
                if (cacheIndex < i)
                {
                    cache[cacheIndex] += cache[cacheIndex + i];
                }
                thread.SyncThreads();
                i /= 2;
            }

            if (cacheIndex == 0)
            {
                c[thread.blockIdx.x] = cache[0];
            }

            //callWithShared(cache);
        }
Example #20
0
        public static void VectorAdd(GThread thread,
                                     [CudafyAddressSpace(eCudafyAddressSpace.Global)] int[] a,
                                     int[] b,
                                     int[] c)
        {
            int[] shared = thread.AllocateShared <int>("shared", Program.N);
            int   index  = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x;

            //int index = thread.get_local_id(0);
            c[index] = (a[index] + b[index]) * ConstantMemory[index];
            thread.SyncThreads();
        }
Example #21
0
        public static void Dot(GThread thread, int[,] table1, int[] output_table, char[] matched_result, char[] input, int[] size1)
        {
            int  cacheIndex = thread.threadIdx.x;
            int  tid        = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x;
            int  pos        = tid;
            int  state      = 0;
            int  start;
            char ch;
            int  size2 = size1[0];

            while (pos < size2)
            {
                thread.SyncThreads();
                start = pos;
                state = 0;
                while ((state != -1) && (pos < size2))
                {
                    ch = input[pos];
                    thread.SyncThreads();
                    int nextState = table1[state, (int)(ch) - (int)('A')];
                    pos = pos + 1;
                    if (nextState != -1)
                    {
                        int matchVec = output_table[nextState];
                        if (matchVec > 0)
                        {
                            matched_result[start] = (char)matchVec;
                        }

                        thread.SyncThreads();
                    }

                    state = nextState;
                }
                pos = start + N * thread.gridDim.x;
            }
        }
Example #22
0
        public static void UaktualnijWagiNeuronowGPU(GThread watek, float[, ,] neuron, float[,] macierzDelt, float[,] macierzWejsc, float[] stala)
        {
            int x   = watek.blockIdx.x;
            int y   = watek.blockIdx.y;
            int z   = 0;
            int len = neuron.GetLength(2);

            //neuron[x, y, z] += stala[0] * macierzDelt[x, y] * macierzWejsc[x, z];
            while (z < len)
            {
                watek.atomicAdd(ref neuron[x, y, z], stala[0] * macierzDelt[x, y] * macierzWejsc[x, z]);
                z++;
            }
            watek.SyncThreads();
            //neuron[x, y, z+x] = 1;
        }
Example #23
0
        public static void PFACAnalyse(GThread thread, byte[] buffer, int initialState, int[,] lookup, int[] targetEndLength, uint[] resultCount, int[] foundCount, byte[] foundID, int[] foundSOF)
        {
            int n = buffer.Length;

            int i      = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x; // Counter for i
            int stride = thread.blockDim.x * thread.gridDim.x;                       // Stride is the next byte for the thread to go to

            for (; i < n; i += stride)                                               // Loop to scan full file segment
            {
                int state = initialState;
                int pos   = i;

                while (pos < n)
                {
                    state = lookup[state, buffer[pos]];
                    if (state == 0)
                    {
                        break;
                    }
                    if (state < initialState)
                    {
                        if ((state - 1) % 2 == 0)
                        {
                            thread.atomicAdd(ref resultCount[(int)((state + 1) / 2) - 1], 1);
                            int counter = thread.atomicAdd(ref foundCount[0], 1);
                            foundID[counter]  = (byte)state;
                            foundSOF[counter] = i;
                        }
                        else
                        {
                            int fileEnd = i + targetEndLength[((state + 1) / 2) - 1];
                            if (buffer[fileEnd] != 0x38 && buffer[fileEnd + 1] != 0x38 && buffer[fileEnd + 1] != 0x3B)
                            {
                                int counter = thread.atomicAdd(ref foundCount[0], 1);
                                foundID[counter]  = (byte)state;
                                foundSOF[counter] = i;
                            }
                        }
                    }
                    pos++;
                }
            }

            thread.SyncThreads();                                                   // Sync GPU threads
        }
Example #24
0
        public static void FindPixel(GThread thread, GPUColorBGRA[] rgbColors, GPUColorBGRA[] colors, int[] indices, float[] output)
        {
            //int[] cache = thread.AllocateShared<int>("cache", 1025);
            //thread.SyncThreads();
            //int offset = thread.gridDim.x * thread.threadIdx.x + indices[thread.threadIdx.x]; //2025 * 0 + 0 -> 2025 * 0 + 1
            //thread.SyncThreads();
            //thread.SyncThreadsCount(true);

            //indices[thread.threadIdx.x]++;


            ////float[] cache = thread.AllocateShared<float>("cache", screenWidth * screenHeight / blockSizeX);
            int o = thread.threadIdx.x /*0 bis threadcount*/ + thread.blockDim.x /*1024*/ * thread.blockIdx.x /*+1 in 1024 schritten*/;

            //int offset = thread.gridDim.x * thread.threadIdx.x + indices[thread.threadIdx.x]++; //2025 * 0 + 0 -> 2025 * 0 + 1
            //cache[offset] = -1;
            output[o] = 0;



            for (int i = 0; i < colors.Length; i++)
            {
                //if (rgbColors[o].Red + 5 >= colors[i].Red && rgbColors[o].Red - 5 <= colors[i].Red
                //    && rgbColors[o].Green + 5 >= colors[i].Green && rgbColors[o].Green - 5 <= colors[i].Green
                //    && rgbColors[o].Blue + 5 >= colors[i].Blue && rgbColors[o].Blue - 5 <= colors[i].Blue)
                if (rgbColors[o].Red == colors[i].Red &&
                    rgbColors[o].Green == colors[i].Green &&
                    rgbColors[o].Blue == colors[i].Blue)
                {
                    //cache[offset] = o;

                    thread.SyncThreads();
                    output[o] = o;
                    break;
                }
            }
            //indices[thread.threadIdx.x]++;
            //thread.SyncThreads();

            //output[offset] = cache[offset];
        }
Example #25
0
        public static void GpuFindPathDistance(GThread thread, AnswerStruct[] answer)
        {
            var answerLocal = thread.AllocateShared <AnswerStruct>("ansL", ThreadsPerBlock);

            var bestDistance    = thread.gridDim.x;
            var bestPermutation = thread.blockDim.x;

            var sum = 0;

            for (int i = 0; i < thread.blockDim.x; i++)
            {
                sum += i * thread.threadIdx.x;
            }

            answerLocal[thread.threadIdx.x].distance = bestDistance;
            answerLocal[thread.threadIdx.x].pathNo   = bestPermutation;
            thread.SyncThreads();

            if (thread.threadIdx.x == 0)
            {
                answer[thread.blockIdx.x] = answerLocal[0];
            }
        }
Example #26
0
        private static void GpuConv2DKernelsGradient(GThread thread, float[] input, float[] gradient, float[,] resultPartials, GpuShape[] shapes, int paddingX, int paddingY, int stride)
        {
            /*
             * for (int kernelD = 0; kernelD < kernels.Depth; ++kernelD)
             * for (int kernelH = 0; kernelH < kernels.Height; ++kernelH)
             * for (int kernelW = 0; kernelW < kernels.Width; ++kernelW)
             * for (int kernelN = 0; kernelN < kernels.BatchSize; ++kernelN)
             * {
             *  for (int n = 0; n < gradient.BatchSize; ++n)
             *  for (int h = -paddingY, outH = 0; outH < gradient.Height; h += stride, ++outH)
             *  for (int w = -paddingX, outW = 0; outW < gradient.Width; w += stride, ++outW)
             *  {
             *      float grad = gradient[outW, outH, kernelN, n];
             *      float kernGradVal = input.TryGet(0, w + kernelW, h + kernelH, kernelD, n) * grad;
             *      kernelsGradient[kernelW, kernelH, kernelD, kernelN] += kernGradVal;
             *  }
             * }
             */

            // this shared memory will store partial sums that later on will be reduced
            float[] sdata = thread.AllocateShared <float>("sdata", THREADS_PER_BLOCK);

            int resultElemId = thread.blockIdx.x;
            int tid          = thread.threadIdx.x;
            int id           = (thread.blockDim.x * thread.blockIdx.y) + thread.threadIdx.x;

            int threadsRequiredPerResultElem = shapes[4].BatchSize * shapes[4].Height * shapes[4].Width;

            int kernelN = shapes[1].GetBatch(resultElemId);
            int kernelD = shapes[1].GetDepth(resultElemId);
            int kernelH = shapes[1].GetHeight(resultElemId);
            int kernelW = shapes[1].GetWidth(resultElemId);
            int n       = shapes[4].GetBatch(id);
            int outH    = shapes[4].GetHeight(id);
            int outW    = shapes[4].GetWidth(id);

            int h = -paddingY + stride * outH;
            int w = -paddingX + stride * outW;

            float temp = 0;

            if (id < threadsRequiredPerResultElem)
            {
                int inputIndex = shapes[0].TryGetIndex(w + kernelW, h + kernelH, kernelD, n);
                if (inputIndex >= 0)
                {
                    temp = input[inputIndex] * gradient[shapes[2].GetIndex(outW, outH, kernelN, n)];
                }

                //if (resultElemId == 0)
                //    Console.WriteLine("tid=%d - %f", id, temp);
            }
            sdata[tid] = temp;

            thread.SyncThreads();

            int i = thread.blockDim.x / 2;

            while (i != 0)
            {
                if (tid < i)
                {
                    sdata[tid] += sdata[tid + i];
                }
                thread.SyncThreads();
                i /= 2;
            }

            if (tid == 0)
            {
                //if (resultElemId == 0)
                //    Console.WriteLine("gridDim.x=%d gridDim.y=%d blockDim.x=%d blockDim.y=%d", thread.gridDim.x, thread.gridDim.y, thread.blockDim.x, thread.blockDim.y);
                resultPartials[thread.blockIdx.x, thread.blockIdx.y] = sdata[0];
            }
        }
Example #27
0
        public static void GpuFindPathDistance(GThread thread, AnswerStruct[] answer)
        {
            var answerLocal = thread.AllocateShared<AnswerStruct>("ansL", ThreadsPerBlock);

            var bestDistance = thread.gridDim.x;
            var bestPermutation = thread.blockDim.x;

            var sum = 0;
            for (int i = 0; i < thread.blockDim.x; i++) sum += i * thread.threadIdx.x;

            answerLocal[thread.threadIdx.x].distance = bestDistance;
            answerLocal[thread.threadIdx.x].pathNo = bestPermutation;
            thread.SyncThreads();

            if (thread.threadIdx.x == 0)
            {
                answer[thread.blockIdx.x] = answerLocal[0];
            }
        }
Example #28
0
        public static void RepeatedAverageColorDifferenceKernel(GThread thread,
                                                                byte[] source, int sourceStride, int sourceWidth, int sourceHeight,
                                                                byte[] patterns, int patternsStride, int patternsWidth, int patternsHeight, int patternsOffsetX,
                                                                int patternStride, int patternWidth, int patternHeight,
                                                                int[,] patternLocations, int[,,] blockAvgs)
        {
            var threadX = thread.threadIdx.x;
            var threadY = thread.threadIdx.y;

            int x = thread.blockIdx.x * thread.blockDim.x + threadX;
            int y = thread.blockIdx.y * thread.blockDim.y + threadY;

            //Store px sums in shared memory
            var pixelAvgDistance = thread.AllocateShared <float>("pixelSums", (int)Filters.BlockSideLength, (int)Filters.BlockSideLength);

            pixelAvgDistance[threadX, threadY] = 0;

            var patternId    = x / patternWidth;
            var sourceStartX = patternLocations[patternId, 0];
            var sourceStartY = patternLocations[patternId, 1];

            if (x < patternsWidth && y < patternsHeight)
            {
                var patternIndex = 3 * (x + patternsOffsetX) + y * patternsStride;
                var sourceIndex  = 3 * ((x % patternWidth) + sourceStartX) + (y + sourceStartY) * sourceStride;

                pixelAvgDistance[threadX, threadY] = ComputePixelError(patterns, patternIndex, source, sourceIndex);

                //Paint pattern blue with the overlap average
                //patterns[patternIndex] = (byte)pixelAvgDistance[threadX, threadY];
            }



            //Wait till all block threads have finished
            thread.SyncThreads();

            //Use the first thread of each block to add up the block sums. CPU will need to add up the grid sum.
            if (threadX == 0 && threadY == 0)
            {
                float thisBlockAvg = 0;
                float nextBlockAvg = 0;

                //Add up the pixel sums to a block sum
                for (var ty = 0; ty < (int)Filters.BlockSideLength; ty++)
                {
                    for (var tx = 0; tx < (int)Filters.BlockSideLength; tx++)
                    {
                        if ((x + tx) / patternWidth == x / patternWidth)
                        {
                            thisBlockAvg += pixelAvgDistance[tx, ty];
                        }
                        else
                        {
                            nextBlockAvg += pixelAvgDistance[tx, ty];
                        }
                    }
                }

                var blocksPerPattern = (int)GMath.Ceiling(patternWidth / thread.blockDim.x);

                //Store the block's avgs
                if (thisBlockAvg > 0)
                {
                    blockAvgs[patternId, thread.blockIdx.x % blocksPerPattern, thread.blockIdx.y] = (int)GMath.Round(thisBlockAvg);
                }


                if (nextBlockAvg > 0)
                {
                    var isLastLastPattern = (patternsWidth / patternWidth) - 1 == patternId;

                    //Last block of this pattern is the first block of next pattern
                    if (!isLastLastPattern)
                    {
                        blockAvgs[patternId + 1, 0, thread.blockIdx.y] = (int)GMath.Round(nextBlockAvg);
                    }
                }
            }
        }
Example #29
0
        public static void ExplorePermutationsKernel(GThread gThread, Evaluation[] evaluations)
        {
            var blockEvaluations = gThread.AllocateShared<Evaluation>("be", 256);
            var v = gThread.AllocateShared<byte>("v", 256, 9);
            var t = gThread.threadIdx.x;

            var permutation = gThread.blockIdx.x * gThread.blockDim.x + gThread.threadIdx.x;

            // 0 1 2
            // 3 4 5
            // 6 7 8

            TileOrderFromPermutation(Permutations, permutation, 9, v, t);

            var metric = 0f;

            metric += LeftRightFit[v[t, 0], v[t, 1]] + LeftRightFit[v[t, 1], v[t, 2]];
            metric += LeftRightFit[v[t, 3], v[t, 4]] + LeftRightFit[v[t, 4], v[t, 5]];
            metric += LeftRightFit[v[t, 6], v[t, 7]] + LeftRightFit[v[t, 7], v[t, 8]];

            metric += TopBottomFit[v[t, 0], v[t, 3]] + TopBottomFit[v[t, 3], v[t, 6]];
            metric += TopBottomFit[v[t, 1], v[t, 4]] + TopBottomFit[v[t, 4], v[t, 7]];
            metric += TopBottomFit[v[t, 2], v[t, 5]] + TopBottomFit[v[t, 5], v[t, 8]];

            blockEvaluations[t].Permutation = permutation;
            blockEvaluations[t].Metric = metric;

            gThread.SyncThreads();

            for (var i = 256 / 2; i > 0; i /= 2)
            {
                if (t < i)
                {
                    if (blockEvaluations[t].Metric > blockEvaluations[t + i].Metric)
                    {
                        blockEvaluations[t] = blockEvaluations[t + i];
                    }
                }

                gThread.SyncThreads();
            }

            if (gThread.threadIdx.x == 0)
            {
                evaluations[gThread.blockIdx.x] = blockEvaluations[0];
            }
        }
Example #30
0
        public static void AverageColor(GThread thread, GPUColorBGRA[] screenshotColors, GPUColorBGRA[] colors, int[] indices, float[] output, float[] debugOutput)
        {
            //int[] cache = thread.AllocateShared<int>("cache", 1025);
            //thread.SyncThreads();
            //int offset = thread.gridDim.x * thread.threadIdx.x + indices[thread.threadIdx.x]; //2025 * 0 + 0 -> 2025 * 0 + 1
            //thread.SyncThreads();
            //thread.SyncThreadsCount(true);

            //indices[thread.threadIdx.x]++;
            //(25 * ) + () * *

            //keysWidth = 25 * 4; //griddim x
            //keysHeight = 6 * 4; //griddim y
            //pixelPerKeyColumn = (screenWidth) / keysWidth; //blockdim.y
            //pixelPerKeyRow = (screenHeight) / keysHeight; //blockdim.y

            //int o = (thread.gridDim.x * thread.blockIdx.x + thread.threadIdx.x) +
            //(thread.gridDim.y * thread.blockIdx.y + thread.threadIdx.y) * thread.gridDim.x * thread.blockDim.x;

            ////float[] cache = thread.AllocateShared<float>("cache", screenWidth * screenHeight / blockSizeX);
            int o = thread.threadIdx.x /*0 bis threadcount*/ + thread.blockDim.x /*100*/ * thread.blockIdx.x + thread.blockDim.y * thread.blockIdx.y /*+1 in 1024 schritten*/;

            //threadIdx => Thread im Block (900)
            //BlockIdx => Block im Grid (100,24)

            //int offset = thread.gridDim.x * thread.threadIdx.x + indices[thread.threadIdx.x]++; //2025 * 0 + 0 -> 2025 * 0 + 1
            //cache[offset] = -1;
            //output[o] = 0;
            //float o2;

            var column = o % screenWidth;
            var row    = (o / screenWidth);

            var keyColumnIndex = column / pixelPerKeyColumn;
            var keyRowIndex    = row / pixelPerKeyRow;

            //debugOutput[o] = keyColumnIndex * keysHeight + keyRowIndex;
            //if (output[0] == 0)
            //    output[0] = thread.gridDim.x;
            //if(output[1] == 0)
            //    output[1] = thread.gridDim.y;
            thread.SyncThreads();

            debugOutput[o] = thread.blockIdx.x;
            //if (debugOutput[o] == 0)
            //{
            //    debugOutput[o] = thread.blockDim.x;
            //    debugOutput[o + 1] = thread.blockDim.y;
            //    debugOutput[o + 2] = thread.gridDim.x;
            //    debugOutput[o + 3] = thread.gridDim.y;
            //    debugOutput[o + 5] = thread.gridDim.y;
            //}
            output[(((keyColumnIndex * keysHeight) + keyRowIndex) * 3) + 0] += (float)screenshotColors[o].Red;
            output[(((keyColumnIndex * keysHeight) + keyRowIndex) * 3) + 1] += (float)screenshotColors[o].Green;
            output[(((keyColumnIndex * keysHeight) + keyRowIndex) * 3) + 2] += (float)screenshotColors[o].Blue;
            //output[thread.gridDim.x * keysHeight + thread.gridDim.y] += 10f;
            thread.SyncThreads();
            //for (int i = 0; i < colors.Length; i++)
            //{

            //    //if (rgbColors[o].Red + 5 >= colors[i].Red && rgbColors[o].Red - 5 <= colors[i].Red
            //    //    && rgbColors[o].Green + 5 >= colors[i].Green && rgbColors[o].Green - 5 <= colors[i].Green
            //    //    && rgbColors[o].Blue + 5 >= colors[i].Blue && rgbColors[o].Blue - 5 <= colors[i].Blue)
            //    if (screenshotColors[o].Red == colors[i].Red
            //    && screenshotColors[o].Green == colors[i].Green
            //    && screenshotColors[o].Blue == colors[i].Blue)
            //    {
            //        //cache[offset] = o;

            //        thread.SyncThreads();
            //        output[o] = o;
            //        break;
            //    }
            //}
            //indices[thread.threadIdx.x]++;
            //thread.SyncThreads();

            //output[offset] = cache[offset];
        }
Example #31
0
        public static void RayTraceMain(GThread thread, Color3[,,] voxelMap, Color3[,] pixelMap, byte[] imageBytes, Camera[] camera, ChunkData[,,] chunkData, Color3[] voxelData, FSMUnit[] units)
        {
            // int threadIndex = thread.threadIdx.x + (thread.blockIdx.x * thread.blockDim.x);a
            // int strideLength = (thread.blockDim.x * thread.gridDim.x);

            int tx = thread.threadIdx.x + (thread.blockIdx.x * thread.blockDim.x);
            int ty = thread.threadIdx.y + (thread.blockIdx.y * thread.blockDim.y);

            if (tx > pixelMap.GetLength(0) || ty > pixelMap.GetLength(1))
            {
                return; // out of bounds, do no work
            }

            if (tx < 64 && ty == 0)
            {
                // test
                unsafe
                {
                    units[tx].values[0] = tx + 0;
                    units[tx].values[1] = tx + 1;
                    units[tx].values[2] = tx + 2;
                    units[tx].values[3] = tx + 3;
                }
            }

            // camera
            float hRot = camera[0].hRotation;
            float vRot = camera[0].vRotation;

            vRot = Clamp(vRot, -90.0f, 90.0f);

            float yaw      = hRot;
            float pitch    = vRot;
            float cosPitch = degcos(pitch);
            float sinPitch = degsin(pitch);
            float cosYaw   = degcos(yaw);
            float sinYaw   = degsin(yaw);

            camera[0].rightX = cosYaw;
            camera[0].rightY = 0f;
            camera[0].rightZ = -sinYaw;

            camera[0].upX = sinYaw * sinPitch;
            camera[0].upY = cosPitch;
            camera[0].upZ = cosYaw * sinPitch;

            camera[0].forwardX = sinYaw * cosPitch;
            camera[0].forwardY = -sinPitch;
            camera[0].forwardZ = cosPitch * cosYaw;

            // raster coordinates (0..1, 0..1)
            float px    = ((float)(tx + 0.5f) / (float)pixelMap.GetLength(0));
            float py    = ((float)(ty + 0.5f) / (float)pixelMap.GetLength(1));
            float ratio = (float)pixelMap.GetLength(0) / (float)pixelMap.GetLength(1); // should be > 1.0, normalized to Y-axis of screen

            float FOV     = 90.0f;
            float halfFOV = FOV / 2f;

            // middle of screen is 0,0 in this frame
            px = (px - 0.5f) * 2; // normalize: -1...+1
            py = (py - 0.5f) * 2; // normalize: -1...+1

            float vx = px * degtan(halfFOV) * ratio;
            float vy = py * degtan(halfFOV);
            float vz = -1.0f;

            float vlength         = GMath.Sqrt(vx * vx + vy * vy + vz * vz);
            float norm_starting_x = vx / vlength;
            float norm_starting_y = vy / vlength;
            float norm_starting_z = vz / vlength;
            // normalized vector to rotate

            // normalized rotation axis
            float x = 0f;
            float y = 1f;
            float z = 0f;

            float rho_deg = hRot;
            float c       = degcos(rho_deg);
            float s       = degsin(rho_deg);
            float t       = (1 - degcos(rho_deg));

            float norm_final_x = norm_starting_x * (t * x * x + c) + norm_starting_y * (t * x * y - s * z) + norm_starting_z * (t * x * z + s * y);
            float norm_final_y = norm_starting_x * (t * x * y + s * z) + norm_starting_y * (t * y * y + c) + norm_starting_z * (t * y * z - s * x);
            float norm_final_z = norm_starting_x * (t * x * z - s * y) + norm_starting_y * (t * y * z + s * x) + norm_starting_z * (t * z * z + c);

            norm_starting_x = norm_final_x;
            norm_starting_y = norm_final_y;
            norm_starting_z = norm_final_z;

            // rotate relative to NEW local 'right' vector
            x = camera[0].rightX;
            y = camera[0].rightY;
            z = camera[0].rightZ;

            rho_deg = vRot; // rot_angle;
            c       = degcos(rho_deg);
            s       = degsin(rho_deg);
            t       = (1 - degcos(rho_deg));

            norm_final_x = norm_starting_x * (t * x * x + c) + norm_starting_y * (t * x * y - s * z) + norm_starting_z * (t * x * z + s * y);
            norm_final_y = norm_starting_x * (t * x * y + s * z) + norm_starting_y * (t * y * y + c) + norm_starting_z * (t * y * z - s * x);
            norm_final_z = norm_starting_x * (t * x * z - s * y) + norm_starting_y * (t * y * z + s * x) + norm_starting_z * (t * z * z + c);

            vx = norm_final_x;
            vy = norm_final_y;
            vz = norm_final_z;

            // normalize
            //vlength = GMath.Sqrt(vx * vx + vy * vy + vz * vz);
            //vx /= vlength;
            //vy /= vlength;
            //vz /= vlength;

            float rayx = camera[0].x;
            float rayy = camera[0].y;
            float rayz = camera[0].z;

            float red   = 0f;
            float green = 0f;
            float blue  = 0f;

            float maxDistance     = 64.0f * 2f;
            float currentDistance = 0.0f;

            float rayStartX = camera[0].x;
            float rayStartY = camera[0].y;
            float rayStartZ = camera[0].z;

            float rayEndX = rayStartX + vx * maxDistance;
            float rayEndY = rayStartY + vy * maxDistance;
            float rayEndZ = rayStartZ + vz * maxDistance;

            // Bresenham3D algorithm
            if (false)
            {
                float x1 = GMath.Floor(rayStartX);
                float x2 = GMath.Floor(rayEndX);
                float y1 = GMath.Floor(rayStartY);
                float y2 = GMath.Floor(rayEndY);
                float z1 = GMath.Floor(rayStartZ);
                float z2 = GMath.Floor(rayEndZ);

                float dx = flabs(x2 - x1);
                float dy = flabs(y2 - y1);
                float dz = flabs(z2 - z1);

                float xs = 0;
                float ys = 0;
                float zs = 0;

                if (x2 > x1)
                {
                    xs = 1;
                }
                else
                {
                    xs = -1;
                }

                if (y2 > y1)
                {
                    ys = 1;
                }
                else
                {
                    ys = -1;
                }

                if (z2 > z1)
                {
                    zs = 1;
                }
                else
                {
                    zs = -1;
                }

                float p1 = 0;
                float p2 = 0;

                // Driving axis is X-axis
                if (dx >= dy && dx >= dz)
                {
                    p1 = 2 * dy - dx;
                    p2 = 2 * dz - dx;

                    while (x1 != x2)
                    {
                        x1 += xs;

                        if (p1 >= 0)
                        {
                            y1 += ys;
                            p1 -= 2 * dx;
                        }

                        if (p2 >= 0)
                        {
                            z1 += zs;
                            p2 -= 2 * dx;
                        }

                        p1 += 2 * dy;
                        p2 += 2 * dz;

                        // ListOfPoints.append((x1, y1, z1))
                        // check voxel x1, y1, z1
                        // chunkData = new ChunkData[17, 17, 17]; // -8 to +8, and 0, adding +8 offset; chunk size = 8 voxels (512 blocks)

                        unsafe
                        {
                            int rawX = (int)(GMath.Floor(x1));
                            int rawY = (int)(GMath.Floor(y1));
                            int rawZ = (int)(GMath.Floor(z1));
                        }
                    }
                }
                // Driving axis is Y-axis
                else if (dy >= dx && dy >= dz)
                {
                    p1 = 2 * dx - dy;
                    p2 = 2 * dz - dy;

                    while (y1 != y2)
                    {
                        y1 += ys;

                        if (p1 >= 0)
                        {
                            x1 += xs;
                            p1 -= 2 * dy;
                        }

                        if (p2 >= 0)
                        {
                            z1 += zs;
                            p2 -= 2 * dy;
                        }

                        p1 += 2 * dx;
                        p2 += 2 * dz;

                        //ListOfPoints.append((x1, y1, z1))
                        // check voxel x1, y1, z1

                        unsafe
                        {
                            int rawX = (int)(GMath.Floor(x1));
                            int rawY = (int)(GMath.Floor(y1));
                            int rawZ = (int)(GMath.Floor(z1));
                        }
                    }
                }
                // Driving axis is Z-axis
                else
                {
                    p1 = 2 * dy - dz;
                    p2 = 2 * dx - dz;

                    while (z1 != z2)
                    {
                        z1 += zs;

                        if (p1 >= 0)
                        {
                            y1 += ys;
                            p1 -= 2 * dz;
                        }

                        if (p2 >= 0)
                        {
                            x1 += xs;
                            p2 -= 2 * dz;
                        }

                        p1 += 2 * dy;
                        p2 += 2 * dx;

                        //ListOfPoints.append((x1, y1, z1))
                        // check voxel x1, y1, z1

                        unsafe
                        {
                            int rawX = (int)(GMath.Floor(x1));
                            int rawY = (int)(GMath.Floor(y1));
                            int rawZ = (int)(GMath.Floor(z1));
                        }
                    }
                }
            }

            if (true)
            {
                // ray cast
                while (currentDistance < maxDistance)
                {
                    // voxel map is 0...31 index
                    int voxelx = (int)GMath.Floor(rayx);
                    int voxely = (int)GMath.Floor(rayy);
                    int voxelz = (int)GMath.Floor(rayz);

                    int chunkX = voxelx / 8 + 16; // +16 in the array dimension
                    int chunkY = voxely / 8 + 16;
                    int chunkZ = voxelz / 8 + 16;

                    int chunkInternalX = voxelx & 7; // 0,0,0 to 7,7,7
                    int chunkInternalY = voxely & 7;
                    int chunkInternalZ = voxelz & 7;

                    if (chunkX < 0 || chunkX > 32 || chunkY < 0 || chunkY > 32 || chunkZ < 0 || chunkZ > 32)
                    {
                        // out of camera bounds, ignore
                    }
                    else
                    {
                        if (chunkData[chunkX, chunkY, chunkZ].empty == 0)
                        {
                            int index    = chunkData[chunkX, chunkY, chunkZ].voxelDataIndex;
                            int adjIndex = index + (chunkInternalX + chunkInternalY * 8 + chunkInternalZ * 64);

                            if (voxelData[adjIndex].red > 0f || voxelData[adjIndex].green > 0f || voxelData[adjIndex].blue > 0f)
                            {
                                red   = voxelData[adjIndex].red;
                                green = voxelData[adjIndex].green;
                                blue  = voxelData[adjIndex].blue;
                                break;
                            }
                        }
                    }

                    // inner x/y/z of cube volume
                    float ix = rayx - GMath.Floor(rayx);
                    float iy = rayy - GMath.Floor(rayy);
                    float iz = rayz - GMath.Floor(rayz);

                    // get dist remaining in cube axis
                    if (vx > 0)
                    {
                        ix = 1f - ix;
                    }
                    if (vy > 0)
                    {
                        iy = 1f - iy;
                    }
                    if (vz > 0)
                    {
                        iz = 1f - iz;
                    }

                    ix = flabs(ix / vx);
                    iy = flabs(iy / vy);
                    iz = flabs(iz / vz);

                    float nextDistance = GMath.Min(iz, GMath.Min(ix, iy)) + 0.01f; // step just over boundary

                    rayx += vx * nextDistance;
                    rayy += vy * nextDistance;
                    rayz += vz * nextDistance;

                    currentDistance += nextDistance; // add step length
                }
            }

            // render a pixel to buffer
            int imageByteIndex = (tx + (ty * pixelMap.GetLength(0))) * 4;

            imageBytes[imageByteIndex + 0] = (byte)(blue * 255f);  // blue
            imageBytes[imageByteIndex + 1] = (byte)(green * 255f); // green
            imageBytes[imageByteIndex + 2] = (byte)(red * 255f);   // red
            imageBytes[imageByteIndex + 3] = 255;                  // alpha channel

            int L = pixelMap.GetLength(0);

            pixelMap[tx, ty].red   = vx;
            pixelMap[tx, ty].green = vy;
            pixelMap[tx, ty].blue  = vz;

            return;

            pixelMap[tx, ty].red  = 1.0f * ((float)tx / (float)pixelMap.GetLength(0));
            pixelMap[tx, ty].blue = 1.0f * ((float)ty / (float)pixelMap.GetLength(1));

            imageByteIndex = (tx + (ty * pixelMap.GetLength(0))) * 4;
            imageBytes[imageByteIndex + 0] = (byte)(pixelMap[tx, ty].red * 255); // + (rnd * 255)); test dynamic
            imageBytes[imageByteIndex + 1] = (byte)(pixelMap[tx, ty].green * 255);
            imageBytes[imageByteIndex + 2] = (byte)(pixelMap[tx, ty].blue * 255);
            imageBytes[imageByteIndex + 3] = 255; // alpha channel

            thread.SyncThreads();

            return;
        }
Example #32
0
        private static void GpuConv2DInputGradient(GThread thread, float[] gradient, float[] rotKernels, float[,] resultPartials, GpuShape[] shapes, int paddingX, int paddingY, int stride)
        {
            /*
             * for (int n = 0; n < gradients.BatchSize; ++n)
             * for (int outW = 0, w = -paddingX; outW < inputGradients.Width; w += stride, ++outW)
             * for (int outH = 0, h = -paddingY; outH < inputGradients.Height; h += stride, ++outH)
             * for (int outD = 0; outD < inputGradients.Depth; ++outD)
             * {
             *  for (int kernelN = 0; kernelN < rotKernels.BatchSize; ++kernelN)
             *  for (int kernelH = 0; kernelH < rotKernels.Height; ++kernelH)
             *  for (int kernelW = 0; kernelW < rotKernels.Width; ++kernelW)
             *      inputGradients[outW, outH, outD, n] += gradients.TryGet(0, w + kernelW, h + kernelH, kernelN, n) * rotKernels[kernelW, kernelH, outD, kernelN];
             * }
             */

            // this shared memory will store partial sums that later on will be reduced
            float[] sdata = thread.AllocateShared <float>("sdata", THREADS_PER_BLOCK);

            int resultElemId = thread.blockIdx.x;
            int tid          = thread.threadIdx.x;
            int id           = (thread.blockDim.x * thread.blockIdx.y) + thread.threadIdx.x;

            int threadsRequiredPerResultElem = shapes[1].BatchSize * shapes[1].Height * shapes[1].Width;

            int outN = shapes[2].GetBatch(resultElemId);
            int outD = shapes[2].GetDepth(resultElemId);
            int outH = shapes[2].GetHeight(resultElemId);
            int outW = shapes[2].GetWidth(resultElemId);

            int kernelN = shapes[3].GetBatch(id);
            int kernelH = shapes[3].GetHeight(id);
            int kernelW = shapes[3].GetWidth(id);

            int h = -paddingY + stride * outH;
            int w = -paddingX + stride * outW;

            float temp = 0;

            if (id < threadsRequiredPerResultElem)
            {
                int gradientIndex = shapes[0].TryGetIndex(w + kernelW, h + kernelH, kernelN, outN);
                if (gradientIndex >= 0)
                {
                    temp = gradient[gradientIndex] * rotKernels[shapes[1].GetIndex(kernelW, kernelH, outD, kernelN)];
                }
            }
            sdata[tid] = temp;

            thread.SyncThreads();

            int i = thread.blockDim.x / 2;

            while (i != 0)
            {
                if (tid < i)
                {
                    sdata[tid] += sdata[tid + i];
                }
                thread.SyncThreads();
                i /= 2;
            }

            if (tid == 0)
            {
                resultPartials[thread.blockIdx.x, thread.blockIdx.y] = sdata[0];
            }
        }
Example #33
0
        public static void EdgeFilterKernel(GThread thread,
                                            byte[] source, int stride, int imageWidth, int imageHeight, int filterSize)
        {
            int x = thread.blockIdx.x * thread.blockDim.x + thread.threadIdx.x;
            int y = thread.blockIdx.y * thread.blockDim.y + thread.threadIdx.y;

            if (x >= imageWidth || y >= imageHeight)
            {
                return;
            }

            var index = 3 * x + y * stride;

            //take the pixel above, left, right, below and compare to the middle pixel
            var colorDifference = 0.0;

            var xLeft  = x - filterSize;
            var xRight = x + filterSize;
            var yUp    = y - filterSize;
            var yDown  = y + filterSize;

            var middleB = source[x * 3 + y * stride];
            var middleG = source[x * 3 + y * stride + 1];
            var middleR = source[x * 3 + y * stride + 2];

            if (xLeft >= 0)
            {
                colorDifference +=
                    GMath.Abs(middleB - source[xLeft * 3 + y * stride]) +
                    GMath.Abs(middleG - source[xLeft * 3 + y * stride + 1]) +
                    GMath.Abs(middleR - source[xLeft * 3 + y * stride + 2]);
            }

            if (xRight < imageWidth)
            {
                colorDifference +=
                    GMath.Abs(middleB - source[xRight * 3 + y * stride]) +
                    GMath.Abs(middleG - source[xRight * 3 + y * stride + 1]) +
                    GMath.Abs(middleR - source[xRight * 3 + y * stride + 2]);
            }

            if (yUp >= 0)
            {
                colorDifference +=
                    GMath.Abs(middleB - source[x * 3 + yUp * stride]) +
                    GMath.Abs(middleG - source[x * 3 + yUp * stride + 1]) +
                    GMath.Abs(middleR - source[x * 3 + yUp * stride + 2]);
            }

            if (yDown < imageHeight)
            {
                colorDifference +=
                    GMath.Abs(middleB - source[x * 3 + yDown * stride]) +
                    GMath.Abs(middleG - source[x * 3 + yDown * stride + 1]) +
                    GMath.Abs(middleR - source[x * 3 + yDown * stride + 2]);
            }

            //This will only work within blocks, pixels across block edge based on scheduling
            //That's ok for this task, as it will have small impact
            thread.SyncThreads();

            source[index] = source[index + 1] = source[index + 2] =
                (byte)(colorDifference / 12);
        }
Example #34
0
        public static void ComputeFitsKernel(GThread gThread, int edgeIndexA, int edgeIndexB, float[,] fit)
        {
            var sum = gThread.AllocateShared<float>("sum", 64);

            var tileIndexA = gThread.blockIdx.x;
            var tileIndexB = gThread.blockIdx.y;
            var pixelIndex = gThread.threadIdx.x;

            var diff = Edges[tileIndexA, edgeIndexA, pixelIndex] - Edges[tileIndexB, edgeIndexB, pixelIndex];
            sum[pixelIndex] = diff * diff;

            gThread.SyncThreads();

            for (var i = 64 / 2; i > 0; i /= 2)
            {
                if (pixelIndex < i)
                {
                    sum[pixelIndex] += sum[pixelIndex + i];
                }

                gThread.SyncThreads();
            }

            if (pixelIndex == 0)
            {
                fit[tileIndexA, tileIndexB] = sum[0];
            }
        }
Example #35
0
        public static void AverageColorDifferenceKernel(GThread thread,
                                                        byte[] source, int sourceStride, int sourceWidth, int sourceHeight,
                                                        byte[] pattern, int patternStride, int patternWidth, int patternHeight,
                                                        int sourceStartX, int sourceStartY, int[,] blockSums)
        {
            //Store px sums in shared memory
            var pixelSums = thread.AllocateShared <int>("pixelSums", (int)Filters.BlockSideLength, (int)Filters.BlockSideLength);

            pixelSums[thread.threadIdx.x, thread.threadIdx.y] = 0;

            int x = thread.blockIdx.x * thread.blockDim.x + thread.threadIdx.x;
            int y = thread.blockIdx.y * thread.blockDim.y + thread.threadIdx.y;

            if (x < patternWidth && y < patternHeight)
            {
                var sourceIndex  = 3 * (x + sourceStartX) + (y + sourceStartY) * sourceStride;
                var patternIndex = 3 * x + y * patternStride;

                //Ignore if close to black
                if (pattern[patternIndex] > 3)
                {
                    pixelSums[thread.threadIdx.x, thread.threadIdx.y] =
                        Distance(pattern[patternIndex], source[sourceIndex]) +
                        Distance(pattern[patternIndex + 1], source[sourceIndex + 1]) +
                        Distance(pattern[patternIndex + 2], source[sourceIndex + 2]);
                }
            }

            //Wait till all block threads have finished
            thread.SyncThreads();

            //Use the first thread of each block to add up the block sums. CPU will need to add up the grid sum.
            if (thread.threadIdx.x == 0 && thread.threadIdx.y == 0)
            {
                int blockSum = 0;

                //Add up the pixel sums to a block sum
                for (var ty = 0; ty < (int)Filters.BlockSideLength; ty++)
                {
                    for (var tx = 0; tx < (int)Filters.BlockSideLength; tx++)
                    {
                        blockSum += pixelSums[tx, ty];
                    }
                }

                //Store the block's sum
                blockSums[thread.blockIdx.x, thread.blockIdx.y] = blockSum;
            }

            //var window = (int)Filters.BlockSideLength / 2;

            //while (window > 0)
            //{
            //    if (thread.threadIdx.x < window)
            //        pixelSums[thread.threadIdx.x, thread.threadIdx.y] += pixelSums[thread.threadIdx.x+window, thread.threadIdx.y];

            //    window /= 2;

            //    thread.SyncThreads();
            //}

            //if (thread.threadIdx.x == 0 && thread.threadIdx.y == 0)
            //{
            //    int blockSum = 0;

            //    for (var ty = 0; ty < (int)Filters.BlockSideLength; ty++)
            //        blockSum += pixelSums[0, ty];

            //    //Store the block's sum
            //    blockSums[thread.blockIdx.x, thread.blockIdx.y] = blockSum;
            //}
        }
Example #36
0
 public static void VectorAdd(GThread thread,
                         [CudafyAddressSpace(eCudafyAddressSpace.Global)] int[] a,
                         int[] b,
                         int[] c )
 {
     int[] shared = thread.AllocateShared<int>("shared", Program.N);
     int index = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x;
     //int index = thread.get_local_id(0);
     c[index] = (a[index] + b[index]) * ConstantMemory[index];
     thread.SyncThreads();
 }
Example #37
0
        public static void PeaksCompare_Dalton(GThread thread, double[] leftInsilicoMasses, double[] rightInsilicoMasses,
                                               double[] insilicoMassLeftAo, double[] insilicoMassLeftAstar, double[] insilicoMassLeftBo, double[] insilicoMassLeftBstar,
                                               double[] insilicoMassRightYo, double[] insilicoMassRightYstar, double[] insilicoMassRightZo, double[] insilicoMassRightZoo,
                                               double[] peakListMasses, double[] differencesArray, int[] matchesFound, int[] arraySize, int[] numberOfMatches,
                                               double[] Tolerance, double[] modifiedIonsArray)
        {
            int tidx = thread.blockIdx.x * thread.blockDim.x + thread.threadIdx.x;

            int    Iindex = tidx % leftInsilicoMasses.Length;
            int    Pindex = (tidx / leftInsilicoMasses.Length) % peakListMasses.Length;
            double peak   = peakListMasses[Pindex];

            int planeSize = leftInsilicoMasses.Length * peakListMasses.Length;

            // modified array index ***

            if (tidx < planeSize)
            {
                differencesArray[tidx] = leftInsilicoMasses[Iindex] - peak;
                if (differencesArray[tidx] < 0)
                {
                    differencesArray[tidx] = differencesArray[tidx] * -1;
                }

                if (differencesArray[tidx] < Tolerance[0])
                {
                    matchesFound[tidx] = 1;
                }
                else
                {
                    matchesFound[tidx] = 0;
                }

                thread.SyncThreads();
                tidx += planeSize;

                differencesArray[tidx] = rightInsilicoMasses[Iindex] - peak;
                if (differencesArray[tidx] < 0)
                {
                    differencesArray[tidx] = differencesArray[tidx] * -1;
                }

                if (differencesArray[tidx] < Tolerance[0])
                {
                    matchesFound[tidx] = 1;
                }
                else
                {
                    matchesFound[tidx] = 0;
                }

                thread.SyncThreads();
                tidx += planeSize;

                differencesArray[tidx] = insilicoMassLeftAo[Iindex] - peak;
                if (differencesArray[tidx] < 0)
                {
                    differencesArray[tidx] = differencesArray[tidx] * -1;
                }

                if (differencesArray[tidx] < Tolerance[0])
                {
                    matchesFound[tidx] = 1;
                }
                else
                {
                    matchesFound[tidx] = 0;
                }

                thread.SyncThreads();
                tidx += planeSize;

                differencesArray[tidx] = insilicoMassLeftAstar[Iindex] - peak;
                if (differencesArray[tidx] < 0)
                {
                    differencesArray[tidx] = differencesArray[tidx] * -1;
                }

                if (differencesArray[tidx] < Tolerance[0])
                {
                    matchesFound[tidx] = 1;
                }
                else
                {
                    matchesFound[tidx] = 0;
                }

                thread.SyncThreads();
                tidx += planeSize;

                differencesArray[tidx] = insilicoMassLeftBo[Iindex] - peak;
                if (differencesArray[tidx] < 0)
                {
                    differencesArray[tidx] = differencesArray[tidx] * -1;
                }

                if (differencesArray[tidx] < Tolerance[0])
                {
                    matchesFound[tidx] = 1;
                }
                else
                {
                    matchesFound[tidx] = 0;
                }

                thread.SyncThreads();
                tidx += planeSize;

                differencesArray[tidx] = insilicoMassLeftBstar[Iindex] - peak;
                if (differencesArray[tidx] < 0)
                {
                    differencesArray[tidx] = differencesArray[tidx] * -1;
                }

                if (differencesArray[tidx] < Tolerance[0])
                {
                    matchesFound[tidx] = 1;
                }
                else
                {
                    matchesFound[tidx] = 0;
                }

                thread.SyncThreads();
                tidx += planeSize;

                differencesArray[tidx] = insilicoMassRightYo[Iindex] - peak;
                if (differencesArray[tidx] < 0)
                {
                    differencesArray[tidx] = differencesArray[tidx] * -1;
                }

                if (differencesArray[tidx] < Tolerance[0])
                {
                    matchesFound[tidx] = 1;
                }
                else
                {
                    matchesFound[tidx] = 0;
                }

                thread.SyncThreads();
                tidx += planeSize;

                differencesArray[tidx] = insilicoMassRightYstar[Iindex] - peak;
                if (differencesArray[tidx] < 0)
                {
                    differencesArray[tidx] = differencesArray[tidx] * -1;
                }

                if (differencesArray[tidx] < Tolerance[0])
                {
                    matchesFound[tidx] = 1;
                }
                else
                {
                    matchesFound[tidx] = 0;
                }

                thread.SyncThreads();
                tidx += planeSize;

                differencesArray[tidx] = insilicoMassRightZo[Iindex] - peak;
                if (differencesArray[tidx] < 0)
                {
                    differencesArray[tidx] = differencesArray[tidx] * -1;
                }

                if (differencesArray[tidx] < Tolerance[0])
                {
                    matchesFound[tidx] = 1;
                }
                else
                {
                    matchesFound[tidx] = 0;
                }

                thread.SyncThreads();
                tidx += planeSize;

                differencesArray[tidx] = insilicoMassRightZoo[Iindex] - peak;
                if (differencesArray[tidx] < 0)
                {
                    differencesArray[tidx] = differencesArray[tidx] * -1;
                }

                if (differencesArray[tidx] < Tolerance[0])
                {
                    matchesFound[tidx] = 1;
                }
                else
                {
                    matchesFound[tidx] = 0;
                }
            }
        }