Exemplo n.º 1
0
        public static void Kernel(
            int[] precomputedStateTransitioningMatrixA,
            int[] precomputedStateTransitioningMatrixB,
            bool[] isSynchronizing,
            int[] shortestSynchronizingWordLength)
        {
            var n          = problemSize.Value;
            var arrayCount = precomputedStateTransitioningMatrixA.Length / n;
            var power      = 1 << n;

            #region Pointer setup
            var byteOffset = 0;

            var minEven = DeviceFunction.AddressOfArray(__shared__.ExternArray <int>())
                          .Ptr(byteOffset / sizeof(int));
            byteOffset += blockDim.x * sizeof(int);

            var minOdd = DeviceFunction.AddressOfArray(__shared__.ExternArray <int>())
                         .Ptr(byteOffset / sizeof(int));
            byteOffset += blockDim.x * sizeof(int);

            var maxEven = DeviceFunction.AddressOfArray(__shared__.ExternArray <int>())
                          .Ptr(byteOffset / sizeof(int));
            byteOffset += blockDim.x * sizeof(int);

            var maxOdd = DeviceFunction.AddressOfArray(__shared__.ExternArray <int>())
                         .Ptr(byteOffset / sizeof(int));
            byteOffset += blockDim.x * sizeof(int);

            var gpuA = DeviceFunction.AddressOfArray(__shared__.ExternArray <ushort>())
                       .Ptr(byteOffset / sizeof(ushort))
                       .Volatile();
            byteOffset += n * sizeof(ushort);

            var gpuB = DeviceFunction.AddressOfArray(__shared__.ExternArray <ushort>())
                       .Ptr(byteOffset / sizeof(ushort))
                       .Volatile();
            byteOffset += n * sizeof(ushort);

            var isActiveEven = DeviceFunction.AddressOfArray(__shared__.ExternArray <bool>())
                               .Ptr(byteOffset / sizeof(bool));
            byteOffset += power * sizeof(bool);

            var isActiveOdd = DeviceFunction.AddressOfArray(__shared__.ExternArray <bool>())
                              .Ptr(byteOffset / sizeof(bool));
            byteOffset += power * sizeof(bool);

            var isDiscovered = DeviceFunction.AddressOfArray(__shared__.ExternArray <bool>())
                               .Ptr(byteOffset / sizeof(bool));
            byteOffset += power * sizeof(bool);

            var addedAnythingOdd = DeviceFunction.AddressOfArray(__shared__.ExternArray <bool>())
                                   .Ptr(byteOffset / sizeof(bool))
                                   .Volatile();
            byteOffset += sizeof(bool);

            var addedAnythingEven = DeviceFunction.AddressOfArray(__shared__.ExternArray <bool>())
                                    .Ptr(byteOffset / sizeof(bool))
                                    .Volatile();
            byteOffset += sizeof(bool);

            var shouldStop = DeviceFunction.AddressOfArray(__shared__.ExternArray <bool>())
                             .Ptr(byteOffset / sizeof(bool))
                             .Volatile();
            byteOffset += sizeof(bool);
            #endregion


            ushort nextDistance;
            int    vertexAfterTransitionA,
                   vertexAfterTransitionB,
                   index;
            var acPart  = (arrayCount + gridDim.x - 1) / gridDim.x;
            var acBegin = blockIdx.x * acPart;
            var acEnd   = acBegin + acPart;
            if (arrayCount < acEnd)
            {
                acEnd = arrayCount;
            }
            index = acBegin * n;
            var threadWork = (power + blockDim.x - 1) / blockDim.x;
            DeviceFunction.SyncThreads();
            for (int ac = acBegin; ac < acEnd; ac++, index += n)
            {
                //Console.WriteLine("Begin {0}", threadIdx.x);
                // cleanup
                var readingActive        = isActiveOdd;
                var writingActive        = isActiveEven;
                var readingAnythingAdded = addedAnythingOdd;
                var writingAnythingAdded = addedAnythingEven;
                var minRead  = minOdd;
                var minWrite = minEven;
                var maxRead  = maxOdd;
                var maxWrite = maxEven;

                if (threadIdx.x == DeviceFunction.WarpSize || (threadIdx.x == 0 && blockDim.x <= DeviceFunction.WarpSize))
                {
                    for (int i = 0; i < n; i++)
                    {
                        gpuA[i] = (ushort)(1 << precomputedStateTransitioningMatrixA[index + i]);
                        gpuB[i] = (ushort)(1 << precomputedStateTransitioningMatrixB[index + i]);
                    }
                }

                minRead[threadIdx.x]  = int.MaxValue;
                maxRead[threadIdx.x]  = 0;
                minWrite[threadIdx.x] = int.MaxValue;
                maxWrite[threadIdx.x] = 0;
                if (threadIdx.x == 0)
                {
                    shouldStop[0]           = false;
                    readingAnythingAdded[0] = true;
                }
                else if (threadIdx.x == blockDim.x - 1)
                {
                    minRead[blockDim.x - 1] = (power - 1) % threadWork;
                    maxRead[blockDim.x - 1] = power % threadWork;
                }
                nextDistance = 1;
                for (int consideringVertex = threadIdx.x, endingVertex = power, powerm1 = power - 1;
                     consideringVertex < endingVertex;
                     consideringVertex += blockDim.x)
                {
                    isActiveEven[consideringVertex] = false;
                    isActiveOdd[consideringVertex]  = consideringVertex == powerm1;
                    isDiscovered[consideringVertex] = consideringVertex == powerm1;
                    //Console.WriteLine("cleaning {0}, {1} {2} {3}", consideringVertex,
                    //    readingActive[consideringVertex], writingActive[consideringVertex], isDiscovered[consideringVertex]);
                }
                DeviceFunction.SyncThreads();

                while (readingAnythingAdded[0] && !shouldStop[0])
                {
                    if (threadIdx.x == 0)
                    {
                        readingAnythingAdded[0] = false;
                        writingAnythingAdded[0] = false;
                        //Console.WriteLine("distance {0}", nextDistance);
                    }

                    int myPart           = (power + blockDim.x - 1) / blockDim.x;
                    int beginningPointer = threadIdx.x * myPart;
                    int endingPointer    = beginningPointer + myPart;
                    if (power < endingPointer)
                    {
                        endingPointer = power;
                    }
                    if (minRead[threadIdx.x] > beginningPointer)
                    {
                        beginningPointer = minRead[threadIdx.x];
                    }
                    //if (maxRead[threadIdx.x] < endingPointer)
                    //    endingPointer = maxRead[threadIdx.x];
                    minRead[threadIdx.x]  = int.MaxValue;
                    maxRead[threadIdx.x]  = 0;
                    minWrite[threadIdx.x] = int.MaxValue;
                    maxWrite[threadIdx.x] = 0;
                    DeviceFunction.SyncThreads();
                    //Console.WriteLine("ac {3}, threadix {0}, begin {1}, end {2}", threadIdx.x, beginningPointer, endingPointer, ac);
                    for (int consideringVertex = beginningPointer; consideringVertex < endingPointer; ++consideringVertex)
                    {
                        //Console.WriteLine("writeIsActive[{0}]=={1}", consideringVertex, writingActive[consideringVertex]);
                        if (!readingActive[consideringVertex])
                        {
                            //Console.WriteLine("Skipping {0}", consideringVertex);
                            continue;
                        }
                        else
                        {
                            //Console.WriteLine("Considering {0} dist {1}", consideringVertex, nextDistance);
                            readingActive[consideringVertex] = false;
                        }

                        vertexAfterTransitionA = vertexAfterTransitionB = 0;
                        for (int i = 0, mask = 1; i < n; i++, mask <<= 1)
                        {
                            if (0 != (mask & consideringVertex))
                            {
                                vertexAfterTransitionA |= gpuA[i];
                                vertexAfterTransitionB |= gpuB[i];
                            }
                        }

                        if (!isDiscovered[vertexAfterTransitionA])
                        {
                            isDiscovered[vertexAfterTransitionA] = true;
                            //Console.WriteLine("Discovered {0} by {1}", vertexAfterTransitionA, consideringVertex);
                            DeviceFunction.AtomicMin(minWrite.Ptr(vertexAfterTransitionA / threadWork), vertexAfterTransitionA % threadWork);
                            DeviceFunction.AtomicMax(maxWrite.Ptr(vertexAfterTransitionA / threadWork), 1 + (vertexAfterTransitionA % threadWork));

                            if (0 == (vertexAfterTransitionA & (vertexAfterTransitionA - 1)))
                            {
                                shortestSynchronizingWordLength[ac] = nextDistance;
                                isSynchronizing[ac] = true;
                                shouldStop[0]       = true;
                                break;
                            }

                            writingActive[vertexAfterTransitionA] = true;
                            writingAnythingAdded[0] = true;
                        }

                        if (!isDiscovered[vertexAfterTransitionB])
                        {
                            isDiscovered[vertexAfterTransitionB] = true;
                            //Console.WriteLine("Discovered {0} by {1}", vertexAfterTransitionB, consideringVertex);
                            DeviceFunction.AtomicMin(minWrite.Ptr(vertexAfterTransitionB / threadWork), vertexAfterTransitionB % threadWork);
                            DeviceFunction.AtomicMax(maxWrite.Ptr(vertexAfterTransitionB / threadWork), 1 + (vertexAfterTransitionB % threadWork));

                            if (0 == (vertexAfterTransitionB & (vertexAfterTransitionB - 1)))
                            {
                                shortestSynchronizingWordLength[ac] = nextDistance;
                                isSynchronizing[ac] = true;
                                shouldStop[0]       = true;
                                break;
                            }

                            writingActive[vertexAfterTransitionB] = true;
                            writingAnythingAdded[0] = true;
                        }
                    }
                    ++nextDistance;
                    readingActive        = nextDistance % 2 == 0 ? isActiveEven : isActiveOdd;
                    writingActive        = nextDistance % 2 != 0 ? isActiveEven : isActiveOdd;
                    readingAnythingAdded = nextDistance % 2 == 0 ? addedAnythingEven : addedAnythingOdd;
                    writingAnythingAdded = nextDistance % 2 != 0 ? addedAnythingEven : addedAnythingOdd;
                    minRead  = nextDistance % 2 == 0 ? minEven : minOdd;
                    minWrite = nextDistance % 2 != 0 ? minEven : minOdd;
                    maxRead  = nextDistance % 2 == 0 ? maxEven : maxOdd;
                    maxWrite = nextDistance % 2 != 0 ? maxEven : maxOdd;
                    DeviceFunction.SyncThreads();
                }
            }
        }