public static void Kernel(
            int[] precomputedStateTransitioningMatrixA,
            int[] precomputedStateTransitioningMatrixB,
            bool[] statusOfSynchronization)
        {
            // the status might be YES, NO and DUNNO (aleaGPU enum???)
            // TODO: change this Kernel and computation!
            var n          = problemSize.Value;
            var arrayCount = precomputedStateTransitioningMatrixA.Length / n;
            var power      = 1 << n;

            #region Pointer setup
            var byteOffset = 0;

            var gpuA = DeviceFunction.AddressOfArray(__shared__.ExternArray <ushort>())
                       .Ptr(byteOffset / sizeof(ushort))
                       .Volatile();
            byteOffset += n * sizeof(ushort);

            var gpuB = DeviceFunction.AddressOfArray(__shared__.ExternArray <ushort>())
                       .Ptr(byteOffset / sizeof(ushort))
                       .Volatile();
            byteOffset += n * sizeof(ushort);
            #endregion

            var acPart  = (arrayCount + gridDim.x - 1) / gridDim.x;
            var acBegin = blockIdx.x * acPart;
            var acEnd   = acBegin + acPart;
            if (arrayCount < acEnd)
            {
                acEnd = arrayCount;
            }
            var index = acBegin * n;
            for (int ac = acBegin; ac < acEnd; ac++, index += n)
            {
                DeviceFunction.SyncThreads();
                if (threadIdx.x == 0)
                {
                    for (int i = 0; i < n; i++)
                    {
                        gpuA[i] = (ushort)(1 << precomputedStateTransitioningMatrixA[index + i]);
                        gpuB[i] = (ushort)(1 << precomputedStateTransitioningMatrixB[index + i]);
                    }
                }
                var pathMask = threadIdx.x;
                int vertexAfterTransition;
                var consideringVertex = power - 1;
                DeviceFunction.SyncThreads();
                for (int iter = 0; iter < 9; iter++, pathMask >>= 1)
                {
                    vertexAfterTransition = 0;
                    if ((pathMask & 1) == 0)
                    {
                        for (int i = 0, mask = 1; i < n; i++, mask <<= 1)
                        {
                            if (0 != (mask & consideringVertex))
                            {
                                vertexAfterTransition |= gpuA[i];
                            }
                        }
                    }
                    else
                    {
                        for (int i = 0, mask = 1; i < n; i++, mask <<= 1)
                        {
                            if (0 != (mask & consideringVertex))
                            {
                                vertexAfterTransition |= gpuB[i];
                            }
                        }
                    }
                    consideringVertex = vertexAfterTransition;
                }
                var singleVertex = DeviceFunction.Any(0 == (consideringVertex & (consideringVertex - 1)));
                if (singleVertex && threadIdx.x % DeviceFunction.WarpSize == 0)
                {
                    statusOfSynchronization[ac] = true;
                }
            }
        }