Beispiel #1
0
        /// <summary>
        /// Init grid information.
        /// </summary>
        private void InitGridInfo()
        {
            // Max grid size
            int workItemDimensions = IntrinsicMath.Max(CurrentAPI.GetDeviceInfo <int>(
                                                           DeviceId,
                                                           CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS), 3);
            var workItemSizes = new IntPtr[workItemDimensions];

            CurrentAPI.GetDeviceInfo(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_SIZES,
                workItemSizes);
            MaxGridSize = new Index3D(
                workItemSizes[0].ToInt32(),
                workItemSizes[1].ToInt32(),
                workItemSizes[2].ToInt32());

            // Resolve max threads per group
            MaxNumThreadsPerGroup = CurrentAPI.GetDeviceInfo <IntPtr>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_WORK_GROUP_SIZE).ToInt32();
            MaxGroupSize = new Index3D(
                MaxNumThreadsPerGroup,
                MaxNumThreadsPerGroup,
                MaxNumThreadsPerGroup);

            // Result max number of threads per multiprocessor
            MaxNumThreadsPerMultiprocessor = MaxNumThreadsPerGroup;
        }
Beispiel #2
0
        /// <summary>
        /// Init grid information.
        /// </summary>
        private void InitGridInfo()
        {
            int workItemDimensions = IntrinsicMath.Max(CurrentAPI.GetDeviceInfo <int>(
                                                           DeviceId,
                                                           CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS), 3);

            // OpenCL does not report maximium grid sizes, MaxGridSize value is consistent
            // with the CPU accelator and values returned by CUDA accelerators.
            // MaxGridSize is ultimately contrained by system and device memory
            // and how each kernel manages memory.
            MaxGridSize = new Index3D(int.MaxValue, ushort.MaxValue, ushort.MaxValue);

            // Resolve max threads per group
            MaxNumThreadsPerGroup = CurrentAPI.GetDeviceInfo <IntPtr>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_WORK_GROUP_SIZE).ToInt32();

            // Max work item thread dimensions
            var workItemSizes = new IntPtr[workItemDimensions];

            CurrentAPI.GetDeviceInfo(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_SIZES,
                workItemSizes);

            MaxGroupSize = new Index3D(
                workItemSizes[0].ToInt32(),
                workItemSizes[1].ToInt32(),
                workItemSizes[2].ToInt32());

            // Result max number of threads per multiprocessor
            MaxNumThreadsPerMultiprocessor = MaxNumThreadsPerGroup;
        }
Beispiel #3
0
        /// <summary>
        /// Constructs a new use distribution.
        /// </summary>
        /// <param name="context">The target context.</param>
        public UseDistribution(IRContext context)
        {
            if (context == null)
            {
                throw new ArgumentNullException(nameof(context));
            }

            var groupedUses = new Dictionary <int, int>();
            var usesPerType = new Dictionary <Type, (int, int)>();

            foreach (var method in context.Methods)
            {
                foreach (Value value in method.Blocks.Values)
                {
                    if (!groupedUses.TryGetValue(value.AllNumUses, out int count))
                    {
                        count = 0;
                    }
                    groupedUses[value.AllNumUses] = count + 1;

                    var type = value.GetType();
                    if (!usesPerType.TryGetValue(type, out var entry))
                    {
                        entry = (0, 0);
                    }

                    usesPerType[type] =
                        (IntrinsicMath.Max(value.AllNumUses, entry.Item1),
                         entry.Item2 + 1);
                }
            }

            var groupedUsesList = new List <(int, int)>(groupedUses.Count);

            foreach (var entry in groupedUses)
            {
                groupedUsesList.Add((entry.Key, entry.Value));
            }
            groupedUsesList.Sort((x, y) => y.Item1.CompareTo(x.Item1));
            Uses = groupedUsesList.ToImmutableArray();

            var groupedUsesPerTypeList = new List <(int, Type, int)>(usesPerType.Count);

            foreach (var entry in usesPerType)
            {
                groupedUsesPerTypeList.Add(
                    (entry.Value.Item1, entry.Key, entry.Value.Item2));
            }
            groupedUsesPerTypeList.Sort((x, y) => y.Item1.CompareTo(x.Item1));

            Uses        = groupedUsesList.ToImmutableArray();
            UsesPerType = groupedUsesPerTypeList.ToImmutableArray();
        }
        /// <summary>
        /// Performs an internal shared-memory allocation.
        /// </summary>
        /// <param name="extent">The number of elements to allocate.</param>
        private void AllocateSharedMemoryInternal <T>(int extent)
            where T : unmanaged
        {
            int sizeInBytes = extent * Interop.SizeOf <T>();

            if (advancedSharedMemoryBufferIndex < 0)
            {
                // We can allocate the required memory
                if (sharedMemoryOffset + sizeInBytes <= SharedMemory.Length)
                {
                    currentSharedMemoryView = SharedMemory.GetSubView(
                        sharedMemoryOffset,
                        sizeInBytes);
                    sharedMemoryOffset += sizeInBytes;
                    return;
                }

                // We have to perform an advanced buffer allocation
                // -> ...
            }
            else
            {
                // Use the advanced buffer
                var buffer = advancedSharedMemoryBuffer[advancedSharedMemoryBufferIndex];
                if (sharedMemoryOffset + sizeInBytes <= buffer.Length)
                {
                    currentSharedMemoryView = buffer.View.GetSubView(
                        sharedMemoryOffset,
                        sizeInBytes);
                    sharedMemoryOffset += sizeInBytes;
                    return;
                }
                // We have to perform a new buffer allocation
            }

            // We need a new dynamically-chunk of shared memory
            var tempBuffer = Accelerator.Allocate <byte>(
                IntrinsicMath.Max(sizeInBytes, SharedMemoryChunkSize));

            advancedSharedMemoryBuffer.Add(tempBuffer);
            currentSharedMemoryView = tempBuffer.View.GetSubView(0, sizeInBytes);

            sharedMemoryOffset = sizeInBytes;
            ++advancedSharedMemoryBufferIndex;
        }
Beispiel #5
0
        /// <summary>
        /// Constructs a new OpenCL accelerator.
        /// </summary>
        /// <param name="context">The ILGPU context.</param>
        /// <param name="acceleratorId">The accelerator id.</param>
        public CLAccelerator(Context context, CLAcceleratorId acceleratorId)
            : base(context, AcceleratorType.OpenCL)
        {
            if (acceleratorId == null)
            {
                throw new ArgumentNullException(nameof(acceleratorId));
            }

            PlatformId = acceleratorId.PlatformId;
            DeviceId   = acceleratorId.DeviceId;

            PlatformName = CLAPI.GetPlatformInfo(
                PlatformId,
                CLPlatformInfoType.CL_PLATFORM_NAME);

            VendorName = CLAPI.GetPlatformInfo(
                PlatformId,
                CLPlatformInfoType.CL_PLATFORM_VENDOR);

            // Create new context
            CLException.ThrowIfFailed(
                CLAPI.CreateContext(DeviceId, out contextPtr));

            // Resolve device info
            Name = CLAPI.GetDeviceInfo(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_NAME);

            MemorySize = CLAPI.GetDeviceInfo <long>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_GLOBAL_MEM_SIZE);

            DeviceType = (CLDeviceType)CLAPI.GetDeviceInfo <long>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_TYPE);

            // Max grid size
            int workItemDimensions = IntrinsicMath.Max(CLAPI.GetDeviceInfo <int>(
                                                           DeviceId,
                                                           CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS), 3);
            var workItemSizes = new IntPtr[workItemDimensions];

            CLAPI.GetDeviceInfo(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_WORK_ITEM_SIZES,
                workItemSizes);
            MaxGridSize = new Index3(
                workItemSizes[0].ToInt32(),
                workItemSizes[1].ToInt32(),
                workItemSizes[2].ToInt32());

            // Resolve max threads per group
            MaxNumThreadsPerGroup = CLAPI.GetDeviceInfo <IntPtr>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_WORK_GROUP_SIZE).ToInt32();

            // Resolve max shared memory per block
            MaxSharedMemoryPerGroup = (int)IntrinsicMath.Min(
                CLAPI.GetDeviceInfo <long>(
                    DeviceId,
                    CLDeviceInfoType.CL_DEVICE_LOCAL_MEM_SIZE),
                int.MaxValue);

            // Resolve total constant memory
            MaxConstantMemory = (int)CLAPI.GetDeviceInfo <long>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_PARAMETER_SIZE);

            // Resolve clock rate
            ClockRate = CLAPI.GetDeviceInfo <int>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_CLOCK_FREQUENCY);

            // Resolve number of multiprocessors
            NumMultiprocessors = CLAPI.GetDeviceInfo <int>(
                DeviceId,
                CLDeviceInfoType.CL_DEVICE_MAX_COMPUTE_UNITS);

            // Result max number of threads per multiprocessor
            MaxNumThreadsPerMultiprocessor = MaxNumThreadsPerGroup;

            InitVendorFeatures();
            InitSubGroupSupport(acceleratorId);

            Bind();
            DefaultStream = CreateStreamInternal();
            base.Backend  = new CLBackend(Context, Backends.Backend.OSPlatform, Vendor);
        }
Beispiel #6
0
 /// <summary>
 /// Returns the last inclusive field access.
 /// </summary>
 /// <returns>The last inclusive field access.</returns>
 public FieldAccess GetLastAccess() =>
 Access.Add(IntrinsicMath.Max(Span - 1, 0));
Beispiel #7
0
 public static ulong Max(ulong first, ulong second) =>
 IntrinsicMath.Max(first, second);
Beispiel #8
0
 public static uint Max(uint first, uint second) =>
 IntrinsicMath.Max(first, second);
Beispiel #9
0
 public static ushort Max(ushort first, ushort second) =>
 IntrinsicMath.Max(first, second);
Beispiel #10
0
 public static byte Max(byte first, byte second) =>
 IntrinsicMath.Max(first, second);
Beispiel #11
0
 public static int Max(int first, int second) =>
 IntrinsicMath.Max(first, second);
Beispiel #12
0
 public static float Max(float first, float second) =>
 IntrinsicMath.Max(first, second);
Beispiel #13
0
 public static double Max(double first, double second) =>
 IntrinsicMath.Max(first, second);