コード例 #1
0
 public SpecialPointCollector(int wordsPerNumber, Pollard_Rho pRho, OpenCLBuffer <uint> gpuSpecialPointsBuffer, StartingPointGenerator startingPointGenerator)
 {
     this.wordsPerNumber         = wordsPerNumber;
     this.pRho                   = pRho;
     this.gpuSpecialPointsBuffer = gpuSpecialPointsBuffer;
     this.startingPointGenerator = startingPointGenerator;
 }
コード例 #2
0
        public DLPSolver(InputTuple input)
        {
            modulus = input.Modulus;

            if (modulus % 2 == 0)
            {
                throw new NotImplementedException("At the moment, it is not possible to use an even number for a modulus.");
            }

            generator = input.Generator;
            order     = input.Order;
            element   = input.Element;

            wordsPerNumber = modulus.ToUintArray().Length;
            rAsPower       = 32 * wordsPerNumber;

            Pollard_Rho pRho = new Pollard_Rho(input, rAsPower);

            // Initialize the startingPointGenerator.
            gpuStartingPointsBuffer = new OpenCLBuffer <uint>(program, new uint[4 * NUM_GPU_THREADS * wordsPerNumber]);
            startingPointGenerator  = new StartingPointGenerator(input, rAsPower, wordsPerNumber, pRho, program, gpuStartingPointsBuffer);

            // Initialize the specialPointCollector.
            gpuSpecialPointsBuffer = new OpenCLBuffer <uint>(program, new uint[2 * wordsPerNumber * 4 * NUM_GPU_THREADS]);
            specialPointCollector  = new SpecialPointCollector(wordsPerNumber, pRho, gpuSpecialPointsBuffer, startingPointGenerator);
        }
コード例 #3
0
 private void ReallocateResultsBufferIfNecessary(int numResults)
 {
     if ((_resultsBuffer?.Length ?? 0) == numResults)
     {
         return;
     }
     _resultsBuffer?.Dispose();
     _resultsBuffer = _runner.CreateWriteOnlyBuffer <ushort>(numResults);
 }
コード例 #4
0
        private OpenCLKernel InitKernel(BigInteger elementMontgomery, BigInteger generatorMontgomery, out OpenCLBuffer <int> gpuCounterBuffer, out BigInteger?answer)
        {
            // Make all inputs GPU ready, by converting them to uint arrays.
            // Note: each number (e.g. special points, the modulus etc.) will be represented with
            // wordsPerNumber uints (wordsPerNumber * 32 bits).
            uint[] gpuModulus = modulus.ToUintArray().PadWithDefaultForLength(wordsPerNumber); // TODO: Remove padding?

            uint[] gpuModulusPrime = modulusPrime.ToUintArray().PadWithDefaultForLength(wordsPerNumber);
            uint[] gpuElement      = elementMontgomery.ToUintArray().PadWithDefaultForLength(wordsPerNumber);
            uint[] gpuGenerator    = generatorMontgomery.ToUintArray().PadWithDefaultForLength(wordsPerNumber);

            // Input buffers.
            OpenCLBuffer <uint> gpuModulusBuffer   = new OpenCLBuffer <uint>(program, gpuModulus);
            OpenCLBuffer <uint> gpuGeneratorBuffer = new OpenCLBuffer <uint>(program, gpuGenerator);
            OpenCLBuffer <uint> gpuElementBuffer   = new OpenCLBuffer <uint>(program, gpuElement);

            // Buffers for local memory. There is room for an additional 2 numbers, which will be used to store
            // the generator and element in local memory.
            OpenCLBuffer <uint> gpuNumbersBuffer = new OpenCLBuffer <uint>(program, new uint[wordsPerNumber * (2 + 32)]);

            // Counter buffer.
            gpuCounterBuffer = new OpenCLBuffer <int>(program, new int[1]);

            // Buffers for saving numbers between kernel executions.
            uint[] startingPointsArray = startingPointGenerator.GetVerticalStartingPointsArray(NUM_GPU_THREADS, out answer);
            OpenCLBuffer <uint> gpuSavedNumbersBuffer      = new OpenCLBuffer <uint>(program, startingPointsArray);
            OpenCLBuffer <uint> gpuUsedStartingPointBuffer = new OpenCLBuffer <uint>(program, startingPointsArray);
            OpenCLBuffer <long> gpuIterationCounts         = new OpenCLBuffer <long>(program, new long[NUM_GPU_THREADS]);

            // Fill the gpuStartingPointBuffer.
            answer = startingPointGenerator.FillStartingPointsBuffer(4 * NUM_GPU_THREADS);

            OpenCLKernel kernel = new OpenCLKernel(program, "generate_chain");

            // Set the kernelarguments.
            kernel.SetArgument(0, gpuStartingPointsBuffer);
            kernel.SetArgument(1, gpuSavedNumbersBuffer);
            kernel.SetArgument(2, gpuUsedStartingPointBuffer);
            kernel.SetLocalArgument(3, gpuNumbersBuffer);
            kernel.SetArgument(4, gpuModulusBuffer);
            kernel.SetArgument <uint>(5, gpuModulusPrime[0]);
            kernel.SetArgument(6, gpuGeneratorBuffer);
            kernel.SetArgument(7, gpuElementBuffer);

            kernel.SetArgument(8, gpuSpecialPointsBuffer);

            kernel.SetArgument <int>(9, wordsPerNumber);
            kernel.SetArgument(10, gpuCounterBuffer);

            kernel.SetArgument(11, gpuIterationCounts);
            kernel.SetArgument <long>(12, 1 << (Program.K + 4)); // Maximum chain length is 16 * 2^k.
            kernel.SetArgument <int>(13, Program.K / 32);        // Value of k, in words.
            kernel.SetArgument <int>(14, Program.K % 32);        // Remaining value of k.

            return(kernel);
        }
コード例 #5
0
 public void ReadMappedBuffer <T>(OpenCLBuffer sourceBuffer, T[] destinationArray)
 {
     using (var destinationArrayHandle = new PinnedObject(destinationArray))
     {
         var mappedPtr = MapBufferForReading(sourceBuffer);
         var cb        = (uint)(destinationArray.Length * Marshal.SizeOf(typeof(T)));
         CopyMemory(destinationArrayHandle, mappedPtr, cb);
         UnmapBuffer(sourceBuffer, ref mappedPtr);
     }
 }
コード例 #6
0
        /// <summary>
        /// Creates a new <see cref="OpenCLSubBuffer{T}"/> from a specified <see cref="OpenCLBuffer{T}"/>.
        /// </summary>
        /// <param name="buffer"> The buffer to create the <see cref="OpenCLSubBuffer{T}"/> from. </param>
        /// <param name="flags"> A bit-field that is used to specify allocation and usage information about the <see cref="OpenCLBuffer{T}"/>. </param>
        /// <param name="offset"> The index of the element of <paramref name="buffer"/>, where the <see cref="OpenCLSubBuffer{T}"/> starts. </param>
        /// <param name="count"> The number of elements of <paramref name="buffer"/> to include in the <see cref="OpenCLSubBuffer{T}"/>. </param>
        public OpenCLSubBuffer(OpenCLBuffer buffer, OpenCLMemoryFlags flags, long offset, long count)
		: base(buffer.Context, flags, buffer.ElementType, new long[] { count })
        {
            SysIntX2 region = new SysIntX2(offset * Marshal.SizeOf(buffer.ElementType), count * Marshal.SizeOf(buffer.ElementType));
            OpenCLErrorCode error;
            CLMemoryHandle handle = CL11.CreateSubBuffer(Handle, flags, OpenCLBufferCreateType.Region, ref region, out error);
            OpenCLException.ThrowOnError(error);

            Init();
        }
コード例 #7
0
ファイル: OpenCLSubBuffer.cs プロジェクト: forki/FSCL.Runtime
        /// <summary>
        /// Creates a new <see cref="OpenCLSubBuffer{T}"/> from a specified <see cref="OpenCLBuffer{T}"/>.
        /// </summary>
        /// <param name="buffer"> The buffer to create the <see cref="OpenCLSubBuffer{T}"/> from. </param>
        /// <param name="flags"> A bit-field that is used to specify allocation and usage information about the <see cref="OpenCLBuffer{T}"/>. </param>
        /// <param name="offset"> The index of the element of <paramref name="buffer"/>, where the <see cref="OpenCLSubBuffer{T}"/> starts. </param>
        /// <param name="count"> The number of elements of <paramref name="buffer"/> to include in the <see cref="OpenCLSubBuffer{T}"/>. </param>
        public OpenCLSubBuffer(OpenCLBuffer buffer, OpenCLMemoryFlags flags, long offset, long count)
            : base(buffer.Context, flags, buffer.ElementType, new long[] { count })
        {
            SysIntX2        region = new SysIntX2(offset * Marshal.SizeOf(buffer.ElementType), count * Marshal.SizeOf(buffer.ElementType));
            OpenCLErrorCode error;
            CLMemoryHandle  handle = CL11.CreateSubBuffer(Handle, flags, OpenCLBufferCreateType.Region, ref region, out error);

            OpenCLException.ThrowOnError(error);

            Init();
        }
コード例 #8
0
 private void UnmapBuffer(OpenCLBuffer buffer, ref IntPtr mappedPtr)
 {
     if (buffer == null)
     {
         throw new ArgumentNullException(nameof(buffer));
     }
     if (mappedPtr == IntPtr.Zero)
     {
         return;
     }
     CommandQueue.Unmap(buffer, ref mappedPtr);
 }
コード例 #9
0
 public void ReadBuffer <T>(OpenCLBuffer sourceBuffer, T[] destinationArray)
 {
     using (var destinationArrayHandle = new PinnedObject(destinationArray))
     {
         CommandQueue.ReadFromBuffer(
             sourceBuffer,
             destinationArrayHandle,
             true,                     // blocking
             0L,                       // offset
             destinationArray.Length); // region
     }
 }
コード例 #10
0
        public StartingPointGenerator(InputTuple input, int rAsPower, int wordsPerNumber, Pollard_Rho pRho, OpenCLProgram program, OpenCLBuffer <uint> startingPointsBuffer)
        {
            this.modulus        = input.Modulus;
            this.generator      = input.Generator;
            this.order          = input.Order;
            this.element        = input.Element;
            this.rAsPower       = rAsPower;
            this.wordsPerNumber = wordsPerNumber;
            this.pRho           = pRho;

            this.kernel = new OpenCLKernel(program, "add_new_starting_points");
            this.newStartingPointsBuffer = new OpenCLBuffer <uint>(program, new uint[4 * DLPSolver.NUM_GPU_THREADS * wordsPerNumber]);

            this.startingPointPool = new uint[4 * DLPSolver.NUM_GPU_THREADS * wordsPerNumber];

            kernel.SetArgument(0, startingPointsBuffer);
            kernel.SetArgument(1, newStartingPointsBuffer);
        }
コード例 #11
0
 private void DisposeUnmanagedResources()
 {
     oclValueBuffer.Dispose();
     oclValueBuffer = null;
     oclQueue.Dispose();
     oclQueue = null;
     oclContext.Dispose();
     oclContext = null;
 }
コード例 #12
0
        protected override void Built(BufferAllocator allocator, ConnectedLayerGroups connectedLayerGroups, NNInitParameters initPars)
        {
            // Create buffer:
            oclValueBuffer = oclContext.CreateBuffer<float>(allocator.Size, ComputeMemoryFlags.ReadWrite);

            // Fill with zeros: 
            // TODO: Add this stuff to and OpenCLUtils class or sumthin
            int size = 1000;
            int remain = allocator.Size % size;
            float[] zeros = new float[size];

            if (remain != 0) oclQueue.Write(oclValueBuffer, zeros, 0, remain, false);
            for (int i = remain; i < allocator.Size; i += size)
            {
                oclQueue.Write(oclValueBuffer, zeros, i, size, false);
            }

            oclQueue.ComputeCommandQueue.Finish();
        }
コード例 #13
0
        private static void RunKernel(OpenCLPlatform platform, OpenCLDevice device)
        {
            var context = new OpenCLContext(new List <OpenCLDevice> {
                device
            }, new OpenCLContextPropertyList(platform), null, IntPtr.Zero);
            var program = LoadProgram(context, device, "ReductionUsingFSCLOpenCLManagedWrapper.reduction.cl");
            var kernel1 = program.CreateKernel("reductionVector");
            var kernel2 = program.CreateKernel("reductionComplete");

            const int numValues            = 1024 * 1024;
            const int numValuesPerWorkItem = 4;
            var       globalWorkSize       = numValues / numValuesPerWorkItem;
            const int localWorkSize        = 32;
            var       initialNumWorkGroups = globalWorkSize / localWorkSize;
            const int value = 42;
            var       data  = Enumerable.Repeat(value, numValues).Select(n => (float)n).ToArray();

            var commandQueue = new OpenCLCommandQueue(context, device, OpenCLCommandQueueProperties.None);

            var floatType = typeof(float);
            var floatSize = sizeof(float);

            var dataBuffer1      = new OpenCLBuffer(context, OpenCLMemoryFlags.ReadWrite | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] { numValues });
            var dataBuffer2      = new OpenCLBuffer(context, OpenCLMemoryFlags.ReadWrite | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] { initialNumWorkGroups *numValuesPerWorkItem });
            var sumBuffer        = new OpenCLBuffer(context, OpenCLMemoryFlags.WriteOnly | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] { 1 });
            var resultDataBuffer = dataBuffer2;

            using (var pinnedData = new PinnedObject(data))
            {
                commandQueue.WriteToBuffer(pinnedData, dataBuffer1, true, 0L, numValues);
            }

            foreach (var index in Enumerable.Range(0, int.MaxValue))
            {
                var dataBufferIn  = index % 2 == 0 ? dataBuffer1 : dataBuffer2;
                var dataBufferOut = index % 2 == 0 ? dataBuffer2 : dataBuffer1;
                resultDataBuffer = dataBufferOut;

                kernel1.SetMemoryArgument(0, dataBufferIn);
                kernel1.SetMemoryArgument(1, dataBufferOut);
                kernel1.SetLocalArgument(2, localWorkSize * numValuesPerWorkItem * floatSize);

                Console.WriteLine($"Calling commandQueue.Execute(kernel1) with globalWorkSize: {globalWorkSize}; localWorkSize: {localWorkSize}; num work groups: {globalWorkSize / localWorkSize}");

                commandQueue.Execute(kernel1, null, new long[] { globalWorkSize }, new long[] { localWorkSize });

                globalWorkSize /= localWorkSize;
                if (globalWorkSize <= localWorkSize)
                {
                    break;
                }
            }

            kernel2.SetMemoryArgument(0, resultDataBuffer);
            kernel2.SetLocalArgument(1, globalWorkSize * numValuesPerWorkItem * floatSize);
            kernel2.SetMemoryArgument(2, sumBuffer);

            Console.WriteLine($"Calling commandQueue.Execute(kernel2) with globalWorkSize: {globalWorkSize}; localWorkSize: {globalWorkSize}");

            commandQueue.Execute(kernel2, null, new long[] { globalWorkSize }, new long[] { globalWorkSize });

            commandQueue.Finish();

            var sum = new float[1];

            using (var pinnedSum = new PinnedObject(sum))
            {
                commandQueue.ReadFromBuffer(sumBuffer, pinnedSum, true, 0L, 1L);
            }

            const int correctAnswer = numValues * value;

            Console.WriteLine($"OpenCL final answer: {Math.Truncate(sum[0]):N0}; Correct answer: {correctAnswer:N0}");
        }
コード例 #14
0
        private static void RunKernel(OpenCLPlatform platform, OpenCLDevice device)
        {
            var context = new OpenCLContext(new List<OpenCLDevice> {device}, new OpenCLContextPropertyList(platform), null, IntPtr.Zero);
            var program = LoadProgram(context, device, "ReductionUsingFSCLOpenCLManagedWrapper.reduction.cl");
            var kernel1 = program.CreateKernel("reductionVector");
            var kernel2 = program.CreateKernel("reductionComplete");

            const int numValues = 1024 * 1024;
            const int numValuesPerWorkItem = 4;
            var globalWorkSize = numValues / numValuesPerWorkItem;
            const int localWorkSize = 32;
            var initialNumWorkGroups = globalWorkSize/localWorkSize;
            const int value = 42;
            var data = Enumerable.Repeat(value, numValues).Select(n => (float)n).ToArray();

            var commandQueue = new OpenCLCommandQueue(context, device, OpenCLCommandQueueProperties.None);

            var floatType = typeof (float);
            var floatSize = sizeof (float);

            var dataBuffer1 = new OpenCLBuffer(context, OpenCLMemoryFlags.ReadWrite | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] {numValues});
            var dataBuffer2 = new OpenCLBuffer(context, OpenCLMemoryFlags.ReadWrite | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] {initialNumWorkGroups*numValuesPerWorkItem});
            var sumBuffer = new OpenCLBuffer(context, OpenCLMemoryFlags.WriteOnly | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] { 1 });
            var resultDataBuffer = dataBuffer2;

            using (var pinnedData = new PinnedObject(data))
            {
                commandQueue.WriteToBuffer(pinnedData, dataBuffer1, true, 0L, numValues);
            }

            foreach (var index in Enumerable.Range(0, int.MaxValue))
            {
                var dataBufferIn = index%2 == 0 ? dataBuffer1 : dataBuffer2;
                var dataBufferOut = index%2 == 0 ? dataBuffer2 : dataBuffer1;
                resultDataBuffer = dataBufferOut;

                kernel1.SetMemoryArgument(0, dataBufferIn);
                kernel1.SetMemoryArgument(1, dataBufferOut);
                kernel1.SetLocalArgument(2, localWorkSize*numValuesPerWorkItem*floatSize);

                Console.WriteLine($"Calling commandQueue.Execute(kernel1) with globalWorkSize: {globalWorkSize}; localWorkSize: {localWorkSize}; num work groups: {globalWorkSize / localWorkSize}");

                commandQueue.Execute(kernel1, null, new long[] {globalWorkSize}, new long[] {localWorkSize});

                globalWorkSize /= localWorkSize;
                if (globalWorkSize <= localWorkSize) break;
            }

            kernel2.SetMemoryArgument(0, resultDataBuffer);
            kernel2.SetLocalArgument(1, globalWorkSize*numValuesPerWorkItem*floatSize);
            kernel2.SetMemoryArgument(2, sumBuffer);

            Console.WriteLine($"Calling commandQueue.Execute(kernel2) with globalWorkSize: {globalWorkSize}; localWorkSize: {globalWorkSize}");

            commandQueue.Execute(kernel2, null, new long[] { globalWorkSize }, new long[] { globalWorkSize });

            commandQueue.Finish();

            var sum = new float[1];
            using (var pinnedSum = new PinnedObject(sum))
            {
                commandQueue.ReadFromBuffer(sumBuffer, pinnedSum, true, 0L, 1L);
            }

            const int correctAnswer = numValues * value;

            Console.WriteLine($"OpenCL final answer: {Math.Truncate(sum[0]):N0}; Correct answer: {correctAnswer:N0}");
        }