public SpecialPointCollector(int wordsPerNumber, Pollard_Rho pRho, OpenCLBuffer <uint> gpuSpecialPointsBuffer, StartingPointGenerator startingPointGenerator) { this.wordsPerNumber = wordsPerNumber; this.pRho = pRho; this.gpuSpecialPointsBuffer = gpuSpecialPointsBuffer; this.startingPointGenerator = startingPointGenerator; }
public DLPSolver(InputTuple input) { modulus = input.Modulus; if (modulus % 2 == 0) { throw new NotImplementedException("At the moment, it is not possible to use an even number for a modulus."); } generator = input.Generator; order = input.Order; element = input.Element; wordsPerNumber = modulus.ToUintArray().Length; rAsPower = 32 * wordsPerNumber; Pollard_Rho pRho = new Pollard_Rho(input, rAsPower); // Initialize the startingPointGenerator. gpuStartingPointsBuffer = new OpenCLBuffer <uint>(program, new uint[4 * NUM_GPU_THREADS * wordsPerNumber]); startingPointGenerator = new StartingPointGenerator(input, rAsPower, wordsPerNumber, pRho, program, gpuStartingPointsBuffer); // Initialize the specialPointCollector. gpuSpecialPointsBuffer = new OpenCLBuffer <uint>(program, new uint[2 * wordsPerNumber * 4 * NUM_GPU_THREADS]); specialPointCollector = new SpecialPointCollector(wordsPerNumber, pRho, gpuSpecialPointsBuffer, startingPointGenerator); }
private void ReallocateResultsBufferIfNecessary(int numResults) { if ((_resultsBuffer?.Length ?? 0) == numResults) { return; } _resultsBuffer?.Dispose(); _resultsBuffer = _runner.CreateWriteOnlyBuffer <ushort>(numResults); }
private OpenCLKernel InitKernel(BigInteger elementMontgomery, BigInteger generatorMontgomery, out OpenCLBuffer <int> gpuCounterBuffer, out BigInteger?answer) { // Make all inputs GPU ready, by converting them to uint arrays. // Note: each number (e.g. special points, the modulus etc.) will be represented with // wordsPerNumber uints (wordsPerNumber * 32 bits). uint[] gpuModulus = modulus.ToUintArray().PadWithDefaultForLength(wordsPerNumber); // TODO: Remove padding? uint[] gpuModulusPrime = modulusPrime.ToUintArray().PadWithDefaultForLength(wordsPerNumber); uint[] gpuElement = elementMontgomery.ToUintArray().PadWithDefaultForLength(wordsPerNumber); uint[] gpuGenerator = generatorMontgomery.ToUintArray().PadWithDefaultForLength(wordsPerNumber); // Input buffers. OpenCLBuffer <uint> gpuModulusBuffer = new OpenCLBuffer <uint>(program, gpuModulus); OpenCLBuffer <uint> gpuGeneratorBuffer = new OpenCLBuffer <uint>(program, gpuGenerator); OpenCLBuffer <uint> gpuElementBuffer = new OpenCLBuffer <uint>(program, gpuElement); // Buffers for local memory. There is room for an additional 2 numbers, which will be used to store // the generator and element in local memory. OpenCLBuffer <uint> gpuNumbersBuffer = new OpenCLBuffer <uint>(program, new uint[wordsPerNumber * (2 + 32)]); // Counter buffer. gpuCounterBuffer = new OpenCLBuffer <int>(program, new int[1]); // Buffers for saving numbers between kernel executions. uint[] startingPointsArray = startingPointGenerator.GetVerticalStartingPointsArray(NUM_GPU_THREADS, out answer); OpenCLBuffer <uint> gpuSavedNumbersBuffer = new OpenCLBuffer <uint>(program, startingPointsArray); OpenCLBuffer <uint> gpuUsedStartingPointBuffer = new OpenCLBuffer <uint>(program, startingPointsArray); OpenCLBuffer <long> gpuIterationCounts = new OpenCLBuffer <long>(program, new long[NUM_GPU_THREADS]); // Fill the gpuStartingPointBuffer. answer = startingPointGenerator.FillStartingPointsBuffer(4 * NUM_GPU_THREADS); OpenCLKernel kernel = new OpenCLKernel(program, "generate_chain"); // Set the kernelarguments. kernel.SetArgument(0, gpuStartingPointsBuffer); kernel.SetArgument(1, gpuSavedNumbersBuffer); kernel.SetArgument(2, gpuUsedStartingPointBuffer); kernel.SetLocalArgument(3, gpuNumbersBuffer); kernel.SetArgument(4, gpuModulusBuffer); kernel.SetArgument <uint>(5, gpuModulusPrime[0]); kernel.SetArgument(6, gpuGeneratorBuffer); kernel.SetArgument(7, gpuElementBuffer); kernel.SetArgument(8, gpuSpecialPointsBuffer); kernel.SetArgument <int>(9, wordsPerNumber); kernel.SetArgument(10, gpuCounterBuffer); kernel.SetArgument(11, gpuIterationCounts); kernel.SetArgument <long>(12, 1 << (Program.K + 4)); // Maximum chain length is 16 * 2^k. kernel.SetArgument <int>(13, Program.K / 32); // Value of k, in words. kernel.SetArgument <int>(14, Program.K % 32); // Remaining value of k. return(kernel); }
public void ReadMappedBuffer <T>(OpenCLBuffer sourceBuffer, T[] destinationArray) { using (var destinationArrayHandle = new PinnedObject(destinationArray)) { var mappedPtr = MapBufferForReading(sourceBuffer); var cb = (uint)(destinationArray.Length * Marshal.SizeOf(typeof(T))); CopyMemory(destinationArrayHandle, mappedPtr, cb); UnmapBuffer(sourceBuffer, ref mappedPtr); } }
/// <summary> /// Creates a new <see cref="OpenCLSubBuffer{T}"/> from a specified <see cref="OpenCLBuffer{T}"/>. /// </summary> /// <param name="buffer"> The buffer to create the <see cref="OpenCLSubBuffer{T}"/> from. </param> /// <param name="flags"> A bit-field that is used to specify allocation and usage information about the <see cref="OpenCLBuffer{T}"/>. </param> /// <param name="offset"> The index of the element of <paramref name="buffer"/>, where the <see cref="OpenCLSubBuffer{T}"/> starts. </param> /// <param name="count"> The number of elements of <paramref name="buffer"/> to include in the <see cref="OpenCLSubBuffer{T}"/>. </param> public OpenCLSubBuffer(OpenCLBuffer buffer, OpenCLMemoryFlags flags, long offset, long count) : base(buffer.Context, flags, buffer.ElementType, new long[] { count }) { SysIntX2 region = new SysIntX2(offset * Marshal.SizeOf(buffer.ElementType), count * Marshal.SizeOf(buffer.ElementType)); OpenCLErrorCode error; CLMemoryHandle handle = CL11.CreateSubBuffer(Handle, flags, OpenCLBufferCreateType.Region, ref region, out error); OpenCLException.ThrowOnError(error); Init(); }
private void UnmapBuffer(OpenCLBuffer buffer, ref IntPtr mappedPtr) { if (buffer == null) { throw new ArgumentNullException(nameof(buffer)); } if (mappedPtr == IntPtr.Zero) { return; } CommandQueue.Unmap(buffer, ref mappedPtr); }
public void ReadBuffer <T>(OpenCLBuffer sourceBuffer, T[] destinationArray) { using (var destinationArrayHandle = new PinnedObject(destinationArray)) { CommandQueue.ReadFromBuffer( sourceBuffer, destinationArrayHandle, true, // blocking 0L, // offset destinationArray.Length); // region } }
public StartingPointGenerator(InputTuple input, int rAsPower, int wordsPerNumber, Pollard_Rho pRho, OpenCLProgram program, OpenCLBuffer <uint> startingPointsBuffer) { this.modulus = input.Modulus; this.generator = input.Generator; this.order = input.Order; this.element = input.Element; this.rAsPower = rAsPower; this.wordsPerNumber = wordsPerNumber; this.pRho = pRho; this.kernel = new OpenCLKernel(program, "add_new_starting_points"); this.newStartingPointsBuffer = new OpenCLBuffer <uint>(program, new uint[4 * DLPSolver.NUM_GPU_THREADS * wordsPerNumber]); this.startingPointPool = new uint[4 * DLPSolver.NUM_GPU_THREADS * wordsPerNumber]; kernel.SetArgument(0, startingPointsBuffer); kernel.SetArgument(1, newStartingPointsBuffer); }
private void DisposeUnmanagedResources() { oclValueBuffer.Dispose(); oclValueBuffer = null; oclQueue.Dispose(); oclQueue = null; oclContext.Dispose(); oclContext = null; }
protected override void Built(BufferAllocator allocator, ConnectedLayerGroups connectedLayerGroups, NNInitParameters initPars) { // Create buffer: oclValueBuffer = oclContext.CreateBuffer<float>(allocator.Size, ComputeMemoryFlags.ReadWrite); // Fill with zeros: // TODO: Add this stuff to and OpenCLUtils class or sumthin int size = 1000; int remain = allocator.Size % size; float[] zeros = new float[size]; if (remain != 0) oclQueue.Write(oclValueBuffer, zeros, 0, remain, false); for (int i = remain; i < allocator.Size; i += size) { oclQueue.Write(oclValueBuffer, zeros, i, size, false); } oclQueue.ComputeCommandQueue.Finish(); }
private static void RunKernel(OpenCLPlatform platform, OpenCLDevice device) { var context = new OpenCLContext(new List <OpenCLDevice> { device }, new OpenCLContextPropertyList(platform), null, IntPtr.Zero); var program = LoadProgram(context, device, "ReductionUsingFSCLOpenCLManagedWrapper.reduction.cl"); var kernel1 = program.CreateKernel("reductionVector"); var kernel2 = program.CreateKernel("reductionComplete"); const int numValues = 1024 * 1024; const int numValuesPerWorkItem = 4; var globalWorkSize = numValues / numValuesPerWorkItem; const int localWorkSize = 32; var initialNumWorkGroups = globalWorkSize / localWorkSize; const int value = 42; var data = Enumerable.Repeat(value, numValues).Select(n => (float)n).ToArray(); var commandQueue = new OpenCLCommandQueue(context, device, OpenCLCommandQueueProperties.None); var floatType = typeof(float); var floatSize = sizeof(float); var dataBuffer1 = new OpenCLBuffer(context, OpenCLMemoryFlags.ReadWrite | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] { numValues }); var dataBuffer2 = new OpenCLBuffer(context, OpenCLMemoryFlags.ReadWrite | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] { initialNumWorkGroups *numValuesPerWorkItem }); var sumBuffer = new OpenCLBuffer(context, OpenCLMemoryFlags.WriteOnly | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] { 1 }); var resultDataBuffer = dataBuffer2; using (var pinnedData = new PinnedObject(data)) { commandQueue.WriteToBuffer(pinnedData, dataBuffer1, true, 0L, numValues); } foreach (var index in Enumerable.Range(0, int.MaxValue)) { var dataBufferIn = index % 2 == 0 ? dataBuffer1 : dataBuffer2; var dataBufferOut = index % 2 == 0 ? dataBuffer2 : dataBuffer1; resultDataBuffer = dataBufferOut; kernel1.SetMemoryArgument(0, dataBufferIn); kernel1.SetMemoryArgument(1, dataBufferOut); kernel1.SetLocalArgument(2, localWorkSize * numValuesPerWorkItem * floatSize); Console.WriteLine($"Calling commandQueue.Execute(kernel1) with globalWorkSize: {globalWorkSize}; localWorkSize: {localWorkSize}; num work groups: {globalWorkSize / localWorkSize}"); commandQueue.Execute(kernel1, null, new long[] { globalWorkSize }, new long[] { localWorkSize }); globalWorkSize /= localWorkSize; if (globalWorkSize <= localWorkSize) { break; } } kernel2.SetMemoryArgument(0, resultDataBuffer); kernel2.SetLocalArgument(1, globalWorkSize * numValuesPerWorkItem * floatSize); kernel2.SetMemoryArgument(2, sumBuffer); Console.WriteLine($"Calling commandQueue.Execute(kernel2) with globalWorkSize: {globalWorkSize}; localWorkSize: {globalWorkSize}"); commandQueue.Execute(kernel2, null, new long[] { globalWorkSize }, new long[] { globalWorkSize }); commandQueue.Finish(); var sum = new float[1]; using (var pinnedSum = new PinnedObject(sum)) { commandQueue.ReadFromBuffer(sumBuffer, pinnedSum, true, 0L, 1L); } const int correctAnswer = numValues * value; Console.WriteLine($"OpenCL final answer: {Math.Truncate(sum[0]):N0}; Correct answer: {correctAnswer:N0}"); }
private static void RunKernel(OpenCLPlatform platform, OpenCLDevice device) { var context = new OpenCLContext(new List<OpenCLDevice> {device}, new OpenCLContextPropertyList(platform), null, IntPtr.Zero); var program = LoadProgram(context, device, "ReductionUsingFSCLOpenCLManagedWrapper.reduction.cl"); var kernel1 = program.CreateKernel("reductionVector"); var kernel2 = program.CreateKernel("reductionComplete"); const int numValues = 1024 * 1024; const int numValuesPerWorkItem = 4; var globalWorkSize = numValues / numValuesPerWorkItem; const int localWorkSize = 32; var initialNumWorkGroups = globalWorkSize/localWorkSize; const int value = 42; var data = Enumerable.Repeat(value, numValues).Select(n => (float)n).ToArray(); var commandQueue = new OpenCLCommandQueue(context, device, OpenCLCommandQueueProperties.None); var floatType = typeof (float); var floatSize = sizeof (float); var dataBuffer1 = new OpenCLBuffer(context, OpenCLMemoryFlags.ReadWrite | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] {numValues}); var dataBuffer2 = new OpenCLBuffer(context, OpenCLMemoryFlags.ReadWrite | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] {initialNumWorkGroups*numValuesPerWorkItem}); var sumBuffer = new OpenCLBuffer(context, OpenCLMemoryFlags.WriteOnly | OpenCLMemoryFlags.AllocateHostPointer, floatType, new long[] { 1 }); var resultDataBuffer = dataBuffer2; using (var pinnedData = new PinnedObject(data)) { commandQueue.WriteToBuffer(pinnedData, dataBuffer1, true, 0L, numValues); } foreach (var index in Enumerable.Range(0, int.MaxValue)) { var dataBufferIn = index%2 == 0 ? dataBuffer1 : dataBuffer2; var dataBufferOut = index%2 == 0 ? dataBuffer2 : dataBuffer1; resultDataBuffer = dataBufferOut; kernel1.SetMemoryArgument(0, dataBufferIn); kernel1.SetMemoryArgument(1, dataBufferOut); kernel1.SetLocalArgument(2, localWorkSize*numValuesPerWorkItem*floatSize); Console.WriteLine($"Calling commandQueue.Execute(kernel1) with globalWorkSize: {globalWorkSize}; localWorkSize: {localWorkSize}; num work groups: {globalWorkSize / localWorkSize}"); commandQueue.Execute(kernel1, null, new long[] {globalWorkSize}, new long[] {localWorkSize}); globalWorkSize /= localWorkSize; if (globalWorkSize <= localWorkSize) break; } kernel2.SetMemoryArgument(0, resultDataBuffer); kernel2.SetLocalArgument(1, globalWorkSize*numValuesPerWorkItem*floatSize); kernel2.SetMemoryArgument(2, sumBuffer); Console.WriteLine($"Calling commandQueue.Execute(kernel2) with globalWorkSize: {globalWorkSize}; localWorkSize: {globalWorkSize}"); commandQueue.Execute(kernel2, null, new long[] { globalWorkSize }, new long[] { globalWorkSize }); commandQueue.Finish(); var sum = new float[1]; using (var pinnedSum = new PinnedObject(sum)) { commandQueue.ReadFromBuffer(sumBuffer, pinnedSum, true, 0L, 1L); } const int correctAnswer = numValues * value; Console.WriteLine($"OpenCL final answer: {Math.Truncate(sum[0]):N0}; Correct answer: {correctAnswer:N0}"); }