/// <summary> /// Calculates for multiple numbers whether they're primes, in a parallelized way. /// </summary> /// <remarks> /// This demonstrates how you can write parallelized code that Hastlayer will process and turn into hardware-level /// parallelization: the Tasks' bodies will be copied in hardware as many times as many Tasks you start; thus, /// the actual level of parallelism you get on the hardware corresponds to the number of Tasks, not the number /// of CPU cores. /// </remarks> public virtual void ParallelizedArePrimeNumbers(SimpleMemory memory) { // We need this information explicitly as we can't store arrays directly in memory. uint numberCount = memory.ReadUInt32(ArePrimeNumbers_InputUInt32CountIndex); // At the moment Hastlayer only supports a fixed degree of parallelism so we need to pad the input array // if necessary, see PrimeCalculatorExtensions. var tasks = new Task <bool> [MaxDegreeOfParallelism]; int i = 0; while (i < numberCount) { for (int m = 0; m < MaxDegreeOfParallelism; m++) { var currentNumber = memory.ReadUInt32(ArePrimeNumbers_InputUInt32sStartIndex + i + m); // Note that you can just call (thread-safe) methods from inside Tasks as usual. In hardware those // invoked methods will be copied together with the Tasks' bodies too. tasks[m] = Task.Factory.StartNew( numberObject => IsPrimeNumberInternal((uint)numberObject), currentNumber); } // Hastlayer doesn't support async code at the moment since ILSpy doesn't handle the new Roslyn-compiled // code. See: https://github.com/icsharpcode/ILSpy/issues/502 Task.WhenAll(tasks).Wait(); for (int m = 0; m < MaxDegreeOfParallelism; m++) { memory.WriteBoolean(ArePrimeNumbers_OutputBooleansStartIndex + i + m, tasks[m].Result); } i += MaxDegreeOfParallelism; } }
public virtual void EstimatePi(SimpleMemory memory) { var iterationsCount = memory.ReadUInt32(EstimatePi_IteractionsCountUInt32Index); var randomSeed = (ushort)memory.ReadUInt32(EstimatePi_RandomSeedUInt32Index); var iterationsPerTask = iterationsCount / MaxDegreeOfParallelism; var tasks = new Task <uint> [MaxDegreeOfParallelism]; for (uint i = 0; i < MaxDegreeOfParallelism; i++) { tasks[i] = Task.Factory.StartNew( indexObject => { var index = (uint)indexObject; // A 16b PRNG is enough for this task and the xorshift one has suitable quality. var random = new RandomXorshiftLfsr16 { State = (ushort)(randomSeed + index) }; uint inCircleCount = 0; for (var j = 0; j < iterationsPerTask; j++) { uint a = random.NextUInt16(); uint b = random.NextUInt16(); // A bit of further parallelization can be exploited with SIMD to shave off some execution // time. However, this needs so much resources on the hardware that the degree of // parallelism needs to be lowered substantially (below 60). //var randomNumbers = new uint[] { random.NextUInt16(), random.NextUInt16() }; //var products = Common.Numerics.SimdOperations.MultiplyVectors(randomNumbers, randomNumbers, 2); if ((ulong)(a * a) + b * b <= ((uint)ushort.MaxValue * ushort.MaxValue)) //if ((ulong)products[0] + products[1] <= ((uint)ushort.MaxValue * ushort.MaxValue)) { inCircleCount++; } } return(inCircleCount); }, i); } Task.WhenAll(tasks).Wait(); uint inCircleCountSum = 0; for (int i = 0; i < MaxDegreeOfParallelism; i++) { inCircleCountSum += tasks[i].Result; } memory.WriteUInt32(EstimatePi_InCircleCountSumUInt32Index, inCircleCountSum); }
public virtual void ArePrimeNumbers(SimpleMemory memory) { uint numberCount = memory.ReadUInt32(ArePrimeNumbers_InputUInt32CountIndex); for (int i = 0; i < numberCount; i++) { uint number = memory.ReadUInt32(ArePrimeNumbers_InputUInt32sStartIndex + i); var isPrime = IsPrimeNumberInternal(number); memory.WriteBoolean(ArePrimeNumbers_OutputUInt32sStartIndex + i, isPrime); } }
public virtual void ArePrimeNumbers(SimpleMemory memory) { // We need this information explicitly as we can't store arrays directly in memory. uint numberCount = memory.ReadUInt32(ArePrimeNumbers_InputUInt32CountIndex); for (int i = 0; i < numberCount; i++) { uint number = memory.ReadUInt32(ArePrimeNumbers_InputUInt32sStartIndex + i); memory.WriteBoolean(ArePrimeNumbers_OutputBooleansStartIndex + i, IsPrimeNumberInternal(number)); } }
public virtual void MWC64X(SimpleMemory memory) { uint stateHighWord = memory.ReadUInt32(1); uint stateLowWord = memory.ReadUInt32(0);; ulong randomState = stateLowWord * 0xFFFEB81BUL + stateHighWord; uint randomWord = stateLowWord ^ stateHighWord; memory.WriteUInt32(0, (uint)randomState); //LE: 1 is high byte, 0 is low byte memory.WriteUInt32(1, (uint)(randomState >> 32)); memory.WriteUInt32(2, randomWord); }
public virtual void ParallelizedArePrimeNumbers(SimpleMemory memory) { // We need this information explicitly as we can't store arrays directly in memory. uint numberCount = memory.ReadUInt32(ArePrimeNumbers_InputUInt32CountIndex); // At the moment Hastlayer only supports a fixed degree of parallelism so we need to pad the input array // if necessary, see PrimeCalculatorExtensions. var tasks = new Task <bool> [MaxDegreeOfParallelism]; int i = 0; while (i < numberCount) { for (int m = 0; m < MaxDegreeOfParallelism; m++) { var currentNumber = memory.ReadUInt32(ArePrimeNumbers_InputUInt32sStartIndex + i + m); tasks[m] = Task.Factory.StartNew( numberObject => { // This is a copy of the body of IsPrimeNumberInternal(). We could also call that method // from this lambda but it's more efficient to just do it directly, not adding indirection. var number = (uint)numberObject; uint factor = number / 2; for (uint x = 2; x <= factor; x++) { if ((number % x) == 0) { return(false); } } return(true); }, currentNumber); } // Hastlayer doesn't support async code at the moment since ILSpy doesn't handle the new Roslyn-compiled // code. See: https://github.com/icsharpcode/ILSpy/issues/502 Task.WhenAll(tasks).Wait(); for (int m = 0; m < MaxDegreeOfParallelism; m++) { memory.WriteBoolean(ArePrimeNumbers_OutputBooleansStartIndex + i + m, tasks[m].Result); } i += MaxDegreeOfParallelism; } }
public static uint Run(this ParallelAlgorithm algorithm, uint input) { var memory = new SimpleMemory(1); memory.WriteUInt32(ParallelAlgorithm.Run_InputUInt32Index, input); algorithm.Run(memory); return memory.ReadUInt32(ParallelAlgorithm.Run_OutputUInt32Index); }
public virtual void Run(SimpleMemory memory) { var inputNumber = memory.ReadUInt32(Run_InputUInt32Index); // Or: inputNumber = new MemoryContainer(memory).GetInput(); // Arrays can be initialized as usual, as well as objects. var numberContainers1 = new[] { new NumberContainer { Number = inputNumber }, new NumberContainer { Number = inputNumber + 4 }, new NumberContainer { Number = 24 }, new NumberContainer(9) }; // Array elements can be accessed and modified as usual. numberContainers1[0].NumberPlusFive = inputNumber + 10; numberContainers1[1].IncreaseNumber(5); numberContainers1[2].IncreaseNumberBy10(); // Using ref and out. uint increaseBy = 10; numberContainers1[3].IncreaseNumberByParameterTimes10(ref increaseBy, out uint originalNumber);
public virtual void IsPrimeNumber(SimpleMemory memory) { var number = memory.ReadUInt32(IsPrimeNumber_InputUInt32Index); var isPrime = IsPrimeNumberInternal(number); memory.WriteBoolean(IsPrimeNumber_OutputBooleanIndex, isPrime); }
public virtual void MWC64X(SimpleMemory memory) { uint stateHighWord = memory.ReadUInt32(1); uint stateLowWord = memory.ReadUInt32(0);; // Creating the value 0xFFFEB81BUL. This literal can't be directly used due to an ILSpy bug, see: // https://github.com/icsharpcode/ILSpy/issues/807 uint constantHighShort = 0xFFFE; uint constantLowShort = 0xB81B; uint constantWord = (0 << 32) | (constantHighShort << 16) | constantLowShort; ulong randomState = (ulong)stateLowWord * (ulong)constantWord + (ulong)stateHighWord; uint randomWord = stateLowWord ^ stateHighWord; memory.WriteUInt32(0, (uint)randomState); //LE: 1 is high byte, 0 is low byte memory.WriteUInt32(1, (uint)(randomState >> 32)); memory.WriteUInt32(2, randomWord); }
public uint CalculateFibonacchiSeries(short number) { var memory = new SimpleMemory(2); memory.WriteInt32(CalculateFibonacchiSeries_InputShortIndex, number); CalculateFibonacchiSeries(memory); return(memory.ReadUInt32(CalculateFibonacchiSeries_OutputUInt32Index)); }
public static uint CalculateFactorial(this RecursiveAlgorithms recursiveAlgorithms, short number) { var memory = new SimpleMemory(2); memory.WriteInt32(RecursiveAlgorithms.CalculateFactorial_InputShortIndex, number); recursiveAlgorithms.CalculateFactorial(memory); return(memory.ReadUInt32(RecursiveAlgorithms.CalculateFactorial_OutputUInt32Index)); }
public uint CalculateFactorial(short number) { var memory = new SimpleMemory(2); memory.WriteInt32(CalculateFactorial_InputShortIndex, number); CalculateFactorial(memory); return(memory.ReadUInt32(CalculateFactorial_OutputUInt32Index)); }
public static uint Run(this ObjectOrientedShowcase algorithm, uint input) { var memory = new SimpleMemory(1); memory.WriteUInt32(ObjectOrientedShowcase.Run_InputUInt32Index, input); algorithm.Run(memory); return(memory.ReadUInt32(ObjectOrientedShowcase.Run_OutputUInt32Index)); }
public uint Run(uint input) { var memory = new SimpleMemory(1); memory.WriteUInt32(Run_InputUInt32Index, input); Run(memory); return(memory.ReadUInt32(Run_OutputUInt32Index)); }
/// <summary> /// Calculates whether a number is prime. /// </summary> /// <remarks> /// Note that the entry point of SimpleMemory-using algorithms should be void methods having a single /// <see cref="SimpleMemory"/> argument. /// </remarks> /// <param name="memory">The <see cref="SimpleMemory"/> object representing the accessible memory space.</param> public virtual void IsPrimeNumber(SimpleMemory memory) { // Reading out the input parameter. var number = memory.ReadUInt32(IsPrimeNumber_InputUInt32Index); // Writing back the output. memory.WriteBoolean(IsPrimeNumber_OutputBooleanIndex, IsPrimeNumberInternal(number)); }
/// <summary> /// This function adds two numbers on the FPGA using <see cref="KpzKernelsInterface.TestAdd(SimpleMemory)"/>. /// </summary> public static uint TestAddWrapper(this KpzKernelsInterface kernels, uint a, uint b) { var sm = new SimpleMemory(3); sm.WriteUInt32(0, a); sm.WriteUInt32(1, b); kernels.TestAdd(sm); return(sm.ReadUInt32(2)); }
/// <summary>Pull table from the FPGA.</summary> public static void CopyFromSimpleMemoryToGrid(KpzNode[,] gridDst, SimpleMemory memorySrc) { for (int x = 0; x < KpzKernels.GridWidth; x++) { for (int y = 0; y < KpzKernels.GridHeight; y++) { gridDst[x, y] = KpzNode.DeserializeFromUInt32(memorySrc.ReadUInt32(KpzKernels.MemIndexGrid + y * KpzKernels.GridWidth + x)); } } }
/// <summary> /// Copies the grid data to BRAM/LUT RAM from DDR. /// </summary> public void CopyFromSimpleMemoryToRawGrid(SimpleMemory memory) { for (int x = 0; x < GridWidth; x++) { for (int y = 0; y < GridHeight; y++) { int index = y * GridWidth + x; _gridRaw[index] = memory.ReadUInt32(MemIndexGrid + index); } } }
public static void WriteHexdump(TextWriter writer, SimpleMemory memory) { for (int i = 0; i < memory.CellCount; i += HexDumpBlocksPerLine) { for (int j = 0; j < HexDumpBlocksPerLine && i + j < memory.CellCount; j++) { writer.Write("{0}{1:X8}", j == 0 ? "" : " ", memory.ReadUInt32(i + j)); } writer.WriteLine(); } }
/// <summary>Pull table from the FPGA.</summary> public static void CopyFromSimpleMemoryToGrid(KpzNode[,] gridDst, SimpleMemory memorySrc) { for (int x = 0; x < KpzKernelsParallelizedInterface.GridSize; x++) { for (int y = 0; y < KpzKernelsParallelizedInterface.GridSize; y++) { gridDst[x, y] = KpzNode.DeserializeFromUInt32( memorySrc.ReadUInt32(KpzKernelsParallelizedInterface.MemIndexGrid + y * KpzKernelsParallelizedInterface.GridSize + x)); } } }
private uint RecursivelyCalculateFactorial(SimpleMemory memory, short number) { memory.WriteUInt32( CalculateFactorial_InvocationCounterUInt32Index, memory.ReadUInt32(CalculateFactorial_InvocationCounterUInt32Index) + 1); if (number == 0) { return(1); } return((uint)(number * RecursivelyCalculateFactorial(memory, (short)(number - 1)))); }
// The return value should be a type with a bigger range than the input. Although we can use 64b numbers // internally we can't write the to memory yet so the input needs to be a short. private uint RecursivelyCalculateFibonacchiSeries(SimpleMemory memory, short number) { memory.WriteUInt32( CalculateFibonacchiSeries_InvocationCounterUInt32Index, memory.ReadUInt32(CalculateFibonacchiSeries_InvocationCounterUInt32Index) + 1); if (number == 0 || number == 1) { return((uint)number); } return(RecursivelyCalculateFibonacchiSeries(memory, (short)(number - 2)) + RecursivelyCalculateFibonacchiSeries(memory, (short)(number - 1))); }
/// <summary> /// Calculates the weight and centre of mass of a section of torus with varying density from a <see cref="SimpleMemory"/> object. /// </summary> /// <param name="simpleMemory">The <see cref="SimpleMemory"/> object that contains the result.</param> /// <returns>Returns the weight and centre of mass of a section of torus with varying density in the form of a <see cref="MonteCarloResult"/> object.</returns> private static MonteCarloResult GetResult(SimpleMemory simpleMemory) { return(new MonteCarloResult { W = simpleMemory.ReadUInt32(MonteCarloAlgorithm.MonteCarloAlgorithm_WIndex), X = simpleMemory.ReadUInt32(MonteCarloAlgorithm.MonteCarloAlgorithm_XIndex), Y = simpleMemory.ReadUInt32(MonteCarloAlgorithm.MonteCarloAlgorithm_YIndex), Z = simpleMemory.ReadUInt32(MonteCarloAlgorithm.MonteCarloAlgorithm_ZIndex), DW = simpleMemory.ReadUInt32(MonteCarloAlgorithm.MonteCarloAlgorithm_DWIndex), DX = simpleMemory.ReadUInt32(MonteCarloAlgorithm.MonteCarloAlgorithm_DXIndex), DY = simpleMemory.ReadUInt32(MonteCarloAlgorithm.MonteCarloAlgorithm_DYIndex), DZ = simpleMemory.ReadUInt32(MonteCarloAlgorithm.MonteCarloAlgorithm_DZIndex) }); }
/// <summary> /// It loads the TestMode, NumberOfIterations parameters and also the PRNG seed from the SimpleMemory at /// the beginning. /// </summary> /// <param name="memory"></param> public void InitializeParametersFromMemory(SimpleMemory memory) { Prng1 = new PrngMWC64X((((ulong)memory.ReadUInt32(MemIndexRandomStates)) << 32) | memory.ReadUInt32(MemIndexRandomStates + 1)); Prng2 = new PrngMWC64X((((ulong)memory.ReadUInt32(MemIndexRandomStates + 2)) << 32) | memory.ReadUInt32(MemIndexRandomStates + 3)); TestMode = (memory.ReadUInt32(MemIndexStepMode) & 1) == 1; NumberOfIterations = memory.ReadUInt32(MemIndexNumberOfIterations); }
public virtual void Run(SimpleMemory memory) { var inputNumber = memory.ReadUInt32(Run_InputUInt32Index); // Or: inputNumber = new MemoryContainer(memory).GetInput(); // Arrays can be initialized as usual, as well as objects. var numberContainers1 = new[] { new NumberContainer { Number = inputNumber }, new NumberContainer { Number = inputNumber + 4 }, new NumberContainer { Number = 24 }, new NumberContainer(9) }; // Array elements can be accessed and modified as usual. numberContainers1[0].NumberPlusFive = inputNumber + 10; numberContainers1[1].IncreaseNumber(5); // Note that array dimensions need to be defined compile-time. They needn't bee constants directly used // when instantiating the array but the size argument needs to be resolvable compile-time (so if it's a // variable then its value should be computable from all other values at compile-time). var numberContainers2 = new NumberContainer[1]; var numberContainer = new NumberContainer(); numberContainer.Number = 5; numberContainer.Number = numberContainer.NumberPlusFive; if (!numberContainer.WasIncreased) { numberContainer.IncreaseNumber(5); } numberContainers2[0] = numberContainer; for (int i = 0; i < numberContainers1.Length; i++) { numberContainers1[i].IncreaseNumber(numberContainers2[0].Number); } // You can also pass arrays and other objects around to other methods. memory.WriteUInt32(Run_OutputUInt32Index, SumNumberCointainers(numberContainers1)); }
public virtual void Run(SimpleMemory memory) { var input = memory.ReadUInt32(Run_InputUInt32Index); var tasks = new Task <uint> [MaxDegreeOfParallelism]; // Hastlayer will figure out how many Tasks you want to start if you kick them off in a loop like this. // If this is more involved then you'll need to tell Hastlayer the level of parallelism, see the comment in // ParallelAlgorithmSampleRunner. for (uint i = 0; i < MaxDegreeOfParallelism; i++) { tasks[i] = Task.Factory.StartNew( indexObject => { var index = (uint)indexObject; uint result = input + index * 2; var even = true; for (int j = 2; j < 9999999; j++) { if (even) { result += index; } else { result -= index; } even = !even; } return(result); }, i); } // Task.WhenAny() can be used too. Task.WhenAll(tasks).Wait(); uint output = 0; for (int i = 0; i < MaxDegreeOfParallelism; i++) { output += tasks[i].Result; } memory.WriteUInt32(Run_OutputUInt32Index, output); }
/// <summary> /// This function generates random numbers on the FPGA using /// <see cref="KpzKernelsInterface.TestPrng(SimpleMemory)"/>. /// </summary> public static uint[] TestPrngWrapper(this KpzKernelsInterface kernels) { var numbers = new uint[KpzKernels.GridWidth * KpzKernels.GridHeight]; var sm = new SimpleMemory(KpzKernels.SizeOfSimpleMemory); CopyParametersToMemory(sm, false, 0x5289a3b89ac5f211, 0x5289a3b89ac5f211, 0); kernels.TestPrng(sm); for (int i = 0; i < KpzKernels.GridWidth * KpzKernels.GridHeight; i++) { numbers[i] = sm.ReadUInt32(i); } return(numbers); }
public virtual void Run(SimpleMemory memory) { var input = memory.ReadUInt32(Run_InputUInt32Index); var tasks = new Task <uint> [MaxDegreeOfParallelism]; for (uint i = 0; i < MaxDegreeOfParallelism; i++) { tasks[i] = Task.Factory.StartNew( indexObject => { var index = (uint)indexObject; uint result = input + index * 2; var even = true; for (int j = 2; j < 9999999; j++) { if (even) { result += index; } else { result -= index; } even = !even; } return(result); }, i); } // Task.WhenAny() can be used too. Task.WhenAll(tasks).Wait(); uint output = 0; for (int i = 0; i < MaxDegreeOfParallelism; i++) { output += tasks[i].Result; } memory.WriteUInt32(Run_OutputUInt32Index, output); }
/// <summary> /// Extracts the longest common subsequence from the <see cref="SimpleMemory"/> object. /// </summary> /// <param name="simpleMemory">The <see cref="SimpleMemory"/> object that contains the result.</param> /// <param name="inputOne">The first string to compare.</param> /// <param name="inputTwo">The second string to compare.</param> /// <returns>Returns the longest common subsequence.</returns> private string GetResult(SimpleMemory simpleMemory, string inputOne, string inputTwo) { var maxInputLength = Math.Max(inputOne.Length, inputTwo.Length); var result = ""; var startIndex = GetLCS_InputOneStartIndex + inputOne.Length + inputTwo.Length + (inputOne.Length * inputTwo.Length) * 2; for (int i = 0; i < maxInputLength; i++) { var currentChar = simpleMemory.ReadUInt32(startIndex + i); var currentCharBytes = BitConverter.GetBytes(currentChar); var chars = Encoding.UTF8.GetChars(currentCharBytes); result += chars[0]; } return(result.Replace("\0", "")); }