示例#1
0
        protected T[] InternalExecuteMPI <T>(
            byte[] kernelBinary,
            String function,
            int bufferSize,
            ParallelTaskParams loaderParams,
            params Object[] kernelParams) where T : struct
        {
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointStart);

            String binaryPath = Path.GetTempFileName();

            File.WriteAllBytes(binaryPath, kernelBinary);
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointPlatformInit);

            String mpiDirectory = SystemArchitecture.ProgramFolder(ArchitectureType.x86, @"OpenMPI*");

            Process mpirunProcess = new Process();

            mpirunProcess.StartInfo.CreateNoWindow                = true;
            mpirunProcess.StartInfo.UseShellExecute               = false;
            mpirunProcess.StartInfo.RedirectStandardOutput        = true;
            mpirunProcess.StartInfo.EnvironmentVariables["PATH"] += @";" + mpiDirectory + @"\bin";
            mpirunProcess.StartInfo.FileName = mpiDirectory + @"\bin\mpirun.exe";
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointKernelBuild);

            mpirunProcess.StartInfo.Arguments = String.Format("-n {0} \"{1}\" {2} {3} {4}",
                                                              loaderParams.ProcessCount,
                                                              ShortPath(binaryPath),
                                                              TypeName(typeof(T)),
                                                              function,
                                                              bufferSize);

            foreach (Object param in kernelParams)
            {
                mpirunProcess.StartInfo.Arguments += " " + param.ToString();
            }

            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointDeviceWrite);

            mpirunProcess.Start();
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointKernelExecute);

            using (MemoryStream resultStream = new MemoryStream())
            {
                mpirunProcess.StandardOutput.BaseStream.CopyTo(resultStream);
                TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointDeviceRead);

                byte[] processOutput = resultStream.ToArray();
                T[]    result        = new T[(int)Math.Ceiling((float)processOutput.Length / Marshal.SizeOf(typeof(T)))];
                Buffer.BlockCopy(processOutput, 0, result, 0, processOutput.Length);
                TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointPlatformDeinit);

                return(result);
            }
        }
示例#2
0
        protected T[] InternalExecuteOpencl <T>(
            String source,
            String function,
            int bufferSize,
            ParallelTaskParams loaderParams,
            params Object[] kernelParams)
            where T : struct
        {
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointStart);

            ComputeCommandQueue queue = QueueWithDevice(loaderParams.OpenCLDevice);

            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointPlatformInit);

            String         updatedSource = "#define OpenCL\r\n" + source;
            ComputeProgram program       = new ComputeProgram(queue.Context, updatedSource);

            program.Build(new ComputeDevice[] { queue.Device }, null, null, IntPtr.Zero);
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointKernelBuild);

            T[] resultBuffer = new T[bufferSize];

            ComputeBuffer <T>    resultBufferVar = new ComputeBuffer <T>(queue.Context, ComputeMemoryFlags.WriteOnly, bufferSize);
            List <ComputeMemory> vars            = new List <ComputeMemory>();

            vars.Add(resultBufferVar);
            vars.AddRange(WrapDeviceVariables(kernelParams, queue.Context));

            ComputeKernel kernel = program.CreateKernel(function);

            for (int i = 0; i < vars.Count; i++)
            {
                kernel.SetMemoryArgument(i, vars[i]);
            }

            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointDeviceWrite);

            long[] workersGlobal = new long[2] {
                loaderParams.GlobalWorkers.Width, loaderParams.GlobalWorkers.Height
            };
            queue.Execute(kernel, null, workersGlobal, null, null);
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointKernelExecute);

            queue.ReadFromBuffer <T>(resultBufferVar, ref resultBuffer, false, null);
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointDeviceRead);

            queue.Finish();
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointPlatformDeinit);

            return(resultBuffer);
        }
示例#3
0
        protected T[] InternalExecuteCuda <T>(
            byte[] kernelBinary,
            String function,
            int bufferSize,
            ParallelTaskParams loaderParams,
            params Object[] kernelParams) where T : struct
        {
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointStart);

            CudaContext context = ContextWithDevice(loaderParams.CudaDevice);

            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointPlatformInit);
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointKernelBuild);

            CudaDeviceVariable <T> resultBufferVar = new CudaDeviceVariable <T>(bufferSize);

            resultBufferVar.Memset(0);

            List <Tuple <Object, IDisposable> > vars = new List <Tuple <Object, IDisposable> >();

            vars.Add(new Tuple <Object, IDisposable>(resultBufferVar.DevicePointer, resultBufferVar));
            vars.AddRange(WrapDeviceVariables(kernelParams, true));
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointDeviceWrite);

            CudaKernel kernel = context.LoadKernelPTX(kernelBinary, function);

            kernel.BlockDimensions = new dim3(loaderParams.BlockSize.Width, loaderParams.BlockSize.Height);
            kernel.GridDimensions  = new dim3(loaderParams.GridSize.Width, loaderParams.GridSize.Height);
            kernel.Run(vars.Select(tuple => tuple.Item1).ToArray());
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointKernelExecute);

            T[] resultBuffer = resultBufferVar;
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointDeviceRead);

            vars.Where(tuple => tuple.Item2 != null).ToList().ForEach(tuple => tuple.Item2.Dispose());
            TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointPlatformDeinit);

            return(resultBuffer);
        }