static public FSharpOption <double> Calc(CalcPIParam param) { if (CanDoGPUCalc) { var worker = Worker.Default; var pi = worker.EvalFunc(() => _Calc(worker, param)); return(FSharpOption <double> .Some(pi)); } return(FSharpOption <double> .None); }
static private double _Calc(Worker worker, CalcPIParam param) { var seed = param.Seed; var numStreams = param.NumStreams; const int numDimentions = 2; var numPoints = param.NumPoints; var numSMs = worker.Device.Attributes.MULTIPROCESSOR_COUNT; var startStreamId = param.StartStreamId; var stopStreamId = param.StopStreamId; var random = param.GetRandom(seed, numStreams, numDimentions); using (var reduce = DeviceSumModuleI32.Default.Create(numPoints)) using (var points = random.AllocCUDAStreamBuffer(numPoints)) using (var numPointsInside = worker.Malloc <int>(numPoints)) { var pointsX = points.Ptr; var pointsY = points.Ptr + numPoints; var lp = new LaunchParam(numSMs * 8, 256); var countStream = stopStreamId + 1 - startStreamId; var pi = Enumerable.Range(startStreamId, countStream).Select(streamId => { random.Fill(streamId, numPoints, points); worker.Launch(KernelCountInside, lp, pointsX, pointsY, numPoints, numPointsInside.Ptr); var numPointsInsideH = reduce.Reduce(numPointsInside.Ptr, numPoints); return(4.0 * (double)numPointsInsideH / (double)numPoints / (double)countStream); }).Aggregate((a, b) => a + b); Console.WriteLine("Streams({0}-{1}/{2}) Points({3}) : {4}", startStreamId + 1, stopStreamId + 1, numStreams, numPoints, pi); return(pi); } }