public override void SetElementAsFloat(long index, float value) { var ptr = DevicePtrAtElement(index); if (ElementType == DType.Float32) { context.CopyToDevice(ptr, (float)value); } else if (ElementType == DType.Float64) { context.CopyToDevice(ptr, (double)value); } else if (ElementType == DType.Int32) { context.CopyToDevice(ptr, (int)value); } else if (ElementType == DType.UInt8) { context.CopyToDevice(ptr, (byte)value); } else { throw new NotSupportedException("Element type " + ElementType + " not supported"); } }
static double[] SumMatrixManagedCuda(double[][,] matrix) { int Z = matrix.Length; int Y = matrix[0].GetLength(0); int X = matrix[0].GetLength(1); var result = new double[Y * X]; var lm = ToLinearArray(matrix); int N = lm.Length; matrixSumCude.SetComputeSize((uint)X, (uint)Y); //matrixSumCude.BlockDimensions = 128; //matrixSumCude.GridDimensions = (N + 127) / 128; var da = cntxt.AllocateMemory(N * sizeof(double)); var db = cntxt.AllocateMemory(result.Length * sizeof(double)); cntxt.CopyToDevice(da, lm); cntxt.CopyToDevice(db, result); //CudaDeviceVariable<int> dA = a; //CudaDeviceVariable<int> dB = b; //CudaDeviceVariable<int> dC = new CudaDeviceVariable<int>(N); // Invoke kernel //kernel.Run(dA.DevicePointer, dC.DevicePointer, dimX, dimY, dimZ); matrixSumCude.Run(db, da, X, Y, Z); cntxt.CopyToHost <double>(result, db); return(result); }
internal IDeviceMemoryPtr MultiEuclideanDistance(IDeviceMemoryPtr vector, CUdeviceptr[] compareTo, int size) { IDeviceMemoryPtr ret = null; var buffer = _cuda.AllocateMemory(8 * compareTo.Length); try { _cuda.CopyToDevice(buffer, compareTo); ret = Allocate(size * compareTo.Length); _Use(_multiEuclidean, size, compareTo.Length, k => k.Run(0, vector.DevicePointer, buffer, ret.DevicePointer, size, compareTo.Length)); } finally { _cuda.FreeMemory(buffer); } return(ret); }
internal CudaDeviceVariable <float> MultiEuclideanDistance(CudaDeviceVariable <float> vector, CUdeviceptr[] compareTo, int size) { CudaDeviceVariable <float> ret = null; var buffer = _cuda.AllocateMemory(8 * compareTo.Length); try { _cuda.CopyToDevice(buffer, compareTo); ret = new CudaDeviceVariable <float>(size * compareTo.Length); _Use(_multiEuclidean, size, compareTo.Length, k => k.Run(0, vector.DevicePointer, buffer, ret.DevicePointer, size, compareTo.Length)); } finally { _cuda.FreeMemory(buffer); } return(ret); }
public uint[] Run() { var ptx = @"C:\Src\_Tree\SmallPrograms\Buddhabrot\Buddhabrot.Cuda70\x64\Release\Buddhabrot.ptx"; var context = new CudaContext(); var module = new CudaModuleHelper(context, ptx); var init = module.GetKernel("Init"); var setSettings = module.GetKernel("SetSettings"); var runBuddha = module.GetKernel("RunBuddha"); var nBlocks = 4196; var nThreads = 256; var dSettings = context.AllocateMemoryFor(settings); context.CopyToDevice(dSettings, settings); var array = new uint[settings.Width * settings.Height]; var dState = context.AllocateMemory(nThreads * nBlocks * SizeOfCurandState); var dArray = context.AllocateMemoryFor(array); context.CopyToDevice(dArray, array); init.Launch(nBlocks, nThreads, dState); setSettings.Launch(1, 1, dSettings); Console.WriteLine("Starting..."); var sw = Stopwatch.StartNew(); long i = 0; while (!IsStopping) { runBuddha.Launch(nBlocks, nThreads, dArray, dState); double count = (++i * nBlocks * nThreads); if (i % 5 == 0) { Console.WriteLine("Generated {0:0.0} Million samples in {1:0.000} sec", count / 1000000.0, sw.ElapsedMilliseconds / 1000.0); } if (maxSamples.HasValue && count >= maxSamples) break; } context.CopyToHost(array, dArray); return array; }
public override void SetElementsAsInt(long index, int[] value) { CUdeviceptr ptr = DevicePtrAtElement(index); if (ElementType == DType.Int32) { context.CopyToDevice(ptr, value); } else { throw new NotSupportedException("Element type " + ElementType + " not supported"); } }
public int[] Find(byte[] vectorsInDataset, int vectorToExamine, out int vectorsFound) { context.CopyToDevice( deviceIsInDataSet.DevicePointer, vectorsInDataset); kernel.Run( deviceVectors.DevicePointer, vectorCount, vectorToExamine, attrCount, deviceIsInDataSet.DevicePointer, deviceResult.DevicePointer ); float[] hostResult = deviceResult; for (int i = 0; i < heap.Length; i++) { heap[i].val = float.MaxValue; } vectorsFound = 0; for (int i = 0; i < hostResult.Length; i++) { if (vectorsInDataset[i] == 1 && hostResult[i] < heap[0].val && i != vectorToExamine) { vectorsFound++; heap[0].val = hostResult[i]; heap[0].index = i; Utils.hipify(heap); } } if (vectorCount > heap.Length) { vectorsFound = heap.Length; } Array.Sort(heap, new Comparer()); int[] result = new int[heap.Length]; for (int i = 0; i < result.Length; i++) { result[i] = heap[i].index; } return(result); }
public void CopyCpuToGpu(Tensor result, Tensor src, long totalElements) { TSCudaContext context = CudaHelpers.TSContextForTensor(result); CudaContext resultContext = context.CudaContextForTensor(result); // If types of src and result are different, convert on the CPU first. using (Tensor srcContig = AsTypeCpu(src, result.ElementType, true)) using (Tensor resultContig = Ops.AsContiguous(result)) { CUdeviceptr resultContigPtr = ((CudaStorage)resultContig.Storage).DevicePtrAtElement(resultContig.StorageOffset); IntPtr srcContigPtr = ((Cpu.CpuStorage)srcContig.Storage).PtrAtElement(srcContig.StorageOffset); resultContext.CopyToDevice(resultContigPtr, srcContigPtr, totalElements * srcContig.ElementType.Size()); if (result.Storage != resultContig.Storage) { CopyGpuDirect(result, resultContig, resultContext); } } }
public Evolutionary2( CudaContext context, IFitnessFunction fitnessCalc, FlattArray <byte> initialPopulation ) { this.context = context; this.popSize = initialPopulation.GetLength(0); this.genLength = initialPopulation.GetLength(1); int alignedPopSizeMemory = (popSize * genLength) + ((popSize * genLength) % (sizeof(int))); populationGens = new CudaDeviceVariable <byte>(alignedPopSizeMemory); populationGens2 = new CudaDeviceVariable <byte>(alignedPopSizeMemory); context.CopyToDevice(populationGens2.DevicePointer, initialPopulation.Raw); //initialPopulation.Raw; deviceFitnes = new CudaDeviceVariable <float>(popSize); fitnessIndeces = new CudaDeviceVariable <int>(popSize); LoadKernels(); MutationRate = 0.01f; CrossOverRate = 0.7f; Alpha = 0.7f; Elitism = 0.2f; this.fitnessCalc = fitnessCalc; performGeneticAlgorythm.SetConstantVariable("popSize", popSize); performGeneticAlgorythm.SetConstantVariable("genLength", genLength); }