Esempio n. 1
0
        public override void SetElementAsFloat(long index, float value)
        {
            var ptr = DevicePtrAtElement(index);

            if (ElementType == DType.Float32)
            {
                context.CopyToDevice(ptr, (float)value);
            }
            else if (ElementType == DType.Float64)
            {
                context.CopyToDevice(ptr, (double)value);
            }
            else if (ElementType == DType.Int32)
            {
                context.CopyToDevice(ptr, (int)value);
            }
            else if (ElementType == DType.UInt8)
            {
                context.CopyToDevice(ptr, (byte)value);
            }
            else
            {
                throw new NotSupportedException("Element type " + ElementType + " not supported");
            }
        }
Esempio n. 2
0
        static double[] SumMatrixManagedCuda(double[][,] matrix)
        {
            int Z = matrix.Length;
            int Y = matrix[0].GetLength(0);
            int X = matrix[0].GetLength(1);

            var result = new double[Y * X];
            var lm     = ToLinearArray(matrix);
            int N      = lm.Length;

            matrixSumCude.SetComputeSize((uint)X, (uint)Y);
            //matrixSumCude.BlockDimensions = 128;
            //matrixSumCude.GridDimensions = (N + 127) / 128;

            var da = cntxt.AllocateMemory(N * sizeof(double));
            var db = cntxt.AllocateMemory(result.Length * sizeof(double));

            cntxt.CopyToDevice(da, lm);
            cntxt.CopyToDevice(db, result);

            //CudaDeviceVariable<int> dA = a;
            //CudaDeviceVariable<int> dB = b;
            //CudaDeviceVariable<int> dC = new CudaDeviceVariable<int>(N);

            // Invoke kernel
            //kernel.Run(dA.DevicePointer, dC.DevicePointer, dimX, dimY, dimZ);
            matrixSumCude.Run(db, da, X, Y, Z);

            cntxt.CopyToHost <double>(result, db);

            return(result);
        }
Esempio n. 3
0
        internal IDeviceMemoryPtr MultiEuclideanDistance(IDeviceMemoryPtr vector, CUdeviceptr[] compareTo, int size)
        {
            IDeviceMemoryPtr ret = null;
            var buffer           = _cuda.AllocateMemory(8 * compareTo.Length);

            try {
                _cuda.CopyToDevice(buffer, compareTo);
                ret = Allocate(size * compareTo.Length);
                _Use(_multiEuclidean, size, compareTo.Length, k => k.Run(0, vector.DevicePointer, buffer, ret.DevicePointer, size, compareTo.Length));
            }
            finally {
                _cuda.FreeMemory(buffer);
            }
            return(ret);
        }
Esempio n. 4
0
        internal CudaDeviceVariable <float> MultiEuclideanDistance(CudaDeviceVariable <float> vector, CUdeviceptr[] compareTo, int size)
        {
            CudaDeviceVariable <float> ret = null;
            var buffer = _cuda.AllocateMemory(8 * compareTo.Length);

            try {
                _cuda.CopyToDevice(buffer, compareTo);
                ret = new CudaDeviceVariable <float>(size * compareTo.Length);
                _Use(_multiEuclidean, size, compareTo.Length, k => k.Run(0, vector.DevicePointer, buffer, ret.DevicePointer, size, compareTo.Length));
            }
            finally {
                _cuda.FreeMemory(buffer);
            }
            return(ret);
        }
Esempio n. 5
0
        public uint[] Run()
        {
            var ptx = @"C:\Src\_Tree\SmallPrograms\Buddhabrot\Buddhabrot.Cuda70\x64\Release\Buddhabrot.ptx";

            var context = new CudaContext();
            var module = new CudaModuleHelper(context, ptx);

            var init = module.GetKernel("Init");
            var setSettings = module.GetKernel("SetSettings");
            var runBuddha = module.GetKernel("RunBuddha");

            var nBlocks = 4196;
            var nThreads = 256;

            var dSettings = context.AllocateMemoryFor(settings);
            context.CopyToDevice(dSettings, settings);

            var array = new uint[settings.Width * settings.Height];
            var dState = context.AllocateMemory(nThreads * nBlocks * SizeOfCurandState);
            var dArray = context.AllocateMemoryFor(array);
            context.CopyToDevice(dArray, array);

            init.Launch(nBlocks, nThreads, dState);
            setSettings.Launch(1, 1, dSettings);

            Console.WriteLine("Starting...");
            var sw = Stopwatch.StartNew();
            long i = 0;

            while (!IsStopping)
            {
                runBuddha.Launch(nBlocks, nThreads, dArray, dState);

                double count = (++i * nBlocks * nThreads);
                if (i % 5 == 0)
                {
                    Console.WriteLine("Generated {0:0.0} Million samples in {1:0.000} sec", count / 1000000.0, sw.ElapsedMilliseconds / 1000.0);
                }

                if (maxSamples.HasValue && count >= maxSamples)
                    break;
            }

            context.CopyToHost(array, dArray);
            return array;
        }
Esempio n. 6
0
        public override void SetElementsAsInt(long index, int[] value)
        {
            CUdeviceptr ptr = DevicePtrAtElement(index);

            if (ElementType == DType.Int32)
            {
                context.CopyToDevice(ptr, value);
            }
            else
            {
                throw new NotSupportedException("Element type " + ElementType + " not supported");
            }
        }
    public int[] Find(byte[] vectorsInDataset, int vectorToExamine, out int vectorsFound)
    {
        context.CopyToDevice(
            deviceIsInDataSet.DevicePointer, vectorsInDataset);

        kernel.Run(
            deviceVectors.DevicePointer,
            vectorCount,
            vectorToExamine,
            attrCount,
            deviceIsInDataSet.DevicePointer,
            deviceResult.DevicePointer
            );

        float[] hostResult = deviceResult;

        for (int i = 0; i < heap.Length; i++)
        {
            heap[i].val = float.MaxValue;
        }
        vectorsFound = 0;
        for (int i = 0; i < hostResult.Length; i++)
        {
            if (vectorsInDataset[i] == 1 && hostResult[i] < heap[0].val && i != vectorToExamine)
            {
                vectorsFound++;
                heap[0].val   = hostResult[i];
                heap[0].index = i;
                Utils.hipify(heap);
            }
        }
        if (vectorCount > heap.Length)
        {
            vectorsFound = heap.Length;
        }
        Array.Sort(heap, new Comparer());

        int[] result = new int[heap.Length];
        for (int i = 0; i < result.Length; i++)
        {
            result[i] = heap[i].index;
        }
        return(result);
    }
Esempio n. 8
0
        public void CopyCpuToGpu(Tensor result, Tensor src, long totalElements)
        {
            TSCudaContext context       = CudaHelpers.TSContextForTensor(result);
            CudaContext   resultContext = context.CudaContextForTensor(result);

            // If types of src and result are different, convert on the CPU first.
            using (Tensor srcContig = AsTypeCpu(src, result.ElementType, true))
                using (Tensor resultContig = Ops.AsContiguous(result))
                {
                    CUdeviceptr resultContigPtr = ((CudaStorage)resultContig.Storage).DevicePtrAtElement(resultContig.StorageOffset);
                    IntPtr      srcContigPtr    = ((Cpu.CpuStorage)srcContig.Storage).PtrAtElement(srcContig.StorageOffset);

                    resultContext.CopyToDevice(resultContigPtr, srcContigPtr, totalElements * srcContig.ElementType.Size());

                    if (result.Storage != resultContig.Storage)
                    {
                        CopyGpuDirect(result, resultContig, resultContext);
                    }
                }
        }
Esempio n. 9
0
    public Evolutionary2(
        CudaContext context,
        IFitnessFunction fitnessCalc,
        FlattArray <byte> initialPopulation
        )
    {
        this.context = context;

        this.popSize   = initialPopulation.GetLength(0);
        this.genLength = initialPopulation.GetLength(1);


        int alignedPopSizeMemory = (popSize * genLength) + ((popSize * genLength) % (sizeof(int)));

        populationGens =
            new CudaDeviceVariable <byte>(alignedPopSizeMemory);
        populationGens2 =
            new CudaDeviceVariable <byte>(alignedPopSizeMemory);

        context.CopyToDevice(populationGens2.DevicePointer, initialPopulation.Raw);
        //initialPopulation.Raw;

        deviceFitnes   = new CudaDeviceVariable <float>(popSize);
        fitnessIndeces = new CudaDeviceVariable <int>(popSize);


        LoadKernels();

        MutationRate  = 0.01f;
        CrossOverRate = 0.7f;
        Alpha         = 0.7f;
        Elitism       = 0.2f;

        this.fitnessCalc = fitnessCalc;

        performGeneticAlgorythm.SetConstantVariable("popSize", popSize);
        performGeneticAlgorythm.SetConstantVariable("genLength", genLength);
    }