Exemplo n.º 1
0
        public override void Conv2DInputGradient(Tensor gradient, Tensor rotKernels, int stride, int paddingX, int paddingY, Tensor inputGradients)
        {
            GpuShape[] shapes = new[] { new GpuShape(gradient.Shape),
                                        new GpuShape(rotKernels.Shape),
                                        new GpuShape(inputGradients.Shape),
                                        new GpuShape(rotKernels.Width, rotKernels.Height, 1, rotKernels.BatchSize) };

            float[]    devGradient   = Gpu.CopyToDevice(gradient.Values);
            float[]    devRotKernels = Gpu.CopyToDevice(rotKernels.Values);
            GpuShape[] devShapes     = Gpu.CopyToDevice(shapes);

            int threadsRequiredPerResultElem = rotKernels.BatchSize * rotKernels.Height * rotKernels.Width;

            float[,] resultPartials    = new float[inputGradients.Length, GetBlocksNum(threadsRequiredPerResultElem)];
            float[,] devResultPartials = Gpu.Allocate(resultPartials);

            // simulate
            //GpuConv2DInputGradient(GetSimulatedThread(blockSize, new dim3(bx, by, bz), new dim3(tx, ty, tz)), gradient.Values, rotKernels.Values, resultPartials, shapes, paddingX, paddingY, stride);

            Gpu.Launch(new dim3(inputGradients.Length, GetBlocksNum(threadsRequiredPerResultElem)), THREADS_PER_BLOCK).GpuConv2DInputGradient(devGradient, devRotKernels, devResultPartials, devShapes, paddingX, paddingY, stride);
            Gpu.Synchronize();

            Gpu.CopyFromDevice(devResultPartials, resultPartials);

            Gpu.FreeAll();

            for (int k = 0; k < resultPartials.GetLength(0); ++k)
            {
                for (int partialId = 0; partialId < resultPartials.GetLength(1); ++partialId)
                {
                    inputGradients.Values[k] += resultPartials[k, partialId];
                }
            }
        }
            private void AllocateGpuResources(int sampleCount)
            {
                if (m_SampleCount == sampleCount)
                {
                    return;
                }

                DisposeGpuResources();
                m_DevOverdBs  = Gpu.Allocate <float>(sampleCount);
                m_SampleCount = sampleCount;
            }
Exemplo n.º 3
0
        public Tuple <int, int> CompareAbsoluteOpt(double[] source, double[] target, double tolerance, double ThreshholdTol)
        {
            System.Diagnostics.Debug.WriteLine("starting an absolute comparison on GPU");
            if (source.Length != target.Length)
            {
                throw new ArgumentException("The source and target lengths need to match");
            }

            double epsilon          = ThreshholdTol;
            double MaxSource        = source.Max();
            double MaxTarget        = target.Max();
            double MinDoseEvaluated = (MaxSource * epsilon);
            double zero             = 0.0;
            double lowMultiplier    = (1 - tolerance);
            double highMultiplier   = (1 + tolerance);
            int    failed           = 0;
            int    isCounted        = 0;
            Gpu    gpu = Gpu.Default;

            // filter doses below threshold
            // TODO: should failure be -1?

            int dimension = source.Length;

            double[] sourceOnGPU     = gpu.Allocate(source);
            double[] targetOnGPU     = gpu.Allocate(target);
            double[] isCountedArray  = gpu.Allocate <double>(dimension);
            double[] sourceOnGPULow  = gpu.Allocate <double>(dimension);
            double[] sourceOnGPUHigh = gpu.Allocate <double>(dimension);
            double[] isGTtol         = gpu.Allocate <double>(dimension);

            gpu.For(0, dimension, i => sourceOnGPU[i]    = (sourceOnGPU[i] > epsilon) ? sourceOnGPU[i] : zero);
            gpu.For(0, dimension, i => targetOnGPU[i]    = (targetOnGPU[i] > epsilon) ? targetOnGPU[i] : zero);
            gpu.For(0, dimension, i => sourceOnGPU[i]    = (targetOnGPU[i] > epsilon) ? sourceOnGPU[i] : zero);
            gpu.For(0, dimension, i => targetOnGPU[i]    = (sourceOnGPU[i] > epsilon) ? targetOnGPU[i] : zero);
            gpu.For(0, dimension, i => isCountedArray[i] = (sourceOnGPU[i] > zero) ? 1.0 : zero);

            gpu.For(0, dimension, i => sourceOnGPULow[i]  = lowMultiplier * sourceOnGPU[i]);
            gpu.For(0, dimension, i => sourceOnGPUHigh[i] = highMultiplier * sourceOnGPU[i]);

            //determine if relative difference is greater than minDoseEvaluated
            // stores 1 as GT minDoseEvaluated is true
            gpu.For(0, isGTtol.Length,
                    i => isGTtol[i] = (targetOnGPU[i] < sourceOnGPULow[i] || targetOnGPU[i] > sourceOnGPUHigh[i]) ? 1 : 0);
            isCounted = (int)gpu.Sum(isCountedArray);
            failed    = (int)gpu.Sum(isGTtol);

            Gpu.Free(sourceOnGPU);
            Gpu.Free(targetOnGPU);
            Gpu.Free(sourceOnGPULow);
            Gpu.Free(sourceOnGPUHigh);
            Gpu.Free(isCountedArray);
            Gpu.Free(isGTtol);
            System.Diagnostics.Debug.WriteLine("finished an absolute comparison on GPU");
            //gpu.Dispose();

            return(new Tuple <int, int>(failed, isCounted));
        }
        private static double[,] CosineSimilarityGpu(Gpu gpu, double[][] dataset)
        {
            int size       = dataset.Length * dataset.Length;
            var gpuDataset = gpu.Allocate(dataset);

            // Allocate directly on gpu.
            var gpuDistances = gpu.Allocate <double>(dataset.Length, dataset.Length);

            gpu.For(0, size, index =>
            {
                int i               = index / dataset.Length;
                int j               = index % dataset.Length;
                double dotProduct   = 0;
                double magnitudeOne = 0;
                double magnitudeTwo = 0;
                for (int k = 0; k < dataset[i].Length; k++)
                {
                    dotProduct   += (dataset[i][k] * dataset[j][k]);
                    magnitudeOne += (dataset[i][k] * dataset[i][k]);
                    magnitudeTwo += (dataset[j][k] * dataset[j][k]);
                }
                double distance    = Math.Max(0, 1 - (dotProduct / Math.Sqrt(magnitudeOne * magnitudeTwo)));
                gpuDistances[i, j] = distance;
            });

            // Gpu -> Cpu.
            var result = new double[dataset.Length, dataset.Length];

            Gpu.Copy(gpuDistances, result);

            // Release gpu memory.
            Gpu.Free(gpuDataset);
            Gpu.Free(gpuDistances);

            return(result);
        }
Exemplo n.º 5
0
        //public override void Add(Tensor t1, Tensor t2, Tensor result)
        //{
        //    int threadsRequired = result.Length;
        //    float[] devT1 = Gpu.CopyToDevice(t1.Values);
        //    float[] devT2 = Gpu.CopyToDevice(t2.Values);
        //    float[] devResult = Gpu.Allocate(result.Values);

        //    Gpu.Launch(GetBlocksNum(threadsRequired), THREADS_PER_BLOCK).GpuAdd(devT1, devT2, devResult);
        //    Gpu.Synchronize();

        //    Gpu.CopyFromDevice(devResult, result.Values);
        //    Gpu.FreeAll();
        //}

        //public override void Sub(Tensor t1, Tensor t2, Tensor result)
        //{
        //    int threadsRequired = result.Length;
        //    float[] devT1 = Gpu.CopyToDevice(t1.Values);
        //    float[] devT2 = Gpu.CopyToDevice(t2.Values);
        //    float[] devResult = Gpu.Allocate(result.Values);

        //    Gpu.Launch(GetBlocksNum(threadsRequired), THREADS_PER_BLOCK).GpuSub(devT1, devT2, devResult);
        //    Gpu.Synchronize();

        //    Gpu.CopyFromDevice(devResult, result.Values);
        //    Gpu.FreeAll();
        //}

        //public override void Mul(Tensor t1, Tensor t2, Tensor result)
        //{
        //    int threadsRequired = result.BatchSize * t1.Depth * t1.Height * t2.Width;
        //    GpuShape[] shapes = new [] { new GpuShape(t1.Shape), new GpuShape(t2.Shape), new GpuShape(result.Shape) };

        //    float[] devT1 = Gpu.CopyToDevice(t1.Values);
        //    float[] devT2 = Gpu.CopyToDevice(t2.Values);
        //    float[] devResult = Gpu.Allocate(result.Values);
        //    GpuShape[] devShapes = Gpu.CopyToDevice(shapes);

        //    Gpu.Launch(GetBlocksNum(threadsRequired), THREADS_PER_BLOCK).GpuMul(devT1, devT2, devResult, devShapes);
        //    Gpu.Synchronize();

        //    Gpu.CopyFromDevice(devResult, result.Values);
        //    Gpu.FreeAll();
        //}

        public override void Conv2D(Tensor t, Tensor kernels, int stride, int paddingX, int paddingY, Tensor result)
        {
            int threadsRequired = t.BatchSize * kernels.BatchSize * result.Width * result.Height;

            GpuShape[] shapes = new[] { new GpuShape(t.Shape), new GpuShape(kernels.Shape), new GpuShape(result.Shape) };

            float[]    devT       = Gpu.CopyToDevice(t.Values);
            float[]    devKernels = Gpu.CopyToDevice(kernels.Values);
            float[]    devResult  = Gpu.Allocate(result.Values);
            GpuShape[] devShapes  = Gpu.CopyToDevice(shapes);

            Gpu.Launch(GetBlocksNum(threadsRequired), THREADS_PER_BLOCK).GpuConv2D(devT, devKernels, devResult, devShapes, paddingX, paddingY, stride);
            Gpu.Synchronize();

            Gpu.CopyFromDevice(devResult, result.Values);
            Gpu.FreeAll();
        }
Exemplo n.º 6
0
 private float[,] GetDevNormSamples(int channels, int sampleCount)
 {
     return(m_DevNormSamples ?? (m_DevNormSamples = Gpu.Allocate <float>(channels, sampleCount)));
 }
Exemplo n.º 7
0
 private T[] GetDevOutputSamples <T>(int length) where T : struct
 {
     return((T[])(m_DevOutputSamples ?? (m_DevOutputSamples = Gpu.Allocate <T>(length))));
 }