コード例 #1
0
        public MyFourierBinder(MyWorkingNode owner, int inputSize, MyMemoryBlock<float> tempBlock)
            : base(owner, inputSize, tempBlock)
        {
            m_stream = new CudaStream();

            m_fft = new CudaFFTPlan1D(inputSize, cufftType.R2C, 1);
            m_fft.SetStream(m_stream.Stream);
            m_ifft = new CudaFFTPlan1D(inputSize, cufftType.C2R, 1);
            m_ifft.SetStream(m_stream.Stream);

            m_mulkernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Common\CombineVectorsKernel", "MulComplexElementWise");
            m_mulkernel.SetupExecution(inputSize + 1);

            m_involutionKernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Common\CombineVectorsKernel", "InvolveVector");
            m_involutionKernel.SetupExecution(inputSize - 1);

            m_inversionKernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Transforms\InvertValuesKernel", "InvertLengthComplexKernel");
            m_inversionKernel.SetupExecution(inputSize);

            m_dotKernel = MyKernelFactory.Instance.KernelProduct<float>(owner, owner.GPU, ProductMode.f_DotProduct_f);

            m_normalKernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Transforms\TransformKernels", "PolynomialFunctionKernel");
            m_normalKernel.SetupExecution(inputSize);

            m_firstFFTOffset = 0;
            m_secondFFTOffset = (inputSize + 1) * 2;
            m_tempOffset = (inputSize + 1) * 4;

            Denominator = inputSize;
        }
コード例 #2
0
            float NearestCC_dist; //. similarity to closest



            public override void Init(int nGPU)
            {
                m_kernel_AddNewCCenter = MyKernelFactory.Instance.Kernel(nGPU, @"Vision\KMeansWM", "AddDataAsCC");
                m_kernel_AddNewCCenter.SetupExecution(Owner.DescCount);

                m_kernel_UpadteCC_desc = MyKernelFactory.Instance.Kernel(nGPU, @"Vision\KMeansWM", "UpadateCC_Desc");
                m_kernel_UpadteCC_desc.SetupExecution(Owner.DescCount);

                m_kernel_UpdateCC_XY = MyKernelFactory.Instance.Kernel(nGPU, @"Vision\KMeansWM", "UpdateCC_XY");
                m_kernel_UpdateCC_XY.SetupExecution(Owner.ObjectXY.Count);


                m_dotKernel = MyKernelFactory.Instance.KernelProduct <float>(Owner, nGPU, ProductMode.f_DotProduct_f);
                m_mulKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Transforms\TransformKernels", "PolynomialFunctionKernel");
                m_mulKernel.SetupExecution(Owner.DescCount);

                m_matMultpl = MyKernelFactory.Instance.Kernel(Owner.GPU, @"Common\CombineVectorsKernel", "MatMultipl_naive");
                m_matMultpl.GridDimensions  = new ManagedCuda.VectorTypes.dim3(1, Owner.DescCount);
                m_matMultpl.BlockDimensions = new ManagedCuda.VectorTypes.dim3(1, 1);

                m_minIdxKernel = MyKernelFactory.Instance.KernelReduction <float>(Owner, nGPU, ReductionMode.f_MinIdx_ff);

                m_kernel_UpdateXY_basedOnTheBrainsMovement = MyKernelFactory.Instance.Kernel(nGPU, @"Vision\KMeansWM", "ApplyBrainsMovement");
                m_kernel_UpdateCC_XY.SetupExecution(Owner.MaxClusters);
            }
コード例 #3
0
            public override void Init(int nGPU)
            {
                if (DecayFactor != 1f)
                {
                    if (DecayFactor > 1f)
                    {
                        MyLog.WARNING.WriteLine("Decay factor on a HashingMemoryNode that is greater than one is suspicious...");
                    }

                    _polynomialFuncKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Transforms\TransformKernels", "PolynomialFunctionKernel");
                    _polynomialFuncKernel.SetupExecution(Memory.Count);
                }

                if (AddFactor != 1f)
                {
                    _constMulKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Transforms\TransformKernels", "PolynomialFunctionKernel");
                    _constMulKernel.SetupExecution(Owner.SymbolSize);
                }

                if (NormalizeTarget)
                {
                    _combineVectorsKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Common\CombineVectorsKernel", "CombineTwoVectorsKernel");
                    _combineVectorsKernel.SetupExecution(Owner.SymbolSize);
                    _mapToIdcsKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Common\CombineVectorsKernel", "MapToIdcs");
                    _mapToIdcsKernel.SetupExecution(Owner.SymbolSize);
                    _dotKernel = MyKernelFactory.Instance.KernelProduct <float>(Owner, nGPU, ProductMode.f_DotProduct_f);
                }
                else
                {
                    _mapToIdcsKernel = MyKernelFactory.Instance.Kernel(nGPU, @"common\CombineVectorsKernel", "AddToIdcs");
                    _mapToIdcsKernel.SetupExecution(Owner.SymbolSize);
                }

                Temp.SafeCopyToHost();
            }
コード例 #4
0
        public MyFourierBinder(MyWorkingNode owner, int inputSize, MyMemoryBlock <float> tempBlock)
            : base(owner, inputSize, tempBlock)
        {
            m_stream = new CudaStream();

            m_fft = new CudaFFTPlan1D(inputSize, cufftType.R2C, 1);
            m_fft.SetStream(m_stream.Stream);
            m_ifft = new CudaFFTPlan1D(inputSize, cufftType.C2R, 1);
            m_ifft.SetStream(m_stream.Stream);

            m_mulkernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Common\CombineVectorsKernel", "MulComplexElementWise");
            m_mulkernel.SetupExecution(inputSize + 1);

            m_involutionKernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Common\CombineVectorsKernel", "InvolveVector");
            m_involutionKernel.SetupExecution(inputSize - 1);

            m_inversionKernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Transforms\InvertValuesKernel", "InvertLengthComplexKernel");
            m_inversionKernel.SetupExecution(inputSize);

            m_dotKernel = MyKernelFactory.Instance.KernelProduct <float>(owner, owner.GPU, ProductMode.f_DotProduct_f);

            m_normalKernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Transforms\TransformKernels", "PolynomialFunctionKernel");
            m_normalKernel.SetupExecution(inputSize);

            m_firstFFTOffset  = 0;
            m_secondFFTOffset = (inputSize + 1) * 2;
            m_tempOffset      = (inputSize + 1) * 4;

            Denominator = inputSize;
        }
コード例 #5
0
ファイル: MyJoin.cs プロジェクト: codeaudit/BrainSimulator
            public override void Init(int nGPU)
            {
                in0  = Owner.GetInput(0);
                in1  = Owner.GetInput(1);
                out0 = Owner.GetOutput(0);

                m_kernel = Owner.InputBranches > 2
                    ? MyKernelFactory.Instance.Kernel(nGPU, @"Common\CombineVectorsKernel")
                    : MyKernelFactory.Instance.Kernel(nGPU, @"Common\CombineVectorsKernel", "CombineTwoVectorsKernelVarSize");

                m_kernel.SetupExecution(out0.Count);

                switch (Owner.Operation)
                {
                case MyJoinOperation.AddToIdcs:
                    m_kernel = MyKernelFactory.Instance.Kernel(nGPU, @"Common\CombineVectorsKernel", "AddToIdcs");
                    m_kernel.SetupExecution(in1.Count);
                    break;

                case MyJoinOperation.AddToIdcs_Normalize:
                    m_kernel = MyKernelFactory.Instance.Kernel(nGPU, @"Common\CombineVectorsKernel", "CombineTwoVectorsKernel");
                    m_kernel.SetupExecution(in1.Count);
                    m_mapToIdcsKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Common\CombineVectorsKernel", "MapToIdcs");
                    m_mapToIdcsKernel.SetupExecution(in1.Count);
                    m_dotKernel = MyKernelFactory.Instance.KernelProduct <float>(Owner, nGPU, ProductMode.f_DotProduct_f);
                    break;

                case MyJoinOperation.GatherFromIdcs:
                    m_kernel = MyKernelFactory.Instance.Kernel(nGPU, @"Common\CombineVectorsKernel", "CombineTwoVectorsKernel");
                    m_kernel.SetupExecution(in1.Count);
                    break;

                case MyJoinOperation.DotProduct:
                case MyJoinOperation.DistanceSquared:
                    m_kernel.SetupExecution(in0.Count);
                    m_dotKernel = MyKernelFactory.Instance.KernelProduct <float>(Owner, nGPU, ProductMode.f_DotProduct_f);
                    break;

                case MyJoinOperation.CosineDistance:
                    m_dotKernel = MyKernelFactory.Instance.KernelProduct <float>(Owner, nGPU, ProductMode.f_Cosine_f);
                    break;

                case MyJoinOperation.MatMultiplication:
                {
                    // out0.Count / out0.ColumnHint
                    m_kernel = MyKernelFactory.Instance.Kernel(nGPU, @"Common\CombineVectorsKernel", "MatMultipl_naive");
                    int MAX_BLOCK_SIZE = 1;
                    m_kernel.GridDimensions  = new ManagedCuda.VectorTypes.dim3(out0.ColumnHint / MAX_BLOCK_SIZE, out0.Count / out0.ColumnHint / MAX_BLOCK_SIZE);
                    m_kernel.BlockDimensions = new ManagedCuda.VectorTypes.dim3(MAX_BLOCK_SIZE, MAX_BLOCK_SIZE);
                }
                break;
                }
            }
コード例 #6
0
        /// <summary>
        /// Normalizes vectors along the leading dimension.
        /// </summary>
        public static void NormalizeLeadingDim(
            MyMemoryBlock <float> vectors, MyMemoryBlock <float> temp,
            int leadingDim, int otherDim,
            MyProductKernel <float> dotKernel, MyCudaKernel multKernel, int GPU)
        {
            var count = leadingDim * otherDim;

            Debug.Assert(vectors != null && temp != null, "Missing data!");
            Debug.Assert(dotKernel != null && multKernel != null, "Missing kernels.");
            Debug.Assert(leadingDim > 0 && otherDim > 0, "Negative matrix dimensions!");
            Debug.Assert(vectors.Count >= count, "Too little vectors to orthonormalize!");
            Debug.Assert(temp.Count >= Math.Max(leadingDim, otherDim), "Too little temp space!");

            multKernel.SetupExecution(leadingDim);


            for (int i = 0; i < otherDim; i++)
            {
                var seg = vectors.GetDevicePtr(GPU, i * leadingDim);
                //dotKernel.Run(temp, i, seg, seg, leadingDim, /* distributed: */ 0);
                dotKernel.outOffset = i;
                dotKernel.Run(temp, seg, seg, leadingDim);
            }

            temp.SafeCopyToHost(0, otherDim);


            for (int i = 0; i < otherDim; i++)
            {
                if (temp.Host[i] < 0.0000001f)
                {
                    temp.Host[i] = 0;
                }
                else
                {
                    temp.Host[i] = (float)(1 / Math.Sqrt(temp.Host[i]));
                }
            }

            temp.SafeCopyToDevice(0, otherDim);


            for (int i = 0; i < otherDim; i++)
            {
                var seg = vectors.GetDevicePtr(GPU, i * leadingDim);
                var len = temp.GetDevicePtr(GPU, i);
                multKernel.Run(seg, len, seg, (int)MyJoin.MyJoinOperation.Multiplication, leadingDim, 1);
            }
        }
コード例 #7
0
            public override void Init(int nGPU)
            {
                lastIdx = -1;

                if (Owner.UseBSCVariety)
                {
                    m_sum = MyKernelFactory.Instance.KernelReduction <float>(Owner, nGPU, ReductionMode.f_Sum_f);
                }
                else
                {
                    m_dot = MyKernelFactory.Instance.KernelProduct <float>(Owner, nGPU, ProductMode.f_DotProduct_f);
                }

                MyMemoryManager.Instance.ClearGlobalVariable(Owner.GlobalVariableName, nGPU);

                if (Owner.UseBSCVariety)
                {
                    m_similarityKernel = MyKernelFactory.Instance.Kernel(Owner.GPU, @"Common\CombineVectorsKernel", "CombineTwoVectorsKernel");
                    m_similarityKernel.SetupExecution(Owner.SymbolSize);
                }
            }
コード例 #8
0
        public MyDistanceOps(MyWorkingNode caller, DistanceOperation operations, MyMemoryBlock <float> tempBlock = null)
        {
            m_caller     = caller;
            m_operations = operations;
            m_temp       = tempBlock;


            if (operations.HasFlag(DistanceOperation.DotProd))
            {
                m_dotKernel = MyKernelFactory.Instance.KernelProduct <float>(caller, caller.GPU, ProductMode.f_DotProduct_f);
            }

            if (operations.HasFlag(DistanceOperation.CosDist))
            {
                m_cosKernel = MyKernelFactory.Instance.KernelProduct <float>(caller, caller.GPU, ProductMode.f_Cosine_f);
            }

            if (operations.HasFlag(DistanceOperation.EuclidDist) || operations.HasFlag(DistanceOperation.EuclidDistSquared))
            {
                // EuclidDist computes EuclidDistSquared first, so keep them together:
                m_operations |= DistanceOperation.EuclidDist | DistanceOperation.EuclidDistSquared;
                m_dotKernel   = MyKernelFactory.Instance.KernelProduct <float>(caller, caller.GPU, ProductMode.f_DotProduct_f);
            }

            if (operations.HasFlag(DistanceOperation.HammingDist))
            {
                m_reduceSumKernel = MyKernelFactory.Instance.KernelReduction <float>(caller, caller.GPU, ReductionMode.f_Sum_f);
            }
            if (operations.HasFlag(DistanceOperation.HammingSim))
            {
                m_reduceSumKernel = MyKernelFactory.Instance.KernelReduction <float>(caller, caller.GPU, ReductionMode.f_Sum_f);
            }

            if (operations.HasFlag(DistanceOperation.EuclidDist) || operations.HasFlag(DistanceOperation.EuclidDistSquared) ||
                operations.HasFlag(DistanceOperation.HammingDist) || operations.HasFlag(DistanceOperation.HammingSim))
            {
                m_combineVecsKernel = MyKernelFactory.Instance.Kernel(m_caller.GPU, @"Common\CombineVectorsKernel", "CombineTwoVectorsKernel");
            }
        }
コード例 #9
0
        public MyDistanceOps(MyWorkingNode caller, DistanceOperation operations, MyMemoryBlock<float> tempBlock = null)
        {
            m_caller = caller;
            m_operations = operations;
            m_temp = tempBlock;

            if (operations.HasFlag(DistanceOperation.DotProd))
            {
                m_dotKernel = MyKernelFactory.Instance.KernelProduct<float>(caller, caller.GPU, ProductMode.f_DotProduct_f);
            }

            if (operations.HasFlag(DistanceOperation.CosDist))
            {
                m_cosKernel = MyKernelFactory.Instance.KernelProduct<float>(caller, caller.GPU, ProductMode.f_Cosine_f);
            }

            if (operations.HasFlag(DistanceOperation.EuclidDist) || operations.HasFlag(DistanceOperation.EuclidDistSquared))
            {
                // EuclidDist computes EuclidDistSquared first, so keep them together:
                m_operations |= DistanceOperation.EuclidDist | DistanceOperation.EuclidDistSquared;
                m_dotKernel = MyKernelFactory.Instance.KernelProduct<float>(caller, caller.GPU, ProductMode.f_DotProduct_f);
            }

            if (operations.HasFlag(DistanceOperation.HammingDist))
            {
                m_reduceSumKernel = MyKernelFactory.Instance.KernelReduction<float>(caller, caller.GPU, ReductionMode.f_Sum_f);
            }
            if (operations.HasFlag(DistanceOperation.HammingSim))
            {
                m_reduceSumKernel = MyKernelFactory.Instance.KernelReduction<float>(caller, caller.GPU, ReductionMode.f_Sum_f);
            }

            if (operations.HasFlag(DistanceOperation.EuclidDist) || operations.HasFlag(DistanceOperation.EuclidDistSquared) ||
                operations.HasFlag(DistanceOperation.HammingDist) || operations.HasFlag(DistanceOperation.HammingSim))
            {
                m_combineVecsKernel = MyKernelFactory.Instance.Kernel(m_caller.GPU, @"Common\CombineVectorsKernel", "CombineTwoVectorsKernel");
            }
        }
コード例 #10
0
        /// <summary>
        /// Transforms all the vectors stored in <paramref name="vectors"/> to be pair-wise orthonormal using a modified version of the Gram-Schmidt algorithm.
        /// </summary>
        /// <param name="vectors">The vectors to orthonormalize.</param>
        /// <param name="temp">A vector of temporal space.</param>
        /// <param name="xDim">The length of each vector.</param>
        /// <param name="yDim">The number of vectors.</param>
        /// <param name="dotKernel">The kernel to compute a dot product.</param>
        /// <param name="multKernel">The kernel to compute vector combinations.</param>
        public static void OrthonormalizeVectors(MyMemoryBlock<float> vectors, MyMemoryBlock<float> temp, int xDim, int yDim, MyProductKernel<float> dotKernel, MyCudaKernel multKernel, int GPU)
        {
            int count = xDim * yDim;

            Debug.Assert(vectors != null && temp != null, "Missing data!");
            Debug.Assert(dotKernel != null && multKernel != null, "Missing a kernel!");
            Debug.Assert(xDim > 0 && yDim > 0, "Negative matrix dimensions!");
            Debug.Assert(vectors.Count >= count, "Too little vectors to orthonormalize!");
            Debug.Assert(temp.Count >= xDim, "Too little temp space!");

            multKernel.SetupExecution(xDim);

            for (int i = 0; i < count; i += xDim)
            {
                var curr = vectors.GetDevicePtr(GPU, i);

                // Normalize the current vector
                {
                    //ZXC dotKernel.Run(temp, 0, curr, curr, xDim, /* distributed: */ 0);
                    dotKernel.Run(temp, curr, curr, xDim);
                    temp.SafeCopyToDevice(0, 1);

                    if (temp.Host[0] < 0.0000001f)
                        continue;

                    temp.Host[0] = (float)(1 / Math.Sqrt(temp.Host[0]));
                    temp.SafeCopyToDevice(0, 1);

                    multKernel.Run(curr, temp, curr, (int)MyJoin.MyJoinOperation.Multiplication, xDim, 1);
                }

                // Make all the remaining vectors orthogonal to the current one
                for (int j = i + xDim; j < count; j += xDim)
                {
                    var next = vectors.GetDevicePtr(GPU, j);

                    // Compute and subtract the projection onto the current vector
                    //ZXC dotKernel.Run(temp, xDim, curr, next, xDim, /* distributed: */ 0);
                    dotKernel.outOffset = xDim;
                    dotKernel.Run(temp, curr, next, xDim);

                    multKernel.Run(curr, temp, temp, (int)MyJoin.MyJoinOperation.Multiplication, xDim, 1);
                    multKernel.Run(next, temp, next, (int)MyJoin.MyJoinOperation.Subtraction, xDim, xDim);
                }
            }
        }
コード例 #11
0
        /// <summary>
        /// Normalizes vectors along the leading dimension.
        /// </summary>
        public static void NormalizeLeadingDim(
            MyMemoryBlock<float> vectors, MyMemoryBlock<float> temp,
            int leadingDim, int otherDim,
            MyProductKernel<float> dotKernel, MyCudaKernel multKernel, int GPU)
        {
            var count = leadingDim * otherDim;

            Debug.Assert(vectors != null && temp != null, "Missing data!");
            Debug.Assert(dotKernel != null && multKernel != null, "Missing kernels.");
            Debug.Assert(leadingDim > 0 && otherDim > 0, "Negative matrix dimensions!");
            Debug.Assert(vectors.Count >= count, "Too little vectors to orthonormalize!");
            Debug.Assert(temp.Count >= Math.Max(leadingDim, otherDim), "Too little temp space!");

            multKernel.SetupExecution(leadingDim);

            for (int i = 0; i < otherDim; i++)
            {
                var seg = vectors.GetDevicePtr(GPU, i * leadingDim);
                //dotKernel.Run(temp, i, seg, seg, leadingDim, /* distributed: */ 0);
                dotKernel.outOffset = i;
                dotKernel.Run(temp, seg, seg, leadingDim);
            }

            temp.SafeCopyToHost(0, otherDim);

            for (int i = 0; i < otherDim; i++)
            {
                if (temp.Host[i] < 0.0000001f)
                    temp.Host[i] = 0;
                else
                    temp.Host[i] = (float)(1 / Math.Sqrt(temp.Host[i]));
            }

            temp.SafeCopyToDevice(0, otherDim);

            for (int i = 0; i < otherDim; i++)
            {
                var seg = vectors.GetDevicePtr(GPU, i * leadingDim);
                var len = temp.GetDevicePtr(GPU, i);
                multKernel.Run(seg, len, seg, (int)MyJoin.MyJoinOperation.Multiplication, leadingDim, 1);
            }
        }
コード例 #12
0
        /// <summary>
        /// Generates a matrix with <paramref name="xDim"/> being the leading dimension in column-major storage.
        /// </summary>
        /// <param name="unmanagedVectors">A memory block to store the generated matrix.
        /// Must be as large as <paramref name="xDim"/> x <paramref name="yDim"/>.</param>
        /// <param name="unmanagedBaseVectors">A temporary block to store all the base vectors.
        /// Must be as large as Max(<paramref name="xDim"/>, <paramref name="yDim"/>)^2.
        /// Only neccessary when <paramref name="mode"/> is set to <see cref="VectorGenerationMode.AverageBaseVectors"/>.</param>
        /// <param name="temp">The temporary storage. It should be as long as the longer of the dimensions.</param>
        /// <param name="random">The random object for number generation.</param>
        /// <param name="xDim">The size of the other dimension.</param>
        /// <param name="yDim">The size of the leading dimension.</param>
        /// <param name="mode">If true, the vectors along the longer dimension will be orthonormalized.</param>
        /// <param name="axisToNormalize">The axis along which to normalize vectors after orthonormalization.</param>
        public static void GenerateTransformMatrix(
            MyMemoryBlock<float> unmanagedVectors, MyMemoryBlock<float> unmanagedBaseVectors, MyMemoryBlock<float> temp,
            Random random, int xDim, int yDim,
            MyProductKernel<float> dotKernel, MyCudaKernel multKernel, MyCudaKernel transposeKernel, int GPU,
            VectorGenerationMode mode = VectorGenerationMode.Normal, AxisToNormalizeEnum axisToNormalize = AxisToNormalizeEnum.yDim)
        {
            Debug.Assert(random != null, "Missing random object");
            Debug.Assert(unmanagedVectors != null && (mode != VectorGenerationMode.AverageBaseVectors || unmanagedBaseVectors != null) && temp != null, "Missing data!");
            Debug.Assert(dotKernel != null && multKernel != null && transposeKernel != null, "Missing a kernel!");

            // Mapping to rows --- Column-major storage --- rows will the leading dimension
            // The larger dimension vectors will be orthogonal; the cols dimension vectors will be normalized

            switch (mode)
            {
                case VectorGenerationMode.Normal:
                    if (axisToNormalize == AxisToNormalizeEnum.xDim)
                    {
                        // Generate normalized vectors with xDim as the leading dim
                        GenerateRandomNormalVectors(unmanagedVectors.Host, random, xDim, yDim);
                        unmanagedVectors.SafeCopyToDevice();

                        // Transpose to the correct position
                        transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);
                    }
                    else
                    {
                        GenerateRandomNormalVectors(unmanagedVectors.Host, random, yDim, xDim);
                        unmanagedVectors.SafeCopyToDevice();
                    }
                    break;

                case VectorGenerationMode.Orthonormalize:
                    int largerDim = Math.Max(xDim, yDim);
                    int smallerDim = Math.Min(xDim, yDim);

                    // Generate vectors with larger leading dimension
                    GenerateRandomNormalVectors(unmanagedVectors.Host, random, largerDim, smallerDim, normalize: false);
                    unmanagedVectors.SafeCopyToDevice();

                    // Orthonormalize along the larger dimension
                    OrthonormalizeVectors(unmanagedVectors, temp, largerDim, smallerDim, dotKernel, multKernel, GPU);

                    if (xDim > yDim)
                    {
                        // xDim is leading and is normalized
                        // We need to transpose to get the correct dims
                        transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);

                        if (axisToNormalize == AxisToNormalizeEnum.yDim)
                            NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU);
                    }
                    else
                    {
                        // yDim is leading and is normalized
                        // The matrix is in correct position

                        if (axisToNormalize == AxisToNormalizeEnum.xDim)
                        {
                            // TODO: generate the matrix with transposed dims?
                            // TODO: SMELLY VERSION:
                            transposeKernel.Run(unmanagedVectors, unmanagedVectors, yDim, xDim);
                            NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU);
                            transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);
                        }
                    }
                    break;

                case VectorGenerationMode.AverageBaseVectors:
                    int longerDim = Math.Max(xDim, yDim);
                    int shorterDim = Math.Min(xDim, yDim);

                    GenerateTransformMatrix(
                        unmanagedBaseVectors, null, temp,
                        random, longerDim, longerDim,
                        dotKernel, multKernel, transposeKernel, GPU,
                        VectorGenerationMode.Orthonormalize);

                    if (shorterDim == longerDim)
                        break;

                    float it = 0f;
                    float step = longerDim / (float)shorterDim;
                    int beg, end = 0;

                    for (int i = 0; i < shorterDim; i++)
                    {
                        beg = end;
                        it += step;
                        end = (int)it;

                        var vect = unmanagedVectors.GetDevicePtr(GPU, i * longerDim);

                        for (int j = beg; j < end; j++)
                        {
                            var baseVect = unmanagedBaseVectors.GetDevicePtr(GPU, j * longerDim);
                            multKernel.Run(baseVect, vect, vect, (int)MyJoin.MyJoinOperation.Addition, longerDim,
                                longerDim);
                        }
                    }

                    if (xDim > yDim)
                    {
                        // xDim is leading and is not normalized
                        // We need to transpose to get the correct dims

                        if (axisToNormalize == AxisToNormalizeEnum.xDim)
                        {
                            NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU);

                            transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);
                        }
                        else
                        {
                            transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);

                            NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU);
                        }
                    }
                    else
                    {
                        // yDim is leading and is not normalized
                        // The matrix is in correct position

                        if (axisToNormalize == AxisToNormalizeEnum.yDim)
                            NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU);
                        else
                        {
                            // TODO: SMELLY VERSION:
                            transposeKernel.Run(unmanagedVectors, unmanagedVectors, yDim, xDim);
                            NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU);
                            transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);
                        }
                    }
                    break;
            }
        }
コード例 #13
0
        /// <summary>
        /// Transforms all the vectors stored in <paramref name="vectors"/> to be pair-wise orthonormal using a modified version of the Gram-Schmidt algorithm.
        /// </summary>
        /// <param name="vectors">The vectors to orthonormalize.</param>
        /// <param name="temp">A vector of temporal space.</param>
        /// <param name="xDim">The length of each vector.</param>
        /// <param name="yDim">The number of vectors.</param>
        /// <param name="dotKernel">The kernel to compute a dot product.</param>
        /// <param name="multKernel">The kernel to compute vector combinations.</param>
        public static void OrthonormalizeVectors(MyMemoryBlock <float> vectors, MyMemoryBlock <float> temp, int xDim, int yDim, MyProductKernel <float> dotKernel, MyCudaKernel multKernel, int GPU)
        {
            int count = xDim * yDim;

            Debug.Assert(vectors != null && temp != null, "Missing data!");
            Debug.Assert(dotKernel != null && multKernel != null, "Missing a kernel!");
            Debug.Assert(xDim > 0 && yDim > 0, "Negative matrix dimensions!");
            Debug.Assert(vectors.Count >= count, "Too little vectors to orthonormalize!");
            Debug.Assert(temp.Count >= xDim, "Too little temp space!");

            multKernel.SetupExecution(xDim);


            for (int i = 0; i < count; i += xDim)
            {
                var curr = vectors.GetDevicePtr(GPU, i);

                // Normalize the current vector
                {
                    //ZXC dotKernel.Run(temp, 0, curr, curr, xDim, /* distributed: */ 0);
                    dotKernel.Run(temp, curr, curr, xDim);
                    temp.SafeCopyToDevice(0, 1);

                    if (temp.Host[0] < 0.0000001f)
                    {
                        continue;
                    }

                    temp.Host[0] = (float)(1 / Math.Sqrt(temp.Host[0]));
                    temp.SafeCopyToDevice(0, 1);

                    multKernel.Run(curr, temp, curr, (int)MyJoin.MyJoinOperation.Multiplication, xDim, 1);
                }

                // Make all the remaining vectors orthogonal to the current one
                for (int j = i + xDim; j < count; j += xDim)
                {
                    var next = vectors.GetDevicePtr(GPU, j);

                    // Compute and subtract the projection onto the current vector
                    //ZXC dotKernel.Run(temp, xDim, curr, next, xDim, /* distributed: */ 0);
                    dotKernel.outOffset = xDim;
                    dotKernel.Run(temp, curr, next, xDim);

                    multKernel.Run(curr, temp, temp, (int)MyJoin.MyJoinOperation.Multiplication, xDim, 1);
                    multKernel.Run(next, temp, next, (int)MyJoin.MyJoinOperation.Subtraction, xDim, xDim);
                }
            }
        }
コード例 #14
0
        /// <summary>
        /// Generates a matrix with <paramref name="xDim"/> being the leading dimension in column-major storage.
        /// </summary>
        /// <param name="unmanagedVectors">A memory block to store the generated matrix.
        /// Must be as large as <paramref name="xDim"/> x <paramref name="yDim"/>.</param>
        /// <param name="unmanagedBaseVectors">A temporary block to store all the base vectors.
        /// Must be as large as Max(<paramref name="xDim"/>, <paramref name="yDim"/>)^2.
        /// Only neccessary when <paramref name="mode"/> is set to <see cref="VectorGenerationMode.AverageBaseVectors"/>.</param>
        /// <param name="temp">The temporary storage. It should be as long as the longer of the dimensions.</param>
        /// <param name="random">The random object for number generation.</param>
        /// <param name="xDim">The size of the other dimension.</param>
        /// <param name="yDim">The size of the leading dimension.</param>
        /// <param name="mode">If true, the vectors along the longer dimension will be orthonormalized.</param>
        /// <param name="axisToNormalize">The axis along which to normalize vectors after orthonormalization.</param>
        public static void GenerateTransformMatrix(
            MyMemoryBlock <float> unmanagedVectors, MyMemoryBlock <float> unmanagedBaseVectors, MyMemoryBlock <float> temp,
            Random random, int xDim, int yDim,
            MyProductKernel <float> dotKernel, MyCudaKernel multKernel, MyCudaKernel transposeKernel, int GPU,
            VectorGenerationMode mode = VectorGenerationMode.Normal, AxisToNormalizeEnum axisToNormalize = AxisToNormalizeEnum.yDim)
        {
            Debug.Assert(random != null, "Missing random object");
            Debug.Assert(unmanagedVectors != null && (mode != VectorGenerationMode.AverageBaseVectors || unmanagedBaseVectors != null) && temp != null, "Missing data!");
            Debug.Assert(dotKernel != null && multKernel != null && transposeKernel != null, "Missing a kernel!");


            // Mapping to rows --- Column-major storage --- rows will the leading dimension
            // The larger dimension vectors will be orthogonal; the cols dimension vectors will be normalized

            switch (mode)
            {
            case VectorGenerationMode.Normal:
                if (axisToNormalize == AxisToNormalizeEnum.xDim)
                {
                    // Generate normalized vectors with xDim as the leading dim
                    GenerateRandomNormalVectors(unmanagedVectors.Host, random, xDim, yDim);
                    unmanagedVectors.SafeCopyToDevice();

                    // Transpose to the correct position
                    transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);
                }
                else
                {
                    GenerateRandomNormalVectors(unmanagedVectors.Host, random, yDim, xDim);
                    unmanagedVectors.SafeCopyToDevice();
                }
                break;

            case VectorGenerationMode.Orthonormalize:
                int largerDim  = Math.Max(xDim, yDim);
                int smallerDim = Math.Min(xDim, yDim);

                // Generate vectors with larger leading dimension
                GenerateRandomNormalVectors(unmanagedVectors.Host, random, largerDim, smallerDim, normalize: false);
                unmanagedVectors.SafeCopyToDevice();

                // Orthonormalize along the larger dimension
                OrthonormalizeVectors(unmanagedVectors, temp, largerDim, smallerDim, dotKernel, multKernel, GPU);

                if (xDim > yDim)
                {
                    // xDim is leading and is normalized
                    // We need to transpose to get the correct dims
                    transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);

                    if (axisToNormalize == AxisToNormalizeEnum.yDim)
                    {
                        NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU);
                    }
                }
                else
                {
                    // yDim is leading and is normalized
                    // The matrix is in correct position

                    if (axisToNormalize == AxisToNormalizeEnum.xDim)
                    {
                        // TODO: generate the matrix with transposed dims?
                        // TODO: SMELLY VERSION:
                        transposeKernel.Run(unmanagedVectors, unmanagedVectors, yDim, xDim);
                        NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU);
                        transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);
                    }
                }
                break;

            case VectorGenerationMode.AverageBaseVectors:
                int longerDim  = Math.Max(xDim, yDim);
                int shorterDim = Math.Min(xDim, yDim);

                GenerateTransformMatrix(
                    unmanagedBaseVectors, null, temp,
                    random, longerDim, longerDim,
                    dotKernel, multKernel, transposeKernel, GPU,
                    VectorGenerationMode.Orthonormalize);

                if (shorterDim == longerDim)
                {
                    break;
                }


                float it = 0f;
                float step = longerDim / (float)shorterDim;
                int   beg, end = 0;

                for (int i = 0; i < shorterDim; i++)
                {
                    beg = end;
                    it += step;
                    end = (int)it;

                    var vect = unmanagedVectors.GetDevicePtr(GPU, i * longerDim);

                    for (int j = beg; j < end; j++)
                    {
                        var baseVect = unmanagedBaseVectors.GetDevicePtr(GPU, j * longerDim);
                        multKernel.Run(baseVect, vect, vect, (int)MyJoin.MyJoinOperation.Addition, longerDim,
                                       longerDim);
                    }
                }

                if (xDim > yDim)
                {
                    // xDim is leading and is not normalized
                    // We need to transpose to get the correct dims

                    if (axisToNormalize == AxisToNormalizeEnum.xDim)
                    {
                        NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU);

                        transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);
                    }
                    else
                    {
                        transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);

                        NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU);
                    }
                }
                else
                {
                    // yDim is leading and is not normalized
                    // The matrix is in correct position

                    if (axisToNormalize == AxisToNormalizeEnum.yDim)
                    {
                        NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU);
                    }
                    else
                    {
                        // TODO: SMELLY VERSION:
                        transposeKernel.Run(unmanagedVectors, unmanagedVectors, yDim, xDim);
                        NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU);
                        transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);
                    }
                }
                break;
            }
        }