public void Binding(MyMemoryBlock <float> first, MyMemoryBlock <float> second, MyMemoryBlock <float> temp, MyMemoryBlock <float> destination, bool DoQuery)
            {
                fft.Exec(first.GetDevicePtr(Owner), Owner.FirstInputFFT.GetDevicePtr(Owner));

                if (DoQuery)
                {
                    m_involutionKernel.Run(second, temp, second.Count);
                    fft.Exec(temp.GetDevicePtr(Owner), Owner.SecondInputFFT.GetDevicePtr(Owner));
                }
                else
                {
                    fft.Exec(second.GetDevicePtr(Owner), Owner.SecondInputFFT.GetDevicePtr(Owner));
                }

                m_kernel.Run(Owner.FirstInputFFT, Owner.SecondInputFFT, Owner.SecondInputFFT, Owner.InputSize + 1);

                ifft.Exec(Owner.SecondInputFFT.GetDevicePtr(Owner), temp.GetDevicePtr(Owner));

                float factor = 1.0f / Owner.Transform.Count;

                if (factor != 1)
                {
                    m_normalKernel.Run(0, 0, factor, 0, temp, destination, Owner.InputSize);
                }
            }
Ejemplo n.º 2
0
        public virtual void Bind(MyMemoryBlock<float> firstInput, MyMemoryBlock<float> secondInput, MyMemoryBlock<float> output)
        {
            int nrInputs = secondInput.Count / m_inputSize;

            var vecs = nrInputs > 1
                // Concatenate pointers to the individual vectors
                ? Enumerable.Range(0, nrInputs).Select(i => secondInput.GetDevicePtr(m_owner) + i * m_inputSize * sizeof(float))
                // Use only a singe pointer
                : Enumerable.Repeat(secondInput.GetDevicePtr(m_owner), 1);

            Bind(firstInput.GetDevicePtr(m_owner), vecs, output.GetDevicePtr(m_owner));
        }
Ejemplo n.º 3
0

        
Ejemplo n.º 4
0
        /// <summary>
        /// Normalizes vectors along the leading dimension.
        /// </summary>
        public static void NormalizeLeadingDim(
            MyMemoryBlock <float> vectors, MyMemoryBlock <float> temp,
            int leadingDim, int otherDim,
            MyProductKernel <float> dotKernel, MyCudaKernel multKernel, int GPU)
        {
            var count = leadingDim * otherDim;

            Debug.Assert(vectors != null && temp != null, "Missing data!");
            Debug.Assert(dotKernel != null && multKernel != null, "Missing kernels.");
            Debug.Assert(leadingDim > 0 && otherDim > 0, "Negative matrix dimensions!");
            Debug.Assert(vectors.Count >= count, "Too little vectors to orthonormalize!");
            Debug.Assert(temp.Count >= Math.Max(leadingDim, otherDim), "Too little temp space!");

            multKernel.SetupExecution(leadingDim);


            for (int i = 0; i < otherDim; i++)
            {
                var seg = vectors.GetDevicePtr(GPU, i * leadingDim);
                //dotKernel.Run(temp, i, seg, seg, leadingDim, /* distributed: */ 0);
                dotKernel.outOffset = i;
                dotKernel.Run(temp, seg, seg, leadingDim);
            }

            temp.SafeCopyToHost(0, otherDim);


            for (int i = 0; i < otherDim; i++)
            {
                if (temp.Host[i] < 0.0000001f)
                {
                    temp.Host[i] = 0;
                }
                else
                {
                    temp.Host[i] = (float)(1 / Math.Sqrt(temp.Host[i]));
                }
            }

            temp.SafeCopyToDevice(0, otherDim);


            for (int i = 0; i < otherDim; i++)
            {
                var seg = vectors.GetDevicePtr(GPU, i * leadingDim);
                var len = temp.GetDevicePtr(GPU, i);
                multKernel.Run(seg, len, seg, (int)MyJoin.MyJoinOperation.Multiplication, leadingDim, 1);
            }
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Transforms all the vectors stored in <paramref name="vectors"/> to be pair-wise orthonormal using a modified version of the Gram-Schmidt algorithm.
        /// </summary>
        /// <param name="vectors">The vectors to orthonormalize.</param>
        /// <param name="temp">A vector of temporal space.</param>
        /// <param name="xDim">The length of each vector.</param>
        /// <param name="yDim">The number of vectors.</param>
        /// <param name="dotKernel">The kernel to compute a dot product.</param>
        /// <param name="multKernel">The kernel to compute vector combinations.</param>
        public static void OrthonormalizeVectors(MyMemoryBlock <float> vectors, MyMemoryBlock <float> temp, int xDim, int yDim, MyProductKernel <float> dotKernel, MyCudaKernel multKernel, int GPU)
        {
            int count = xDim * yDim;

            Debug.Assert(vectors != null && temp != null, "Missing data!");
            Debug.Assert(dotKernel != null && multKernel != null, "Missing a kernel!");
            Debug.Assert(xDim > 0 && yDim > 0, "Negative matrix dimensions!");
            Debug.Assert(vectors.Count >= count, "Too little vectors to orthonormalize!");
            Debug.Assert(temp.Count >= xDim, "Too little temp space!");

            multKernel.SetupExecution(xDim);


            for (int i = 0; i < count; i += xDim)
            {
                var curr = vectors.GetDevicePtr(GPU, i);

                // Normalize the current vector
                {
                    //ZXC dotKernel.Run(temp, 0, curr, curr, xDim, /* distributed: */ 0);
                    dotKernel.Run(temp, curr, curr, xDim);
                    temp.SafeCopyToDevice(0, 1);

                    if (temp.Host[0] < 0.0000001f)
                    {
                        continue;
                    }

                    temp.Host[0] = (float)(1 / Math.Sqrt(temp.Host[0]));
                    temp.SafeCopyToDevice(0, 1);

                    multKernel.Run(curr, temp, curr, (int)MyJoin.MyJoinOperation.Multiplication, xDim, 1);
                }

                // Make all the remaining vectors orthogonal to the current one
                for (int j = i + xDim; j < count; j += xDim)
                {
                    var next = vectors.GetDevicePtr(GPU, j);

                    // Compute and subtract the projection onto the current vector
                    //ZXC dotKernel.Run(temp, xDim, curr, next, xDim, /* distributed: */ 0);
                    dotKernel.outOffset = xDim;
                    dotKernel.Run(temp, curr, next, xDim);

                    multKernel.Run(curr, temp, temp, (int)MyJoin.MyJoinOperation.Multiplication, xDim, 1);
                    multKernel.Run(next, temp, next, (int)MyJoin.MyJoinOperation.Subtraction, xDim, xDim);
                }
            }
        }
Ejemplo n.º 6
0
        public virtual void Bind(MyMemoryBlock<float> inputs, MyMemoryBlock<float> output)
        {
            int nrInputs = inputs.Count / m_inputSize;
            CUdeviceptr start = inputs.GetDevicePtr(m_owner);
            CUdeviceptr[] arr = GetTempArray(nrInputs); //-1 to skip the first +1 to include output
            for (int i = 0; i < nrInputs - 1; ++i)
            {
                arr[i] = start + (i + 1) * m_inputSize * sizeof(float);
            }

            arr[nrInputs - 1] = output.GetDevicePtr(m_owner);

            Bind(start, arr);
        }
Ejemplo n.º 7
0
        public virtual void Bind(MyMemoryBlock <float> inputs, MyMemoryBlock <float> output)
        {
            int         nrInputs = inputs.Count / m_inputSize;
            CUdeviceptr start    = inputs.GetDevicePtr(m_owner);

            CUdeviceptr[] arr = GetTempArray(nrInputs); //-1 to skip the first +1 to include output
            for (int i = 0; i < nrInputs - 1; ++i)
            {
                arr[i] = start + (i + 1) * m_inputSize * sizeof(float);
            }

            arr[nrInputs - 1] = output.GetDevicePtr(m_owner);

            Bind(start, arr);
        }
Ejemplo n.º 8
0
        public override void Initialize(Int32 nGPU)
        {
            base.Initialize(nGPU);

            if (m_deltaBlock != null)
            {
                m_delta.Ptr = m_deltaBlock.GetDevicePtr(m_network, m_deltaOffset);
            }

            // Send the structures to GPU
            m_network.DataDimsMemoryBlock.Host[m_deltaDimGPUPtrOffset] = Delta;

            // Store the GPU pointers
            DeltaDataPtr = m_network.DataDimsMemoryBlock.GetDevicePtr(m_network, (int)m_deltaDimGPUPtrOffset);
        }
Ejemplo n.º 9
0
        public virtual void UnbindMultiple(MyMemoryBlock<float> firstInput, MyMemoryBlock<float> otherInputs, MyMemoryBlock<float> output)
        {
            int nrInputs = otherInputs.Count / m_inputSize;
            CUdeviceptr firstPtr = firstInput.GetDevicePtr(m_owner);
            CUdeviceptr start = otherInputs.GetDevicePtr(m_owner);
            CUdeviceptr[] arr = GetTempArray(nrInputs + 1);//+1 for output

            for (int i = 0; i <= nrInputs; ++i)
            {
                arr[i] = start + i * m_inputSize * sizeof(float);
            }

            arr[nrInputs] = output.GetDevicePtr(m_owner);

            Unbind(firstPtr, arr);
        }
Ejemplo n.º 10
0
        public virtual void UnbindMultiple(MyMemoryBlock <float> firstInput, MyMemoryBlock <float> otherInputs, MyMemoryBlock <float> output)
        {
            int         nrInputs = otherInputs.Count / m_inputSize;
            CUdeviceptr firstPtr = firstInput.GetDevicePtr(m_owner);
            CUdeviceptr start    = otherInputs.GetDevicePtr(m_owner);

            CUdeviceptr[] arr = GetTempArray(nrInputs + 1);//+1 for output

            for (int i = 0; i <= nrInputs; ++i)
            {
                arr[i] = start + i * m_inputSize * sizeof(float);
            }

            arr[nrInputs] = output.GetDevicePtr(m_owner);

            Unbind(firstPtr, arr);
        }
Ejemplo n.º 11
0
            //--------------------------------------------------------------------
            /// <summary>
            ///
            /// </summary>
            /// <param name="Vec">input vector</param>
            /// <param name="dim">dimetiosn of the vec</param>
            /// <param name="id_start">vec start dispalcement</param>
            private void NormalizeVector(MyMemoryBlock <float> Vec, int dim, int id_start = 0)
            {
                CUdeviceptr VecDevPtr = Vec.GetDevicePtr(0, id_start * dim);

                m_dotKernel.Run(Owner.TempVal, 0, VecDevPtr, VecDevPtr, dim);
                Owner.TempVal.SafeCopyToHost();
                float length = (float)Math.Sqrt(Owner.TempVal.Host[0]);

                if (length != 0)
                {
                    m_mulKernel.Run(0f, 0f, 1.0f / length, 0f, VecDevPtr, VecDevPtr, dim);
                }
                else
                {
                    Vec.Fill(0);
                }
            }
Ejemplo n.º 12
0
            public override void Execute()
            {
                switch (Owner.Operation)
                {
                case MyJoinOperation.StackInputs:
                case MyJoinOperation.GatherFromIdcs:
                    break;

                default:
                    for (int i = 0; i < Owner.InputBranches; i++)
                    {
                        MyMemoryBlock <float> ai = Owner.GetInput(i);
                        Owner.InputBlocksPointers.Host[i] = ai != null?ai.GetDevicePtr(Owner) : default(CUdeviceptr);
                    }

                    Owner.InputBlocksPointers.SafeCopyToDevice();
                    break;
                }
            }
Ejemplo n.º 13
0
        public virtual void Initialize(Int32 nGPU)
        {
            // output

            // Set the dimension CUDevicePtr
            if (m_outputBlock != null)
            {
                m_output.Ptr = m_outputBlock.GetDevicePtr(m_network, m_outputOffset);
            }

            m_network.DataDimsMemoryBlock.Host[m_outputDimGPUOffset] = Output;

            // Store the GPU pointers
            OutputDataPtr = m_network.DataDimsMemoryBlock.GetDevicePtr(m_network, (int)m_outputDimGPUOffset);


            // extra
            if (m_extraBlock != null)
            {
                m_extraPtr = m_extraBlock.GetDevicePtr(m_network, m_extraOffset);
            }
        }
        public override void Initialize(Int32 nGPU)
        {
            base.Initialize(nGPU);

            // Set WeightChange and BiasChange dimensions according to respective Weight and Bias

            if (m_weightBlock != null)
            {
                m_weight.Ptr       = m_weightBlock.GetDevicePtr(m_network, m_weightOffset);
                m_weightChange.Ptr = m_weightChangeBlock.GetDevicePtr(m_network, m_weightChangeOffset);
            }
            if (m_biasBlock != null)
            {
                m_bias.Ptr       = m_biasBlock.GetDevicePtr(m_network, m_biasOffset);
                m_biasChange.Ptr = m_biasChangeBlock.GetDevicePtr(m_network, m_biasChangeOffset);
            }

            // Send the structures to GPU
            m_network.DataDimsMemoryBlock.Host[m_weightDimGPUPtrOffset]          = Weight;
            m_network.DataDimsMemoryBlock.Host[m_weightChangeDimGPUPtrOffset]    = WeightChange;
            m_network.DataDimsMemoryBlock.Host[m_biasDimGPUPtrOffset]            = Bias;
            m_network.DataDimsMemoryBlock.Host[m_biasChangeDimGPUPtrOffset]      = BiasChange;
            m_network.DataDimsMemoryBlock.Host[m_lastWeightDeltaDimGPUPtrOffset] = LastWeightDelta;
            m_network.DataDimsMemoryBlock.Host[m_storedOutputDimGPUPtrOffset]    = StoredOutput;

            // Store the GPU pointers
            WeightDataPtr          = m_network.DataDimsMemoryBlock.GetDevicePtr(m_network, (int)m_weightDimGPUPtrOffset);
            WeightChangeDataPtr    = m_network.DataDimsMemoryBlock.GetDevicePtr(m_network, (int)m_weightChangeDimGPUPtrOffset);
            BiasDataPtr            = m_network.DataDimsMemoryBlock.GetDevicePtr(m_network, (int)m_biasDimGPUPtrOffset);
            BiasChangeDataPtr      = m_network.DataDimsMemoryBlock.GetDevicePtr(m_network, (int)m_biasChangeDimGPUPtrOffset);
            LastWeightDeltaDataPtr = m_network.DataDimsMemoryBlock.GetDevicePtr(m_network, (int)m_lastWeightDeltaDimGPUPtrOffset);
            StoredOutputDataPtr    = m_network.DataDimsMemoryBlock.GetDevicePtr(m_network, (int)m_storedOutputDimGPUPtrOffset);

            // Generate initial weights
            GenerateWeights();
        }
Ejemplo n.º 15
0
        /// <summary>
        /// Transforms all the vectors stored in <paramref name="vectors"/> to be pair-wise orthonormal using a modified version of the Gram-Schmidt algorithm.
        /// </summary>
        /// <param name="vectors">The vectors to orthonormalize.</param>
        /// <param name="temp">A vector of temporal space.</param>
        /// <param name="xDim">The length of each vector.</param>
        /// <param name="yDim">The number of vectors.</param>
        /// <param name="dotKernel">The kernel to compute a dot product.</param>
        /// <param name="multKernel">The kernel to compute vector combinations.</param>
        public static void OrthonormalizeVectors(MyMemoryBlock<float> vectors, MyMemoryBlock<float> temp, int xDim, int yDim, MyProductKernel<float> dotKernel, MyCudaKernel multKernel, int GPU)
        {
            int count = xDim * yDim;

            Debug.Assert(vectors != null && temp != null, "Missing data!");
            Debug.Assert(dotKernel != null && multKernel != null, "Missing a kernel!");
            Debug.Assert(xDim > 0 && yDim > 0, "Negative matrix dimensions!");
            Debug.Assert(vectors.Count >= count, "Too little vectors to orthonormalize!");
            Debug.Assert(temp.Count >= xDim, "Too little temp space!");

            multKernel.SetupExecution(xDim);

            for (int i = 0; i < count; i += xDim)
            {
                var curr = vectors.GetDevicePtr(GPU, i);

                // Normalize the current vector
                {
                    //ZXC dotKernel.Run(temp, 0, curr, curr, xDim, /* distributed: */ 0);
                    dotKernel.Run(temp, curr, curr, xDim);
                    temp.SafeCopyToDevice(0, 1);

                    if (temp.Host[0] < 0.0000001f)
                        continue;

                    temp.Host[0] = (float)(1 / Math.Sqrt(temp.Host[0]));
                    temp.SafeCopyToDevice(0, 1);

                    multKernel.Run(curr, temp, curr, (int)MyJoin.MyJoinOperation.Multiplication, xDim, 1);
                }

                // Make all the remaining vectors orthogonal to the current one
                for (int j = i + xDim; j < count; j += xDim)
                {
                    var next = vectors.GetDevicePtr(GPU, j);

                    // Compute and subtract the projection onto the current vector
                    //ZXC dotKernel.Run(temp, xDim, curr, next, xDim, /* distributed: */ 0);
                    dotKernel.outOffset = xDim;
                    dotKernel.Run(temp, curr, next, xDim);

                    multKernel.Run(curr, temp, temp, (int)MyJoin.MyJoinOperation.Multiplication, xDim, 1);
                    multKernel.Run(next, temp, next, (int)MyJoin.MyJoinOperation.Subtraction, xDim, xDim);
                }
            }
        }
Ejemplo n.º 16
0
        /// <summary>
        /// Normalizes vectors along the leading dimension.
        /// </summary>
        public static void NormalizeLeadingDim(
            MyMemoryBlock<float> vectors, MyMemoryBlock<float> temp,
            int leadingDim, int otherDim,
            MyProductKernel<float> dotKernel, MyCudaKernel multKernel, int GPU)
        {
            var count = leadingDim * otherDim;

            Debug.Assert(vectors != null && temp != null, "Missing data!");
            Debug.Assert(dotKernel != null && multKernel != null, "Missing kernels.");
            Debug.Assert(leadingDim > 0 && otherDim > 0, "Negative matrix dimensions!");
            Debug.Assert(vectors.Count >= count, "Too little vectors to orthonormalize!");
            Debug.Assert(temp.Count >= Math.Max(leadingDim, otherDim), "Too little temp space!");

            multKernel.SetupExecution(leadingDim);

            for (int i = 0; i < otherDim; i++)
            {
                var seg = vectors.GetDevicePtr(GPU, i * leadingDim);
                //dotKernel.Run(temp, i, seg, seg, leadingDim, /* distributed: */ 0);
                dotKernel.outOffset = i;
                dotKernel.Run(temp, seg, seg, leadingDim);
            }

            temp.SafeCopyToHost(0, otherDim);

            for (int i = 0; i < otherDim; i++)
            {
                if (temp.Host[i] < 0.0000001f)
                    temp.Host[i] = 0;
                else
                    temp.Host[i] = (float)(1 / Math.Sqrt(temp.Host[i]));
            }

            temp.SafeCopyToDevice(0, otherDim);

            for (int i = 0; i < otherDim; i++)
            {
                var seg = vectors.GetDevicePtr(GPU, i * leadingDim);
                var len = temp.GetDevicePtr(GPU, i);
                multKernel.Run(seg, len, seg, (int)MyJoin.MyJoinOperation.Multiplication, leadingDim, 1);
            }
        }
Ejemplo n.º 17
0
        /// <summary>
        /// Generates a matrix with <paramref name="xDim"/> being the leading dimension in column-major storage.
        /// </summary>
        /// <param name="unmanagedVectors">A memory block to store the generated matrix.
        /// Must be as large as <paramref name="xDim"/> x <paramref name="yDim"/>.</param>
        /// <param name="unmanagedBaseVectors">A temporary block to store all the base vectors.
        /// Must be as large as Max(<paramref name="xDim"/>, <paramref name="yDim"/>)^2.
        /// Only neccessary when <paramref name="mode"/> is set to <see cref="VectorGenerationMode.AverageBaseVectors"/>.</param>
        /// <param name="temp">The temporary storage. It should be as long as the longer of the dimensions.</param>
        /// <param name="random">The random object for number generation.</param>
        /// <param name="xDim">The size of the other dimension.</param>
        /// <param name="yDim">The size of the leading dimension.</param>
        /// <param name="mode">If true, the vectors along the longer dimension will be orthonormalized.</param>
        /// <param name="axisToNormalize">The axis along which to normalize vectors after orthonormalization.</param>
        public static void GenerateTransformMatrix(
            MyMemoryBlock<float> unmanagedVectors, MyMemoryBlock<float> unmanagedBaseVectors, MyMemoryBlock<float> temp,
            Random random, int xDim, int yDim,
            MyProductKernel<float> dotKernel, MyCudaKernel multKernel, MyCudaKernel transposeKernel, int GPU,
            VectorGenerationMode mode = VectorGenerationMode.Normal, AxisToNormalizeEnum axisToNormalize = AxisToNormalizeEnum.yDim)
        {
            Debug.Assert(random != null, "Missing random object");
            Debug.Assert(unmanagedVectors != null && (mode != VectorGenerationMode.AverageBaseVectors || unmanagedBaseVectors != null) && temp != null, "Missing data!");
            Debug.Assert(dotKernel != null && multKernel != null && transposeKernel != null, "Missing a kernel!");

            // Mapping to rows --- Column-major storage --- rows will the leading dimension
            // The larger dimension vectors will be orthogonal; the cols dimension vectors will be normalized

            switch (mode)
            {
                case VectorGenerationMode.Normal:
                    if (axisToNormalize == AxisToNormalizeEnum.xDim)
                    {
                        // Generate normalized vectors with xDim as the leading dim
                        GenerateRandomNormalVectors(unmanagedVectors.Host, random, xDim, yDim);
                        unmanagedVectors.SafeCopyToDevice();

                        // Transpose to the correct position
                        transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);
                    }
                    else
                    {
                        GenerateRandomNormalVectors(unmanagedVectors.Host, random, yDim, xDim);
                        unmanagedVectors.SafeCopyToDevice();
                    }
                    break;

                case VectorGenerationMode.Orthonormalize:
                    int largerDim = Math.Max(xDim, yDim);
                    int smallerDim = Math.Min(xDim, yDim);

                    // Generate vectors with larger leading dimension
                    GenerateRandomNormalVectors(unmanagedVectors.Host, random, largerDim, smallerDim, normalize: false);
                    unmanagedVectors.SafeCopyToDevice();

                    // Orthonormalize along the larger dimension
                    OrthonormalizeVectors(unmanagedVectors, temp, largerDim, smallerDim, dotKernel, multKernel, GPU);

                    if (xDim > yDim)
                    {
                        // xDim is leading and is normalized
                        // We need to transpose to get the correct dims
                        transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);

                        if (axisToNormalize == AxisToNormalizeEnum.yDim)
                            NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU);
                    }
                    else
                    {
                        // yDim is leading and is normalized
                        // The matrix is in correct position

                        if (axisToNormalize == AxisToNormalizeEnum.xDim)
                        {
                            // TODO: generate the matrix with transposed dims?
                            // TODO: SMELLY VERSION:
                            transposeKernel.Run(unmanagedVectors, unmanagedVectors, yDim, xDim);
                            NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU);
                            transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);
                        }
                    }
                    break;

                case VectorGenerationMode.AverageBaseVectors:
                    int longerDim = Math.Max(xDim, yDim);
                    int shorterDim = Math.Min(xDim, yDim);

                    GenerateTransformMatrix(
                        unmanagedBaseVectors, null, temp,
                        random, longerDim, longerDim,
                        dotKernel, multKernel, transposeKernel, GPU,
                        VectorGenerationMode.Orthonormalize);

                    if (shorterDim == longerDim)
                        break;

                    float it = 0f;
                    float step = longerDim / (float)shorterDim;
                    int beg, end = 0;

                    for (int i = 0; i < shorterDim; i++)
                    {
                        beg = end;
                        it += step;
                        end = (int)it;

                        var vect = unmanagedVectors.GetDevicePtr(GPU, i * longerDim);

                        for (int j = beg; j < end; j++)
                        {
                            var baseVect = unmanagedBaseVectors.GetDevicePtr(GPU, j * longerDim);
                            multKernel.Run(baseVect, vect, vect, (int)MyJoin.MyJoinOperation.Addition, longerDim,
                                longerDim);
                        }
                    }

                    if (xDim > yDim)
                    {
                        // xDim is leading and is not normalized
                        // We need to transpose to get the correct dims

                        if (axisToNormalize == AxisToNormalizeEnum.xDim)
                        {
                            NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU);

                            transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);
                        }
                        else
                        {
                            transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);

                            NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU);
                        }
                    }
                    else
                    {
                        // yDim is leading and is not normalized
                        // The matrix is in correct position

                        if (axisToNormalize == AxisToNormalizeEnum.yDim)
                            NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU);
                        else
                        {
                            // TODO: SMELLY VERSION:
                            transposeKernel.Run(unmanagedVectors, unmanagedVectors, yDim, xDim);
                            NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU);
                            transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);
                        }
                    }
                    break;
            }
        }
Ejemplo n.º 18
0
        /// <summary>
        /// Generates a matrix with <paramref name="xDim"/> being the leading dimension in column-major storage.
        /// </summary>
        /// <param name="unmanagedVectors">A memory block to store the generated matrix.
        /// Must be as large as <paramref name="xDim"/> x <paramref name="yDim"/>.</param>
        /// <param name="unmanagedBaseVectors">A temporary block to store all the base vectors.
        /// Must be as large as Max(<paramref name="xDim"/>, <paramref name="yDim"/>)^2.
        /// Only neccessary when <paramref name="mode"/> is set to <see cref="VectorGenerationMode.AverageBaseVectors"/>.</param>
        /// <param name="temp">The temporary storage. It should be as long as the longer of the dimensions.</param>
        /// <param name="random">The random object for number generation.</param>
        /// <param name="xDim">The size of the other dimension.</param>
        /// <param name="yDim">The size of the leading dimension.</param>
        /// <param name="mode">If true, the vectors along the longer dimension will be orthonormalized.</param>
        /// <param name="axisToNormalize">The axis along which to normalize vectors after orthonormalization.</param>
        public static void GenerateTransformMatrix(
            MyMemoryBlock <float> unmanagedVectors, MyMemoryBlock <float> unmanagedBaseVectors, MyMemoryBlock <float> temp,
            Random random, int xDim, int yDim,
            MyCudaKernel dotKernel, MyCudaKernel multKernel, MyCudaKernel transposeKernel, int GPU,
            VectorGenerationMode mode = VectorGenerationMode.Normal, AxisToNormalizeEnum axisToNormalize = AxisToNormalizeEnum.yDim)
        {
            Debug.Assert(random != null, "Missing random object");
            Debug.Assert(unmanagedVectors != null && (mode != VectorGenerationMode.AverageBaseVectors || unmanagedBaseVectors != null) && temp != null, "Missing data!");
            Debug.Assert(dotKernel != null && multKernel != null && transposeKernel != null, "Missing a kernel!");


            // Mapping to rows --- Column-major storage --- rows will the leading dimension
            // The larger dimension vectors will be orthogonal; the cols dimension vectors will be normalized

            switch (mode)
            {
            case VectorGenerationMode.Normal:
                if (axisToNormalize == AxisToNormalizeEnum.xDim)
                {
                    // Generate normalized vectors with xDim as the leading dim
                    GenerateRandomNormalVectors(unmanagedVectors.Host, random, xDim, yDim);
                    unmanagedVectors.SafeCopyToDevice();

                    // Transpose to the correct position
                    transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);
                }
                else
                {
                    GenerateRandomNormalVectors(unmanagedVectors.Host, random, yDim, xDim);
                    unmanagedVectors.SafeCopyToDevice();
                }
                break;

            case VectorGenerationMode.Orthonormalize:
                int largerDim  = Math.Max(xDim, yDim);
                int smallerDim = Math.Min(xDim, yDim);

                // Generate vectors with larger leading dimension
                GenerateRandomNormalVectors(unmanagedVectors.Host, random, largerDim, smallerDim, normalize: false);
                unmanagedVectors.SafeCopyToDevice();

                // Orthonormalize along the larger dimension
                OrthonormalizeVectors(unmanagedVectors, temp, largerDim, smallerDim, dotKernel, multKernel, GPU);

                if (xDim > yDim)
                {
                    // xDim is leading and is normalized
                    // We need to transpose to get the correct dims
                    transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);

                    if (axisToNormalize == AxisToNormalizeEnum.yDim)
                    {
                        NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU);
                    }
                }
                else
                {
                    // yDim is leading and is normalized
                    // The matrix is in correct position

                    if (axisToNormalize == AxisToNormalizeEnum.xDim)
                    {
                        // TODO: generate the matrix with transposed dims?
                        // TODO: SMELLY VERSION:
                        transposeKernel.Run(unmanagedVectors, unmanagedVectors, yDim, xDim);
                        NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU);
                        transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);
                    }
                }
                break;

            case VectorGenerationMode.AverageBaseVectors:
                int longerDim  = Math.Max(xDim, yDim);
                int shorterDim = Math.Min(xDim, yDim);

                GenerateTransformMatrix(
                    unmanagedBaseVectors, null, temp,
                    random, longerDim, longerDim,
                    dotKernel, multKernel, transposeKernel, GPU,
                    VectorGenerationMode.Orthonormalize);

                if (shorterDim == longerDim)
                {
                    break;
                }


                float it = 0f;
                float step = longerDim / (float)shorterDim;
                int   beg, end = 0;

                for (int i = 0; i < shorterDim; i++)
                {
                    beg = end;
                    it += step;
                    end = (int)it;

                    var vect = unmanagedVectors.GetDevicePtr(GPU, i * longerDim);

                    for (int j = beg; j < end; j++)
                    {
                        var baseVect = unmanagedBaseVectors.GetDevicePtr(GPU, j * longerDim);
                        multKernel.Run(baseVect, vect, vect, (int)MyJoin.MyJoinOperation.Addition, longerDim,
                                       longerDim);
                    }
                }

                if (xDim > yDim)
                {
                    // xDim is leading and is not normalized
                    // We need to transpose to get the correct dims

                    if (axisToNormalize == AxisToNormalizeEnum.xDim)
                    {
                        NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU);

                        transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);
                    }
                    else
                    {
                        transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);

                        NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU);
                    }
                }
                else
                {
                    // yDim is leading and is not normalized
                    // The matrix is in correct position

                    if (axisToNormalize == AxisToNormalizeEnum.yDim)
                    {
                        NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU);
                    }
                    else
                    {
                        // TODO: SMELLY VERSION:
                        transposeKernel.Run(unmanagedVectors, unmanagedVectors, yDim, xDim);
                        NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU);
                        transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim);
                    }
                }
                break;
            }
        }
Ejemplo n.º 19
0
 public virtual void Unbind(MyMemoryBlock<float> firstInput, MyMemoryBlock<float> secondInput, MyMemoryBlock<float> output)
 {
     Unbind(firstInput.GetDevicePtr(m_owner), secondInput.GetDevicePtr(m_owner), output.GetDevicePtr(m_owner));
 }
Ejemplo n.º 20
0
 public virtual void Unbind(MyMemoryBlock <float> firstInput, MyMemoryBlock <float> secondInput, MyMemoryBlock <float> output)
 {
     Unbind(firstInput.GetDevicePtr(m_owner), secondInput.GetDevicePtr(m_owner), output.GetDevicePtr(m_owner));
 }
Ejemplo n.º 21
0
 public void RunAsync(CudaStream stream, MyMemoryBlock <T> output, MyMemoryBlock <T> input1, MyMemoryBlock <T> input2)
 {
     KernelRun(stream, output.GetDevicePtr(m_nGPU), input1.GetDevicePtr(m_nGPU), input2.GetDevicePtr(m_nGPU), input1.Count, true);
 }
Ejemplo n.º 22
0
 public void RunAsync(CudaStream stream, MyMemoryBlock <T> output, CUdeviceptr input1Ptr, MyMemoryBlock <T> input2, int size)
 {
     KernelRun(stream, output.GetDevicePtr(m_nGPU), input1Ptr, input2.GetDevicePtr(m_nGPU), size, true);
 }
Ejemplo n.º 23
0
 public void RunAsync(CudaStream stream, CUdeviceptr outputPtr, MyMemoryBlock <T> input1, CUdeviceptr input2Ptr)
 {
     KernelRun(stream, outputPtr, input1.GetDevicePtr(m_nGPU), input2Ptr, input1.Count, true);
 }
Ejemplo n.º 24
0
 public void Run(MyMemoryBlock <T> output, MyMemoryBlock <T> input1, MyMemoryBlock <T> input2)
 {
     KernelRun(null, output.GetDevicePtr(m_nGPU), input1.GetDevicePtr(m_nGPU), input2.GetDevicePtr(m_nGPU), input1.Count);
 }
Ejemplo n.º 25
0
 public void Run(MyMemoryBlock <T> output, CUdeviceptr input1Ptr, MyMemoryBlock <T> input2, int size)
 {
     KernelRun(null, output.GetDevicePtr(m_nGPU), input1Ptr, input2.GetDevicePtr(m_nGPU), size);
 }
Ejemplo n.º 26
0
 public void Run(CUdeviceptr outputPtr, MyMemoryBlock <T> input1, MyMemoryBlock <T> input2)
 {
     KernelRun(null, outputPtr, input1.GetDevicePtr(m_nGPU), input2.GetDevicePtr(m_nGPU), input1.Count);
 }