public MyFourierBinder(MyWorkingNode owner, int inputSize, MyMemoryBlock<float> tempBlock) : base(owner, inputSize, tempBlock) { m_stream = new CudaStream(); m_fft = new CudaFFTPlan1D(inputSize, cufftType.R2C, 1); m_fft.SetStream(m_stream.Stream); m_ifft = new CudaFFTPlan1D(inputSize, cufftType.C2R, 1); m_ifft.SetStream(m_stream.Stream); m_mulkernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Common\CombineVectorsKernel", "MulComplexElementWise"); m_mulkernel.SetupExecution(inputSize + 1); m_involutionKernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Common\CombineVectorsKernel", "InvolveVector"); m_involutionKernel.SetupExecution(inputSize - 1); m_inversionKernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Transforms\InvertValuesKernel", "InvertLengthComplexKernel"); m_inversionKernel.SetupExecution(inputSize); m_dotKernel = MyKernelFactory.Instance.KernelProduct<float>(owner, owner.GPU, ProductMode.f_DotProduct_f); m_normalKernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Transforms\TransformKernels", "PolynomialFunctionKernel"); m_normalKernel.SetupExecution(inputSize); m_firstFFTOffset = 0; m_secondFFTOffset = (inputSize + 1) * 2; m_tempOffset = (inputSize + 1) * 4; Denominator = inputSize; }
float NearestCC_dist; //. similarity to closest public override void Init(int nGPU) { m_kernel_AddNewCCenter = MyKernelFactory.Instance.Kernel(nGPU, @"Vision\KMeansWM", "AddDataAsCC"); m_kernel_AddNewCCenter.SetupExecution(Owner.DescCount); m_kernel_UpadteCC_desc = MyKernelFactory.Instance.Kernel(nGPU, @"Vision\KMeansWM", "UpadateCC_Desc"); m_kernel_UpadteCC_desc.SetupExecution(Owner.DescCount); m_kernel_UpdateCC_XY = MyKernelFactory.Instance.Kernel(nGPU, @"Vision\KMeansWM", "UpdateCC_XY"); m_kernel_UpdateCC_XY.SetupExecution(Owner.ObjectXY.Count); m_dotKernel = MyKernelFactory.Instance.KernelProduct <float>(Owner, nGPU, ProductMode.f_DotProduct_f); m_mulKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Transforms\TransformKernels", "PolynomialFunctionKernel"); m_mulKernel.SetupExecution(Owner.DescCount); m_matMultpl = MyKernelFactory.Instance.Kernel(Owner.GPU, @"Common\CombineVectorsKernel", "MatMultipl_naive"); m_matMultpl.GridDimensions = new ManagedCuda.VectorTypes.dim3(1, Owner.DescCount); m_matMultpl.BlockDimensions = new ManagedCuda.VectorTypes.dim3(1, 1); m_minIdxKernel = MyKernelFactory.Instance.KernelReduction <float>(Owner, nGPU, ReductionMode.f_MinIdx_ff); m_kernel_UpdateXY_basedOnTheBrainsMovement = MyKernelFactory.Instance.Kernel(nGPU, @"Vision\KMeansWM", "ApplyBrainsMovement"); m_kernel_UpdateCC_XY.SetupExecution(Owner.MaxClusters); }
public override void Init(int nGPU) { if (DecayFactor != 1f) { if (DecayFactor > 1f) { MyLog.WARNING.WriteLine("Decay factor on a HashingMemoryNode that is greater than one is suspicious..."); } _polynomialFuncKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Transforms\TransformKernels", "PolynomialFunctionKernel"); _polynomialFuncKernel.SetupExecution(Memory.Count); } if (AddFactor != 1f) { _constMulKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Transforms\TransformKernels", "PolynomialFunctionKernel"); _constMulKernel.SetupExecution(Owner.SymbolSize); } if (NormalizeTarget) { _combineVectorsKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Common\CombineVectorsKernel", "CombineTwoVectorsKernel"); _combineVectorsKernel.SetupExecution(Owner.SymbolSize); _mapToIdcsKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Common\CombineVectorsKernel", "MapToIdcs"); _mapToIdcsKernel.SetupExecution(Owner.SymbolSize); _dotKernel = MyKernelFactory.Instance.KernelProduct <float>(Owner, nGPU, ProductMode.f_DotProduct_f); } else { _mapToIdcsKernel = MyKernelFactory.Instance.Kernel(nGPU, @"common\CombineVectorsKernel", "AddToIdcs"); _mapToIdcsKernel.SetupExecution(Owner.SymbolSize); } Temp.SafeCopyToHost(); }
public MyFourierBinder(MyWorkingNode owner, int inputSize, MyMemoryBlock <float> tempBlock) : base(owner, inputSize, tempBlock) { m_stream = new CudaStream(); m_fft = new CudaFFTPlan1D(inputSize, cufftType.R2C, 1); m_fft.SetStream(m_stream.Stream); m_ifft = new CudaFFTPlan1D(inputSize, cufftType.C2R, 1); m_ifft.SetStream(m_stream.Stream); m_mulkernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Common\CombineVectorsKernel", "MulComplexElementWise"); m_mulkernel.SetupExecution(inputSize + 1); m_involutionKernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Common\CombineVectorsKernel", "InvolveVector"); m_involutionKernel.SetupExecution(inputSize - 1); m_inversionKernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Transforms\InvertValuesKernel", "InvertLengthComplexKernel"); m_inversionKernel.SetupExecution(inputSize); m_dotKernel = MyKernelFactory.Instance.KernelProduct <float>(owner, owner.GPU, ProductMode.f_DotProduct_f); m_normalKernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Transforms\TransformKernels", "PolynomialFunctionKernel"); m_normalKernel.SetupExecution(inputSize); m_firstFFTOffset = 0; m_secondFFTOffset = (inputSize + 1) * 2; m_tempOffset = (inputSize + 1) * 4; Denominator = inputSize; }
public override void Init(int nGPU) { in0 = Owner.GetInput(0); in1 = Owner.GetInput(1); out0 = Owner.GetOutput(0); m_kernel = Owner.InputBranches > 2 ? MyKernelFactory.Instance.Kernel(nGPU, @"Common\CombineVectorsKernel") : MyKernelFactory.Instance.Kernel(nGPU, @"Common\CombineVectorsKernel", "CombineTwoVectorsKernelVarSize"); m_kernel.SetupExecution(out0.Count); switch (Owner.Operation) { case MyJoinOperation.AddToIdcs: m_kernel = MyKernelFactory.Instance.Kernel(nGPU, @"Common\CombineVectorsKernel", "AddToIdcs"); m_kernel.SetupExecution(in1.Count); break; case MyJoinOperation.AddToIdcs_Normalize: m_kernel = MyKernelFactory.Instance.Kernel(nGPU, @"Common\CombineVectorsKernel", "CombineTwoVectorsKernel"); m_kernel.SetupExecution(in1.Count); m_mapToIdcsKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Common\CombineVectorsKernel", "MapToIdcs"); m_mapToIdcsKernel.SetupExecution(in1.Count); m_dotKernel = MyKernelFactory.Instance.KernelProduct <float>(Owner, nGPU, ProductMode.f_DotProduct_f); break; case MyJoinOperation.GatherFromIdcs: m_kernel = MyKernelFactory.Instance.Kernel(nGPU, @"Common\CombineVectorsKernel", "CombineTwoVectorsKernel"); m_kernel.SetupExecution(in1.Count); break; case MyJoinOperation.DotProduct: case MyJoinOperation.DistanceSquared: m_kernel.SetupExecution(in0.Count); m_dotKernel = MyKernelFactory.Instance.KernelProduct <float>(Owner, nGPU, ProductMode.f_DotProduct_f); break; case MyJoinOperation.CosineDistance: m_dotKernel = MyKernelFactory.Instance.KernelProduct <float>(Owner, nGPU, ProductMode.f_Cosine_f); break; case MyJoinOperation.MatMultiplication: { // out0.Count / out0.ColumnHint m_kernel = MyKernelFactory.Instance.Kernel(nGPU, @"Common\CombineVectorsKernel", "MatMultipl_naive"); int MAX_BLOCK_SIZE = 1; m_kernel.GridDimensions = new ManagedCuda.VectorTypes.dim3(out0.ColumnHint / MAX_BLOCK_SIZE, out0.Count / out0.ColumnHint / MAX_BLOCK_SIZE); m_kernel.BlockDimensions = new ManagedCuda.VectorTypes.dim3(MAX_BLOCK_SIZE, MAX_BLOCK_SIZE); } break; } }
/// <summary> /// Normalizes vectors along the leading dimension. /// </summary> public static void NormalizeLeadingDim( MyMemoryBlock <float> vectors, MyMemoryBlock <float> temp, int leadingDim, int otherDim, MyProductKernel <float> dotKernel, MyCudaKernel multKernel, int GPU) { var count = leadingDim * otherDim; Debug.Assert(vectors != null && temp != null, "Missing data!"); Debug.Assert(dotKernel != null && multKernel != null, "Missing kernels."); Debug.Assert(leadingDim > 0 && otherDim > 0, "Negative matrix dimensions!"); Debug.Assert(vectors.Count >= count, "Too little vectors to orthonormalize!"); Debug.Assert(temp.Count >= Math.Max(leadingDim, otherDim), "Too little temp space!"); multKernel.SetupExecution(leadingDim); for (int i = 0; i < otherDim; i++) { var seg = vectors.GetDevicePtr(GPU, i * leadingDim); //dotKernel.Run(temp, i, seg, seg, leadingDim, /* distributed: */ 0); dotKernel.outOffset = i; dotKernel.Run(temp, seg, seg, leadingDim); } temp.SafeCopyToHost(0, otherDim); for (int i = 0; i < otherDim; i++) { if (temp.Host[i] < 0.0000001f) { temp.Host[i] = 0; } else { temp.Host[i] = (float)(1 / Math.Sqrt(temp.Host[i])); } } temp.SafeCopyToDevice(0, otherDim); for (int i = 0; i < otherDim; i++) { var seg = vectors.GetDevicePtr(GPU, i * leadingDim); var len = temp.GetDevicePtr(GPU, i); multKernel.Run(seg, len, seg, (int)MyJoin.MyJoinOperation.Multiplication, leadingDim, 1); } }
public override void Init(int nGPU) { lastIdx = -1; if (Owner.UseBSCVariety) { m_sum = MyKernelFactory.Instance.KernelReduction <float>(Owner, nGPU, ReductionMode.f_Sum_f); } else { m_dot = MyKernelFactory.Instance.KernelProduct <float>(Owner, nGPU, ProductMode.f_DotProduct_f); } MyMemoryManager.Instance.ClearGlobalVariable(Owner.GlobalVariableName, nGPU); if (Owner.UseBSCVariety) { m_similarityKernel = MyKernelFactory.Instance.Kernel(Owner.GPU, @"Common\CombineVectorsKernel", "CombineTwoVectorsKernel"); m_similarityKernel.SetupExecution(Owner.SymbolSize); } }
public MyDistanceOps(MyWorkingNode caller, DistanceOperation operations, MyMemoryBlock <float> tempBlock = null) { m_caller = caller; m_operations = operations; m_temp = tempBlock; if (operations.HasFlag(DistanceOperation.DotProd)) { m_dotKernel = MyKernelFactory.Instance.KernelProduct <float>(caller, caller.GPU, ProductMode.f_DotProduct_f); } if (operations.HasFlag(DistanceOperation.CosDist)) { m_cosKernel = MyKernelFactory.Instance.KernelProduct <float>(caller, caller.GPU, ProductMode.f_Cosine_f); } if (operations.HasFlag(DistanceOperation.EuclidDist) || operations.HasFlag(DistanceOperation.EuclidDistSquared)) { // EuclidDist computes EuclidDistSquared first, so keep them together: m_operations |= DistanceOperation.EuclidDist | DistanceOperation.EuclidDistSquared; m_dotKernel = MyKernelFactory.Instance.KernelProduct <float>(caller, caller.GPU, ProductMode.f_DotProduct_f); } if (operations.HasFlag(DistanceOperation.HammingDist)) { m_reduceSumKernel = MyKernelFactory.Instance.KernelReduction <float>(caller, caller.GPU, ReductionMode.f_Sum_f); } if (operations.HasFlag(DistanceOperation.HammingSim)) { m_reduceSumKernel = MyKernelFactory.Instance.KernelReduction <float>(caller, caller.GPU, ReductionMode.f_Sum_f); } if (operations.HasFlag(DistanceOperation.EuclidDist) || operations.HasFlag(DistanceOperation.EuclidDistSquared) || operations.HasFlag(DistanceOperation.HammingDist) || operations.HasFlag(DistanceOperation.HammingSim)) { m_combineVecsKernel = MyKernelFactory.Instance.Kernel(m_caller.GPU, @"Common\CombineVectorsKernel", "CombineTwoVectorsKernel"); } }
public MyDistanceOps(MyWorkingNode caller, DistanceOperation operations, MyMemoryBlock<float> tempBlock = null) { m_caller = caller; m_operations = operations; m_temp = tempBlock; if (operations.HasFlag(DistanceOperation.DotProd)) { m_dotKernel = MyKernelFactory.Instance.KernelProduct<float>(caller, caller.GPU, ProductMode.f_DotProduct_f); } if (operations.HasFlag(DistanceOperation.CosDist)) { m_cosKernel = MyKernelFactory.Instance.KernelProduct<float>(caller, caller.GPU, ProductMode.f_Cosine_f); } if (operations.HasFlag(DistanceOperation.EuclidDist) || operations.HasFlag(DistanceOperation.EuclidDistSquared)) { // EuclidDist computes EuclidDistSquared first, so keep them together: m_operations |= DistanceOperation.EuclidDist | DistanceOperation.EuclidDistSquared; m_dotKernel = MyKernelFactory.Instance.KernelProduct<float>(caller, caller.GPU, ProductMode.f_DotProduct_f); } if (operations.HasFlag(DistanceOperation.HammingDist)) { m_reduceSumKernel = MyKernelFactory.Instance.KernelReduction<float>(caller, caller.GPU, ReductionMode.f_Sum_f); } if (operations.HasFlag(DistanceOperation.HammingSim)) { m_reduceSumKernel = MyKernelFactory.Instance.KernelReduction<float>(caller, caller.GPU, ReductionMode.f_Sum_f); } if (operations.HasFlag(DistanceOperation.EuclidDist) || operations.HasFlag(DistanceOperation.EuclidDistSquared) || operations.HasFlag(DistanceOperation.HammingDist) || operations.HasFlag(DistanceOperation.HammingSim)) { m_combineVecsKernel = MyKernelFactory.Instance.Kernel(m_caller.GPU, @"Common\CombineVectorsKernel", "CombineTwoVectorsKernel"); } }
/// <summary> /// Transforms all the vectors stored in <paramref name="vectors"/> to be pair-wise orthonormal using a modified version of the Gram-Schmidt algorithm. /// </summary> /// <param name="vectors">The vectors to orthonormalize.</param> /// <param name="temp">A vector of temporal space.</param> /// <param name="xDim">The length of each vector.</param> /// <param name="yDim">The number of vectors.</param> /// <param name="dotKernel">The kernel to compute a dot product.</param> /// <param name="multKernel">The kernel to compute vector combinations.</param> public static void OrthonormalizeVectors(MyMemoryBlock<float> vectors, MyMemoryBlock<float> temp, int xDim, int yDim, MyProductKernel<float> dotKernel, MyCudaKernel multKernel, int GPU) { int count = xDim * yDim; Debug.Assert(vectors != null && temp != null, "Missing data!"); Debug.Assert(dotKernel != null && multKernel != null, "Missing a kernel!"); Debug.Assert(xDim > 0 && yDim > 0, "Negative matrix dimensions!"); Debug.Assert(vectors.Count >= count, "Too little vectors to orthonormalize!"); Debug.Assert(temp.Count >= xDim, "Too little temp space!"); multKernel.SetupExecution(xDim); for (int i = 0; i < count; i += xDim) { var curr = vectors.GetDevicePtr(GPU, i); // Normalize the current vector { //ZXC dotKernel.Run(temp, 0, curr, curr, xDim, /* distributed: */ 0); dotKernel.Run(temp, curr, curr, xDim); temp.SafeCopyToDevice(0, 1); if (temp.Host[0] < 0.0000001f) continue; temp.Host[0] = (float)(1 / Math.Sqrt(temp.Host[0])); temp.SafeCopyToDevice(0, 1); multKernel.Run(curr, temp, curr, (int)MyJoin.MyJoinOperation.Multiplication, xDim, 1); } // Make all the remaining vectors orthogonal to the current one for (int j = i + xDim; j < count; j += xDim) { var next = vectors.GetDevicePtr(GPU, j); // Compute and subtract the projection onto the current vector //ZXC dotKernel.Run(temp, xDim, curr, next, xDim, /* distributed: */ 0); dotKernel.outOffset = xDim; dotKernel.Run(temp, curr, next, xDim); multKernel.Run(curr, temp, temp, (int)MyJoin.MyJoinOperation.Multiplication, xDim, 1); multKernel.Run(next, temp, next, (int)MyJoin.MyJoinOperation.Subtraction, xDim, xDim); } } }
/// <summary> /// Normalizes vectors along the leading dimension. /// </summary> public static void NormalizeLeadingDim( MyMemoryBlock<float> vectors, MyMemoryBlock<float> temp, int leadingDim, int otherDim, MyProductKernel<float> dotKernel, MyCudaKernel multKernel, int GPU) { var count = leadingDim * otherDim; Debug.Assert(vectors != null && temp != null, "Missing data!"); Debug.Assert(dotKernel != null && multKernel != null, "Missing kernels."); Debug.Assert(leadingDim > 0 && otherDim > 0, "Negative matrix dimensions!"); Debug.Assert(vectors.Count >= count, "Too little vectors to orthonormalize!"); Debug.Assert(temp.Count >= Math.Max(leadingDim, otherDim), "Too little temp space!"); multKernel.SetupExecution(leadingDim); for (int i = 0; i < otherDim; i++) { var seg = vectors.GetDevicePtr(GPU, i * leadingDim); //dotKernel.Run(temp, i, seg, seg, leadingDim, /* distributed: */ 0); dotKernel.outOffset = i; dotKernel.Run(temp, seg, seg, leadingDim); } temp.SafeCopyToHost(0, otherDim); for (int i = 0; i < otherDim; i++) { if (temp.Host[i] < 0.0000001f) temp.Host[i] = 0; else temp.Host[i] = (float)(1 / Math.Sqrt(temp.Host[i])); } temp.SafeCopyToDevice(0, otherDim); for (int i = 0; i < otherDim; i++) { var seg = vectors.GetDevicePtr(GPU, i * leadingDim); var len = temp.GetDevicePtr(GPU, i); multKernel.Run(seg, len, seg, (int)MyJoin.MyJoinOperation.Multiplication, leadingDim, 1); } }
/// <summary> /// Generates a matrix with <paramref name="xDim"/> being the leading dimension in column-major storage. /// </summary> /// <param name="unmanagedVectors">A memory block to store the generated matrix. /// Must be as large as <paramref name="xDim"/> x <paramref name="yDim"/>.</param> /// <param name="unmanagedBaseVectors">A temporary block to store all the base vectors. /// Must be as large as Max(<paramref name="xDim"/>, <paramref name="yDim"/>)^2. /// Only neccessary when <paramref name="mode"/> is set to <see cref="VectorGenerationMode.AverageBaseVectors"/>.</param> /// <param name="temp">The temporary storage. It should be as long as the longer of the dimensions.</param> /// <param name="random">The random object for number generation.</param> /// <param name="xDim">The size of the other dimension.</param> /// <param name="yDim">The size of the leading dimension.</param> /// <param name="mode">If true, the vectors along the longer dimension will be orthonormalized.</param> /// <param name="axisToNormalize">The axis along which to normalize vectors after orthonormalization.</param> public static void GenerateTransformMatrix( MyMemoryBlock<float> unmanagedVectors, MyMemoryBlock<float> unmanagedBaseVectors, MyMemoryBlock<float> temp, Random random, int xDim, int yDim, MyProductKernel<float> dotKernel, MyCudaKernel multKernel, MyCudaKernel transposeKernel, int GPU, VectorGenerationMode mode = VectorGenerationMode.Normal, AxisToNormalizeEnum axisToNormalize = AxisToNormalizeEnum.yDim) { Debug.Assert(random != null, "Missing random object"); Debug.Assert(unmanagedVectors != null && (mode != VectorGenerationMode.AverageBaseVectors || unmanagedBaseVectors != null) && temp != null, "Missing data!"); Debug.Assert(dotKernel != null && multKernel != null && transposeKernel != null, "Missing a kernel!"); // Mapping to rows --- Column-major storage --- rows will the leading dimension // The larger dimension vectors will be orthogonal; the cols dimension vectors will be normalized switch (mode) { case VectorGenerationMode.Normal: if (axisToNormalize == AxisToNormalizeEnum.xDim) { // Generate normalized vectors with xDim as the leading dim GenerateRandomNormalVectors(unmanagedVectors.Host, random, xDim, yDim); unmanagedVectors.SafeCopyToDevice(); // Transpose to the correct position transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); } else { GenerateRandomNormalVectors(unmanagedVectors.Host, random, yDim, xDim); unmanagedVectors.SafeCopyToDevice(); } break; case VectorGenerationMode.Orthonormalize: int largerDim = Math.Max(xDim, yDim); int smallerDim = Math.Min(xDim, yDim); // Generate vectors with larger leading dimension GenerateRandomNormalVectors(unmanagedVectors.Host, random, largerDim, smallerDim, normalize: false); unmanagedVectors.SafeCopyToDevice(); // Orthonormalize along the larger dimension OrthonormalizeVectors(unmanagedVectors, temp, largerDim, smallerDim, dotKernel, multKernel, GPU); if (xDim > yDim) { // xDim is leading and is normalized // We need to transpose to get the correct dims transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); if (axisToNormalize == AxisToNormalizeEnum.yDim) NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU); } else { // yDim is leading and is normalized // The matrix is in correct position if (axisToNormalize == AxisToNormalizeEnum.xDim) { // TODO: generate the matrix with transposed dims? // TODO: SMELLY VERSION: transposeKernel.Run(unmanagedVectors, unmanagedVectors, yDim, xDim); NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU); transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); } } break; case VectorGenerationMode.AverageBaseVectors: int longerDim = Math.Max(xDim, yDim); int shorterDim = Math.Min(xDim, yDim); GenerateTransformMatrix( unmanagedBaseVectors, null, temp, random, longerDim, longerDim, dotKernel, multKernel, transposeKernel, GPU, VectorGenerationMode.Orthonormalize); if (shorterDim == longerDim) break; float it = 0f; float step = longerDim / (float)shorterDim; int beg, end = 0; for (int i = 0; i < shorterDim; i++) { beg = end; it += step; end = (int)it; var vect = unmanagedVectors.GetDevicePtr(GPU, i * longerDim); for (int j = beg; j < end; j++) { var baseVect = unmanagedBaseVectors.GetDevicePtr(GPU, j * longerDim); multKernel.Run(baseVect, vect, vect, (int)MyJoin.MyJoinOperation.Addition, longerDim, longerDim); } } if (xDim > yDim) { // xDim is leading and is not normalized // We need to transpose to get the correct dims if (axisToNormalize == AxisToNormalizeEnum.xDim) { NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU); transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); } else { transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU); } } else { // yDim is leading and is not normalized // The matrix is in correct position if (axisToNormalize == AxisToNormalizeEnum.yDim) NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU); else { // TODO: SMELLY VERSION: transposeKernel.Run(unmanagedVectors, unmanagedVectors, yDim, xDim); NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU); transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); } } break; } }
/// <summary> /// Transforms all the vectors stored in <paramref name="vectors"/> to be pair-wise orthonormal using a modified version of the Gram-Schmidt algorithm. /// </summary> /// <param name="vectors">The vectors to orthonormalize.</param> /// <param name="temp">A vector of temporal space.</param> /// <param name="xDim">The length of each vector.</param> /// <param name="yDim">The number of vectors.</param> /// <param name="dotKernel">The kernel to compute a dot product.</param> /// <param name="multKernel">The kernel to compute vector combinations.</param> public static void OrthonormalizeVectors(MyMemoryBlock <float> vectors, MyMemoryBlock <float> temp, int xDim, int yDim, MyProductKernel <float> dotKernel, MyCudaKernel multKernel, int GPU) { int count = xDim * yDim; Debug.Assert(vectors != null && temp != null, "Missing data!"); Debug.Assert(dotKernel != null && multKernel != null, "Missing a kernel!"); Debug.Assert(xDim > 0 && yDim > 0, "Negative matrix dimensions!"); Debug.Assert(vectors.Count >= count, "Too little vectors to orthonormalize!"); Debug.Assert(temp.Count >= xDim, "Too little temp space!"); multKernel.SetupExecution(xDim); for (int i = 0; i < count; i += xDim) { var curr = vectors.GetDevicePtr(GPU, i); // Normalize the current vector { //ZXC dotKernel.Run(temp, 0, curr, curr, xDim, /* distributed: */ 0); dotKernel.Run(temp, curr, curr, xDim); temp.SafeCopyToDevice(0, 1); if (temp.Host[0] < 0.0000001f) { continue; } temp.Host[0] = (float)(1 / Math.Sqrt(temp.Host[0])); temp.SafeCopyToDevice(0, 1); multKernel.Run(curr, temp, curr, (int)MyJoin.MyJoinOperation.Multiplication, xDim, 1); } // Make all the remaining vectors orthogonal to the current one for (int j = i + xDim; j < count; j += xDim) { var next = vectors.GetDevicePtr(GPU, j); // Compute and subtract the projection onto the current vector //ZXC dotKernel.Run(temp, xDim, curr, next, xDim, /* distributed: */ 0); dotKernel.outOffset = xDim; dotKernel.Run(temp, curr, next, xDim); multKernel.Run(curr, temp, temp, (int)MyJoin.MyJoinOperation.Multiplication, xDim, 1); multKernel.Run(next, temp, next, (int)MyJoin.MyJoinOperation.Subtraction, xDim, xDim); } } }
/// <summary> /// Generates a matrix with <paramref name="xDim"/> being the leading dimension in column-major storage. /// </summary> /// <param name="unmanagedVectors">A memory block to store the generated matrix. /// Must be as large as <paramref name="xDim"/> x <paramref name="yDim"/>.</param> /// <param name="unmanagedBaseVectors">A temporary block to store all the base vectors. /// Must be as large as Max(<paramref name="xDim"/>, <paramref name="yDim"/>)^2. /// Only neccessary when <paramref name="mode"/> is set to <see cref="VectorGenerationMode.AverageBaseVectors"/>.</param> /// <param name="temp">The temporary storage. It should be as long as the longer of the dimensions.</param> /// <param name="random">The random object for number generation.</param> /// <param name="xDim">The size of the other dimension.</param> /// <param name="yDim">The size of the leading dimension.</param> /// <param name="mode">If true, the vectors along the longer dimension will be orthonormalized.</param> /// <param name="axisToNormalize">The axis along which to normalize vectors after orthonormalization.</param> public static void GenerateTransformMatrix( MyMemoryBlock <float> unmanagedVectors, MyMemoryBlock <float> unmanagedBaseVectors, MyMemoryBlock <float> temp, Random random, int xDim, int yDim, MyProductKernel <float> dotKernel, MyCudaKernel multKernel, MyCudaKernel transposeKernel, int GPU, VectorGenerationMode mode = VectorGenerationMode.Normal, AxisToNormalizeEnum axisToNormalize = AxisToNormalizeEnum.yDim) { Debug.Assert(random != null, "Missing random object"); Debug.Assert(unmanagedVectors != null && (mode != VectorGenerationMode.AverageBaseVectors || unmanagedBaseVectors != null) && temp != null, "Missing data!"); Debug.Assert(dotKernel != null && multKernel != null && transposeKernel != null, "Missing a kernel!"); // Mapping to rows --- Column-major storage --- rows will the leading dimension // The larger dimension vectors will be orthogonal; the cols dimension vectors will be normalized switch (mode) { case VectorGenerationMode.Normal: if (axisToNormalize == AxisToNormalizeEnum.xDim) { // Generate normalized vectors with xDim as the leading dim GenerateRandomNormalVectors(unmanagedVectors.Host, random, xDim, yDim); unmanagedVectors.SafeCopyToDevice(); // Transpose to the correct position transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); } else { GenerateRandomNormalVectors(unmanagedVectors.Host, random, yDim, xDim); unmanagedVectors.SafeCopyToDevice(); } break; case VectorGenerationMode.Orthonormalize: int largerDim = Math.Max(xDim, yDim); int smallerDim = Math.Min(xDim, yDim); // Generate vectors with larger leading dimension GenerateRandomNormalVectors(unmanagedVectors.Host, random, largerDim, smallerDim, normalize: false); unmanagedVectors.SafeCopyToDevice(); // Orthonormalize along the larger dimension OrthonormalizeVectors(unmanagedVectors, temp, largerDim, smallerDim, dotKernel, multKernel, GPU); if (xDim > yDim) { // xDim is leading and is normalized // We need to transpose to get the correct dims transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); if (axisToNormalize == AxisToNormalizeEnum.yDim) { NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU); } } else { // yDim is leading and is normalized // The matrix is in correct position if (axisToNormalize == AxisToNormalizeEnum.xDim) { // TODO: generate the matrix with transposed dims? // TODO: SMELLY VERSION: transposeKernel.Run(unmanagedVectors, unmanagedVectors, yDim, xDim); NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU); transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); } } break; case VectorGenerationMode.AverageBaseVectors: int longerDim = Math.Max(xDim, yDim); int shorterDim = Math.Min(xDim, yDim); GenerateTransformMatrix( unmanagedBaseVectors, null, temp, random, longerDim, longerDim, dotKernel, multKernel, transposeKernel, GPU, VectorGenerationMode.Orthonormalize); if (shorterDim == longerDim) { break; } float it = 0f; float step = longerDim / (float)shorterDim; int beg, end = 0; for (int i = 0; i < shorterDim; i++) { beg = end; it += step; end = (int)it; var vect = unmanagedVectors.GetDevicePtr(GPU, i * longerDim); for (int j = beg; j < end; j++) { var baseVect = unmanagedBaseVectors.GetDevicePtr(GPU, j * longerDim); multKernel.Run(baseVect, vect, vect, (int)MyJoin.MyJoinOperation.Addition, longerDim, longerDim); } } if (xDim > yDim) { // xDim is leading and is not normalized // We need to transpose to get the correct dims if (axisToNormalize == AxisToNormalizeEnum.xDim) { NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU); transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); } else { transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU); } } else { // yDim is leading and is not normalized // The matrix is in correct position if (axisToNormalize == AxisToNormalizeEnum.yDim) { NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU); } else { // TODO: SMELLY VERSION: transposeKernel.Run(unmanagedVectors, unmanagedVectors, yDim, xDim); NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU); transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); } } break; } }