/// <summary> /// Transforms all the vectors stored in <paramref name="vectors"/> to be pair-wise orthonormal using a modified version of the Gram-Schmidt algorithm. /// </summary> /// <param name="vectors">The vectors to orthonormalize.</param> /// <param name="temp">A vector of temporal space.</param> /// <param name="xDim">The length of each vector.</param> /// <param name="yDim">The number of vectors.</param> /// <param name="dotKernel">The kernel to compute a dot product.</param> /// <param name="multKernel">The kernel to compute vector combinations.</param> public static void OrthonormalizeVectors(MyMemoryBlock <float> vectors, MyMemoryBlock <float> temp, int xDim, int yDim, MyProductKernel <float> dotKernel, MyCudaKernel multKernel, int GPU) { int count = xDim * yDim; Debug.Assert(vectors != null && temp != null, "Missing data!"); Debug.Assert(dotKernel != null && multKernel != null, "Missing a kernel!"); Debug.Assert(xDim > 0 && yDim > 0, "Negative matrix dimensions!"); Debug.Assert(vectors.Count >= count, "Too little vectors to orthonormalize!"); Debug.Assert(temp.Count >= xDim, "Too little temp space!"); multKernel.SetupExecution(xDim); for (int i = 0; i < count; i += xDim) { var curr = vectors.GetDevicePtr(GPU, i); // Normalize the current vector { //ZXC dotKernel.Run(temp, 0, curr, curr, xDim, /* distributed: */ 0); dotKernel.Run(temp, curr, curr, xDim); temp.SafeCopyToDevice(0, 1); if (temp.Host[0] < 0.0000001f) { continue; } temp.Host[0] = (float)(1 / Math.Sqrt(temp.Host[0])); temp.SafeCopyToDevice(0, 1); multKernel.Run(curr, temp, curr, (int)MyJoin.MyJoinOperation.Multiplication, xDim, 1); } // Make all the remaining vectors orthogonal to the current one for (int j = i + xDim; j < count; j += xDim) { var next = vectors.GetDevicePtr(GPU, j); // Compute and subtract the projection onto the current vector //ZXC dotKernel.Run(temp, xDim, curr, next, xDim, /* distributed: */ 0); dotKernel.outOffset = xDim; dotKernel.Run(temp, curr, next, xDim); multKernel.Run(curr, temp, temp, (int)MyJoin.MyJoinOperation.Multiplication, xDim, 1); multKernel.Run(next, temp, next, (int)MyJoin.MyJoinOperation.Subtraction, xDim, xDim); } } }
public override void Run(MatOperation operation, MyMemoryBlock <float> A, MyMemoryBlock <float> B, MyMemoryBlock <float> Result) { Result.Fill(.0f); switch (operation) { case MatOperation.EuclidDist: if (B.Count == A.ColumnHint) { A.SafeCopyToHost(); B.SafeCopyToHost(); for (int row = 0; row < A.Count / A.ColumnHint; row++) { Result.Host[row] = 0; for (int Bindex = 0; Bindex < B.Count; Bindex++) { Result.Host[row] += (B.Host[Bindex] - A.Host[A.ColumnHint * row + Bindex]) * (B.Host[Bindex] - A.Host[A.ColumnHint * row + Bindex]); } Result.Host[row] = (float)Math.Sqrt((double)Result.Host[row]); //System.Console.Write(" " + Result.Host[row]); } Result.SafeCopyToDevice(); } break; default: MyLog.Writer.WriteLine(MyLogLevel.ERROR, "Trying to run cpu mat ops. for undefined MatOperation"); break; } }
public override void Run(MatOperation operation, MyMemoryBlock<float> A, MyMemoryBlock<float> B, MyMemoryBlock<float> Result) { Result.Fill(.0f); switch (operation) { case MatOperation.EuclidDist: if (B.Count == A.ColumnHint) { A.SafeCopyToHost(); B.SafeCopyToHost(); for (int row = 0; row < A.Count / A.ColumnHint; row++) { Result.Host[row] = 0; for (int Bindex = 0; Bindex < B.Count; Bindex++) { Result.Host[row] += (B.Host[Bindex] - A.Host[A.ColumnHint * row + Bindex]) * (B.Host[Bindex] - A.Host[A.ColumnHint * row + Bindex]); } Result.Host[row] = (float)Math.Sqrt( (double) Result.Host[row] ); //System.Console.Write(" " + Result.Host[row]); } Result.SafeCopyToDevice(); } break; default: MyLog.Writer.WriteLine(MyLogLevel.ERROR, "Trying to run cpu mat ops. for undefined MatOperation"); break; } }
private void GenerateBiasFromInitialWeights() { m_biasBlock.SafeCopyToHost(); for (int i = 0; i < m_initialBias.Length; i++) { m_biasBlock.Host[m_biasOffset + i] = m_initialBias[i]; } m_biasBlock.SafeCopyToDevice(); }
private void GenerateWeightFromInitialWeights() { m_weightBlock.SafeCopyToHost(); for (int i = 0; i < m_initialWeight.Length; i++) { m_weightBlock.Host[m_weightOffset + i] = m_initialWeight[i]; } m_weightBlock.SafeCopyToDevice(); }
/// <summary> /// Set values of memory block /// </summary> /// <param name="nodeId">Node ID</param> /// <param name="values">Values to be set</param> /// <param name="blockName">Name of memory block</param> public void SetValues(int nodeId, float[] values, string blockName = "Input") { MyLog.INFO.WriteLine("Setting values of " + blockName + "@" + nodeId); MyMemoryBlock <float> block = GetMemBlock(nodeId, blockName); for (int i = 0; i < block.Count; ++i) { block.Host[i] = values[i]; } block.SafeCopyToDevice(); }
/// <summary> /// Normalizes vectors along the leading dimension. /// </summary> public static void NormalizeLeadingDim( MyMemoryBlock <float> vectors, MyMemoryBlock <float> temp, int leadingDim, int otherDim, MyProductKernel <float> dotKernel, MyCudaKernel multKernel, int GPU) { var count = leadingDim * otherDim; Debug.Assert(vectors != null && temp != null, "Missing data!"); Debug.Assert(dotKernel != null && multKernel != null, "Missing kernels."); Debug.Assert(leadingDim > 0 && otherDim > 0, "Negative matrix dimensions!"); Debug.Assert(vectors.Count >= count, "Too little vectors to orthonormalize!"); Debug.Assert(temp.Count >= Math.Max(leadingDim, otherDim), "Too little temp space!"); multKernel.SetupExecution(leadingDim); for (int i = 0; i < otherDim; i++) { var seg = vectors.GetDevicePtr(GPU, i * leadingDim); //dotKernel.Run(temp, i, seg, seg, leadingDim, /* distributed: */ 0); dotKernel.outOffset = i; dotKernel.Run(temp, seg, seg, leadingDim); } temp.SafeCopyToHost(0, otherDim); for (int i = 0; i < otherDim; i++) { if (temp.Host[i] < 0.0000001f) { temp.Host[i] = 0; } else { temp.Host[i] = (float)(1 / Math.Sqrt(temp.Host[i])); } } temp.SafeCopyToDevice(0, otherDim); for (int i = 0; i < otherDim; i++) { var seg = vectors.GetDevicePtr(GPU, i * leadingDim); var len = temp.GetDevicePtr(GPU, i); multKernel.Run(seg, len, seg, (int)MyJoin.MyJoinOperation.Multiplication, leadingDim, 1); } }
// Sets up the genetic task public override void Init(int nGPU) { currentGen = 0; m_weights = 0; // Load the relevant kernels m_coeffGenKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Genetic\CosyneGenetics", "generateCoefficients"); m_geneticKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Genetic\CosyneGenetics", "grow"); m_extractKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Genetic\CosyneGenetics", "extractCoeffs"); m_cosineGenKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Genetic\CosyneGenetics", "createCosineMatrix"); m_implantKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Genetic\CosyneGenetics", "implantCoeffs"); // Init the random generator m_rand = new Random(); // Set up coefficient Generation m_coeffGenKernel.SetupExecution(Owner.PopulationSize); // Set up genetic recombination m_geneticKernel.SetupExecution(Owner.PopulationSize); // This finds the first nn group in the network. Possibility of getting a list of networks and evolving them all seperately? List<MyNode> ch = Owner.Owner.Network.Children; foreach (MyNode n in ch) { if (n is MyNeuralNetworkGroup) { nn = n as MyNeuralNetworkGroup; MyLog.INFO.WriteLine("Evolving the layers of node: " + nn.Name); break; } } if (nn == null) { throw new NullReferenceException("There is no top level NeuralNetworkGroup."); } // Construct the layerlist which is to be read from and written to constructLayerList(nn); // This is how big the weight matrix will be arr_size = (int)Math.Ceiling(Math.Sqrt(m_weights)); // Get the relevant execution plan m_executionPlan = Owner.Owner.SimulationHandler.Simulation.ExecutionPlan[0]; #region MemoryBlocks // Initialise the population population = new List<MyMemoryBlock<float>>(); outputPop = new List<MyMemoryBlock<float>>(); for (int i = 0; i < Owner.PopulationSize; i++) { population.Add(new MyMemoryBlock<float>()); population[i].Owner = Owner; population[i].Count = arr_size * arr_size; population[i].AllocateMemory(); outputPop.Add(new MyMemoryBlock<float>()); outputPop[i].Owner = Owner; outputPop[i].Count = arr_size * arr_size; outputPop[i].AllocateMemory(); } // Allocate space to manipulate weight matrices on the device cudaMatrices = new MyMemoryBlock<float>(); cudaMatrices.Owner = Owner; cudaMatrices.Count = arr_size * arr_size * Owner.PopulationSize; cudaMatrices.AllocateDevice(); // Allocate a memory block for the Cosine matrix multiplier = new MyMemoryBlock<float>(); multiplier.Owner = Owner; multiplier.Count = arr_size * arr_size; multiplier.AllocateDevice(); // Fill the cosine Matrices m_cosineGenKernel.SetupExecution(arr_size); m_cosineGenKernel.Run(multiplier, arr_size); // Allocate space needed for chromosomes chromosomePop = new MyMemoryBlock<float>(); chromosomePop.Owner = Owner; if (DirectEvolution) chromosomePop.Count = m_weights * Owner.PopulationSize; else chromosomePop.Count = CoefficientsSaved * Owner.PopulationSize; chromosomePop.AllocateMemory(); // Allocate some space for noise to seed the cuda_rand generator noise = new MyMemoryBlock<float>(); noise.Owner = Owner; noise.Count = Owner.PopulationSize; noise.AllocateMemory(); // Write some noise to the initial array for (int i = 0; i < Owner.PopulationSize; i++) { noise.Host[i] = (float)m_rand.NextDouble() * 100000 + (float)m_rand.NextDouble() * 40; } noise.SafeCopyToDevice(); // Allocate space for the fitnesses fitnesses = new MyMemoryBlock<float>(); fitnesses.Owner = Owner; fitnesses.Count = Owner.PopulationSize; fitnesses.AllocateMemory(); // Allocate some temporary storage tempMB = new MyMemoryBlock<float>(); tempPop = new MyMemoryBlock<float>(); tempMB.Owner = Owner; tempMB.Count = CoefficientsSaved; tempMB.AllocateDevice(); tempPop.Owner = Owner; tempPop.Count = arr_size * arr_size; tempPop.AllocateDevice(); marking = new MyMemoryBlock<int>(); marking.Owner = Owner; marking.Count = CoefficientsSaved * Owner.PopulationSize; marking.AllocateDevice(); #endregion // Check saved Coeffs size if (CoefficientsSaved > m_weights) { MyLog.WARNING.Write("Saving more Coefficients than exist in the weight matrix. Setting to max permissable value\n"); CoefficientsSaved = m_weights; } if (CoefficientsSaved == m_weights) { MyLog.INFO.Write("Saving a coefficient for every weight. Evolving weights directly\n"); DirectEvolution = true; } if (DirectEvolution) CoefficientsSaved = m_weights; // Generate the rest of the population if (DirectEvolution) m_coeffGenKernel.Run(chromosomePop, CoefficientsSaved, noise, Owner.PopulationSize, WeightMagnitude); else m_coeffGenKernel.Run(chromosomePop, CoefficientsSaved, noise, Owner.PopulationSize, Alpha); //Disable Backprop tasks in Network if (nn.GetActiveBackpropTask() != null) { if (!nn.GetActiveBackpropTask().DisableLearning) { MyLog.WARNING.WriteLine("Disabling backprop learning for Neural Network"); nn.GetActiveBackpropTask().DisableLearning = true; } } }
/// <summary> /// Transforms all the vectors stored in <paramref name="vectors"/> to be pair-wise orthonormal using a modified version of the Gram-Schmidt algorithm. /// </summary> /// <param name="vectors">The vectors to orthonormalize.</param> /// <param name="temp">A vector of temporal space.</param> /// <param name="xDim">The length of each vector.</param> /// <param name="yDim">The number of vectors.</param> /// <param name="dotKernel">The kernel to compute a dot product.</param> /// <param name="multKernel">The kernel to compute vector combinations.</param> public static void OrthonormalizeVectors(MyMemoryBlock<float> vectors, MyMemoryBlock<float> temp, int xDim, int yDim, MyProductKernel<float> dotKernel, MyCudaKernel multKernel, int GPU) { int count = xDim * yDim; Debug.Assert(vectors != null && temp != null, "Missing data!"); Debug.Assert(dotKernel != null && multKernel != null, "Missing a kernel!"); Debug.Assert(xDim > 0 && yDim > 0, "Negative matrix dimensions!"); Debug.Assert(vectors.Count >= count, "Too little vectors to orthonormalize!"); Debug.Assert(temp.Count >= xDim, "Too little temp space!"); multKernel.SetupExecution(xDim); for (int i = 0; i < count; i += xDim) { var curr = vectors.GetDevicePtr(GPU, i); // Normalize the current vector { //ZXC dotKernel.Run(temp, 0, curr, curr, xDim, /* distributed: */ 0); dotKernel.Run(temp, curr, curr, xDim); temp.SafeCopyToDevice(0, 1); if (temp.Host[0] < 0.0000001f) continue; temp.Host[0] = (float)(1 / Math.Sqrt(temp.Host[0])); temp.SafeCopyToDevice(0, 1); multKernel.Run(curr, temp, curr, (int)MyJoin.MyJoinOperation.Multiplication, xDim, 1); } // Make all the remaining vectors orthogonal to the current one for (int j = i + xDim; j < count; j += xDim) { var next = vectors.GetDevicePtr(GPU, j); // Compute and subtract the projection onto the current vector //ZXC dotKernel.Run(temp, xDim, curr, next, xDim, /* distributed: */ 0); dotKernel.outOffset = xDim; dotKernel.Run(temp, curr, next, xDim); multKernel.Run(curr, temp, temp, (int)MyJoin.MyJoinOperation.Multiplication, xDim, 1); multKernel.Run(next, temp, next, (int)MyJoin.MyJoinOperation.Subtraction, xDim, xDim); } } }
/// <summary> /// Normalizes vectors along the leading dimension. /// </summary> public static void NormalizeLeadingDim( MyMemoryBlock<float> vectors, MyMemoryBlock<float> temp, int leadingDim, int otherDim, MyProductKernel<float> dotKernel, MyCudaKernel multKernel, int GPU) { var count = leadingDim * otherDim; Debug.Assert(vectors != null && temp != null, "Missing data!"); Debug.Assert(dotKernel != null && multKernel != null, "Missing kernels."); Debug.Assert(leadingDim > 0 && otherDim > 0, "Negative matrix dimensions!"); Debug.Assert(vectors.Count >= count, "Too little vectors to orthonormalize!"); Debug.Assert(temp.Count >= Math.Max(leadingDim, otherDim), "Too little temp space!"); multKernel.SetupExecution(leadingDim); for (int i = 0; i < otherDim; i++) { var seg = vectors.GetDevicePtr(GPU, i * leadingDim); //dotKernel.Run(temp, i, seg, seg, leadingDim, /* distributed: */ 0); dotKernel.outOffset = i; dotKernel.Run(temp, seg, seg, leadingDim); } temp.SafeCopyToHost(0, otherDim); for (int i = 0; i < otherDim; i++) { if (temp.Host[i] < 0.0000001f) temp.Host[i] = 0; else temp.Host[i] = (float)(1 / Math.Sqrt(temp.Host[i])); } temp.SafeCopyToDevice(0, otherDim); for (int i = 0; i < otherDim; i++) { var seg = vectors.GetDevicePtr(GPU, i * leadingDim); var len = temp.GetDevicePtr(GPU, i); multKernel.Run(seg, len, seg, (int)MyJoin.MyJoinOperation.Multiplication, leadingDim, 1); } }
/// <summary> /// Generates a matrix with <paramref name="xDim"/> being the leading dimension in column-major storage. /// </summary> /// <param name="unmanagedVectors">A memory block to store the generated matrix. /// Must be as large as <paramref name="xDim"/> x <paramref name="yDim"/>.</param> /// <param name="unmanagedBaseVectors">A temporary block to store all the base vectors. /// Must be as large as Max(<paramref name="xDim"/>, <paramref name="yDim"/>)^2. /// Only neccessary when <paramref name="mode"/> is set to <see cref="VectorGenerationMode.AverageBaseVectors"/>.</param> /// <param name="temp">The temporary storage. It should be as long as the longer of the dimensions.</param> /// <param name="random">The random object for number generation.</param> /// <param name="xDim">The size of the other dimension.</param> /// <param name="yDim">The size of the leading dimension.</param> /// <param name="mode">If true, the vectors along the longer dimension will be orthonormalized.</param> /// <param name="axisToNormalize">The axis along which to normalize vectors after orthonormalization.</param> public static void GenerateTransformMatrix( MyMemoryBlock<float> unmanagedVectors, MyMemoryBlock<float> unmanagedBaseVectors, MyMemoryBlock<float> temp, Random random, int xDim, int yDim, MyProductKernel<float> dotKernel, MyCudaKernel multKernel, MyCudaKernel transposeKernel, int GPU, VectorGenerationMode mode = VectorGenerationMode.Normal, AxisToNormalizeEnum axisToNormalize = AxisToNormalizeEnum.yDim) { Debug.Assert(random != null, "Missing random object"); Debug.Assert(unmanagedVectors != null && (mode != VectorGenerationMode.AverageBaseVectors || unmanagedBaseVectors != null) && temp != null, "Missing data!"); Debug.Assert(dotKernel != null && multKernel != null && transposeKernel != null, "Missing a kernel!"); // Mapping to rows --- Column-major storage --- rows will the leading dimension // The larger dimension vectors will be orthogonal; the cols dimension vectors will be normalized switch (mode) { case VectorGenerationMode.Normal: if (axisToNormalize == AxisToNormalizeEnum.xDim) { // Generate normalized vectors with xDim as the leading dim GenerateRandomNormalVectors(unmanagedVectors.Host, random, xDim, yDim); unmanagedVectors.SafeCopyToDevice(); // Transpose to the correct position transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); } else { GenerateRandomNormalVectors(unmanagedVectors.Host, random, yDim, xDim); unmanagedVectors.SafeCopyToDevice(); } break; case VectorGenerationMode.Orthonormalize: int largerDim = Math.Max(xDim, yDim); int smallerDim = Math.Min(xDim, yDim); // Generate vectors with larger leading dimension GenerateRandomNormalVectors(unmanagedVectors.Host, random, largerDim, smallerDim, normalize: false); unmanagedVectors.SafeCopyToDevice(); // Orthonormalize along the larger dimension OrthonormalizeVectors(unmanagedVectors, temp, largerDim, smallerDim, dotKernel, multKernel, GPU); if (xDim > yDim) { // xDim is leading and is normalized // We need to transpose to get the correct dims transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); if (axisToNormalize == AxisToNormalizeEnum.yDim) NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU); } else { // yDim is leading and is normalized // The matrix is in correct position if (axisToNormalize == AxisToNormalizeEnum.xDim) { // TODO: generate the matrix with transposed dims? // TODO: SMELLY VERSION: transposeKernel.Run(unmanagedVectors, unmanagedVectors, yDim, xDim); NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU); transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); } } break; case VectorGenerationMode.AverageBaseVectors: int longerDim = Math.Max(xDim, yDim); int shorterDim = Math.Min(xDim, yDim); GenerateTransformMatrix( unmanagedBaseVectors, null, temp, random, longerDim, longerDim, dotKernel, multKernel, transposeKernel, GPU, VectorGenerationMode.Orthonormalize); if (shorterDim == longerDim) break; float it = 0f; float step = longerDim / (float)shorterDim; int beg, end = 0; for (int i = 0; i < shorterDim; i++) { beg = end; it += step; end = (int)it; var vect = unmanagedVectors.GetDevicePtr(GPU, i * longerDim); for (int j = beg; j < end; j++) { var baseVect = unmanagedBaseVectors.GetDevicePtr(GPU, j * longerDim); multKernel.Run(baseVect, vect, vect, (int)MyJoin.MyJoinOperation.Addition, longerDim, longerDim); } } if (xDim > yDim) { // xDim is leading and is not normalized // We need to transpose to get the correct dims if (axisToNormalize == AxisToNormalizeEnum.xDim) { NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU); transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); } else { transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU); } } else { // yDim is leading and is not normalized // The matrix is in correct position if (axisToNormalize == AxisToNormalizeEnum.yDim) NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU); else { // TODO: SMELLY VERSION: transposeKernel.Run(unmanagedVectors, unmanagedVectors, yDim, xDim); NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU); transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); } } break; } }
public void Run(VectorOperation operation, MyMemoryBlock<float> a, MyMemoryBlock<float> b, MyMemoryBlock<float> result) { if (!Validate(operation, a.Count, b.Count)) return; switch (operation) { case VectorOperation.Rotate: { b.SafeCopyToHost(); float rads = DegreeToRadian(b.Host[0]); float[] transform = { (float)Math.Cos(rads), -(float)Math.Sin(rads), (float)Math.Sin(rads), (float)Math.Cos(rads) }; Array.Copy(transform, m_temp.Host, transform.Length); m_temp.SafeCopyToDevice(); m_matOperation.Run(MatOperation.Multiplication, m_temp, a, result); } break; case VectorOperation.Angle: { m_matOperation.Run(MatOperation.DotProd, a, b, result); result.SafeCopyToHost(); float dotProd = result.Host[0]; float angle = RadianToDegree((float)Math.Acos(dotProd)); result.Fill(0); result.Host[0] = angle; result.SafeCopyToDevice(); } break; case VectorOperation.DirectedAngle: { result.Host[0] = -90; result.SafeCopyToDevice(); Run(VectorOperation.Rotate, a, result, result); result.CopyToMemoryBlock(m_temp, 0, 0, result.Count); m_matOperation.Run(MatOperation.DotProd, a, b, result); result.SafeCopyToHost(); float dotProd = result.Host[0]; float angle; if (Math.Abs(Math.Abs(dotProd) - 1) < 1E-4) angle = 0; else angle = RadianToDegree((float)Math.Acos(dotProd)); m_matOperation.Run(MatOperation.DotProd, m_temp, b, result); result.SafeCopyToHost(); float perpDotProd = result.Host[0]; if (perpDotProd > 0) angle *= -1; result.Fill(0); result.Host[0] = angle; result.SafeCopyToDevice(); } break; } }
public void Run(VectorOperation operation, MyMemoryBlock <float> a, MyMemoryBlock <float> b, MyMemoryBlock <float> result) { if (!Validate(operation, a.Count, b.Count)) { return; } switch (operation) { case VectorOperation.Rotate: { b.SafeCopyToHost(); float rads = DegreeToRadian(b.Host[0]); float[] transform = { (float)Math.Cos(rads), -(float)Math.Sin(rads), (float)Math.Sin(rads), (float)Math.Cos(rads) }; Array.Copy(transform, m_temp.Host, transform.Length); m_temp.SafeCopyToDevice(); m_matOperation.Run(MatOperation.Multiplication, m_temp, a, result); } break; case VectorOperation.Angle: { m_matOperation.Run(MatOperation.DotProd, a, b, result); result.SafeCopyToHost(); float dotProd = result.Host[0]; float angle = RadianToDegree((float)Math.Acos(dotProd)); result.Fill(0); result.Host[0] = angle; result.SafeCopyToDevice(); } break; case VectorOperation.DirectedAngle: { result.Host[0] = -90; result.SafeCopyToDevice(); Run(VectorOperation.Rotate, a, result, result); result.CopyToMemoryBlock(m_temp, 0, 0, result.Count); m_matOperation.Run(MatOperation.DotProd, a, b, result); result.SafeCopyToHost(); float dotProd = result.Host[0]; float angle; if (Math.Abs(Math.Abs(dotProd) - 1) < 1E-4) { angle = 0; } else { angle = RadianToDegree((float)Math.Acos(dotProd)); } m_matOperation.Run(MatOperation.DotProd, m_temp, b, result); result.SafeCopyToHost(); float perpDotProd = result.Host[0]; if (perpDotProd > 0) { angle *= -1; } result.Fill(0); result.Host[0] = angle; result.SafeCopyToDevice(); } break; } }
/// <summary> /// Generates a matrix with <paramref name="xDim"/> being the leading dimension in column-major storage. /// </summary> /// <param name="unmanagedVectors">A memory block to store the generated matrix. /// Must be as large as <paramref name="xDim"/> x <paramref name="yDim"/>.</param> /// <param name="unmanagedBaseVectors">A temporary block to store all the base vectors. /// Must be as large as Max(<paramref name="xDim"/>, <paramref name="yDim"/>)^2. /// Only neccessary when <paramref name="mode"/> is set to <see cref="VectorGenerationMode.AverageBaseVectors"/>.</param> /// <param name="temp">The temporary storage. It should be as long as the longer of the dimensions.</param> /// <param name="random">The random object for number generation.</param> /// <param name="xDim">The size of the other dimension.</param> /// <param name="yDim">The size of the leading dimension.</param> /// <param name="mode">If true, the vectors along the longer dimension will be orthonormalized.</param> /// <param name="axisToNormalize">The axis along which to normalize vectors after orthonormalization.</param> public static void GenerateTransformMatrix( MyMemoryBlock <float> unmanagedVectors, MyMemoryBlock <float> unmanagedBaseVectors, MyMemoryBlock <float> temp, Random random, int xDim, int yDim, MyCudaKernel dotKernel, MyCudaKernel multKernel, MyCudaKernel transposeKernel, int GPU, VectorGenerationMode mode = VectorGenerationMode.Normal, AxisToNormalizeEnum axisToNormalize = AxisToNormalizeEnum.yDim) { Debug.Assert(random != null, "Missing random object"); Debug.Assert(unmanagedVectors != null && (mode != VectorGenerationMode.AverageBaseVectors || unmanagedBaseVectors != null) && temp != null, "Missing data!"); Debug.Assert(dotKernel != null && multKernel != null && transposeKernel != null, "Missing a kernel!"); // Mapping to rows --- Column-major storage --- rows will the leading dimension // The larger dimension vectors will be orthogonal; the cols dimension vectors will be normalized switch (mode) { case VectorGenerationMode.Normal: if (axisToNormalize == AxisToNormalizeEnum.xDim) { // Generate normalized vectors with xDim as the leading dim GenerateRandomNormalVectors(unmanagedVectors.Host, random, xDim, yDim); unmanagedVectors.SafeCopyToDevice(); // Transpose to the correct position transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); } else { GenerateRandomNormalVectors(unmanagedVectors.Host, random, yDim, xDim); unmanagedVectors.SafeCopyToDevice(); } break; case VectorGenerationMode.Orthonormalize: int largerDim = Math.Max(xDim, yDim); int smallerDim = Math.Min(xDim, yDim); // Generate vectors with larger leading dimension GenerateRandomNormalVectors(unmanagedVectors.Host, random, largerDim, smallerDim, normalize: false); unmanagedVectors.SafeCopyToDevice(); // Orthonormalize along the larger dimension OrthonormalizeVectors(unmanagedVectors, temp, largerDim, smallerDim, dotKernel, multKernel, GPU); if (xDim > yDim) { // xDim is leading and is normalized // We need to transpose to get the correct dims transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); if (axisToNormalize == AxisToNormalizeEnum.yDim) { NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU); } } else { // yDim is leading and is normalized // The matrix is in correct position if (axisToNormalize == AxisToNormalizeEnum.xDim) { // TODO: generate the matrix with transposed dims? // TODO: SMELLY VERSION: transposeKernel.Run(unmanagedVectors, unmanagedVectors, yDim, xDim); NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU); transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); } } break; case VectorGenerationMode.AverageBaseVectors: int longerDim = Math.Max(xDim, yDim); int shorterDim = Math.Min(xDim, yDim); GenerateTransformMatrix( unmanagedBaseVectors, null, temp, random, longerDim, longerDim, dotKernel, multKernel, transposeKernel, GPU, VectorGenerationMode.Orthonormalize); if (shorterDim == longerDim) { break; } float it = 0f; float step = longerDim / (float)shorterDim; int beg, end = 0; for (int i = 0; i < shorterDim; i++) { beg = end; it += step; end = (int)it; var vect = unmanagedVectors.GetDevicePtr(GPU, i * longerDim); for (int j = beg; j < end; j++) { var baseVect = unmanagedBaseVectors.GetDevicePtr(GPU, j * longerDim); multKernel.Run(baseVect, vect, vect, (int)MyJoin.MyJoinOperation.Addition, longerDim, longerDim); } } if (xDim > yDim) { // xDim is leading and is not normalized // We need to transpose to get the correct dims if (axisToNormalize == AxisToNormalizeEnum.xDim) { NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU); transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); } else { transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU); } } else { // yDim is leading and is not normalized // The matrix is in correct position if (axisToNormalize == AxisToNormalizeEnum.yDim) { NormalizeLeadingDim(unmanagedVectors, temp, yDim, xDim, dotKernel, multKernel, GPU); } else { // TODO: SMELLY VERSION: transposeKernel.Run(unmanagedVectors, unmanagedVectors, yDim, xDim); NormalizeLeadingDim(unmanagedVectors, temp, xDim, yDim, dotKernel, multKernel, GPU); transposeKernel.Run(unmanagedVectors, unmanagedVectors, xDim, yDim); } } break; } }
public override void Execute() { currentGen++; // If not genetically training. Return //Get first population member from the network getFFWeights(population[0]); population[0].SafeCopyToDevice(); if (!DirectEvolution) { MyCublasFactory.Instance.Gemm(Operation.NonTranspose, Operation.NonTranspose, arr_size, arr_size, arr_size, 1.0f, multiplier.GetDevice(Owner), arr_size, population[0].GetDevice(Owner), arr_size, 0.0f, outputPop[0].GetDevice(Owner), arr_size ); MyCublasFactory.Instance.Gemm(Operation.NonTranspose, Operation.Transpose, arr_size, arr_size, arr_size, 1.0f, outputPop[0].GetDevice(Owner), arr_size, multiplier.GetDevice(Owner), arr_size, 0.0f, population[0].GetDevice(Owner), arr_size ); } //Read the saved coeffs from the initial weight matrix into the first chromosome population[0].CopyToMemoryBlock(cudaMatrices, 0, 0, arr_size * arr_size); m_extractKernel.SetupExecution(1); m_extractKernel.Run(cudaMatrices, chromosomePop, CoefficientsSaved, arr_size); // Recombine and grow the population if (DirectEvolution) { m_geneticKernel.Run(cudaMatrices, arr_size, m_weights, Owner.PopulationSize, chromosomePop, noise, Owner.MutationRate, Owner.Survivors, fitnesses, marking, WeightMagnitude); } else { m_geneticKernel.Run(cudaMatrices, arr_size, CoefficientsSaved, Owner.PopulationSize, chromosomePop, noise, Owner.MutationRate, Owner.Survivors, fitnesses, marking, Alpha); } chromosomePop.SafeCopyToHost(); cudaMatrices.Fill(0.0f); m_implantKernel.SetupExecution(Owner.PopulationSize); m_implantKernel.Run(cudaMatrices, chromosomePop, CoefficientsSaved, arr_size); for (int i = 0; i < Owner.PopulationSize; i++) { // Read the cudaMatrices into the population population[i].CopyFromMemoryBlock(cudaMatrices, i * arr_size * arr_size, 0, arr_size * arr_size); if (!DirectEvolution) { MyCublasFactory.Instance.Gemm(Operation.Transpose, Operation.NonTranspose, arr_size, arr_size, arr_size, 1.0f, multiplier.GetDevice(Owner), arr_size, population[i].GetDevice(0), arr_size, 0.0f, outputPop[i].GetDevice(0), arr_size ); MyCublasFactory.Instance.Gemm(Operation.NonTranspose, Operation.NonTranspose, arr_size, arr_size, arr_size, 1.0f, outputPop[i].GetDevice(0), arr_size, multiplier.GetDevice(Owner), arr_size, 0.0f, population[i].GetDevice(0), arr_size ); } population[i].SafeCopyToHost(); noise.Host[i] = (float)m_rand.NextDouble(); } noise.SafeCopyToDevice(); // Determine the fitness of each member determineFitnesses(); chromosomePop.SafeCopyToHost(); #region Sort Chromosomes //sort the chromosomes and populations by fitness //bubble sort, can be improved float tmpfit; int len = Owner.PopulationSize; int newlen; while (len != 0) { newlen = 0; for (int i = 1; i < len; i++) { if (fitnesses.Host[i - 1] < fitnesses.Host[i]) { // Swap fitnesses on the host tmpfit = fitnesses.Host[i - 1]; fitnesses.Host[i - 1] = fitnesses.Host[i]; fitnesses.Host[i] = tmpfit; newlen = i; // Swap Chromosomes on the device for (int x = 0; x < CoefficientsSaved; x++) { tmpfit = chromosomePop.Host[i * CoefficientsSaved + x]; chromosomePop.Host[i * CoefficientsSaved + x] = chromosomePop.Host[(i - 1) * CoefficientsSaved + x]; chromosomePop.Host[(i - 1) * CoefficientsSaved + x] = tmpfit; } for (int x = 0; x < arr_size * arr_size; x++) { tmpfit = population[i - 1].Host[x]; population[i - 1].Host[x] = population[i].Host[x]; population[i].Host[x] = tmpfit; } } } len = newlen; } MyLog.INFO.WriteLine("Top {0} networks:", Math.Max(Owner.Survivors, Owner.PopulationSize / 10)); for (int i = 0; i < Math.Max(Owner.Survivors, Owner.PopulationSize / 10); i++) { MyLog.INFO.Write("Fitness of network {0} is: {1}", i, fitnesses.Host[i]); if (i < Owner.Survivors) { MyLog.INFO.Write(" - surviving"); } MyLog.INFO.Write(" \n"); } #endregion // Best candidate to write to the network is the top of the population list MyLog.INFO.WriteLine("Fitness of selected network is: " + fitnesses.Host[0]); if (fitnesses.Host[0] >= Owner.TargetFitness) { MyLog.INFO.WriteLine("Found satisfying network, halting..."); Owner.Owner.SimulationHandler.PauseSimulation(); } setFFWeights(population[0]); MyLog.INFO.WriteLine("Written weights to network"); if (currentGen >= Owner.Generations && Owner.Generations > 0) { MyLog.INFO.WriteLine("Generation limit reached, halting..."); Owner.Owner.SimulationHandler.PauseSimulation(); } }
// Sets up the genetic task public override void Init(int nGPU) { currentGen = 0; m_weights = 0; // Load the relevant kernels m_coeffGenKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Genetic\CosyneGenetics", "generateCoefficients"); m_geneticKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Genetic\CosyneGenetics", "grow"); m_extractKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Genetic\CosyneGenetics", "extractCoeffs"); m_cosineGenKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Genetic\CosyneGenetics", "createCosineMatrix"); m_implantKernel = MyKernelFactory.Instance.Kernel(nGPU, @"Genetic\CosyneGenetics", "implantCoeffs"); // Init the random generator m_rand = new Random(); // Set up coefficient Generation m_coeffGenKernel.SetupExecution(Owner.PopulationSize); // Set up genetic recombination m_geneticKernel.SetupExecution(Owner.PopulationSize); // This finds the first nn group in the network. Possibility of getting a list of networks and evolving them all seperately? List <MyNode> ch = Owner.Owner.Network.Children; foreach (MyNode n in ch) { if (n is MyNeuralNetworkGroup) { nn = n as MyNeuralNetworkGroup; MyLog.INFO.WriteLine("Evolving the layers of node: " + nn.Name); break; } } if (nn == null) { throw new NullReferenceException("There is no top level NeuralNetworkGroup."); } // Construct the layerlist which is to be read from and written to constructLayerList(nn); // This is how big the weight matrix will be arr_size = (int)Math.Ceiling(Math.Sqrt(m_weights)); // Get the relevant execution plan m_executionPlan = Owner.Owner.SimulationHandler.Simulation.ExecutionPlan; #region MemoryBlocks // Initialise the population population = new List <MyMemoryBlock <float> >(); outputPop = new List <MyMemoryBlock <float> >(); for (int i = 0; i < Owner.PopulationSize; i++) { population.Add(new MyMemoryBlock <float>()); population[i].Owner = Owner; population[i].Count = arr_size * arr_size; population[i].AllocateMemory(); outputPop.Add(new MyMemoryBlock <float>()); outputPop[i].Owner = Owner; outputPop[i].Count = arr_size * arr_size; outputPop[i].AllocateMemory(); } // Allocate space to manipulate weight matrices on the device cudaMatrices = new MyMemoryBlock <float>(); cudaMatrices.Owner = Owner; cudaMatrices.Count = arr_size * arr_size * Owner.PopulationSize; cudaMatrices.AllocateDevice(); // Allocate a memory block for the Cosine matrix multiplier = new MyMemoryBlock <float>(); multiplier.Owner = Owner; multiplier.Count = arr_size * arr_size; multiplier.AllocateDevice(); // Fill the cosine Matrices m_cosineGenKernel.SetupExecution(arr_size); m_cosineGenKernel.Run(multiplier, arr_size); // Allocate space needed for chromosomes chromosomePop = new MyMemoryBlock <float>(); chromosomePop.Owner = Owner; if (DirectEvolution) { chromosomePop.Count = m_weights * Owner.PopulationSize; } else { chromosomePop.Count = CoefficientsSaved * Owner.PopulationSize; } chromosomePop.AllocateMemory(); // Allocate some space for noise to seed the cuda_rand generator noise = new MyMemoryBlock <float>(); noise.Owner = Owner; noise.Count = Owner.PopulationSize; noise.AllocateMemory(); // Write some noise to the initial array for (int i = 0; i < Owner.PopulationSize; i++) { noise.Host[i] = (float)m_rand.NextDouble() * 100000 + (float)m_rand.NextDouble() * 40; } noise.SafeCopyToDevice(); // Allocate space for the fitnesses fitnesses = new MyMemoryBlock <float>(); fitnesses.Owner = Owner; fitnesses.Count = Owner.PopulationSize; fitnesses.AllocateMemory(); // Allocate some temporary storage tempMB = new MyMemoryBlock <float>(); tempPop = new MyMemoryBlock <float>(); tempMB.Owner = Owner; tempMB.Count = CoefficientsSaved; tempMB.AllocateDevice(); tempPop.Owner = Owner; tempPop.Count = arr_size * arr_size; tempPop.AllocateDevice(); marking = new MyMemoryBlock <int>(); marking.Owner = Owner; marking.Count = CoefficientsSaved * Owner.PopulationSize; marking.AllocateDevice(); #endregion // Check saved Coeffs size if (CoefficientsSaved > m_weights) { MyLog.WARNING.Write("Saving more Coefficients than exist in the weight matrix. Setting to max permissable value\n"); CoefficientsSaved = m_weights; } if (CoefficientsSaved == m_weights) { MyLog.INFO.Write("Saving a coefficient for every weight. Evolving weights directly\n"); DirectEvolution = true; } if (DirectEvolution) { CoefficientsSaved = m_weights; } // Generate the rest of the population if (DirectEvolution) { m_coeffGenKernel.Run(chromosomePop, CoefficientsSaved, noise, Owner.PopulationSize, WeightMagnitude); } else { m_coeffGenKernel.Run(chromosomePop, CoefficientsSaved, noise, Owner.PopulationSize, Alpha); } //Disable Backprop tasks in Network if (nn.GetActiveBackpropTask() != null) { if (!nn.GetActiveBackpropTask().DisableLearning) { MyLog.WARNING.WriteLine("Disabling backprop learning for Neural Network"); nn.GetActiveBackpropTask().DisableLearning = true; } } }