/***** * * * * * CEM Helpers * * * * * *****/ private Layers TrainRearAxelBicycleModel() { Int32 numElite = (Int32)(cemBatchSize * cemEliteFrac); // Initialize our base set of layers. // This value will be iterated on each simulation iteration and // is the basis for the current iteration's batches Layers bestLayers = new Layers(); bestLayers.Init(inputSize, hiddenSize, outputSize); bestLayers.SetStdDev(cemInitStddev); bestLayers.SetMean(0); // Run through our CEM for (Int32 i = 0; i < cemIterations; i++) { // Run batches based off of our current 'bestLayers' and store them // on order to find the elite results. List <Layers> batchLayers = new List <Layers>(cemBatchSize); List <double> batchRewards = new List <double>(cemBatchSize); for (Int32 j = 0; j < cemBatchSize; j++) { // Init a set of layers off of our current best set. Layers batchLayer = new Layers(); batchLayer.Init(bestLayers, (float)cemInitStddev); // Calculate the reward for this batch and store it. batchRewards.Add(Reward(batchLayer)); batchLayers.Add(batchLayer); } double meanReward = batchRewards.Average(); // We now have our run-throughs, keep the best elite and set our layers back up. Int32[] indexArray = batchRewards .Select((value, index) => new { value, index }) .OrderByDescending(item => item.value) .Take(numElite) .Select(item => item.index) .ToArray(); // Store the mean and stdDev of our elite layers into our best layer. BatchMeanAndStdDev(in batchLayers, in indexArray, ref bestLayers); bestLayers.AddNoise(cemNoiseFactor / (i + 1)); Debug.Log("Iteration: " + i + "\tmeanReward: " + meanReward + "\t Reward(CurrMatrix): " + Reward(bestLayers)); } UnityEngine.Debug.Log("done"); return(bestLayers); }