public override void OnStep(int step, int globalStep, Tensor state, Tensor action, float reward, Tensor nextState, bool done) { if (globalStep % MemoryInterval == 0) { Memory.Push(new Experience(state, action, reward, nextState, done)); } if (TargetModelUpdateInterval <= 0) { throw new Exception("Target model update has to be positive."); } if (TargetModel == null) { TargetModel = Net.Clone(); } if (!TargetModelUpdateOnEpisodeEnd) { if (TargetModelUpdateInterval >= 1) { if (globalStep % (int)TargetModelUpdateInterval == 0) { Net.CopyParametersTo(TargetModel); } } else { Net.SoftCopyParametersTo(TargetModel, TargetModelUpdateInterval); } } }
public void NextGeneration() { UpdateBestSample(); Net best = best_sample.GetComponentInChildren <NetInterface>().Mind.Clone(); int id = best_sample.GetComponentInChildren <NetInterface>().Id; float score = best_sample.GetComponentInChildren <NetInterface>().Score; Destroy(samples[0]); InstantiateWithNetAndScore(0, best, score, id); Net net; for (int i = 1; i < Mathf.FloorToInt(NUMBER_OF_GENERATION_SAMPLES * MUTATION_PERCENTAGE); i++) { net = best.Clone(); net.Mutate(); Destroy(samples[i]); InstantiateWithNet(i, net); } for (int i = Mathf.FloorToInt(NUMBER_OF_GENERATION_SAMPLES * MUTATION_PERCENTAGE); i < NUMBER_OF_GENERATION_SAMPLES; i++) { net = new Net( MindConstants.NUMBER_OF_INPUT_NODES, MindConstants.NUMBER_OF_OUTPUT_NODES, MindConstants.NUMBER_OF_HIDDEN_NODES, MindConstants.NUMBER_OF_HIDDEN_LAYERS ); net.InitializeRandom(); Destroy(samples[i]); InstantiateWithNet(i, net); } }