private void Update() { SetParameters(); dic.AddOrUpdate(new State(DistanceToObstacleX, DistanceToObstacleY, PositionX), nic); }
public override void LearningAlgorithm() { SetParameters(); switch (state) { case (int)QLearningStateMachine.DoAction: { actualState = GetState(); if (!qLambdaTable.ContainsKey(actualState)) { qLambdaTable.AddOrUpdate(actualState, new EQValues()); } selectedActionNumber = GetRandomAction(actualState); ExecuteAction(selectedActionNumber); state = (int)QLearningStateMachine.Observe; break; } case (int)QLearningStateMachine.Observe: { nextState = GetState(); DontAllowBlockBot(); if (actualState != nextState) { float maxQvalue; if (qLambdaTable.ContainsKey(nextState)) { maxQvalue = qLambdaTable[nextState].QValues.Max(); } else { qLambdaTable.AddOrUpdate(nextState, new EQValues()); maxQvalue = 0; } if (!rewardIsReached) { Reward = (nextState.State1 - actualState.State1) * Rewards.ForPosition; } if (enemyIsReached) { Reward = Rewards.EnemyTouched; } float delta = Reward + GAMMA * maxQvalue - qLambdaTable[actualState].GetQValue(selectedActionNumber); qLambdaTable[actualState].SetEValue(selectedActionNumber, 1); if (!rewardIsReached) { foreach (var key in qLambdaTable.Keys) { for (int action = 0; action < qLambdaTable[key].QValues.Length; action++) { qLambdaTable[key].QValues[action] += (ALPHA * delta * qLambdaTable[key].EValues[action]); } qLambdaTable[key].updateEValues(LAMBDA, GAMMA); } } if (TerminationStateIsReached()) { ResetETable(); SaveQLambdaDataToFile(); } state = (int)QLearningStateMachine.DoAction; } break; } } }