private void Update()
 {
     SetParameters();
     dic.AddOrUpdate(new State(DistanceToObstacleX, DistanceToObstacleY, PositionX), nic);
 }
        public override void LearningAlgorithm()
        {
            SetParameters();
            switch (state)
            {
            case (int)QLearningStateMachine.DoAction:
            {
                actualState = GetState();
                if (!qLambdaTable.ContainsKey(actualState))
                {
                    qLambdaTable.AddOrUpdate(actualState, new EQValues());
                }
                selectedActionNumber = GetRandomAction(actualState);
                ExecuteAction(selectedActionNumber);

                state = (int)QLearningStateMachine.Observe;
                break;
            }

            case (int)QLearningStateMachine.Observe:
            {
                nextState = GetState();
                DontAllowBlockBot();
                if (actualState != nextState)
                {
                    float maxQvalue;
                    if (qLambdaTable.ContainsKey(nextState))
                    {
                        maxQvalue = qLambdaTable[nextState].QValues.Max();
                    }
                    else
                    {
                        qLambdaTable.AddOrUpdate(nextState, new EQValues());
                        maxQvalue = 0;
                    }

                    if (!rewardIsReached)
                    {
                        Reward = (nextState.State1 - actualState.State1) * Rewards.ForPosition;
                    }
                    if (enemyIsReached)
                    {
                        Reward = Rewards.EnemyTouched;
                    }

                    float delta = Reward + GAMMA * maxQvalue - qLambdaTable[actualState].GetQValue(selectedActionNumber);
                    qLambdaTable[actualState].SetEValue(selectedActionNumber, 1);

                    if (!rewardIsReached)
                    {
                        foreach (var key in qLambdaTable.Keys)
                        {
                            for (int action = 0; action < qLambdaTable[key].QValues.Length; action++)
                            {
                                qLambdaTable[key].QValues[action] += (ALPHA * delta * qLambdaTable[key].EValues[action]);
                            }
                            qLambdaTable[key].updateEValues(LAMBDA, GAMMA);
                        }
                    }

                    if (TerminationStateIsReached())
                    {
                        ResetETable();
                        SaveQLambdaDataToFile();
                    }
                    state = (int)QLearningStateMachine.DoAction;
                }
                break;
            }
            }
        }