示例#1
0
        static void runMarkovProblemSovler()
        {
            MarkovProblemSolver qlearner = new QLearningAgent(10000, 10000, 1);

            qlearner.accept(new GridProblem());
            qlearner.solve();
        }
示例#2
0
    /*
     * Allow to analyze effectiveness by saving a magnets lap times to a text file.
     *
     * Allow learning agents to increase their age.
     */
    private void saveLapTimeData()
    {
        if (saveLapDataFileType != null)
        {
            // TODO: fix
            DateTime lapTime = System.DateTime.Now;
            TimeSpan lapse   = lapTime - lastLapTime;
            lastLapTime = lapTime;
            // Use for time System.DateTime.Now;
            StreamWriter file = new StreamWriter(fileName, true);
            file.WriteLine("Lap : " + _lap + " Time: " + lapse.TotalSeconds);
            file.Close();
        }

        try{
            //TODO: Put any other learning agents aging function calls here in the future
            QLearningAgent agent = GetComponent <QLearningAgent>();
            if (agent != null)
            {
                agent.increaseMaturity();
            }
        } catch (Exception e) {
            // Do nothing if no learning agent is present
        }
    }
        public void testFirstStepsOfQLAAgentWhenFirstStepTerminates()
        {
            QLearningAgent <CellWorldPosition, String> qla = new QLearningAgent <CellWorldPosition, String>(
                fourByThree);

            CellWorldPosition startingPosition = new CellWorldPosition(1, 4);
            String            action           = qla.decideAction(new MDPPerception <CellWorldPosition>(
                                                                      startingPosition, -0.04));

            Assert.AreEqual(CellWorld.LEFT, action);

            Randomizer betweenEightyANdNinetyPercent = new MockRandomizer(
                new double[] { 0.85 });     // to force left to become an "up"

            qla.execute(action, betweenEightyANdNinetyPercent);
            Assert.AreEqual(new CellWorldPosition(2, 4), qla.getCurrentState());
            Assert.AreEqual(-1.0, qla.getCurrentReward(), 0.001);
            Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition,
                                                           action), 0.001);
            String action2 = qla.decideAction(new MDPPerception <CellWorldPosition>(
                                                  new CellWorldPosition(2, 4), -1));

            Assert.IsNull(action2);
            Assert.AreEqual(-1.0, qla.getQTable().getQValue(startingPosition,
                                                            action), 0.001);
        }
        public void testFirstStepsOfQLAAgentUnderNormalProbability()
        {
            QLearningAgent <CellWorldPosition, String> qla = new QLearningAgent <CellWorldPosition, String>(
                fourByThree);

            Randomizer alwaysLessThanEightyPercent = new MockRandomizer(
                new double[] { 0.7 });
            CellWorldPosition startingPosition = new CellWorldPosition(1, 4);
            String            action           = qla.decideAction(new MDPPerception <CellWorldPosition>(
                                                                      startingPosition, -0.04));

            Assert.AreEqual(CellWorld.LEFT, action);
            Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition,
                                                           action), 0.001);

            qla.execute(action, alwaysLessThanEightyPercent);
            Assert.AreEqual(new CellWorldPosition(1, 3), qla.getCurrentState());
            Assert.AreEqual(-0.04, qla.getCurrentReward(), 0.001);
            Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition,
                                                           action), 0.001);
            String action2 = qla.decideAction(new MDPPerception <CellWorldPosition>(
                                                  new CellWorldPosition(1, 3), -0.04));

            Assert.AreEqual(-0.04, qla.getQTable().getQValue(startingPosition,
                                                             action), 0.001);
        }
示例#5
0
        static void qLearningAgentDemo()
        {
            CellWorld <double>   cw  = CellWorldFactory.CreateCellWorldForFig17_1();
            CellWorldEnvironment cwe = new CellWorldEnvironment(
                cw.GetCellAt(1, 1),
                cw.GetCells(),
                MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw),
                CommonFactory.CreateRandom());

            QLearningAgent <Cell <double>, CellWorldAction> qla = new QLearningAgent <Cell <double>, CellWorldAction>(
                MDPFactory.createActionsFunctionForFigure17_1(cw),
                CellWorldAction.None, 0.2, 1.0, 5,
                2.0);

            cwe.AddAgent(qla);

            output_utility_learning_rates(qla, 20, 10000, 500, 20);
        }
        public void testQLearningAgent()
        {
            QLearningAgent <CellWorldPosition, String> qla = new QLearningAgent <CellWorldPosition, string>(
                fourByThree);
            Randomizer r = new MockRandomizer(new double[] { 0.1, 0.9, 0.2, 0.8,
                                                             0.3, 0.7, 0.4, 0.6, 0.5 });

            // Randomizer r = new JavaRandomizer();
            Dictionary <Pair <CellWorldPosition, String>, Double> q = null;
            QTable <CellWorldPosition, String> qTable = null;

            for (int i = 0; i < 100; i++)
            {
                qla.executeTrial(r);
                q      = qla.getQ();
                qTable = qla.getQTable();
            }
            // qTable.normalize();
            // System.Console.WriteLine(qTable);
            // System.Console.WriteLine(qTable.getPolicy());
        }
示例#7
0
    /// <summary>
    /// Restarts the learning process with a new walking condition.
    /// </summary>
    public void BeginNewGame()
    {
        sendAction = 0;

        SetUp();
        agent = new QLearningAgent();

        // if we continue a previous training
        if (this.loadExistingQTable)
        {
            Debug.Log(this.loadExistingQTable);
            #if UNITY_EDITOR
            ((QLearningAgent)agent).LoadQTable("Assets/Resources/qtable.txt", envParameters.state_size, envParameters.action_size);
            #endif
            ((QLearningAgent)agent).action_size = envParameters.action_size;
        }
        else
        {
            agent.SendParameters(envParameters);
        }

        Reset();
    }
示例#8
0
        private void resetModel()
        {
            _agent = new QLearningAgent();

            trackBarAlpha.Value   = 2;
            trackBarGamma.Value   = 8;
            trackBarEpsilon.Value = 1;

            _agent.alpha   = 0.2;
            _agent.gamma   = 0.8;
            _agent.epsilon = 0.05;

            // regiser state-qvalue
            foreach (var userHp in Enumerable.Range(0, _maxUserHp + 1))
            {
                foreach (var userPos in Enumerable.Range(0, _maxUserPos + 1))
                {
                    foreach (var towerHp in Enumerable.Range(0, _maxTowerHp + 1))
                    {
                        var state        = new GameState(userHp, userPos, towerHp);
                        var compactState = new CompactGameState(userHp, userPos, towerHp);

                        foreach (var action in state.GetActionSet())
                        {
                            if (checkBoxCompact.Checked)
                            {
                                _agent.registerStateQValue(compactState, action, 0);
                            }
                            else
                            {
                                _agent.registerStateQValue(state, action, 0);
                            }
                        }
                    }
                }
            }
        }
示例#9
0
 public abstract void OnAgentExited(QLearningAgent agent);
示例#10
0
 public abstract void OnAgentEntered(QLearningAgent agent);
示例#11
0
        static void AbsoluteMazeProblem()
        {
            //Declare random
            MyRandom.RandomPool.Declare("action",(int)DateTime.Now.Ticks);

            //環境
            MazeEnvironment mazeEnv = new MazeEnvironment();
            mazeEnv.Map = new MazeMap(new int[,]{
                {  1,  0,  0,  0,  0 },
                {  0,  0,  0,  0,  0 },
                {  0,  0,  0,  0,  0 },
                {  0,  0,  0,  0,  3 },
                {  0,  0,  3,  0,  2 },
            }) ;

            //エージェントを宣言
            var mazeAgent
                = new QLearningAgent<MazeEnvironment,PositionState,MoveAction>();
                //= new ActorCritic<MazeEnvironment,PositionState,MoveAction>();

                //エージェントに環境をセット
                mazeAgent.Environment = mazeEnv;

                //環境にエージェントをセット
                mazeEnv.AddAgent(mazeAgent);

                mazeAgent.Id = 1;

            //エージェントを宣言
            var mazeAgent2
                //= new QLearningAgent<MazeEnvironment,PositionState,FourDirectionAction>();
                = new ActorCritic<MazeEnvironment,PositionState,MoveAction>();

                //エージェントに環境をセット
                mazeAgent2.Environment = mazeEnv;

                //環境にエージェントをセット
                mazeEnv.AddAgent(mazeAgent2);

                mazeAgent2.Id = 2;

            //初期状態をセット
            mazeAgent2.CurrentState = mazeEnv.StartState;

            //初期状態をセット
            mazeAgent.CurrentState = mazeEnv.StartState;

            //View のセット
            MazeView mazeView = new MazeView();
            mazeView.Maze = mazeEnv;

            while(true)
            {

                mazeAgent.Act();
                mazeAgent2.Act();

                mazeView.show();

                Console.WriteLine();
                Console.WriteLine();
                Console.WriteLine();
                mazeAgent.Show();
                Console.WriteLine();
                mazeAgent2.Show();

                string command = Console.ReadLine();  //を、キーをおすたびにやる。

                if (command.Equals("init"))
                {
                    mazeAgent.Init();
                    mazeAgent2.Init();
                }

                else if (command.Equals("q"))
                {
                    mazeAgent.Show ();
                    continue;
                }

                try //数字としてパースしてみて
                {   //パースできたらその回数ステップ進める
                    int num = int.Parse(command);

                    foreach (int j in Enumerable.Range(0, num))
                    {
                        mazeAgent.Act();
                        mazeAgent2.Act();
                    }
                }
                catch (Exception)
                {
                }
            }
        }
示例#12
0
        /// <summary>
        /// エージェントを設定する
        /// </summary>
        private void SetAgent()
        {
            EnvironmentMaze env = new EnvironmentMaze(EnvMaze, IndexData.GoalReword);

            Agent = new QLearningAgent(env, IndexData.Alpha, IndexData.Gamma, IndexData.Epsilon);
        }
        public void testFirstStepsOfQLAAgentWhenFirstStepTerminates()
        {
            QLearningAgent<CellWorldPosition, String> qla = new QLearningAgent<CellWorldPosition, String>(
                    fourByThree);

            CellWorldPosition startingPosition = new CellWorldPosition(1, 4);
            String action = qla.decideAction(new MDPPerception<CellWorldPosition>(
                    startingPosition, -0.04));
            Assert.AreEqual(CellWorld.LEFT, action);

            Randomizer betweenEightyANdNinetyPercent = new MockRandomizer(
                    new double[] { 0.85 }); // to force left to become an "up"
            qla.execute(action, betweenEightyANdNinetyPercent);
            Assert.AreEqual(new CellWorldPosition(2, 4), qla.getCurrentState());
            Assert.AreEqual(-1.0, qla.getCurrentReward(), 0.001);
            Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition,
                    action), 0.001);
            String action2 = qla.decideAction(new MDPPerception<CellWorldPosition>(
                    new CellWorldPosition(2, 4), -1));
            Assert.IsNull(action2);
            Assert.AreEqual(-1.0, qla.getQTable().getQValue(startingPosition,
                    action), 0.001);
        }
        public void testFirstStepsOfQLAAgentUnderNormalProbability()
        {
            QLearningAgent<CellWorldPosition, String> qla = new QLearningAgent<CellWorldPosition, String>(
                    fourByThree);

            Randomizer alwaysLessThanEightyPercent = new MockRandomizer(
                    new double[] { 0.7 });
            CellWorldPosition startingPosition = new CellWorldPosition(1, 4);
            String action = qla.decideAction(new MDPPerception<CellWorldPosition>(
                    startingPosition, -0.04));
            Assert.AreEqual(CellWorld.LEFT, action);
            Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition,
                    action), 0.001);

            qla.execute(action, alwaysLessThanEightyPercent);
            Assert.AreEqual(new CellWorldPosition(1, 3), qla.getCurrentState());
            Assert.AreEqual(-0.04, qla.getCurrentReward(), 0.001);
            Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition,
                    action), 0.001);
            String action2 = qla.decideAction(new MDPPerception<CellWorldPosition>(
                    new CellWorldPosition(1, 3), -0.04));

            Assert.AreEqual(-0.04, qla.getQTable().getQValue(startingPosition,
                    action), 0.001);
        }
        public void testQLearningAgent()
        {
            QLearningAgent<CellWorldPosition, String> qla = new QLearningAgent<CellWorldPosition,string>(
                    fourByThree);
            Randomizer r = new MockRandomizer(new double[] { 0.1, 0.9, 0.2, 0.8,
				0.3, 0.7, 0.4, 0.6, 0.5 });

            // Randomizer r = new JavaRandomizer();
            Dictionary<Pair<CellWorldPosition, String>, Double> q = null;
            QTable<CellWorldPosition, String> qTable = null;
            for (int i = 0; i < 100; i++)
            {
                qla.executeTrial(r);
                q = qla.getQ();
                qTable = qla.getQTable();

            }
            // qTable.normalize();
            // System.Console.WriteLine(qTable);
            // System.Console.WriteLine(qTable.getPolicy());
        }