static void runMarkovProblemSovler() { MarkovProblemSolver qlearner = new QLearningAgent(10000, 10000, 1); qlearner.accept(new GridProblem()); qlearner.solve(); }
/* * Allow to analyze effectiveness by saving a magnets lap times to a text file. * * Allow learning agents to increase their age. */ private void saveLapTimeData() { if (saveLapDataFileType != null) { // TODO: fix DateTime lapTime = System.DateTime.Now; TimeSpan lapse = lapTime - lastLapTime; lastLapTime = lapTime; // Use for time System.DateTime.Now; StreamWriter file = new StreamWriter(fileName, true); file.WriteLine("Lap : " + _lap + " Time: " + lapse.TotalSeconds); file.Close(); } try{ //TODO: Put any other learning agents aging function calls here in the future QLearningAgent agent = GetComponent <QLearningAgent>(); if (agent != null) { agent.increaseMaturity(); } } catch (Exception e) { // Do nothing if no learning agent is present } }
public void testFirstStepsOfQLAAgentWhenFirstStepTerminates() { QLearningAgent <CellWorldPosition, String> qla = new QLearningAgent <CellWorldPosition, String>( fourByThree); CellWorldPosition startingPosition = new CellWorldPosition(1, 4); String action = qla.decideAction(new MDPPerception <CellWorldPosition>( startingPosition, -0.04)); Assert.AreEqual(CellWorld.LEFT, action); Randomizer betweenEightyANdNinetyPercent = new MockRandomizer( new double[] { 0.85 }); // to force left to become an "up" qla.execute(action, betweenEightyANdNinetyPercent); Assert.AreEqual(new CellWorldPosition(2, 4), qla.getCurrentState()); Assert.AreEqual(-1.0, qla.getCurrentReward(), 0.001); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); String action2 = qla.decideAction(new MDPPerception <CellWorldPosition>( new CellWorldPosition(2, 4), -1)); Assert.IsNull(action2); Assert.AreEqual(-1.0, qla.getQTable().getQValue(startingPosition, action), 0.001); }
public void testFirstStepsOfQLAAgentUnderNormalProbability() { QLearningAgent <CellWorldPosition, String> qla = new QLearningAgent <CellWorldPosition, String>( fourByThree); Randomizer alwaysLessThanEightyPercent = new MockRandomizer( new double[] { 0.7 }); CellWorldPosition startingPosition = new CellWorldPosition(1, 4); String action = qla.decideAction(new MDPPerception <CellWorldPosition>( startingPosition, -0.04)); Assert.AreEqual(CellWorld.LEFT, action); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); qla.execute(action, alwaysLessThanEightyPercent); Assert.AreEqual(new CellWorldPosition(1, 3), qla.getCurrentState()); Assert.AreEqual(-0.04, qla.getCurrentReward(), 0.001); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); String action2 = qla.decideAction(new MDPPerception <CellWorldPosition>( new CellWorldPosition(1, 3), -0.04)); Assert.AreEqual(-0.04, qla.getQTable().getQValue(startingPosition, action), 0.001); }
static void qLearningAgentDemo() { CellWorld <double> cw = CellWorldFactory.CreateCellWorldForFig17_1(); CellWorldEnvironment cwe = new CellWorldEnvironment( cw.GetCellAt(1, 1), cw.GetCells(), MDPFactory.createTransitionProbabilityFunctionForFigure17_1(cw), CommonFactory.CreateRandom()); QLearningAgent <Cell <double>, CellWorldAction> qla = new QLearningAgent <Cell <double>, CellWorldAction>( MDPFactory.createActionsFunctionForFigure17_1(cw), CellWorldAction.None, 0.2, 1.0, 5, 2.0); cwe.AddAgent(qla); output_utility_learning_rates(qla, 20, 10000, 500, 20); }
public void testQLearningAgent() { QLearningAgent <CellWorldPosition, String> qla = new QLearningAgent <CellWorldPosition, string>( fourByThree); Randomizer r = new MockRandomizer(new double[] { 0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.4, 0.6, 0.5 }); // Randomizer r = new JavaRandomizer(); Dictionary <Pair <CellWorldPosition, String>, Double> q = null; QTable <CellWorldPosition, String> qTable = null; for (int i = 0; i < 100; i++) { qla.executeTrial(r); q = qla.getQ(); qTable = qla.getQTable(); } // qTable.normalize(); // System.Console.WriteLine(qTable); // System.Console.WriteLine(qTable.getPolicy()); }
/// <summary> /// Restarts the learning process with a new walking condition. /// </summary> public void BeginNewGame() { sendAction = 0; SetUp(); agent = new QLearningAgent(); // if we continue a previous training if (this.loadExistingQTable) { Debug.Log(this.loadExistingQTable); #if UNITY_EDITOR ((QLearningAgent)agent).LoadQTable("Assets/Resources/qtable.txt", envParameters.state_size, envParameters.action_size); #endif ((QLearningAgent)agent).action_size = envParameters.action_size; } else { agent.SendParameters(envParameters); } Reset(); }
private void resetModel() { _agent = new QLearningAgent(); trackBarAlpha.Value = 2; trackBarGamma.Value = 8; trackBarEpsilon.Value = 1; _agent.alpha = 0.2; _agent.gamma = 0.8; _agent.epsilon = 0.05; // regiser state-qvalue foreach (var userHp in Enumerable.Range(0, _maxUserHp + 1)) { foreach (var userPos in Enumerable.Range(0, _maxUserPos + 1)) { foreach (var towerHp in Enumerable.Range(0, _maxTowerHp + 1)) { var state = new GameState(userHp, userPos, towerHp); var compactState = new CompactGameState(userHp, userPos, towerHp); foreach (var action in state.GetActionSet()) { if (checkBoxCompact.Checked) { _agent.registerStateQValue(compactState, action, 0); } else { _agent.registerStateQValue(state, action, 0); } } } } } }
public abstract void OnAgentExited(QLearningAgent agent);
public abstract void OnAgentEntered(QLearningAgent agent);
static void AbsoluteMazeProblem() { //Declare random MyRandom.RandomPool.Declare("action",(int)DateTime.Now.Ticks); //環境 MazeEnvironment mazeEnv = new MazeEnvironment(); mazeEnv.Map = new MazeMap(new int[,]{ { 1, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 3 }, { 0, 0, 3, 0, 2 }, }) ; //エージェントを宣言 var mazeAgent = new QLearningAgent<MazeEnvironment,PositionState,MoveAction>(); //= new ActorCritic<MazeEnvironment,PositionState,MoveAction>(); //エージェントに環境をセット mazeAgent.Environment = mazeEnv; //環境にエージェントをセット mazeEnv.AddAgent(mazeAgent); mazeAgent.Id = 1; //エージェントを宣言 var mazeAgent2 //= new QLearningAgent<MazeEnvironment,PositionState,FourDirectionAction>(); = new ActorCritic<MazeEnvironment,PositionState,MoveAction>(); //エージェントに環境をセット mazeAgent2.Environment = mazeEnv; //環境にエージェントをセット mazeEnv.AddAgent(mazeAgent2); mazeAgent2.Id = 2; //初期状態をセット mazeAgent2.CurrentState = mazeEnv.StartState; //初期状態をセット mazeAgent.CurrentState = mazeEnv.StartState; //View のセット MazeView mazeView = new MazeView(); mazeView.Maze = mazeEnv; while(true) { mazeAgent.Act(); mazeAgent2.Act(); mazeView.show(); Console.WriteLine(); Console.WriteLine(); Console.WriteLine(); mazeAgent.Show(); Console.WriteLine(); mazeAgent2.Show(); string command = Console.ReadLine(); //を、キーをおすたびにやる。 if (command.Equals("init")) { mazeAgent.Init(); mazeAgent2.Init(); } else if (command.Equals("q")) { mazeAgent.Show (); continue; } try //数字としてパースしてみて { //パースできたらその回数ステップ進める int num = int.Parse(command); foreach (int j in Enumerable.Range(0, num)) { mazeAgent.Act(); mazeAgent2.Act(); } } catch (Exception) { } } }
/// <summary> /// エージェントを設定する /// </summary> private void SetAgent() { EnvironmentMaze env = new EnvironmentMaze(EnvMaze, IndexData.GoalReword); Agent = new QLearningAgent(env, IndexData.Alpha, IndexData.Gamma, IndexData.Epsilon); }
public void testFirstStepsOfQLAAgentWhenFirstStepTerminates() { QLearningAgent<CellWorldPosition, String> qla = new QLearningAgent<CellWorldPosition, String>( fourByThree); CellWorldPosition startingPosition = new CellWorldPosition(1, 4); String action = qla.decideAction(new MDPPerception<CellWorldPosition>( startingPosition, -0.04)); Assert.AreEqual(CellWorld.LEFT, action); Randomizer betweenEightyANdNinetyPercent = new MockRandomizer( new double[] { 0.85 }); // to force left to become an "up" qla.execute(action, betweenEightyANdNinetyPercent); Assert.AreEqual(new CellWorldPosition(2, 4), qla.getCurrentState()); Assert.AreEqual(-1.0, qla.getCurrentReward(), 0.001); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); String action2 = qla.decideAction(new MDPPerception<CellWorldPosition>( new CellWorldPosition(2, 4), -1)); Assert.IsNull(action2); Assert.AreEqual(-1.0, qla.getQTable().getQValue(startingPosition, action), 0.001); }
public void testFirstStepsOfQLAAgentUnderNormalProbability() { QLearningAgent<CellWorldPosition, String> qla = new QLearningAgent<CellWorldPosition, String>( fourByThree); Randomizer alwaysLessThanEightyPercent = new MockRandomizer( new double[] { 0.7 }); CellWorldPosition startingPosition = new CellWorldPosition(1, 4); String action = qla.decideAction(new MDPPerception<CellWorldPosition>( startingPosition, -0.04)); Assert.AreEqual(CellWorld.LEFT, action); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); qla.execute(action, alwaysLessThanEightyPercent); Assert.AreEqual(new CellWorldPosition(1, 3), qla.getCurrentState()); Assert.AreEqual(-0.04, qla.getCurrentReward(), 0.001); Assert.AreEqual(0.0, qla.getQTable().getQValue(startingPosition, action), 0.001); String action2 = qla.decideAction(new MDPPerception<CellWorldPosition>( new CellWorldPosition(1, 3), -0.04)); Assert.AreEqual(-0.04, qla.getQTable().getQValue(startingPosition, action), 0.001); }
public void testQLearningAgent() { QLearningAgent<CellWorldPosition, String> qla = new QLearningAgent<CellWorldPosition,string>( fourByThree); Randomizer r = new MockRandomizer(new double[] { 0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.4, 0.6, 0.5 }); // Randomizer r = new JavaRandomizer(); Dictionary<Pair<CellWorldPosition, String>, Double> q = null; QTable<CellWorldPosition, String> qTable = null; for (int i = 0; i < 100; i++) { qla.executeTrial(r); q = qla.getQ(); qTable = qla.getQTable(); } // qTable.normalize(); // System.Console.WriteLine(qTable); // System.Console.WriteLine(qTable.getPolicy()); }