public PendingActionsInfo() { Updated = false; NewPos = new Position(); DesiredPos = new Position(); Action = new SoccerAction(ActionTypes.Hold, -1); }
public NeatCentralizedQTableBase(RLClientBase client, int numTeammates, int myUnum) { m_client = client; m_rows = client.EnvRows; m_cols = client.EnvCols; m_numTeammates = numTeammates; m_myUnum = myUnum; m_numActions = SoccerAction.GetActionCount(Params.MoveKings, m_numTeammates); this.PerformanceNetworkToEvaluate = new NeatPlayerPerformanceStats(); if (NeatExpParams.SaveFitnessGrowth) { m_eaLogger = new PerformanceLogger(String.Format("EALogs/{0}-{1}-{2}", m_client.MyTeamName, m_myUnum, m_client.PerformanceLoggerMethodName), false); m_eaLogger.WriteLine("% Generation BestFitness MeanFitness AvgComplexity"); } Thread evoThread = new Thread(EvolutionaryThread); evoThread.Start(); m_eventNewNetReady.WaitOne(); }
protected override void UpdateQValue(State s, int ai, double newValue) { double reward = EnvironmentModeler.GetReward(base.m_prevState, base.m_curState, SoccerAction.GetActionTypeFromIndex(ai, Params.MoveKings)); double dummy; int prevStateGreedyAct = GetMaxQ(base.m_prevState, out dummy); if (prevStateGreedyAct == ai) // add reward to fitness only if it was a greedy action { m_genomeSumRewards += reward; } if (this.m_client.Cycle % NeatExpParams.CyclesPerGenome == 0) { Console.Write("Gener: {0}, ", m_evoAlg != null ? m_evoAlg.Generation : 0); int curGoalDiff = m_client.OurScore - m_client.OppScore; this.GoalDiffGainedByNetworkToEvaluate = curGoalDiff - m_prevGoalDiff; this.FitnessNetworkToEvaluate = m_genomeSumRewards; m_prevGoalDiff = curGoalDiff; m_eventNetFitnessReady.Set(); m_genomeSumRewards = 0.0; m_eventNewNetReady.WaitOne(); } }
public static int GuessOpponentAction(State prevState, State curState, int oppIndex) { int doerUnum; SoccerAction soc = GuessOpponentAction(prevState, curState, oppIndex, out doerUnum); return(SoccerAction.GetIndexFromAction(soc, Params.MoveKings, doerUnum)); }
public static int GuessTeammateAction(State prevState, State curState, int teammateIndex) { int doerUnum; SoccerAction soc = GuessTeammateAction(prevState, curState, teammateIndex, out doerUnum); return(SoccerAction.GetIndexFromAction(soc, Params.MoveKings, doerUnum)); }
private int InterpretHighestSignal(State s, int maxr, int maxc) { var myPos = s.Me.Position; var rPos = new Position(maxr, maxc); int rDiff = maxr - myPos.Row; int cDiff = maxc - myPos.Col; foreach (var player in s.OurPlayersList) { if (player.Position.Equals(rPos)) { return(SoccerAction.GetIndexFromAction(new SoccerAction(ActionTypes.Pass, player.Unum), Params.MoveKings, this.MyUnum)); } } if (rDiff == 0 && cDiff == 0) { return(SoccerAction.GetIndexFromAction(new SoccerAction(ActionTypes.Hold), Params.MoveKings, this.MyUnum)); } else if (rDiff == 0 || (Math.Abs(rDiff) <= Math.Abs(cDiff))) { return(SoccerAction.GetIndexFromAction(new SoccerAction( cDiff > 0 ? ActionTypes.MoveEast : ActionTypes.MoveWest ), Params.MoveKings, this.MyUnum)); } else if (cDiff == 0 || (Math.Abs(rDiff) > Math.Abs(cDiff))) { return(SoccerAction.GetIndexFromAction(new SoccerAction( rDiff > 0 ? ActionTypes.MoveSouth : ActionTypes.MoveNorth ), Params.MoveKings, this.MyUnum)); } return(0); }
public void UpdateActionForPlayer(int pi, SoccerAction act) { if (pi < 0) { return; } if (!m_isUpdating) { return; } if (m_isGoalScored) // no need to update anything { return; } Sides side = Players[pi].Side; if (m_pendingActions[pi].Updated) { return; } if (side == Sides.Right) { act = GetActionForRightSide(act); } bool hasBall = BallPosition == Players[pi].Position; if (hasBall) { m_ballOwnerIndex = pi; } Position newPos = GetMovementDir(act.ActionType) + Players[pi].Position; // check if a goal is scored if (hasBall) { Sides recvSide; if (IsBallBehindGoal(Players[pi].Position, newPos, out recvSide)) { m_isGoalScored = true; m_sideReceivingGoal = recvSide; return; } } if (IsPositionOutsideField(newPos)) { newPos = NormalizePosition(newPos); } m_pendingActions[pi].DesiredPos.Set(newPos); m_pendingActions[pi].NewPos.Set(newPos); m_pendingActions[pi].Action.Set(act); m_pendingActions[pi].Updated = true; }
public QTable(int rows, int cols, int teammatesCount, int myUnum) { m_teammatesCount = teammatesCount; m_myUnum = myUnum; m_qTable = new double[rows, cols, rows, cols, rows, cols, rows, cols, 4, SoccerAction.GetActionCount(Params.MoveKings, m_teammatesCount)]; }
//private static Random rnd = new Random(); protected override SoccerAction Think() { int oppIndex = this.GetAvailableOpponentsIndeces().ToArray()[0]; int[] state = new int[] { this.PlayerPositions[m_myIndex].Row - 1, this.PlayerPositions[m_myIndex].Col - 1, this.PlayerPositions[oppIndex].Row - 1, this.PlayerPositions[oppIndex].Col - 1, this.AmIBallOwner() ? 0 : 1 }; long linStateInd = Utils.GetLinearIndex(state, m_valueTable.StateDimensions, m_valueTable.NumStates); int maxA; if (m_dpMethod == DPMethods.QValueIteration) { GetMaxQForState(linStateInd, out maxA); return(SoccerAction.GetActionFromIndex(maxA, false, this.MyUnum)); } else if (m_dpMethod == DPMethods.ValueIteration) { EstimateNewValue((int)linStateInd, out maxA); return(SoccerAction.GetActionFromIndex(maxA, false, this.MyUnum)); } else if (m_dpMethod == DPMethods.PolicyIteration) { maxA = m_piPolicy.GetValueLinear((int)linStateInd); return(SoccerAction.GetActionFromIndex(maxA, false, this.MyUnum)); } else { throw new Exception("Unknown DP method"); } }
public virtual void UpdateQ_SARSA_Lambda(int prevActIndex, int curActIndex) { double reward = EnvironmentModeler.GetReward(m_prevState, m_curState, SoccerAction.GetActionTypeFromIndex(prevActIndex, Params.MoveKings)); UpdateQ_SARSA_Lambda(reward, m_prevState, m_curState, prevActIndex, curActIndex); }
public virtual void UpdateQ_QL_Watkins(int prevActIndex, int curActIndex, bool isNaive) { double reward = EnvironmentModeler.GetReward(m_prevState, m_curState, SoccerAction.GetActionTypeFromIndex(prevActIndex, Params.MoveKings)); UpdateQ_QL_Watkins(reward, m_prevState, m_curState, prevActIndex, curActIndex, isNaive); }
/// <summary> /// keep previous state and updste current state and then set previous action /// </summary> /// <returns></returns> protected override SoccerAction Think() { int actIndex; lock (m_lockQTable) { m_prevState = m_curState; m_curState = GetCurrentState(); m_qTable.SetCurrentState(m_curState); try { actIndex = RLThink(); } catch (Exception ex) { Console.WriteLine("-------------"); Console.WriteLine(ex.ToString()); throw; //act = new SoccerAction(ActionTypes.Hold); } m_prevActionIndex = actIndex; } return(SoccerAction.GetActionFromIndex(actIndex, Params.MoveKings, this.MyUnum)); }
/// <summary> /// Initializes a new instance of the <see cref="TripleModule"/> class. /// </summary> /// <param name="rows">The rows.</param> /// <param name="cols">The cols.</param> /// <param name="teammatesCount">The teammates count.</param> /// <param name="myUnum">My unum.</param> /// <param name="teammateToMonitor">0-based index of the teammate to monitor.</param> /// <param name="opponentToMonitor">0-based index of the opponent to monitor.</param> public TripleModulewIM(int rows, int cols, int teammatesCount, int opponentsCount, int myUnum, int teammateToMonitor, int opponentToMonitor) { m_myUnum = myUnum; m_oppToMonitor = opponentToMonitor; m_teammateToMonitor = teammateToMonitor; m_teammatesCount = teammatesCount; m_opponentsCount = opponentsCount; m_QTable = new double[ rows, cols, // my position rows, cols, // my teammate's position rows, cols, // one of opponents's position 5, // ball owner index (0: me)(1: the-teammate)(2:We)(3: the opponent)(4: they) SoccerAction.GetActionCount(Params.MoveKings, teammatesCount), // number of actions_self SoccerAction.GetActionCount(Params.MoveKings, opponentsCount) // number of actions_opp ]; m_InternalModel = new double[ rows, cols, // my position rows, cols, // my teammate's position rows, cols, // one of opponents's position 5, // ball owner index (0: me)(1: the-teammate)(2:We)(3: the opponent)(4: they) SoccerAction.GetActionCount(Params.MoveKings, opponentsCount) // number of actions_opp ]; }
private int InterpretHighestSignal(State s, int maxr, int maxc) { var myPos = s.Me.Position; int rDiff = maxr - myPos.Row; int cDiff = maxc - myPos.Col; if (rDiff == 0 && cDiff == 0) { return(SoccerAction.GetIndexFromAction(new SoccerAction(ActionTypes.Hold), Params.MoveKings, this.MyUnum)); } else if (rDiff == 0 || (Math.Abs(rDiff) <= Math.Abs(cDiff))) { return(SoccerAction.GetIndexFromAction(new SoccerAction( cDiff > 0 ? ActionTypes.MoveEast : ActionTypes.MoveWest ), Params.MoveKings, this.MyUnum)); } else if (cDiff == 0 || (Math.Abs(rDiff) > Math.Abs(cDiff))) { return(SoccerAction.GetIndexFromAction(new SoccerAction( rDiff > 0 ? ActionTypes.MoveSouth : ActionTypes.MoveNorth ), Params.MoveKings, this.MyUnum)); } return(0); }
protected virtual int RLThink() { int greedyActIndex = m_qTable.GetCurrentGreedyActionIndex(); int actIndex = greedyActIndex; if (Params.Epsillon > 0.0) { actIndex = ChooseActionEpsilonGreedy( Params.Epsillon, SoccerAction.GetActionCount(Params.MoveKings, TeammatesCount), greedyActIndex); } //SoccerAction act = SoccerAction.GetActionFromIndex(actIndex, Params.MoveKings, MyUnum); if (Cycle > 0) // because in cycle 0 there is no prev state { double reward = EnvironmentModeler.GetReward(m_prevState, m_curState, SoccerAction.GetActionTypeFromIndex(m_prevActionIndex, Params.MoveKings)); switch (Params.RLMethod) { case Params.RLMethods.Evolutionary: m_qTable.UpdateQ_Evolutionary(m_prevActionIndex, reward); break; case Params.RLMethods.Q_Zero: m_qTable.UpdateQ_QLearning(m_prevActionIndex); break; case Params.RLMethods.SARSA_Zero: m_qTable.UpdateQ_SARSA(m_prevActionIndex, actIndex); break; case Params.RLMethods.SARSA_Lambda: m_qTable.UpdateQ_SARSA_Lambda(m_prevActionIndex, actIndex); break; case Params.RLMethods.Q_Lambda_Watkins: m_qTable.UpdateQ_QL_Watkins(m_prevActionIndex, actIndex, false); break; case Params.RLMethods.Q_Lambda_Naive: m_qTable.UpdateQ_QL_Watkins(m_prevActionIndex, actIndex, true); break; default: break; } if (m_performanceLogger.Enabled) { m_performanceLogger.Log(Cycle, reward, OurScore, OppScore); } } return(actIndex); }
public void CheckConnectedClientsWaitingForAll() { int count = m_simulator.LeftPlayersCount + m_simulator.RightPlayersCount; bool[] recv = new bool[count]; for (int i = 0; i < count; ++i) { try { while (!recv[i] && !m_simulator.IsGameStopped) { if (m_connectedClients[i].DataAvailable) { recv[i] = true; string str = m_connectedClients[i].ReadString(); IMessageInfo mi = MessageParser.ParseInputMessage(str); if (mi.MessageType == MessageTypes.Hold) { var act = new SoccerAction(ActionTypes.Hold, -1); m_simulator.UpdateActionForPlayer(i, act); } else if (mi.MessageType == MessageTypes.Move) { var movmsg = mi as MoveMessage; var act = new SoccerAction(movmsg.ActionType, -1); m_simulator.UpdateActionForPlayer(i, act); } else if (mi.MessageType == MessageTypes.Pass) { var pmsg = mi as PassMessage; m_simulator.UpdateActionForPlayer(i, new SoccerAction(ActionTypes.Pass, pmsg.DstUnum)); } else if (mi.MessageType == MessageTypes.Home) { var hmsg = mi as HomeMessage; if (!m_simulator.SetHomePos(i, hmsg.R, hmsg.C)) { m_connectedClients[i].WriteString("(error could-not-set-home)"); } } else if (mi.MessageType == MessageTypes.EpisodeTimeout) { var etmsg = mi as EpisodeTimeoutMessage; m_simulator.EpisodeTimeout(i, etmsg.IsOur, etmsg.IsPass); } else { m_connectedClients[i].WriteString("(error)"); } } } } catch { } } }
private SoccerAction RandomPlayer() { int[] teammates = this.GetAvailableTeammatesUnums().ToArray(); int numActions = SoccerAction.GetActionCount(this.MoveKings, teammates.Length + 1); int ai = rnd.Next(0, numActions); return(SoccerAction.GetActionFromIndex(ai, this.MoveKings, this.MyUnum)); }
public void UpdateQ_SARSA(double reward, State prevState, State curState, SoccerAction prevAct, SoccerAction curAct) { double oldQ0 = GetQValue(prevState, prevAct, MyUnum); double qOfNewState0 = GetQValue(curState, curAct, MyUnum); double newQ0 = oldQ0 + Params.Alpha * (reward + Params.Gamma * qOfNewState0 - oldQ0); this.UpdateQValue(prevState, prevAct, MyUnum, newQ0); }
public SelfOnlyModule(int rows, int cols, int teammatesCount, int myUnum) { m_myUnum = myUnum; m_teammatesCount = teammatesCount; m_QTable = new double[ rows, cols, // my position 3, // ball owner status (0: Me)(1: We)(2: Opp) own the ball SoccerAction.GetActionCount(Params.MoveKings, teammatesCount) // number of actions ]; }
private SoccerAction GetActionForRightSide(SoccerAction act) { ActionTypes actType = act.ActionType; int dst = -1; switch (act.ActionType) { case ActionTypes.Hold: actType = ActionTypes.Hold; break; case ActionTypes.MoveEast: actType = ActionTypes.MoveWest; break; case ActionTypes.MoveSouth: actType = ActionTypes.MoveNorth; break; case ActionTypes.MoveWest: actType = ActionTypes.MoveEast; break; case ActionTypes.MoveNorth: actType = ActionTypes.MoveSouth; break; case ActionTypes.MoveNorthEast: actType = ActionTypes.MoveSouthWest; break; case ActionTypes.MoveSouthEast: actType = ActionTypes.MoveNorthWest; break; case ActionTypes.MoveSouthWest: actType = ActionTypes.MoveNorthEast; break; case ActionTypes.MoveNorthWest: actType = ActionTypes.MoveSouthEast; break; case ActionTypes.Pass: actType = ActionTypes.Pass; dst = act.DestinationUnum; break; default: break; } return(new SoccerAction(actType, dst)); }
protected override double GetQValue(State s, int ai_self) { int a_other_count = SoccerAction.GetActionCount(Params.MoveKings, m_opponentsCount); double sum = 0.0; for (int i = 0; i < a_other_count; ++i) { sum += GetQValue(s, ai_self, i) * GetIMValue(s, i); } return(sum); }
public virtual int GetStateCount(State s) { int numActions = SoccerAction.GetActionCount(Params.MoveKings, TeammatesCount); int sum = 0; for (int i = 0; i < numActions; ++i) { sum += GetCountValue(s, i); } return(sum); }
/// <summary> /// Initializes a new instance of the <see cref="SelfAndTeammateModule"/> class. /// </summary> /// <param name="rows">The rows.</param> /// <param name="cols">The cols.</param> /// <param name="teammatesCount">The teammates count.</param> /// <param name="myUnum">My unum.</param> /// <param name="teammateToMonitor">The 0-based index of the teammate to monitor.</param> public SelfAndTeammateModule(int rows, int cols, int teammatesCount, int myUnum, int teammateToMonitor) { m_myUnum = myUnum; m_teammatesCount = teammatesCount; m_teammateToMonitor = teammateToMonitor; m_QTable = new double[ rows, cols, // my position rows, cols, // my teammate position 4, // ball owner status (0: Me)(1: The Teammate)(2: We)(3: Opp) own the ball SoccerAction.GetActionCount(Params.MoveKings, teammatesCount) // number of actions ]; }
/// <summary> /// Initializes a new instance of the <see cref="SelfAndOneOpponentModule"/> class. /// </summary> /// <param name="rows">The rows.</param> /// <param name="cols">The cols.</param> /// <param name="teammatesCount">The teammates count.</param> /// <param name="myUnum">My unum.</param> /// <param name="opponentToMonitor">0-based index of the opponent to monitor.</param> public SelfAndOneOpponentModule(int rows, int cols, int teammatesCount, int myUnum, int opponentToMonitor) { m_myUnum = myUnum; m_oppToMonitor = opponentToMonitor; m_teammatesCount = teammatesCount; m_QTable = new double[ rows, cols, // my position rows, cols, // one of opponents's position 4, // ball owner player's index (0: Me) (1: We)(2: The Opponent) (3: They) SoccerAction.GetActionCount(Params.MoveKings, teammatesCount) // number of actions ]; }
public SelfOnlyCounterModule(int rows, int cols, int teammatesCount, int myUnum) { m_rows = rows; m_cols = cols; m_actionsCount = SoccerAction.GetActionCount(Params.MoveKings, teammatesCount); m_myUnum = myUnum; m_ballOwnerStatesCount = 3; // ball owner status (0: Me)(1: We)(2: Opp) own the ball m_teammatesCount = teammatesCount; m_counterTable = new int[ rows, cols, // my position m_ballOwnerStatesCount, // ball owner status (0: Me)(1: We)(2: Opp) own the ball m_actionsCount // number of actions ]; }
private void SendAction(SoccerAction act) { if (act == null) { return; } if (act.ActionType != ActionTypes.Pass) { SendAction(act.ActionType); } else { Send(String.Format("(pass {0})", act.DestinationUnum)); } }
private double GetBestQ(State s, int ai_prime_other) { int a_self_count = SoccerAction.GetActionCount(Params.MoveKings, m_teammatesCount); double maxQ = Double.MinValue; double curQ; for (int i = 0; i < a_self_count; ++i) { curQ = GetQValue(s, i, ai_prime_other); if (curQ > maxQ) { maxQ = curQ; } } return(maxQ); }
public override void UpdateQ_QLearning(int prevActIndex) { double reward = EnvironmentModeler.GetReward(m_prevState, m_curState, SoccerAction.GetActionTypeFromIndex(prevActIndex, Params.MoveKings)); int ai_star_other = EnvironmentModeler.GuessOpponentAction(m_prevOriginalState, m_curOriginalState, m_oppToMonitor); UpdateIM_Learning(m_prevState, ai_star_other); int ai_prime_other = GetMaxIMIndex(m_prevState); double oldQ = GetQValue(m_prevState, prevActIndex, ai_star_other); double bestQ = GetBestQ(m_curState, ai_prime_other); double newQ = oldQ + Params.Alpha * (reward + Params.Gamma * bestQ - oldQ); UpdateQValue(m_prevState, prevActIndex, ai_star_other, newQ); }
private void UpdateIM_Learning(State s, int ai_star_other) { int a_other_count = SoccerAction.GetActionCount(Params.MoveKings, m_opponentsCount); double curIMValue, newIMValue; for (int i = 0; i < a_other_count; ++i) { curIMValue = GetIMValue(s, i); newIMValue = (1 - Params.Theta) * curIMValue; if (i == ai_star_other) { newIMValue += Params.Theta; } UpdateIMValue(s, i, newIMValue); } }
/// <summary> /// Initializes a new instance of the <see cref="TripleModule"/> class. /// </summary> /// <param name="rows">The rows.</param> /// <param name="cols">The cols.</param> /// <param name="teammatesCount">The teammates count.</param> /// <param name="myUnum">My unum.</param> /// <param name="teammateToMonitor">0-based index of the teammate to monitor.</param> /// <param name="opponentToMonitor">0-based index of the opponent to monitor.</param> public PartialSelfAndOneOpponentModule(int rows, int cols, int dist, int teammatesCount, int oppsCount, int myUnum) { m_myUnum = myUnum; m_teammatesCount = teammatesCount; m_oppsCount = oppsCount; m_dist = dist; int playerStates = (2 * m_dist + 1) * (2 * m_dist + 1) + 1; m_QTable = new double[ playerStates, // my teammate's position playerStates, // one of opponents's position playerStates, // the other opponents's position 5, // ball owner index (0: me)(1: the-teammate)(2:Opp1)(3: Opp2)(4: Unknown) SoccerAction.GetActionCount(Params.MoveKings, teammatesCount) // number of actions ]; }