Пример #1
0
		public override Move Move(Virus percept) {
			VirusBoard currentState = percept.GetBoardCopy();
			Move[] actions = currentState.GetPossibleMoves(playerNumber);
			Move action = actions[0];

			double max = double.NegativeInfinity;
			foreach (Move a in actions) {
				VirusBoard newState = currentState.GetUpdated(a);
				double q = 0;
				if (Q.ContainsKey(currentState.CustomHash())) {
					if (Q[currentState.CustomHash()].ContainsKey(a.CustomHash())) {
						q = Q[currentState.CustomHash()][a.CustomHash()];
					}
				}
				q += MinValue(newState, 0);
				if (q > max) {
					max = q;
					action = a;
				}
				if (max == 1) {
					break;
				}
			}

			return action;
		}
Пример #2
0
        public override Move Move(Virus percept)
        {
            VirusBoard currentState = percept.GetBoardCopy();

            Move[] actions = currentState.GetPossibleMoves(playerNumber);
            Move   action  = actions[0];

            double max = double.NegativeInfinity;

            foreach (Move a in actions)
            {
                VirusBoard newState = currentState.GetUpdated(a);
                double     q        = 0;
                if (Q.ContainsKey(currentState.CustomHash()))
                {
                    if (Q[currentState.CustomHash()].ContainsKey(a.CustomHash()))
                    {
                        q = Q[currentState.CustomHash()][a.CustomHash()];
                    }
                }
                q += MinValue(newState, 0);
                if (q > max)
                {
                    max    = q;
                    action = a;
                }
                if (max == 1)
                {
                    break;
                }
            }

            return(action);
        }
Пример #3
0
		public override Move Move(Virus percept) {
			//Stopwatch watch = new Stopwatch();
			//watch.Start();
			VirusBoard currentState = percept.GetBoardCopy();
			Move[] actions = currentState.GetPossibleMoves(playerNumber);
			Move action = actions[0];

			double max = double.NegativeInfinity;
			foreach (Move a in actions)
			{
				VirusBoard newState = currentState.GetUpdated(a);
				double q = Utility(currentState, newState);
				q += MinValue(newState, 0);
				if (q > max)
				{
					max = q;
					action = a;
				}
				if (max == double.PositiveInfinity) {
					break;
				}
			}
			//watch.Stop();

			//StreamWriter timeWriter = new StreamWriter("mmTimeLog",true);
			//timeWriter.WriteLine(watch.ElapsedMilliseconds); // + " ; " + watch.ElapsedTicks);
			//timeWriter.Close();
			return action;
		}
Пример #4
0
        public override Move Move(Virus percept)
        {
            //Stopwatch watch = new Stopwatch();
            //watch.Start();
            VirusBoard currentState = percept.GetBoardCopy();

            Move[] actions = currentState.GetPossibleMoves(playerNumber);
            Move   action  = actions[0];

            double max = double.NegativeInfinity;

            foreach (Move a in actions)
            {
                VirusBoard newState = currentState.GetUpdated(a);
                double     q        = Utility(currentState, newState);
                q += MinValue(newState, 0);
                if (q > max)
                {
                    max    = q;
                    action = a;
                }
                if (max == double.PositiveInfinity)
                {
                    break;
                }
            }
            //watch.Stop();

            //StreamWriter timeWriter = new StreamWriter("mmTimeLog",true);
            //timeWriter.WriteLine(watch.ElapsedMilliseconds); // + " ; " + watch.ElapsedTicks);
            //timeWriter.Close();
            return(action);
        }
Пример #5
0
        public override Move Move(Virus percept)
        {
            VirusBoard state = percept.GetBoardCopy();

            Move[] actions = state.GetPossibleMoves(playerNumber);
            if (actions.Length < 1)
            {
                return(default(Move));
            }
            Move action;

            if (random.NextDouble() > randomRatio)               // bruteforce
            {
                List <Move> list     = new List <Move>();
                int         maxtaken = -1;

                foreach (Move a in actions)
                {
                    int temp = state.TakeablePieces(a);
                    if (a.IsLongMove)
                    {
                        temp--;
                    }

                    if (temp > maxtaken)
                    {
                        maxtaken = temp;
                        list.Clear();
                        list.Add(a);
                    }
                    else if (temp == maxtaken)
                    {
                        list.Add(a);
                    }
                }

                if (deterministic)
                {
                    action = list[0];
                }
                else
                {
                    action = list[random.Next(list.Count)];
                }
            }
            else               // random
            {
                if (deterministic)
                {
                    action = actions[0];
                }
                else
                {
                    action = actions[random.Next(actions.Length)];
                }
            }
            return(action);
        }
Пример #6
0
        public override Move Move(Virus percept)
        {
            //Checking if we're at an terminal state
            byte       winner   = percept.Winner;
            VirusBoard newState = percept.GetBoardCopy();

            if (!Q.ContainsKey(newState.CustomHash()))
            {
                Q.Add(newState.CustomHash(), new Dictionary <UInt32, double>());
            }


            if (learn && !prevState.Equals(default(VirusBoard)))
            {
                if (!N.ContainsKey(prevState.CustomHash()))
                {
                    N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>());
                }
                if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
                {
                    N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0);
                }
                if (!Q.ContainsKey(prevState.CustomHash()))
                {
                    Q.Add(prevState.CustomHash(), new Dictionary <UInt32, double>());
                }
                if (!Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
                {
                    Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue);
                }

                if (winner == playerNumber)
                {
                    if (!Q[newState.CustomHash()].ContainsKey(0))
                    {
                        Q[newState.CustomHash()].Add(0, 1);
                    }
                }
                else if (winner != playerNumber && winner != 0)
                {
                    if (!Q[newState.CustomHash()].ContainsKey(0))
                    {
                        Q[newState.CustomHash()].Add(0, -1);
                    }
                }

                N[prevState.CustomHash()][prevAction.CustomHash()]++;
                Q[prevState.CustomHash()][prevAction.CustomHash()] =
                    Q[prevState.CustomHash()][prevAction.CustomHash()]
                    + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()])
                    * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]);
            }

            prevState  = newState;
            prevAction = GetMaxExplorationFunctionA(newState);
            prevReward = 0;
            return(prevAction);
        }
Пример #7
0
		public override Move Move(Virus percept) {
			
			//Checking if we're at an terminal state
			byte winner = percept.Winner;
			VirusBoard newState = percept.GetBoardCopy();
			if (!Q.ContainsKey(newState.CustomHash()))
				Q.Add(newState.CustomHash(), new Dictionary<UInt32, double>());


			if (learn && !prevState.Equals(default(VirusBoard))) {
				if (!N.ContainsKey(prevState.CustomHash()))
					N.Add(prevState.CustomHash(), new Dictionary<UInt32, int>());
				if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
					N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0);
				if (!Q.ContainsKey(prevState.CustomHash()))
					Q.Add(prevState.CustomHash(), new Dictionary<UInt32, double>());
				if (!Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
					Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue);

				if (winner == playerNumber) {
					if (!Q[newState.CustomHash()].ContainsKey(0))
						Q[newState.CustomHash()].Add(0, 1);
				}
				else if (winner != playerNumber && winner != 0) {
					if (!Q[newState.CustomHash()].ContainsKey(0))
						Q[newState.CustomHash()].Add(0, -1);
				}

				N[prevState.CustomHash()][prevAction.CustomHash()]++;
				Q[prevState.CustomHash()][prevAction.CustomHash()] =
					Q[prevState.CustomHash()][prevAction.CustomHash()]
					+ LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()])
					* (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]);
			}

			prevState = newState;
			prevAction = GetMaxExplorationFunctionA(newState);
			prevReward = 0;
			return prevAction;
		}
Пример #8
0
		public override Move Move(Virus percept) {
			VirusBoard state = percept.GetBoardCopy();
			Move[] actions = state.GetPossibleMoves(playerNumber);
			if (actions.Length < 1)
				return default(Move);
			Move action;
			if (random.NextDouble() > randomRatio) { // bruteforce
				List<Move> list = new List<Move>();
				int maxtaken = -1;

				foreach (Move a in actions) {
					int temp = state.TakeablePieces(a);
					if (a.IsLongMove)
						temp--;

					if (temp > maxtaken) {
						maxtaken = temp;
						list.Clear();
						list.Add(a);
					}
					else if (temp == maxtaken) {
						list.Add(a);
					}
				}

				if (deterministic) {
					action = list[0];
				}
				else {
					action = list[random.Next(list.Count)];
				}
			}
			else { // random
				if (deterministic)
					action = actions[0];
				else
					action = actions[random.Next(actions.Length)];
			}
			return action;
		}
Пример #9
0
        public override Move Move(Virus percept)
        {
            VirusBoard newState = percept.GetBoardCopy();

            if (!Q.ContainsKey(newState.CustomHash()))
            {
                Q.Add(newState.CustomHash(), new Dictionary <UInt32, double>());
            }

            if (learn && !prevState.Equals(default(VirusBoard)))
            {
                if (!N.ContainsKey(prevState.CustomHash()))
                {
                    N.Add(prevState.CustomHash(), new Dictionary <UInt32, int>());
                }
                if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
                {
                    N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0);
                }

                N[prevState.CustomHash()][prevAction.CustomHash()]++;
                Q[prevState.CustomHash()][prevAction.CustomHash()] =
                    Q[prevState.CustomHash()][prevAction.CustomHash()]
                    + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()])
                    * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]);
            }

            prevState  = newState;
            prevAction = GetMaxExplorationFunctionA(newState);
            prevReward = 0;
            if (learn && !Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
            {
                Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue);
            }
            return(prevAction);
        }
Пример #10
0
		public override Move Move(Virus percept) {
			VirusBoard newState = percept.GetBoardCopy();
			if (!Q.ContainsKey(newState.CustomHash()))
				Q.Add(newState.CustomHash(), new Dictionary<UInt32, double>());

			if (learn && !prevState.Equals(default(VirusBoard))) {

				if (!N.ContainsKey(prevState.CustomHash()))
					N.Add(prevState.CustomHash(), new Dictionary<UInt32, int>());
				if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
					N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0);

				N[prevState.CustomHash()][prevAction.CustomHash()]++;
				Q[prevState.CustomHash()][prevAction.CustomHash()] =
					Q[prevState.CustomHash()][prevAction.CustomHash()]
					+ LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()])
					* (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]);
			}

			prevState = newState;
			prevAction = GetMaxExplorationFunctionA(newState);
			prevReward = 0;
			if (learn && !Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
				Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue);
			return prevAction;
		}