示例#1
0
        /// <summary>
        /// Returns the maximum Q-value found for any moves performable in the given state.
        /// If there is no data for a move, it will be considered having [initvalue].
        /// If there is no data for the state, the return value will be 0.
        /// </summary>
        /// <param name="state"></param>
        /// <returns></returns>
        private double GetMaxQ(VirusBoard state)
        {
            if (state.Equals(default(VirusBoard)) || !Q.ContainsKey(state.CustomHash()))
            {
                return(0);
            }

            double max = -10;

            Move[] actions = state.GetPossibleMoves(playerNumber);
            foreach (Move a in actions)
            {
                double value = 0;
                if (!Q[state.CustomHash()].ContainsKey(a.CustomHash()))
                {
                    value = initvalue;
                }
                else
                {
                    value = Q[state.CustomHash()][a.CustomHash()];
                }
                if (value > max)
                {
                    max = value;
                }
            }
            return(max);
        }
示例#2
0
        private Move GetMaxExplorationFunctionA(VirusBoard state)
        {
            double max    = double.NegativeInfinity;
            Move   action = default(Move);

            Move[] actions = state.GetPossibleMoves(playerNumber);
            if (!Q.ContainsKey(state.CustomHash()))
            {
                return(actions.Length > 0 ? actions[0] : action);
            }

            bool berandom = random.NextDouble() < RandomRate;

            foreach (Move a in actions)
            {
                double value = 0;

                if (Q[state.CustomHash()].ContainsKey(a.CustomHash()))
                {
                    if (Q[state.CustomHash()][a.CustomHash()] >= 1)
                    {
                        value  = 1;
                        max    = value;
                        action = a;
                        break;
                    }
                    else if (Q[state.CustomHash()][a.CustomHash()] <= -1)
                    {
                        value = -1;
                    }
                    else
                    {
                        if (berandom)
                        {
                            value = random.NextDouble();
                        }
                        else
                        {
                            value = Q[state.CustomHash()][a.CustomHash()] + ((explore && !(RandomRate > 0)) ? ExplorationRate(N[state.CustomHash()][a.CustomHash()]) : 0);
                        }
                    }
                }
                else
                {
                    value = 1;
                }
                if (value > max)
                {
                    max    = value;
                    action = a;
                }
            }
            return(action);
        }
示例#3
0
		/// <summary>
		/// Returns the maximum Q-value found for any moves performable in the given state.
		/// If there is no data for a move, it will be considered having [initvalue].
		/// If there is no data for the state, the return value will be 0.
		/// </summary>
		/// <param name="state"></param>
		/// <returns></returns>
		private double GetMaxQ(VirusBoard state) {
			if (state.Equals(default(VirusBoard)) || !Q.ContainsKey(state.CustomHash()))
				return 0;

			double max = -10;
			Move[] actions = state.GetPossibleMoves(playerNumber);
			foreach (Move a in actions) {
				double value = 0;
				if (!Q[state.CustomHash()].ContainsKey(a.CustomHash())) {
					value = initvalue;
				}
				else {
					value = Q[state.CustomHash()][a.CustomHash()];
				}
				if (value > max)
					max = value;
			}
			return max;
		}
示例#4
0
		//Calc maxValue
		private double MaxValue(VirusBoard state, int iteration) {
			iteration++;
			if (state.winner == playerNumber) {
				return double.PositiveInfinity;
			}
			if (state.winner != playerNumber && state.winner != 0) {
				return double.NegativeInfinity;
			}

			if (iteration < searchLength) {
				Move[] actions = state.GetPossibleMoves(playerNumber);

				double max = double.NegativeInfinity;
				foreach (Move a in actions) {
					VirusBoard newState = state.GetUpdated(a);
					
					double q = Utility(state, newState);
					if (Q.ContainsKey(state.CustomHash())) {
						if (Q[state.CustomHash()].ContainsKey(a.CustomHash())) {
							q = Q[state.CustomHash()][a.CustomHash()];
						}
					}

					q += MinValue(newState, iteration);
					if (q > max) {
						max = q;
					}
					if (max == double.PositiveInfinity) {
						return max;
					}
				}

				return max;
			}
			else {
				return 0;
			}
		}
示例#5
0
		private Move GetMaxExplorationFunctionA(VirusBoard state) {
			double max = double.NegativeInfinity;
			Move action = default(Move);
			Move[] actions = state.GetPossibleMoves(playerNumber);
			if (!Q.ContainsKey(state.CustomHash()))
				return actions.Length > 0 ? actions[0] : action;

			bool berandom = random.NextDouble() < RandomRate;
			foreach (Move a in actions) {
				double value = 0;

				if (Q[state.CustomHash()].ContainsKey(a.CustomHash())) {
					if (Q[state.CustomHash()][a.CustomHash()] >= 1) {
						value = 1;
						max = value;
						action = a;
						break;
					}
					else if (Q[state.CustomHash()][a.CustomHash()] <= -1) {
						value = -1;
					}
					else {
						if (berandom)
							value = random.NextDouble();
						else
							value = Q[state.CustomHash()][a.CustomHash()] + ((explore && !(RandomRate > 0)) ? ExplorationRate(N[state.CustomHash()][a.CustomHash()]) : 0);
					}
				}
				else {
					value = 1;
				}
				if (value > max) {
					max = value;
					action = a;
				}
			}
			return action;
		}
示例#6
0
		// Calc minValue
		private double MinValue(VirusBoard state, int iteration) {
			iteration++;
			if (state.winner == playerNumber) {
				return double.PositiveInfinity;
			}
			if (state.winner != playerNumber && state.winner != 0) {
				return double.NegativeInfinity;
			}

			if (iteration < searchLength) {
				byte opponent = (playerNumber == 1) ? (byte)2 : (byte)1;
				Move[] actions = state.GetPossibleMoves(opponent);

				double min = double.PositiveInfinity;
				foreach (Move a in actions) {
					VirusBoard newState = state.GetUpdated(a);
					double q = Utility(state, newState);
					if (Q.ContainsKey(state.CustomHash())) {
						if (Q[state.CustomHash()].ContainsKey(a.CustomHash())) {
							q = -Q[state.CustomHash()][a.CustomHash()];
						}
					}

					q += MaxValue(newState, iteration);
					if (q < min) {
						min = q;
					}
					if (min == double.NegativeInfinity) {
						return min;
					}
				}

				return min;
			}
			else {
				return 0;
			}
		}
示例#7
0
		private Move GetMaxExplorationFunctionA(VirusBoard state) {
			double max = double.NegativeInfinity;
			Move action = default(Move);
			Move[] actions = state.GetPossibleMoves(playerNumber);

			bool berandom = random.NextDouble() < RandomRate;
			foreach (Move a in actions) {
				double value = 0;
				if (!Q.ContainsKey(state.CustomHash())) {
					Q.Add(state.CustomHash(), new Dictionary<UInt32, double>());
				}
				if (Q[state.CustomHash()].ContainsKey(a.CustomHash())) {
					if (Q[state.CustomHash()][a.CustomHash()] >= 1) {
						value = 1;
						max = value;
						action = a;
						break;
					}
					else {
						if (berandom)
							value = random.NextDouble();
						else
							value = Q[state.CustomHash()][a.CustomHash()] + (explore ? ExplorationRate(N[state.CustomHash()][a.CustomHash()]) : 0);
					}
				}
				else {
					value = 1;
				}
				if (value > max) {
					max = value;
					action = a;
				}
			}
			return action;
		}
示例#8
0
		private double GetMaxQ(VirusBoard state) {
			double max = -10;
			Move[] actions = state.GetPossibleMoves(playerNumber);
			foreach (Move a in actions) {
				double value = 0;
				if (!Q[state.CustomHash()].ContainsKey(a.CustomHash())) {
					value = 0;
				}
				else {
					value = Q[state.CustomHash()][a.CustomHash()];
				}
				if (value > max)
					max = value;
			}
			if (Q[state.CustomHash()].ContainsKey(0)) {
				if (Q[state.CustomHash()][0] > max)
					max = Q[state.CustomHash()][0];
			}
			return max;
		}
示例#9
0
		public void Learn(VirusBoard startstate, VirusBoard endstate, Move action, double reward) {
			// -- Make sure the entries for the state and action exist -- 
			if (!Q.ContainsKey(startstate.CustomHash()))
				Q.Add(startstate.CustomHash(), new Dictionary<UInt32, double>());
			if (!Q[startstate.CustomHash()].ContainsKey(action.CustomHash()))
				Q[startstate.CustomHash()].Add(action.CustomHash(), initvalue);

			if (!N.ContainsKey(startstate.CustomHash()))
				N.Add(startstate.CustomHash(), new Dictionary<UInt32, int>());
			if (!N[startstate.CustomHash()].ContainsKey(action.CustomHash()))
				N[startstate.CustomHash()].Add(action.CustomHash(), 0);

			// -- Perform the update of Q-values --
			N[startstate.CustomHash()][action.CustomHash()]++;
			Q[startstate.CustomHash()][action.CustomHash()] =
				Q[startstate.CustomHash()][action.CustomHash()]
				+ LearningRate(N[startstate.CustomHash()][action.CustomHash()])
				* (reward + discount * GetMaxQ(endstate) - Q[startstate.CustomHash()][action.CustomHash()]);
		}
示例#10
0
文件: QAgent.cs 项目: Armienn/Virus
		public override Move Move(Virus percept) {
			VirusBoard newState = percept.GetBoardCopy();
			if (!Q.ContainsKey(newState.CustomHash()))
				Q.Add(newState.CustomHash(), new Dictionary<UInt32, double>());

			if (learn && !prevState.Equals(default(VirusBoard))) {

				if (!N.ContainsKey(prevState.CustomHash()))
					N.Add(prevState.CustomHash(), new Dictionary<UInt32, int>());
				if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
					N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0);

				N[prevState.CustomHash()][prevAction.CustomHash()]++;
				Q[prevState.CustomHash()][prevAction.CustomHash()] =
					Q[prevState.CustomHash()][prevAction.CustomHash()]
					+ LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()])
					* (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]);
			}

			prevState = newState;
			prevAction = GetMaxExplorationFunctionA(newState);
			prevReward = 0;
			if (learn && !Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
				Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue);
			return prevAction;
		}
示例#11
0
        private double Learn(VirusMemory memory)
        {
            VirusBoard startstate = memory.StartState;
            VirusBoard endstate   = memory.EndState;
            Move       action     = memory.Action;
            double     reward     = memory.Reward;

            // -- Make sure the entries for the state and action exist --
            if (!Q.ContainsKey(startstate.CustomHash()))
            {
                Q.Add(startstate.CustomHash(), new Dictionary <UInt32, double>());
            }
            if (!Q[startstate.CustomHash()].ContainsKey(action.CustomHash()))
            {
                Q[startstate.CustomHash()].Add(action.CustomHash(), initvalue);
            }

            if (!N.ContainsKey(startstate.CustomHash()))
            {
                N.Add(startstate.CustomHash(), new Dictionary <UInt32, int>());
            }
            if (!N[startstate.CustomHash()].ContainsKey(action.CustomHash()))
            {
                N[startstate.CustomHash()].Add(action.CustomHash(), 0);
            }

            // -- Perform the update of Q-values --
            N[startstate.CustomHash()][action.CustomHash()]++;
            double change = LearningRate(N[startstate.CustomHash()][action.CustomHash()])
                            * (reward + discount * GetMaxQ(endstate) - Q[startstate.CustomHash()][action.CustomHash()]);

            Q[startstate.CustomHash()][action.CustomHash()] =
                Q[startstate.CustomHash()][action.CustomHash()] + change;
            return(change);
        }
示例#12
0
        public void Learn(VirusBoard startstate, VirusBoard endstate, Move action, double reward)
        {
            // -- Make sure the entries for the state and action exist --
            if (!Q.ContainsKey(startstate.CustomHash()))
            {
                Q.Add(startstate.CustomHash(), new Dictionary <UInt32, double>());
            }
            if (!Q[startstate.CustomHash()].ContainsKey(action.CustomHash()))
            {
                Q[startstate.CustomHash()].Add(action.CustomHash(), initvalue);
            }

            if (!N.ContainsKey(startstate.CustomHash()))
            {
                N.Add(startstate.CustomHash(), new Dictionary <UInt32, int>());
            }
            if (!N[startstate.CustomHash()].ContainsKey(action.CustomHash()))
            {
                N[startstate.CustomHash()].Add(action.CustomHash(), 0);
            }

            // -- Perform the update of Q-values --
            N[startstate.CustomHash()][action.CustomHash()]++;
            Q[startstate.CustomHash()][action.CustomHash()] =
                Q[startstate.CustomHash()][action.CustomHash()]
                + LearningRate(N[startstate.CustomHash()][action.CustomHash()])
                * (reward + discount * GetMaxQ(endstate) - Q[startstate.CustomHash()][action.CustomHash()]);
        }