Пример #1
0
		//Calc maxValue
		private double MaxValue(VirusBoard state, int iteration)
		{
			iteration++;
			if (state.winner == playerNumber)
			{
				return double.PositiveInfinity;
			}
			if (state.winner != playerNumber && state.winner != 0)
			{
				return double.NegativeInfinity;
			}

			if (iteration < searchLength) {
				Move[] actions = state.GetPossibleMoves(playerNumber);

				double max = double.NegativeInfinity;
				foreach (Move a in actions) {
					VirusBoard newState = state.GetUpdated(a);
					double q = Utility(state, newState);
					q += MinValue(newState, iteration);
					if (q > max) {
						max = q;
					}
					if (max == double.PositiveInfinity) {
						return max;
					}
				}

				return max;
			}
			else { 
				return 0;
			}
		}
Пример #2
0
        public void Learn(VirusBoard startstate, VirusBoard endstate, Move action)
        {
            // -- Calculate the reward the change of states represents --
            double reward = Reward(startstate, endstate);

            Learn(startstate, endstate, action, reward);
        }
Пример #3
0
        public void LoadlongTermMermory(String file)
        {
            NeaReader reader = new NeaReader(new StreamReader(file + ".MQ"));

            while (reader.Peek() != -1)
            {
                List <VirusMemory> memories   = new List <VirusMemory>();
                VirusBoard         startState = new VirusBoard();
                VirusBoard         endState   = new VirusBoard();
                Move   action = new Move();
                double reward;
                double significance;
                string data;

                data = reader.ReadLine();
                NeaReader r = new NeaReader(data);
                significance = double.Parse(r.ReadUntil(":"));
                while (r.Peek() != -1)
                {
                    startState.Load(r.ReadUntil(":"));
                    endState.Load(r.ReadUntil(":"));
                    action.Load(r.ReadUntil(":"));
                    reward = double.Parse(r.ReadUntil(":"));
                    memories.Add(new VirusMemory(startState, action, endState, reward));
                }


                //memory = new VirusMemory(startState, action, endState, reward);
                LongTermMemory.Add(new VirusMemoryEpisode(memories.ToArray(), significance));                //new VirusLongTermMemory(memory, significance));
            }
            reader.Close();
        }
Пример #4
0
        /// <summary>
        /// Returns the maximum Q-value found for any moves performable in the given state.
        /// If there is no data for a move, it will be considered having [initvalue].
        /// If there is no data for the state, the return value will be 0.
        /// </summary>
        /// <param name="state"></param>
        /// <returns></returns>
        private double GetMaxQ(VirusBoard state)
        {
            if (state.Equals(default(VirusBoard)) || !Q.ContainsKey(state.CustomHash()))
            {
                return(0);
            }

            double max = -10;

            Move[] actions = state.GetPossibleMoves(playerNumber);
            foreach (Move a in actions)
            {
                double value = 0;
                if (!Q[state.CustomHash()].ContainsKey(a.CustomHash()))
                {
                    value = initvalue;
                }
                else
                {
                    value = Q[state.CustomHash()][a.CustomHash()];
                }
                if (value > max)
                {
                    max = value;
                }
            }
            return(max);
        }
Пример #5
0
        private double Learn(VirusMemory memory)
        {
            VirusBoard startstate = memory.StartState;
            VirusBoard endstate   = memory.EndState;
            Move       action     = memory.Action;
            double     reward     = memory.Reward;

            // -- Make sure the entries for the state and action exist --
            if (!Q.ContainsKey(startstate.CustomHash()))
            {
                Q.Add(startstate.CustomHash(), new Dictionary <UInt32, double>());
            }
            if (!Q[startstate.CustomHash()].ContainsKey(action.CustomHash()))
            {
                Q[startstate.CustomHash()].Add(action.CustomHash(), initvalue);
            }

            if (!N.ContainsKey(startstate.CustomHash()))
            {
                N.Add(startstate.CustomHash(), new Dictionary <UInt32, int>());
            }
            if (!N[startstate.CustomHash()].ContainsKey(action.CustomHash()))
            {
                N[startstate.CustomHash()].Add(action.CustomHash(), 0);
            }

            // -- Perform the update of Q-values --
            N[startstate.CustomHash()][action.CustomHash()]++;
            double change = LearningRate(N[startstate.CustomHash()][action.CustomHash()])
                            * (reward + discount * GetMaxQ(endstate) - Q[startstate.CustomHash()][action.CustomHash()]);

            Q[startstate.CustomHash()][action.CustomHash()] =
                Q[startstate.CustomHash()][action.CustomHash()] + change;
            return(change);
        }
Пример #6
0
        public void Learn(VirusBoard startstate, VirusBoard endstate, Move action, double reward)
        {
            // -- Make sure the entries for the state and action exist --
            if (!Q.ContainsKey(startstate.CustomHash()))
            {
                Q.Add(startstate.CustomHash(), new Dictionary <UInt32, double>());
            }
            if (!Q[startstate.CustomHash()].ContainsKey(action.CustomHash()))
            {
                Q[startstate.CustomHash()].Add(action.CustomHash(), initvalue);
            }

            if (!N.ContainsKey(startstate.CustomHash()))
            {
                N.Add(startstate.CustomHash(), new Dictionary <UInt32, int>());
            }
            if (!N[startstate.CustomHash()].ContainsKey(action.CustomHash()))
            {
                N[startstate.CustomHash()].Add(action.CustomHash(), 0);
            }

            // -- Perform the update of Q-values --
            N[startstate.CustomHash()][action.CustomHash()]++;
            Q[startstate.CustomHash()][action.CustomHash()] =
                Q[startstate.CustomHash()][action.CustomHash()]
                + LearningRate(N[startstate.CustomHash()][action.CustomHash()])
                * (reward + discount * GetMaxQ(endstate) - Q[startstate.CustomHash()][action.CustomHash()]);
        }
Пример #7
0
		public override void EndGame(Virus percept) {
			if (learn) {
				double reward = 0;
				byte winner = percept.Winner;
				if (winner == playerNumber)
					reward = 1;
				else if (winner != playerNumber && winner != 0)
					reward = -1;
				else
					reward = 0;
				if (!N.ContainsKey(prevState.CustomHash()))
					N.Add(prevState.CustomHash(), new Dictionary<UInt32, int>());
				if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
					N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0);

				N[prevState.CustomHash()][prevAction.CustomHash()]++;
				Q[prevState.CustomHash()][prevAction.CustomHash()] =
					Q[prevState.CustomHash()][prevAction.CustomHash()]
					+ LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()])
					* (reward - Q[prevState.CustomHash()][prevAction.CustomHash()]);
			}

			prevState = default(VirusBoard);
			prevAction = default(Move);
			prevReward = 0;
		}
Пример #8
0
        private Move GetMaxExplorationFunctionA(VirusBoard state)
        {
            double max    = double.NegativeInfinity;
            Move   action = default(Move);

            Move[] actions = state.GetPossibleMoves(playerNumber);
            if (!Q.ContainsKey(state.CustomHash()))
            {
                return(actions.Length > 0 ? actions[0] : action);
            }

            bool berandom = random.NextDouble() < RandomRate;

            foreach (Move a in actions)
            {
                double value = 0;

                if (Q[state.CustomHash()].ContainsKey(a.CustomHash()))
                {
                    if (Q[state.CustomHash()][a.CustomHash()] >= 1)
                    {
                        value  = 1;
                        max    = value;
                        action = a;
                        break;
                    }
                    else if (Q[state.CustomHash()][a.CustomHash()] <= -1)
                    {
                        value = -1;
                    }
                    else
                    {
                        if (berandom)
                        {
                            value = random.NextDouble();
                        }
                        else
                        {
                            value = Q[state.CustomHash()][a.CustomHash()] + ((explore && !(RandomRate > 0)) ? ExplorationRate(N[state.CustomHash()][a.CustomHash()]) : 0);
                        }
                    }
                }
                else
                {
                    value = 1;
                }
                if (value > max)
                {
                    max    = value;
                    action = a;
                }
            }
            return(action);
        }
Пример #9
0
        public override Move Move(Virus percept)
        {
            VirusBoard newState = percept.GetBoardCopy();

            if (!prevState.Equals(default(VirusBoard)))
            {
                ShortTermMemory.Add(new VirusMemory(prevState, prevAction, newState));
            }

            prevState  = newState;
            prevAction = GetMaxExplorationFunctionA(newState);
            prevReward = 0;
            return(prevAction);
        }
Пример #10
0
        public override Move Move(Virus percept)
        {
            VirusBoard newState = percept.GetBoardCopy();

            if (learn && !prevState.Equals(default(VirusBoard)))
            {
                Learn(prevState, newState, prevAction);
            }

            prevState  = newState;
            prevAction = GetMaxExplorationFunctionA(newState);
            prevReward = 0;
            return(prevAction);
        }
Пример #11
0
        private Move GetAnnMove(Virus percept)
        {
            VirusBoard currentState = percept.GetBoardCopy();

            Move[] actions = currentState.GetPossibleMoves(playerNumber);
            Move   move    = OutputsToMove(network.Compute(BoardToInput(currentState)));

            if (actions.Contains(move))
            {
                using (StreamWriter writer = new StreamWriter("ann" + percept.Size + "log.txt", true))
                    writer.WriteLine("using learned move");
                return(move);
            }

            using (StreamWriter writer = new StreamWriter("ann" + percept.Size + "log.txt", true))
                writer.WriteLine("using default move");
            return(actions[0]);
        }
Пример #12
0
		private double[] BoardToInput(VirusBoard board)
		{
			double[] inputs = new double[board.Size * board.Size];
			for(int i = 0; i < board.Size; i++)
			{
				for (int j = 0; j < board.Size; j++)
				{
					byte fieldState = board.board[i, j];
					if (fieldState == 0)
						inputs[i * board.Size + j] = 0;
					else if (fieldState == playerNumber)
						inputs[i * board.Size + j] = 1;
					else
						inputs[i * board.Size + j] = -1;
				}
			}
			return inputs;
		}
Пример #13
0
		public void Learn(VirusBoard startstate, VirusBoard endstate, Move action, double reward) {
			// -- Make sure the entries for the state and action exist -- 
			if (!Q.ContainsKey(startstate.CustomHash()))
				Q.Add(startstate.CustomHash(), new Dictionary<UInt32, double>());
			if (!Q[startstate.CustomHash()].ContainsKey(action.CustomHash()))
				Q[startstate.CustomHash()].Add(action.CustomHash(), initvalue);

			if (!N.ContainsKey(startstate.CustomHash()))
				N.Add(startstate.CustomHash(), new Dictionary<UInt32, int>());
			if (!N[startstate.CustomHash()].ContainsKey(action.CustomHash()))
				N[startstate.CustomHash()].Add(action.CustomHash(), 0);

			// -- Perform the update of Q-values --
			N[startstate.CustomHash()][action.CustomHash()]++;
			Q[startstate.CustomHash()][action.CustomHash()] =
				Q[startstate.CustomHash()][action.CustomHash()]
				+ LearningRate(N[startstate.CustomHash()][action.CustomHash()])
				* (reward + discount * GetMaxQ(endstate) - Q[startstate.CustomHash()][action.CustomHash()]);
		}
Пример #14
0
		/// <summary>
		/// Returns the maximum Q-value found for any moves performable in the given state.
		/// If there is no data for a move, it will be considered having [initvalue].
		/// If there is no data for the state, the return value will be 0.
		/// </summary>
		/// <param name="state"></param>
		/// <returns></returns>
		private double GetMaxQ(VirusBoard state) {
			if (state.Equals(default(VirusBoard)) || !Q.ContainsKey(state.CustomHash()))
				return 0;

			double max = -10;
			Move[] actions = state.GetPossibleMoves(playerNumber);
			foreach (Move a in actions) {
				double value = 0;
				if (!Q[state.CustomHash()].ContainsKey(a.CustomHash())) {
					value = initvalue;
				}
				else {
					value = Q[state.CustomHash()][a.CustomHash()];
				}
				if (value > max)
					max = value;
			}
			return max;
		}
Пример #15
0
        public override void EndGame(Virus percept)
        {
            VirusBoard newState = percept.GetBoardCopy();
            double     reward   = 0;

            if (percept.Winner == playerNumber)
            {
                reward = 1;
            }
            else if (percept.Winner != playerNumber && percept.Winner != 0)
            {
                reward = -1;
            }

            ShortTermMemory.Add(new VirusMemory(prevState, prevAction, newState, reward));

            prevState  = default(VirusBoard);
            prevAction = default(Move);
            prevReward = 0;
        }
Пример #16
0
		private double GetMaxQ(VirusBoard state) {
			double max = -10;
			Move[] actions = state.GetPossibleMoves(playerNumber);
			foreach (Move a in actions) {
				double value = 0;
				if (!Q[state].ContainsKey(a)) {
					value = 0;
				}
				else {
					value = Q[state][a];
				}
				if (value > max)
					max = value;
			}
			if (Q[state].ContainsKey(default(Move))) {
				if (Q[state][default(Move)] > max)
					max = Q[state][default(Move)];
			}
			return max;
		}
Пример #17
0
		public override Move Move(Virus percept) {

			//Checking if we're at an terminal state
			byte winner = percept.Winner;
			VirusBoard newState = percept.GetBoardCopy();
			if (!Q.ContainsKey(newState))
				Q.Add(newState, new Dictionary<Move, double>());

			if (!prevState.Equals(default(VirusBoard))) {
				if (!N.ContainsKey(prevState))
					N.Add(prevState, new Dictionary<Move, int>());
				if (!N[prevState].ContainsKey(prevAction))
					N[prevState].Add(prevAction, 0);
				if (!Q.ContainsKey(prevState))
					Q.Add(prevState, new Dictionary<Move, double>());
				if (!Q[prevState].ContainsKey(prevAction))
					Q[prevState].Add(prevAction, 0);

				if (winner == playerNumber) {
					if (!Q[newState].ContainsKey(default(Move)))
						Q[newState].Add(default(Move), 1);
				}
				else if (winner != playerNumber && winner != 0) {
					if (!Q[newState].ContainsKey(default(Move)))
						Q[newState].Add(default(Move), -1);
				}

				N[prevState][prevAction]++;
				Q[prevState][prevAction] =
					Q[prevState][prevAction]
					+ LearningRate(N[prevState][prevAction])
					* (prevReward + discount * GetMaxQ(newState) - Q[prevState][prevAction]);
			}

			prevState = newState;
			prevAction = GetMaxExplorationFunctionA(newState);
			prevReward = 0;
			return prevAction;
		}
Пример #18
0
		private Move GetMaxExplorationFunctionA(VirusBoard state) {
			double max = double.NegativeInfinity;
			Move action = default(Move);
			Move[] actions = state.GetPossibleMoves(playerNumber);
			if (!Q.ContainsKey(state.CustomHash()))
				return actions.Length > 0 ? actions[0] : action;

			bool berandom = random.NextDouble() < RandomRate;
			foreach (Move a in actions) {
				double value = 0;

				if (Q[state.CustomHash()].ContainsKey(a.CustomHash())) {
					if (Q[state.CustomHash()][a.CustomHash()] >= 1) {
						value = 1;
						max = value;
						action = a;
						break;
					}
					else if (Q[state.CustomHash()][a.CustomHash()] <= -1) {
						value = -1;
					}
					else {
						if (berandom)
							value = random.NextDouble();
						else
							value = Q[state.CustomHash()][a.CustomHash()] + ((explore && !(RandomRate > 0)) ? ExplorationRate(N[state.CustomHash()][a.CustomHash()]) : 0);
					}
				}
				else {
					value = 1;
				}
				if (value > max) {
					max = value;
					action = a;
				}
			}
			return action;
		}
Пример #19
0
        private Move LearnFromMinimax(Virus percept)
        {
            //lær fra MiniMax
            Move       move         = teacher.Move(percept);
            VirusBoard currentState = percept.GetBoardCopy();

            backProp.LearningRate = 0.1;
            backProp.Momentum     = 0.1;
            Move   annMove = OutputsToMove(network.Compute(BoardToInput(currentState)));
            double error   = backProp.Run(BoardToInput(currentState), MoveToOutputs(move, currentState.Size));

            if (move.Equals(annMove))
            {
                using (StreamWriter writer = new StreamWriter("ann" + percept.Size + "log.txt", true))
                    writer.WriteLine("using right move. E: " + error);
            }
            else
            {
                using (StreamWriter writer = new StreamWriter("ann" + percept.Size + "log.txt", true))
                    writer.WriteLine("using wrong move. E: " + error);
            }
            return(move);
        }
Пример #20
0
 private double[] BoardToInput(VirusBoard board)
 {
     double[] inputs = new double[board.Size * board.Size];
     for (int i = 0; i < board.Size; i++)
     {
         for (int j = 0; j < board.Size; j++)
         {
             byte fieldState = board.board[i, j];
             if (fieldState == 0)
             {
                 inputs[i * board.Size + j] = 0;
             }
             else if (fieldState == playerNumber)
             {
                 inputs[i * board.Size + j] = 1;
             }
             else
             {
                 inputs[i * board.Size + j] = -1;
             }
         }
     }
     return(inputs);
 }
Пример #21
0
		public override void EndGame(Virus percept) {
			Move(percept);
			prevState = default(VirusBoard);
			prevAction = default(Move);
			prevReward = 0;
		}
Пример #22
0
		// Calc minValue
		private double MinValue(VirusBoard state, int iteration) {
			iteration++;
			if (state.winner == playerNumber) {
				return double.PositiveInfinity;
			}
			if (state.winner != playerNumber && state.winner != 0) {
				return double.NegativeInfinity;
			}

			if (iteration < searchLength) {
				byte opponent = (playerNumber == 1) ? (byte)2 : (byte)1;
				Move[] actions = state.GetPossibleMoves(opponent);

				double min = double.PositiveInfinity;
				foreach (Move a in actions) {
					VirusBoard newState = state.GetUpdated(a);
					double q = Utility(state, newState);
					if (Q.ContainsKey(state.CustomHash())) {
						if (Q[state.CustomHash()].ContainsKey(a.CustomHash())) {
							q = -Q[state.CustomHash()][a.CustomHash()];
						}
					}

					q += MaxValue(newState, iteration);
					if (q < min) {
						min = q;
					}
					if (min == double.NegativeInfinity) {
						return min;
					}
				}

				return min;
			}
			else {
				return 0;
			}
		}
Пример #23
0
		private double Utility(VirusBoard currentState, VirusBoard nextState)
		{
			int orgPieces = 0;
			foreach (byte b in currentState.board)
			{
				if (b == playerNumber)
				{
					orgPieces++;
					//orgPieces += orgPieces + 2;
				}
				else if (b != playerNumber && b!=0)
				{
					orgPieces--;
				}
			}

			int newPieces = 0;
			foreach (byte b in nextState.board) {
				if (b == playerNumber) {
					newPieces++;
					//newPieces += newPieces + 2;
				}
				else if (b != playerNumber && b != 0) {
					newPieces--;
				}
			}

			double difference = newPieces - orgPieces;
			difference *= 0.1;
			return difference;
		}
Пример #24
0
		private Move GetMaxExplorationFunctionA(VirusBoard state) {
			double max = double.NegativeInfinity;
			Move action = default(Move);
			Move[] actions = state.GetPossibleMoves(playerNumber);

			bool berandom = random.NextDouble() < RandomRate;
			foreach (Move a in actions) {
				double value = 0;
				if (!Q.ContainsKey(state.CustomHash())) {
					Q.Add(state.CustomHash(), new Dictionary<UInt32, double>());
				}
				if (Q[state.CustomHash()].ContainsKey(a.CustomHash())) {
					if (Q[state.CustomHash()][a.CustomHash()] >= 1) {
						value = 1;
						max = value;
						action = a;
						break;
					}
					else {
						if (berandom)
							value = random.NextDouble();
						else
							value = Q[state.CustomHash()][a.CustomHash()] + (explore ? ExplorationRate(N[state.CustomHash()][a.CustomHash()]) : 0);
					}
				}
				else {
					value = 1;
				}
				if (value > max) {
					max = value;
					action = a;
				}
			}
			return action;
		}
Пример #25
0
		private double GetMaxQ(VirusBoard state) {
			double max = -10;
			Move[] actions = state.GetPossibleMoves(playerNumber);
			foreach (Move a in actions) {
				double value = 0;
				if (!Q[state.CustomHash()].ContainsKey(a.CustomHash())) {
					value = 0;
				}
				else {
					value = Q[state.CustomHash()][a.CustomHash()];
				}
				if (value > max)
					max = value;
			}
			if (Q[state.CustomHash()].ContainsKey(0)) {
				if (Q[state.CustomHash()][0] > max)
					max = Q[state.CustomHash()][0];
			}
			return max;
		}
Пример #26
0
		public void Learn(VirusBoard startstate, VirusBoard endstate, Move action) {
			// -- Calculate the reward the change of states represents --
			double reward = Reward(startstate, endstate);
			Learn(startstate, endstate, action, reward);
		}
Пример #27
0
 public static double Reward(VirusBoard startstate, VirusBoard endstate)
 {
     return(0);
 }
Пример #28
0
		public void LoadlongTermMermory(String file)
		{
			NeaReader reader = new NeaReader(new StreamReader(file + ".MQ"));

			while (reader.Peek() != -1)
			{
				List<VirusMemory> memories = new List<VirusMemory>();
				VirusBoard startState = new VirusBoard();
				VirusBoard endState = new VirusBoard();
				Move action = new Move();
				double reward;
				double significance;
				string data;

				data = reader.ReadLine();
				NeaReader r = new NeaReader(data);
				significance = double.Parse(r.ReadUntil(":"));
				while (r.Peek() != -1) {
					startState.Load(r.ReadUntil(":"));
					endState.Load(r.ReadUntil(":"));
					action.Load(r.ReadUntil(":"));
					reward = double.Parse(r.ReadUntil(":"));
					memories.Add(new VirusMemory(startState, action, endState, reward));
				}
				

				//memory = new VirusMemory(startState, action, endState, reward);
				LongTermMemory.Add(new VirusMemoryEpisode(memories.ToArray(), significance));//new VirusLongTermMemory(memory, significance));
			}
			reader.Close();
		}
Пример #29
0
		public VirusMemory(VirusBoard start, Move action, VirusBoard end) {
			StartState = start;
			Action = action;
			EndState = end;
			Reward = VirusNameSpace.Agents.MemoryQAgent.Reward(start, end);
		}
Пример #30
0
		private Move GetMaxExplorationFunctionA(VirusBoard state) {
			double max = -10;
			Move action = default(Move);
			Move[] actions = state.GetPossibleMoves(playerNumber);
			foreach (Move a in actions) {
				double value = 0;
				if (!Q.ContainsKey(state)) {
					Q.Add(state, new Dictionary<Move, double>());
				}
				if (Q[state].ContainsKey(a)) {
					value = Q[state][a] + 1 / (double)N[state][a];
				}
				else {
					value = 1;
				}
				if (value > max) {
					max = value;
					action = a;
				}
			}
			return action;
		}
Пример #31
0
		public override Move Move(Virus percept) {
			VirusBoard newState = percept.GetBoardCopy();
			if (!Q.ContainsKey(newState.CustomHash()))
				Q.Add(newState.CustomHash(), new Dictionary<UInt32, double>());

			if (learn && !prevState.Equals(default(VirusBoard))) {

				if (!N.ContainsKey(prevState.CustomHash()))
					N.Add(prevState.CustomHash(), new Dictionary<UInt32, int>());
				if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
					N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0);

				N[prevState.CustomHash()][prevAction.CustomHash()]++;
				Q[prevState.CustomHash()][prevAction.CustomHash()] =
					Q[prevState.CustomHash()][prevAction.CustomHash()]
					+ LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()])
					* (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]);
			}

			prevState = newState;
			prevAction = GetMaxExplorationFunctionA(newState);
			prevReward = 0;
			if (learn && !Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash()))
				Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue);
			return prevAction;
		}
Пример #32
0
		public VirusMemory(VirusBoard start, Move action, VirusBoard end, double reward) {
			StartState = start;
			Action = action;
			EndState = end;
			Reward = reward;
		}
Пример #33
0
		public double Reward(VirusBoard startstate, VirusBoard endstate) {
			return 0;
		}