//Calc maxValue private double MaxValue(VirusBoard state, int iteration) { iteration++; if (state.winner == playerNumber) { return double.PositiveInfinity; } if (state.winner != playerNumber && state.winner != 0) { return double.NegativeInfinity; } if (iteration < searchLength) { Move[] actions = state.GetPossibleMoves(playerNumber); double max = double.NegativeInfinity; foreach (Move a in actions) { VirusBoard newState = state.GetUpdated(a); double q = Utility(state, newState); q += MinValue(newState, iteration); if (q > max) { max = q; } if (max == double.PositiveInfinity) { return max; } } return max; } else { return 0; } }
public void Learn(VirusBoard startstate, VirusBoard endstate, Move action) { // -- Calculate the reward the change of states represents -- double reward = Reward(startstate, endstate); Learn(startstate, endstate, action, reward); }
public void LoadlongTermMermory(String file) { NeaReader reader = new NeaReader(new StreamReader(file + ".MQ")); while (reader.Peek() != -1) { List <VirusMemory> memories = new List <VirusMemory>(); VirusBoard startState = new VirusBoard(); VirusBoard endState = new VirusBoard(); Move action = new Move(); double reward; double significance; string data; data = reader.ReadLine(); NeaReader r = new NeaReader(data); significance = double.Parse(r.ReadUntil(":")); while (r.Peek() != -1) { startState.Load(r.ReadUntil(":")); endState.Load(r.ReadUntil(":")); action.Load(r.ReadUntil(":")); reward = double.Parse(r.ReadUntil(":")); memories.Add(new VirusMemory(startState, action, endState, reward)); } //memory = new VirusMemory(startState, action, endState, reward); LongTermMemory.Add(new VirusMemoryEpisode(memories.ToArray(), significance)); //new VirusLongTermMemory(memory, significance)); } reader.Close(); }
/// <summary> /// Returns the maximum Q-value found for any moves performable in the given state. /// If there is no data for a move, it will be considered having [initvalue]. /// If there is no data for the state, the return value will be 0. /// </summary> /// <param name="state"></param> /// <returns></returns> private double GetMaxQ(VirusBoard state) { if (state.Equals(default(VirusBoard)) || !Q.ContainsKey(state.CustomHash())) { return(0); } double max = -10; Move[] actions = state.GetPossibleMoves(playerNumber); foreach (Move a in actions) { double value = 0; if (!Q[state.CustomHash()].ContainsKey(a.CustomHash())) { value = initvalue; } else { value = Q[state.CustomHash()][a.CustomHash()]; } if (value > max) { max = value; } } return(max); }
private double Learn(VirusMemory memory) { VirusBoard startstate = memory.StartState; VirusBoard endstate = memory.EndState; Move action = memory.Action; double reward = memory.Reward; // -- Make sure the entries for the state and action exist -- if (!Q.ContainsKey(startstate.CustomHash())) { Q.Add(startstate.CustomHash(), new Dictionary <UInt32, double>()); } if (!Q[startstate.CustomHash()].ContainsKey(action.CustomHash())) { Q[startstate.CustomHash()].Add(action.CustomHash(), initvalue); } if (!N.ContainsKey(startstate.CustomHash())) { N.Add(startstate.CustomHash(), new Dictionary <UInt32, int>()); } if (!N[startstate.CustomHash()].ContainsKey(action.CustomHash())) { N[startstate.CustomHash()].Add(action.CustomHash(), 0); } // -- Perform the update of Q-values -- N[startstate.CustomHash()][action.CustomHash()]++; double change = LearningRate(N[startstate.CustomHash()][action.CustomHash()]) * (reward + discount * GetMaxQ(endstate) - Q[startstate.CustomHash()][action.CustomHash()]); Q[startstate.CustomHash()][action.CustomHash()] = Q[startstate.CustomHash()][action.CustomHash()] + change; return(change); }
public void Learn(VirusBoard startstate, VirusBoard endstate, Move action, double reward) { // -- Make sure the entries for the state and action exist -- if (!Q.ContainsKey(startstate.CustomHash())) { Q.Add(startstate.CustomHash(), new Dictionary <UInt32, double>()); } if (!Q[startstate.CustomHash()].ContainsKey(action.CustomHash())) { Q[startstate.CustomHash()].Add(action.CustomHash(), initvalue); } if (!N.ContainsKey(startstate.CustomHash())) { N.Add(startstate.CustomHash(), new Dictionary <UInt32, int>()); } if (!N[startstate.CustomHash()].ContainsKey(action.CustomHash())) { N[startstate.CustomHash()].Add(action.CustomHash(), 0); } // -- Perform the update of Q-values -- N[startstate.CustomHash()][action.CustomHash()]++; Q[startstate.CustomHash()][action.CustomHash()] = Q[startstate.CustomHash()][action.CustomHash()] + LearningRate(N[startstate.CustomHash()][action.CustomHash()]) * (reward + discount * GetMaxQ(endstate) - Q[startstate.CustomHash()][action.CustomHash()]); }
public override void EndGame(Virus percept) { if (learn) { double reward = 0; byte winner = percept.Winner; if (winner == playerNumber) reward = 1; else if (winner != playerNumber && winner != 0) reward = -1; else reward = 0; if (!N.ContainsKey(prevState.CustomHash())) N.Add(prevState.CustomHash(), new Dictionary<UInt32, int>()); if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0); N[prevState.CustomHash()][prevAction.CustomHash()]++; Q[prevState.CustomHash()][prevAction.CustomHash()] = Q[prevState.CustomHash()][prevAction.CustomHash()] + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()]) * (reward - Q[prevState.CustomHash()][prevAction.CustomHash()]); } prevState = default(VirusBoard); prevAction = default(Move); prevReward = 0; }
private Move GetMaxExplorationFunctionA(VirusBoard state) { double max = double.NegativeInfinity; Move action = default(Move); Move[] actions = state.GetPossibleMoves(playerNumber); if (!Q.ContainsKey(state.CustomHash())) { return(actions.Length > 0 ? actions[0] : action); } bool berandom = random.NextDouble() < RandomRate; foreach (Move a in actions) { double value = 0; if (Q[state.CustomHash()].ContainsKey(a.CustomHash())) { if (Q[state.CustomHash()][a.CustomHash()] >= 1) { value = 1; max = value; action = a; break; } else if (Q[state.CustomHash()][a.CustomHash()] <= -1) { value = -1; } else { if (berandom) { value = random.NextDouble(); } else { value = Q[state.CustomHash()][a.CustomHash()] + ((explore && !(RandomRate > 0)) ? ExplorationRate(N[state.CustomHash()][a.CustomHash()]) : 0); } } } else { value = 1; } if (value > max) { max = value; action = a; } } return(action); }
public override Move Move(Virus percept) { VirusBoard newState = percept.GetBoardCopy(); if (!prevState.Equals(default(VirusBoard))) { ShortTermMemory.Add(new VirusMemory(prevState, prevAction, newState)); } prevState = newState; prevAction = GetMaxExplorationFunctionA(newState); prevReward = 0; return(prevAction); }
public override Move Move(Virus percept) { VirusBoard newState = percept.GetBoardCopy(); if (learn && !prevState.Equals(default(VirusBoard))) { Learn(prevState, newState, prevAction); } prevState = newState; prevAction = GetMaxExplorationFunctionA(newState); prevReward = 0; return(prevAction); }
private Move GetAnnMove(Virus percept) { VirusBoard currentState = percept.GetBoardCopy(); Move[] actions = currentState.GetPossibleMoves(playerNumber); Move move = OutputsToMove(network.Compute(BoardToInput(currentState))); if (actions.Contains(move)) { using (StreamWriter writer = new StreamWriter("ann" + percept.Size + "log.txt", true)) writer.WriteLine("using learned move"); return(move); } using (StreamWriter writer = new StreamWriter("ann" + percept.Size + "log.txt", true)) writer.WriteLine("using default move"); return(actions[0]); }
private double[] BoardToInput(VirusBoard board) { double[] inputs = new double[board.Size * board.Size]; for(int i = 0; i < board.Size; i++) { for (int j = 0; j < board.Size; j++) { byte fieldState = board.board[i, j]; if (fieldState == 0) inputs[i * board.Size + j] = 0; else if (fieldState == playerNumber) inputs[i * board.Size + j] = 1; else inputs[i * board.Size + j] = -1; } } return inputs; }
public void Learn(VirusBoard startstate, VirusBoard endstate, Move action, double reward) { // -- Make sure the entries for the state and action exist -- if (!Q.ContainsKey(startstate.CustomHash())) Q.Add(startstate.CustomHash(), new Dictionary<UInt32, double>()); if (!Q[startstate.CustomHash()].ContainsKey(action.CustomHash())) Q[startstate.CustomHash()].Add(action.CustomHash(), initvalue); if (!N.ContainsKey(startstate.CustomHash())) N.Add(startstate.CustomHash(), new Dictionary<UInt32, int>()); if (!N[startstate.CustomHash()].ContainsKey(action.CustomHash())) N[startstate.CustomHash()].Add(action.CustomHash(), 0); // -- Perform the update of Q-values -- N[startstate.CustomHash()][action.CustomHash()]++; Q[startstate.CustomHash()][action.CustomHash()] = Q[startstate.CustomHash()][action.CustomHash()] + LearningRate(N[startstate.CustomHash()][action.CustomHash()]) * (reward + discount * GetMaxQ(endstate) - Q[startstate.CustomHash()][action.CustomHash()]); }
/// <summary> /// Returns the maximum Q-value found for any moves performable in the given state. /// If there is no data for a move, it will be considered having [initvalue]. /// If there is no data for the state, the return value will be 0. /// </summary> /// <param name="state"></param> /// <returns></returns> private double GetMaxQ(VirusBoard state) { if (state.Equals(default(VirusBoard)) || !Q.ContainsKey(state.CustomHash())) return 0; double max = -10; Move[] actions = state.GetPossibleMoves(playerNumber); foreach (Move a in actions) { double value = 0; if (!Q[state.CustomHash()].ContainsKey(a.CustomHash())) { value = initvalue; } else { value = Q[state.CustomHash()][a.CustomHash()]; } if (value > max) max = value; } return max; }
public override void EndGame(Virus percept) { VirusBoard newState = percept.GetBoardCopy(); double reward = 0; if (percept.Winner == playerNumber) { reward = 1; } else if (percept.Winner != playerNumber && percept.Winner != 0) { reward = -1; } ShortTermMemory.Add(new VirusMemory(prevState, prevAction, newState, reward)); prevState = default(VirusBoard); prevAction = default(Move); prevReward = 0; }
private double GetMaxQ(VirusBoard state) { double max = -10; Move[] actions = state.GetPossibleMoves(playerNumber); foreach (Move a in actions) { double value = 0; if (!Q[state].ContainsKey(a)) { value = 0; } else { value = Q[state][a]; } if (value > max) max = value; } if (Q[state].ContainsKey(default(Move))) { if (Q[state][default(Move)] > max) max = Q[state][default(Move)]; } return max; }
public override Move Move(Virus percept) { //Checking if we're at an terminal state byte winner = percept.Winner; VirusBoard newState = percept.GetBoardCopy(); if (!Q.ContainsKey(newState)) Q.Add(newState, new Dictionary<Move, double>()); if (!prevState.Equals(default(VirusBoard))) { if (!N.ContainsKey(prevState)) N.Add(prevState, new Dictionary<Move, int>()); if (!N[prevState].ContainsKey(prevAction)) N[prevState].Add(prevAction, 0); if (!Q.ContainsKey(prevState)) Q.Add(prevState, new Dictionary<Move, double>()); if (!Q[prevState].ContainsKey(prevAction)) Q[prevState].Add(prevAction, 0); if (winner == playerNumber) { if (!Q[newState].ContainsKey(default(Move))) Q[newState].Add(default(Move), 1); } else if (winner != playerNumber && winner != 0) { if (!Q[newState].ContainsKey(default(Move))) Q[newState].Add(default(Move), -1); } N[prevState][prevAction]++; Q[prevState][prevAction] = Q[prevState][prevAction] + LearningRate(N[prevState][prevAction]) * (prevReward + discount * GetMaxQ(newState) - Q[prevState][prevAction]); } prevState = newState; prevAction = GetMaxExplorationFunctionA(newState); prevReward = 0; return prevAction; }
private Move GetMaxExplorationFunctionA(VirusBoard state) { double max = double.NegativeInfinity; Move action = default(Move); Move[] actions = state.GetPossibleMoves(playerNumber); if (!Q.ContainsKey(state.CustomHash())) return actions.Length > 0 ? actions[0] : action; bool berandom = random.NextDouble() < RandomRate; foreach (Move a in actions) { double value = 0; if (Q[state.CustomHash()].ContainsKey(a.CustomHash())) { if (Q[state.CustomHash()][a.CustomHash()] >= 1) { value = 1; max = value; action = a; break; } else if (Q[state.CustomHash()][a.CustomHash()] <= -1) { value = -1; } else { if (berandom) value = random.NextDouble(); else value = Q[state.CustomHash()][a.CustomHash()] + ((explore && !(RandomRate > 0)) ? ExplorationRate(N[state.CustomHash()][a.CustomHash()]) : 0); } } else { value = 1; } if (value > max) { max = value; action = a; } } return action; }
private Move LearnFromMinimax(Virus percept) { //lær fra MiniMax Move move = teacher.Move(percept); VirusBoard currentState = percept.GetBoardCopy(); backProp.LearningRate = 0.1; backProp.Momentum = 0.1; Move annMove = OutputsToMove(network.Compute(BoardToInput(currentState))); double error = backProp.Run(BoardToInput(currentState), MoveToOutputs(move, currentState.Size)); if (move.Equals(annMove)) { using (StreamWriter writer = new StreamWriter("ann" + percept.Size + "log.txt", true)) writer.WriteLine("using right move. E: " + error); } else { using (StreamWriter writer = new StreamWriter("ann" + percept.Size + "log.txt", true)) writer.WriteLine("using wrong move. E: " + error); } return(move); }
private double[] BoardToInput(VirusBoard board) { double[] inputs = new double[board.Size * board.Size]; for (int i = 0; i < board.Size; i++) { for (int j = 0; j < board.Size; j++) { byte fieldState = board.board[i, j]; if (fieldState == 0) { inputs[i * board.Size + j] = 0; } else if (fieldState == playerNumber) { inputs[i * board.Size + j] = 1; } else { inputs[i * board.Size + j] = -1; } } } return(inputs); }
public override void EndGame(Virus percept) { Move(percept); prevState = default(VirusBoard); prevAction = default(Move); prevReward = 0; }
// Calc minValue private double MinValue(VirusBoard state, int iteration) { iteration++; if (state.winner == playerNumber) { return double.PositiveInfinity; } if (state.winner != playerNumber && state.winner != 0) { return double.NegativeInfinity; } if (iteration < searchLength) { byte opponent = (playerNumber == 1) ? (byte)2 : (byte)1; Move[] actions = state.GetPossibleMoves(opponent); double min = double.PositiveInfinity; foreach (Move a in actions) { VirusBoard newState = state.GetUpdated(a); double q = Utility(state, newState); if (Q.ContainsKey(state.CustomHash())) { if (Q[state.CustomHash()].ContainsKey(a.CustomHash())) { q = -Q[state.CustomHash()][a.CustomHash()]; } } q += MaxValue(newState, iteration); if (q < min) { min = q; } if (min == double.NegativeInfinity) { return min; } } return min; } else { return 0; } }
private double Utility(VirusBoard currentState, VirusBoard nextState) { int orgPieces = 0; foreach (byte b in currentState.board) { if (b == playerNumber) { orgPieces++; //orgPieces += orgPieces + 2; } else if (b != playerNumber && b!=0) { orgPieces--; } } int newPieces = 0; foreach (byte b in nextState.board) { if (b == playerNumber) { newPieces++; //newPieces += newPieces + 2; } else if (b != playerNumber && b != 0) { newPieces--; } } double difference = newPieces - orgPieces; difference *= 0.1; return difference; }
private Move GetMaxExplorationFunctionA(VirusBoard state) { double max = double.NegativeInfinity; Move action = default(Move); Move[] actions = state.GetPossibleMoves(playerNumber); bool berandom = random.NextDouble() < RandomRate; foreach (Move a in actions) { double value = 0; if (!Q.ContainsKey(state.CustomHash())) { Q.Add(state.CustomHash(), new Dictionary<UInt32, double>()); } if (Q[state.CustomHash()].ContainsKey(a.CustomHash())) { if (Q[state.CustomHash()][a.CustomHash()] >= 1) { value = 1; max = value; action = a; break; } else { if (berandom) value = random.NextDouble(); else value = Q[state.CustomHash()][a.CustomHash()] + (explore ? ExplorationRate(N[state.CustomHash()][a.CustomHash()]) : 0); } } else { value = 1; } if (value > max) { max = value; action = a; } } return action; }
private double GetMaxQ(VirusBoard state) { double max = -10; Move[] actions = state.GetPossibleMoves(playerNumber); foreach (Move a in actions) { double value = 0; if (!Q[state.CustomHash()].ContainsKey(a.CustomHash())) { value = 0; } else { value = Q[state.CustomHash()][a.CustomHash()]; } if (value > max) max = value; } if (Q[state.CustomHash()].ContainsKey(0)) { if (Q[state.CustomHash()][0] > max) max = Q[state.CustomHash()][0]; } return max; }
public static double Reward(VirusBoard startstate, VirusBoard endstate) { return(0); }
public void LoadlongTermMermory(String file) { NeaReader reader = new NeaReader(new StreamReader(file + ".MQ")); while (reader.Peek() != -1) { List<VirusMemory> memories = new List<VirusMemory>(); VirusBoard startState = new VirusBoard(); VirusBoard endState = new VirusBoard(); Move action = new Move(); double reward; double significance; string data; data = reader.ReadLine(); NeaReader r = new NeaReader(data); significance = double.Parse(r.ReadUntil(":")); while (r.Peek() != -1) { startState.Load(r.ReadUntil(":")); endState.Load(r.ReadUntil(":")); action.Load(r.ReadUntil(":")); reward = double.Parse(r.ReadUntil(":")); memories.Add(new VirusMemory(startState, action, endState, reward)); } //memory = new VirusMemory(startState, action, endState, reward); LongTermMemory.Add(new VirusMemoryEpisode(memories.ToArray(), significance));//new VirusLongTermMemory(memory, significance)); } reader.Close(); }
public VirusMemory(VirusBoard start, Move action, VirusBoard end) { StartState = start; Action = action; EndState = end; Reward = VirusNameSpace.Agents.MemoryQAgent.Reward(start, end); }
private Move GetMaxExplorationFunctionA(VirusBoard state) { double max = -10; Move action = default(Move); Move[] actions = state.GetPossibleMoves(playerNumber); foreach (Move a in actions) { double value = 0; if (!Q.ContainsKey(state)) { Q.Add(state, new Dictionary<Move, double>()); } if (Q[state].ContainsKey(a)) { value = Q[state][a] + 1 / (double)N[state][a]; } else { value = 1; } if (value > max) { max = value; action = a; } } return action; }
public override Move Move(Virus percept) { VirusBoard newState = percept.GetBoardCopy(); if (!Q.ContainsKey(newState.CustomHash())) Q.Add(newState.CustomHash(), new Dictionary<UInt32, double>()); if (learn && !prevState.Equals(default(VirusBoard))) { if (!N.ContainsKey(prevState.CustomHash())) N.Add(prevState.CustomHash(), new Dictionary<UInt32, int>()); if (!N[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) N[prevState.CustomHash()].Add(prevAction.CustomHash(), 0); N[prevState.CustomHash()][prevAction.CustomHash()]++; Q[prevState.CustomHash()][prevAction.CustomHash()] = Q[prevState.CustomHash()][prevAction.CustomHash()] + LearningRate(N[prevState.CustomHash()][prevAction.CustomHash()]) * (prevReward + discount * GetMaxQ(newState) - Q[prevState.CustomHash()][prevAction.CustomHash()]); } prevState = newState; prevAction = GetMaxExplorationFunctionA(newState); prevReward = 0; if (learn && !Q[prevState.CustomHash()].ContainsKey(prevAction.CustomHash())) Q[prevState.CustomHash()].Add(prevAction.CustomHash(), initvalue); return prevAction; }
public VirusMemory(VirusBoard start, Move action, VirusBoard end, double reward) { StartState = start; Action = action; EndState = end; Reward = reward; }
public double Reward(VirusBoard startstate, VirusBoard endstate) { return 0; }