Exemplo n.º 1
0
        public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore)
        {
            QSearch       qsearch = new QSearch(this);
            QSearchResult actions = qsearch.Depth_First(currentState, true);

            if (actions != null)
            {
                foreach (QAction action in actions.actionsList)
                {
                    if (!currentState.IsEnd() && isRunning && currentState.GetActions().Contains(action))
                    {
                        WriteOutput(currentState + ": " + action);
                        QState newState = currentState.GetNewState(action);
                        newState.Inherit(currentState);
                        newState.Step();
                        currentState = newState;
                    }
                }
                if (currentState.IsEnd())
                {
                    WriteOutput(currentState + ": End");
                }
                else
                {
                    WriteOutput("Existing solution no longer applicable.  Re-solving...");
                    return(Run(currentState, trialNum, learn, discount, explore));
                }
            }
            else
            {
                WriteOutput("No solution found.", true);
            }
            return(currentState);
        }
Exemplo n.º 2
0
 public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore)
 {
     QSearch qsearch = new QSearch(this);
     QSearchResult actions = qsearch.Breadth_First(currentState, true);
     if (actions != null)
     {
         foreach (QAction action in actions.actionsList)
         {
             if (!currentState.IsEnd() && isRunning && currentState.GetActions().Contains(action))
             {
                 WriteOutput(currentState + ": " + action);
                 QState newState = currentState.GetNewState(action);
                 newState.Inherit(currentState);
                 newState.Step();
                 currentState = newState;
             }
         }
         if (currentState.IsEnd()) WriteOutput(currentState + ": End");
         else
         {
             WriteOutput("Existing solution no longer applicable.  Re-solving...");
             return Run(currentState, trialNum, learn, discount, explore);
         }
     }
     else WriteOutput("No solution found.", true);
     return currentState;
 }
Exemplo n.º 3
0
        public QSearchResult Breadth_First(QState startState, bool output = false)
        {
            HashSet <QState>             explored  = new HashSet <QState>();
            Dictionary <QState, decimal> bestSoFar = new Dictionary <QState, decimal>()
            {
                { startState, 0 }
            };
            Queue <QState> toDo = new Queue <QState>();

            toDo.Enqueue(startState);
            Dictionary <QState, QSearchResult> pathTo = new Dictionary <QState, QSearchResult>()
            {
                { startState, new QSearchResult() }
            };

            if (output)
            {
                WriteOutput("Searching for shortest path via Breadth-First Search...");
            }
            int steps = 0;

            while (toDo.Any() && isRunning)
            {
                steps++;
                QState current = toDo.Dequeue();
                if (current.IsEnd())
                {
                    if (output)
                    {
                        WriteOutput("Shortest path of " + pathTo[current].Count + " step(s) found after " + steps + " iteration(s).");
                    }
                    return(pathTo[current]);
                }
                else
                {
                    explored.Add(current);
                    foreach (QAction action in current.GetActions())
                    {
                        QState newState = current.GetNewState(action);
                        if (!explored.Contains(newState))
                        {
                            decimal actualCost = bestSoFar[current] - current.GetValue();
                            if (!bestSoFar.ContainsKey(newState) || actualCost < bestSoFar[newState])
                            {
                                pathTo[newState] = new QSearchResult(pathTo[current]);
                                pathTo[newState].actionsList.Add(action);
                                pathTo[newState].QStatesList.Add(newState);
                                bestSoFar[newState] = actualCost;
                                toDo.Enqueue(newState);
                            }
                        }
                    }
                }
            }
            if (output)
            {
                WriteOutput("No path found after " + steps + " iteration(s).");
            }
            return(null);
        }
Exemplo n.º 4
0
        public QSearchResult AStar(QState startState, bool output = false, int maxQueue = 1000)
        {
            HashSet <QState>             explored  = new HashSet <QState>();
            Dictionary <QState, decimal> bestSoFar = new Dictionary <QState, decimal>()
            {
                { startState, 0 }
            };
            HeapPriorityQueue <QStateContainer> toDo = new HeapPriorityQueue <QStateContainer>(maxQueue);

            toDo.Enqueue(new QStateContainer(startState), 0);
            Dictionary <QState, QSearchResult> pathTo = new Dictionary <QState, QSearchResult>()
            {
                { startState, new QSearchResult() }
            };

            if (output)
            {
                WriteOutput("Searching for shortest path via A-Star Search...");
            }
            int steps = 0;

            while (toDo.Count > 0 && isRunning)
            {
                steps++;
                QState current = toDo.Dequeue().qstate;
                if (current.IsEnd())
                {
                    if (output)
                    {
                        WriteOutput("Shortest path of " + pathTo[current].Count + " step(s) found after " + steps + " iteration(s).");
                    }
                    return(pathTo[current]);
                }
                else
                {
                    explored.Add(current);
                    foreach (QAction action in current.GetActions())
                    {
                        QState newState = current.GetNewState(action);
                        if (!explored.Contains(newState))
                        {
                            decimal actualCost = bestSoFar[current] - current.GetValue();
                            if (!bestSoFar.ContainsKey(newState) || actualCost < bestSoFar[newState])
                            {
                                pathTo[newState] = new QSearchResult(pathTo[current]);
                                pathTo[newState].actionsList.Add(action);
                                pathTo[newState].QStatesList.Add(newState);
                                bestSoFar[newState] = actualCost;
                                toDo.Enqueue(new QStateContainer(newState), bestSoFar[newState] - 1 * newState.GetValueHeuristic());
                            }
                        }
                    }
                }
            }
            if (output)
            {
                WriteOutput("No path found after " + steps + " iteration(s).");
            }
            return(null);
        }
Exemplo n.º 5
0
 // This runs a single trial/instance of the QState problem.  QLearner will automatically run many times for learning or once to apply what has been learned.
 // Must return the final state
 public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore)
 {
      while (!currentState.IsEnd() && currentState.GetActions().Length > 0 && isRunning)
      {
          QAction action = currentState.GetActions().ElementAt(r.Next(currentState.GetActions().Length));
         QState newState = currentState.GetNewState(action);
         newState.Inherit(currentState);
         newState.Step();
         WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + ": '" + action + "' @ " + currentState.ToString());
         currentState = newState;
     }
     return currentState;
 }
Exemplo n.º 6
0
 // This runs a single trial/instance of the QState problem.  QLearner will automatically run many times for learning or once to apply what has been learned.
 // Must return the final state
 public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore)
 {
     while (!currentState.IsEnd() && currentState.GetActions().Length > 0 && isRunning)
     {
         QAction action   = currentState.GetActions().ElementAt(r.Next(currentState.GetActions().Length));
         QState  newState = currentState.GetNewState(action);
         newState.Inherit(currentState);
         newState.Step();
         WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + ": '" + action + "' @ " + currentState.ToString());
         currentState = newState;
     }
     return(currentState);
 }
Exemplo n.º 7
0
        public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore)
        {
            this.learn = learn; this.discount = discount; this.explore = explore;
            decimal score        = 0;
            int     actionsTaken = 0;

            while (!currentState.IsEnd() && GetOutcomes(currentState).Count > 0 && isRunning)
            {
                actionsTaken++;
                QAction a;
                bool    exp;
                if (explore > 0 && (decimal)random.NextDouble() <= explore)
                {
                    a   = GetRandomAction(currentState);
                    exp = true;
                }
                else
                {
                    a   = GetBestAction(currentState);
                    exp = false;
                }
                QState newState = currentState.GetNewState(a);
                WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " " + (exp ? "Explore" : "Action") + ": '" + a + "' @ " + currentState.ToString());
                newState.Inherit(currentState);
                newState.Step();
                decimal r = GetReward(currentState, newState);
                score += r;
                QUpdate(actionsTaken, currentState, a, newState, r);
                WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Gain " + Math.Round(r, 4) + ",  Total " + Math.Round(score, 4));

                foreach (KeyValuePair <QStateActionPair, QState> kv in newState.GetObservedStates(currentState, a))
                {
                    QState  observedPriorState = kv.Key.state;
                    QAction observedAction     = kv.Key.action;
                    QState  observedState      = kv.Value;
                    decimal observedR          = GetReward(observedPriorState, observedState);
                    QUpdate(actionsTaken, observedPriorState, observedAction, observedState, observedR);
                    WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Observed: '" + observedAction + "' @ " + observedPriorState.ToString() + " | Gain " + Math.Round(observedR, 4));
                }

                currentState = newState;
            }
            if (isRunning)
            {
                WriteOutput("Trial " + trialNum + ": " + Math.Round(score, 4) + " in " + actionsTaken + " step" + (actionsTaken == 1 ? "" : "s") + ".");
            }
            return(currentState);
        }
Exemplo n.º 8
0
        public override QState Run(QState currentState, int trialNum, decimal learn, decimal discount, decimal explore)
        {
            this.learn = learn; this.discount = discount; this.explore = explore;
            decimal score = 0;
            int actionsTaken = 0;
            while (!currentState.IsEnd() && GetOutcomes(currentState).Count > 0 && isRunning)
            {
                actionsTaken++;
                QAction a;
                bool exp;
                if (explore > 0 && (decimal)random.NextDouble() <= explore)
                {
                    a = GetRandomAction(currentState);
                    exp = true;
                }
                else
                {
                    a = GetBestAction(currentState);
                    exp = false;
                }
                QState newState = currentState.GetNewState(a);
                WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " " + (exp ? "Explore" : "Action") + ": '" + a + "' @ " + currentState.ToString());
                newState.Inherit(currentState);
                newState.Step();
                decimal r = GetReward(currentState, newState);
                score += r;
                QUpdate(actionsTaken, currentState, a, newState, r);
                WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Gain " + Math.Round(r, 4) + ",  Total " + Math.Round(score, 4));
                
                foreach (KeyValuePair<QStateActionPair, QState> kv in newState.GetObservedStates(currentState, a))
                {
                    QState observedPriorState = kv.Key.state;
                    QAction observedAction = kv.Key.action;
                    QState observedState = kv.Value;
                    decimal observedR = GetReward(observedPriorState, observedState);
                    QUpdate(actionsTaken, observedPriorState, observedAction, observedState, observedR);
                    WriteOutput((CurrentMode == LEARN ? "Trial " + trialNum + ", " : "") + "#" + actionsTaken + " Observed: '" + observedAction + "' @ " + observedPriorState.ToString() + " | Gain " + Math.Round(observedR, 4));
                }

                currentState = newState;
            }
            if (isRunning)
            {
                WriteOutput("Trial " + trialNum + ": " + Math.Round(score, 4) + " in " + actionsTaken + " step" + (actionsTaken == 1 ? "" : "s") + ".");
            }
            return currentState;
        }
Exemplo n.º 9
0
        public override Dictionary<QStateActionPair, QState> GetObservedStates(QState prevState, QAction action)
        {
            TicTacToe stateAfterMyMove = (TicTacToe)prevState.GetNewState(action);
            TicTacToe stateFromOpponentsView = new TicTacToe() { myMoves = stateAfterMyMove.yourMoves.ToList(), yourMoves = stateAfterMyMove.myMoves.ToList(), me = you, you = me, random = random, gui = gui, openSpaces = stateAfterMyMove.openSpaces.ToList(), score = stateAfterMyMove.score };
            TicTacToe stateNowFromOpponentsView = new TicTacToe() { myMoves = yourMoves.ToList(), yourMoves = myMoves.ToList(), me = you, you = me, random = random, gui = gui, openSpaces = openSpaces.ToList(), score = stateAfterMyMove.score };
            
            foreach (Point x in stateNowFromOpponentsView.myMoves)
                if (!stateFromOpponentsView.myMoves.Contains(x)) 
                    return new Dictionary<QStateActionPair, QState>() {
                        {new QStateActionPair(stateFromOpponentsView, new QAction_String(x.X + "," + x.Y)), stateNowFromOpponentsView}
                    };

            return new Dictionary<QStateActionPair, QState>();
        }