Пример #1
0
        /*
         * Chooses the most greedy action (action towards highest Q) based off the current
         * position and the surrounding locations. Ties are broken randomly.
         */
        private int greedyAction(CytoscapeNode currentPos, int algorithm)
        {
            int        action = (int)(randomGenerator.NextDouble() * 4);
            double     max    = getQValue(currentPos, action, algorithm);
            double     QValue;
            List <int> tiedActions = new List <int>();

            // Look at each direction
            for (int i = 0; i < 4; i++)
            {
                QValue = getQValue(currentPos, i, algorithm);
                if (QValue > max)
                {
                    max    = QValue;
                    action = i;
                    // Have to reset the tiedAction list once a new max has been found
                    tiedActions = new List <int>();
                    tiedActions.Add(i);
                }
                else if (QValue == max)
                {
                    tiedActions.Add(i);
                }
            }

            int tieBreakerIndex = 0;

            if (tiedActions.Count > 1)
            {
                tieBreakerIndex = (int)(randomGenerator.NextDouble() * tiedActions.Count);
                action          = tiedActions[tieBreakerIndex];
            }

            return(action);
        }
Пример #2
0
        /*
         * Position and action here are the state action pair (s',a')
         * Q(s, a) = Q(s, a) + alpha*[r + gamma*Q(s', a') - Q(s, a)]
         */
        private void sarsaBellmanCalculation(CytoscapeNode s, int a, CytoscapeNode sPrime, int aPrime, int reward)
        {
            Tuple <CytoscapeNode, int> stateActionPair      = new Tuple <CytoscapeNode, int>(s, a);
            Tuple <CytoscapeNode, int> stateActionPairPrime = new Tuple <CytoscapeNode, int>(sPrime, aPrime);

            SARSAQ[stateActionPair] = getQValue(stateActionPair, SARSA_EPISODE) + ALPHA * (reward + GAMMA * getQValue(stateActionPairPrime, SARSA_EPISODE) - getQValue(stateActionPair, SARSA_EPISODE));
        }
Пример #3
0
 // The current reward model is 0 if arriving in the goal state, else -1
 private static int rewardForState(CytoscapeNode node, int goalID)
 {
     if (node.id == goalID)
     {
         return(0);
     }
     return(-1);
 }
Пример #4
0
 public CytoscapeNode(CytoscapeNode copy)
 {
     this.id          = copy.id;
     this.name        = copy.name;
     this.heuristic   = copy.heuristic;
     this.connections = copy.connections;
     this.path        = copy.path;
 }
Пример #5
0
 // Cytoscape required a source and target to be defined when rendering the network on the UI.
 // However, AStar will treat them as undirected connections, so this will make sure
 // We aren't accidentally looking at the wrong node.
 public int undirectedTarget(CytoscapeNode desiredSource)
 {
     if (source == desiredSource.id)
     {
         return(target);
     }
     else
     {
         return(source);
     }
 }
Пример #6
0
        /*
         * Position and action here are the state action pair (s',a')
         * Q(s, a) = Q(s, a) + gamma[r + argMaxa'Q(s', a') - Q(s, a)]
         */
        private void qLearningBellmanCalculation(CytoscapeNode s, int a, int reward)
        {
            Tuple <CytoscapeNode, int> stateActionPair = new Tuple <CytoscapeNode, int>(s, a);
            int           aPrime = argMax(s);
            CytoscapeNode sPrime = getNewState(s, a);
            Tuple <CytoscapeNode, int> stateActionPairPrime = new Tuple <CytoscapeNode, int>(sPrime, aPrime);
            double QsPrimeaPrime = getQValue(stateActionPairPrime, QLEARNING_EPISODE);
            double QsA           = getQValue(stateActionPair, QLEARNING_EPISODE);

            QLearningQ[stateActionPair] = getQValue(stateActionPair, QLEARNING_EPISODE) + ALPHA * (reward + GAMMA * QsPrimeaPrime - QsA);
        }
Пример #7
0
        // Helper to keep track of the path being looked at every iteration
        private static void trackAnimationFrame(List <AnimationFrame> frames, CytoscapeNode current)
        {
            AStarAnimationFrame frame = new AStarAnimationFrame();

            frame.frame = new List <AStarAnimationNode>();
            AStarAnimationNode tempNode;

            foreach (CytoscapeNode node in current.path)
            {
                tempNode = new AStarAnimationNode(node.id);
                frame.frame.Add(tempNode);
            }
            frames.Add(frame);
        }
Пример #8
0
 private int getStateReward(CytoscapeNode newNode)
 {
     if (newNode.cellType == DPCellType.Hole)
     {
         return(-100);
     }
     else if (newNode == goalNode)
     {
         return(100);
     }
     else
     {
         return(-1);
     }
 }
Пример #9
0
        /*
         * Chooses an action that is random with probability epsilon, otherwise it is chosen greedily.
         */
        private int epsilonGreedyAction(CytoscapeNode currentPos, int algorithm)
        {
            double prob = randomGenerator.NextDouble();
            int    temp;

            if (prob < epsilon)
            {
                temp = randomAction();
            }
            else
            {
                temp = greedyAction(currentPos, algorithm);
            }
            return(temp);
        }
Пример #10
0
        /* Determine the new state if the current policy is applied to the current node
         * Will return the same node if the current policy instructs to move into a wall
         */
        private CytoscapeNode getNewState(CytoscapeNode currentNode, int action)
        {
            // Ignore walls, goal state is absorbing
            if (currentNode.cellType == DPCellType.Wall || currentNode.cellType == DPCellType.Goal || action == DIDNTMOVE)
            {
                return(currentNode);
            }

            // First determine what direction the action is. If the new state is a wall then return the current node
            // otherwise return the new state
            Tuple <int, int> newCoords = new Tuple <int, int>(0, 0);

            switch (action)
            {
            case LEFT:
                newCoords = new Tuple <int, int>(currentNode.x - 1, currentNode.y);
                break;

            case RIGHT:
                newCoords = new Tuple <int, int>(currentNode.x + 1, currentNode.y);
                break;

            case UP:
                newCoords = new Tuple <int, int>(currentNode.x, currentNode.y - 1);
                break;

            case DOWN:
                newCoords = new Tuple <int, int>(currentNode.x, currentNode.y + 1);
                break;

            default:
                throw new Exception("Attempting to take an invalid action.");
            }

            if (nodeMap[newCoords].cellType == DPCellType.Wall)
            {
                return(currentNode);
            }
            else
            {
                return(nodeMap[newCoords]);
            }
        }
Пример #11
0
        private int getOptimalActionForState(CytoscapeNode node, int algorithm)
        {
            double max    = Double.MinValue;
            int    action = 0;
            double QValue;

            //4 possible actions
            for (int i = 0; i < 4; i++)
            {
                QValue = getQValue(node, i, algorithm);
                // If the QValue is 0, that means the state-action pair was never actually taken
                if (QValue > max && QValue != 0)
                {
                    max    = QValue;
                    action = i;
                }
            }
            return(max == Double.MinValue ? -1 : action);
        }
Пример #12
0
        // If the cell is slippery, there is a 80% chance of staying in the same cell
        // Otherwise, if there is a 100% chance of going in the direction of the policy
        private static double probabilityOfTransition(CytoscapeNode node, Dictionary <Tuple <int, int>, CytoscapeNode> nodeMap, int action, int actionAccordingToPolicy)
        {
            double probability = 1.0;

            if (node.cellType == DPCellType.Ice && action == DIDNTMOVE)
            {
                probability = SLIPPING_PROB;
            }
            else if (node.cellType == DPCellType.Ice && action == actionAccordingToPolicy)
            {
                probability = 1.0 - SLIPPING_PROB;
            }
            else if (action != actionAccordingToPolicy)
            {
                probability = 0.0;
            }

            return(probability);
        }
Пример #13
0
        /*
         * Returns the action that will provide the highest reward given position
         */
        private int argMax(CytoscapeNode currentPosition)
        {
            int           max = Int32.MinValue;
            CytoscapeNode possiblePosition;
            int           reward, action = 0;

            //4 possible actions
            for (int i = 0; i < 4; i++)
            {
                possiblePosition = getNewState(currentPosition, i);
                reward           = getStateReward(possiblePosition);
                if (reward > max)
                {
                    max    = reward;
                    action = i;
                }
            }
            return(action);
        }
Пример #14
0
        /*
         * Currently using two different classes to perform simulations. One is prefaced with
         * Cytoscape, which is passed in via the UI's ajax call and used in the simulation.
         * The other is prefaced with Animation, which is a simplified form and is
         * passed back to the UI to animate. In addition to it being a simplified form, it
         * also does not contain any circular references, unlike the Cytoscape versions.
         * C# has issues serializing objects with circular references, unlike Javascript.
         */
        public static Animation runSimulation(int startID, int goalID, CytoscapeParams cyParams)
        {
            //return testAnim(startID, goalID);
            Animation results = new Animation();
            AStarSpecificAnimation aStarSpecific = new AStarSpecificAnimation();

            aStarSpecific.frontierOverTime = new List <List <AStarAnimationNode> >();
            List <AnimationFrame> frames          = new List <AnimationFrame>();
            bool         goalFound                = false;
            CytoscapeMap map                      = new CytoscapeMap(initializeInternalNodes(cyParams.nodes));
            IntervalHeap <CytoscapeNode> frontier = new IntervalHeap <CytoscapeNode>();

            CytoscapeNode current = map.getNode(startID);

            while (!goalFound)
            {
                //Add new frontier to priority queue
                addToFrontier(map, frontier, current);

                //Store path every iteration for animation
                trackAnimationFrame(frames, current);

                //Store the frontier every iteration for animation
                storeFrontierOverTime(aStarSpecific, frontier);

                //Get the next node to expand
                current = frontier.DeleteMax();

                //When done we record the last frame's information and break
                if (current.id == goalID)
                {
                    goalFound = true;
                    trackAnimationFrame(frames, current);
                    storeFrontierOverTime(aStarSpecific, frontier);
                }
            }

            results.frames             = frames;
            results.simulationSpecific = aStarSpecific;

            return(results);
        }
Пример #15
0
        private double getQValue(CytoscapeNode s, int a, int algorithm)
        {
            Tuple <CytoscapeNode, int> stateActionPair = new Tuple <CytoscapeNode, int>(s, a);

            if (algorithm == QLEARNING_EPISODE)
            {
                if (!QLearningQ.ContainsKey(stateActionPair))
                {
                    QLearningQ.Add(stateActionPair, 0.0);
                }
                return(QLearningQ[stateActionPair]);
            }
            else
            {
                if (!SARSAQ.ContainsKey(stateActionPair))
                {
                    SARSAQ.Add(stateActionPair, 0.0);
                }
                return(SARSAQ[stateActionPair]);
            }
        }
Пример #16
0
        private static void addToFrontier(CytoscapeMap map, IntervalHeap <CytoscapeNode> frontier, CytoscapeNode node)
        {
            CytoscapeNode tempNode;

            foreach (CytoscapeConnection connection in node.connections)
            {
                int undirectedTargetID = connection.undirectedTarget(node);
                tempNode = map.getNode(undirectedTargetID);
                // Discard cyclic paths
                if (!node.hasVisitedNode(undirectedTargetID))
                {
                    // Keep track of the path taken
                    if (node.path == null || !node.path.Any())
                    {
                        node.path = new List <CytoscapeNode>();
                        node.path.Add(node);
                    }
                    // Make sure to be duplicating the path instead of pointing at node's path field
                    tempNode.path = new List <CytoscapeNode>(node.path);
                    tempNode.path.Add(tempNode);
                    // f is the heuristic plus the distance traveled so far
                    tempNode.distance = node.distance + connection.distance;
                    tempNode.f        = tempNode.heuristic + tempNode.distance;
                    frontier.Add(tempNode);
                }
            }
        }
Пример #17
0
        public Animation runSimulation(CytoscapeParams cyParams)
        {
            Animation          results   = new Animation();
            List <List <int> > QLearning = new List <List <int> >();
            List <List <int> > SARSA     = new List <List <int> >();

            randomGenerator = new Random();

            int numEpisodes = cyParams.nodes.Count * 3;

            timeHorizon = cyParams.nodes.Count * 3;

            // Setup maps from coordinates or ID to the nodes
            nodeMap   = new Dictionary <Tuple <int, int>, CytoscapeNode>();
            nodeIDMap = new Dictionary <int, CytoscapeNode>();
            for (int i = 0; i < cyParams.nodes.Count; i++)
            {
                Tuple <int, int> coords = new Tuple <int, int>(cyParams.nodes[i].x, cyParams.nodes[i].y);
                nodeMap.Add(coords, cyParams.nodes[i]);
                nodeIDMap.Add(i, cyParams.nodes[i]);
            }

            epsilon         = 0.9;
            originalEpsilon = epsilon;
            startID         = cyParams.startID;
            goalID          = cyParams.goalID;
            startNode       = nodeIDMap[startID];
            goalNode        = nodeIDMap[goalID];

            Tuple <List <string>, int> QLearningActionRewardPair;
            Tuple <List <string>, int> SARSAActionRewardPair;
            List <List <int> >         QLearningEpisodes = new List <List <int> >();
            List <List <int> >         SARSAEpisodes     = new List <List <int> >();

            results.frames = new List <AnimationFrame>();
            for (int episodeNumber = 0; episodeNumber < numEpisodes; episodeNumber++)
            {
                // Run the episode for each algorithm
                QLearningActionRewardPair = runEpisode(QLEARNING_EPISODE);
                SARSAActionRewardPair     = runEpisode(SARSA_EPISODE);

                // Epsilon decreases every 10 episodes
                if (episodeNumber >= 10 && episodeNumber % 10 == 0)
                {
                    epsilon = originalEpsilon / (episodeNumber / 10);
                    if (epsilon <= 0.009)
                    {
                        epsilon = 0.0;
                    }
                }

                // An animation frame will be either the QLearning or SARSA policy over time
                RLAnimationFrame frame = new RLAnimationFrame();
                frame.QLearningPolicy        = collectCurrentOptimalPolicy(cyParams.nodes, QLEARNING_EPISODE);
                frame.SARSAPolicy            = collectCurrentOptimalPolicy(cyParams.nodes, SARSA_EPISODE);
                frame.QLearningEpisodeStates = QLearningActionRewardPair.Item1;
                frame.SARSAEpisodeStates     = SARSAActionRewardPair.Item1;
                results.frames.Add(frame);
            }

            return(results);
        }