Beispiel #1
0
        /*
         * Currently using two different classes to perform simulations. One is prefaced with
         * Cytoscape, which is passed in via the UI's ajax call and used in the simulation.
         * The other is prefaced with Animation, which is a simplified form and is
         * passed back to the UI to animate. In addition to it being a simplified form, it
         * also does not contain any circular references, unlike the Cytoscape versions.
         * C# has issues serializing objects with circular references, unlike Javascript.
         */
        public static Animation runSimulation(int startID, int goalID, CytoscapeParams cyParams)
        {
            //return testAnim(startID, goalID);
            Animation results = new Animation();
            AStarSpecificAnimation aStarSpecific = new AStarSpecificAnimation();

            aStarSpecific.frontierOverTime = new List <List <AStarAnimationNode> >();
            List <AnimationFrame> frames          = new List <AnimationFrame>();
            bool         goalFound                = false;
            CytoscapeMap map                      = new CytoscapeMap(initializeInternalNodes(cyParams.nodes));
            IntervalHeap <CytoscapeNode> frontier = new IntervalHeap <CytoscapeNode>();

            CytoscapeNode current = map.getNode(startID);

            while (!goalFound)
            {
                //Add new frontier to priority queue
                addToFrontier(map, frontier, current);

                //Store path every iteration for animation
                trackAnimationFrame(frames, current);

                //Store the frontier every iteration for animation
                storeFrontierOverTime(aStarSpecific, frontier);

                //Get the next node to expand
                current = frontier.DeleteMax();

                //When done we record the last frame's information and break
                if (current.id == goalID)
                {
                    goalFound = true;
                    trackAnimationFrame(frames, current);
                    storeFrontierOverTime(aStarSpecific, frontier);
                }
            }

            results.frames             = frames;
            results.simulationSpecific = aStarSpecific;

            return(results);
        }
        // Returns a random policy
        private static List <int> initializePolicy(CytoscapeParams cyParams)
        {
            List <int> initialPolicy = new List <int>();
            Random     rng           = new Random();

            for (int i = 0; i < cyParams.nodes.Count; i++)
            {
                // Ensure walls are not given a valid policy
                if (cyParams.nodes[i].cellType != DPCellType.Wall)
                {
                    initialPolicy.Add(rng.Next(0, 4));
                }
                else
                {
                    initialPolicy.Add(-1);
                }
            }

            return(initialPolicy);
        }
        public Animation runSimulation(CytoscapeParams cyParams)
        {
            Animation          results   = new Animation();
            List <List <int> > QLearning = new List <List <int> >();
            List <List <int> > SARSA     = new List <List <int> >();

            randomGenerator = new Random();

            int numEpisodes = cyParams.nodes.Count * 3;

            timeHorizon = cyParams.nodes.Count * 3;

            // Setup maps from coordinates or ID to the nodes
            nodeMap   = new Dictionary <Tuple <int, int>, CytoscapeNode>();
            nodeIDMap = new Dictionary <int, CytoscapeNode>();
            for (int i = 0; i < cyParams.nodes.Count; i++)
            {
                Tuple <int, int> coords = new Tuple <int, int>(cyParams.nodes[i].x, cyParams.nodes[i].y);
                nodeMap.Add(coords, cyParams.nodes[i]);
                nodeIDMap.Add(i, cyParams.nodes[i]);
            }

            epsilon         = 0.9;
            originalEpsilon = epsilon;
            startID         = cyParams.startID;
            goalID          = cyParams.goalID;
            startNode       = nodeIDMap[startID];
            goalNode        = nodeIDMap[goalID];

            Tuple <List <string>, int> QLearningActionRewardPair;
            Tuple <List <string>, int> SARSAActionRewardPair;
            List <List <int> >         QLearningEpisodes = new List <List <int> >();
            List <List <int> >         SARSAEpisodes     = new List <List <int> >();

            results.frames = new List <AnimationFrame>();
            for (int episodeNumber = 0; episodeNumber < numEpisodes; episodeNumber++)
            {
                // Run the episode for each algorithm
                QLearningActionRewardPair = runEpisode(QLEARNING_EPISODE);
                SARSAActionRewardPair     = runEpisode(SARSA_EPISODE);

                // Epsilon decreases every 10 episodes
                if (episodeNumber >= 10 && episodeNumber % 10 == 0)
                {
                    epsilon = originalEpsilon / (episodeNumber / 10);
                    if (epsilon <= 0.009)
                    {
                        epsilon = 0.0;
                    }
                }

                // An animation frame will be either the QLearning or SARSA policy over time
                RLAnimationFrame frame = new RLAnimationFrame();
                frame.QLearningPolicy        = collectCurrentOptimalPolicy(cyParams.nodes, QLEARNING_EPISODE);
                frame.SARSAPolicy            = collectCurrentOptimalPolicy(cyParams.nodes, SARSA_EPISODE);
                frame.QLearningEpisodeStates = QLearningActionRewardPair.Item1;
                frame.SARSAEpisodeStates     = SARSAActionRewardPair.Item1;
                results.frames.Add(frame);
            }

            return(results);
        }
        private static Animation sampleData(int startID, int goalID, CytoscapeParams cyParams)
        {
            Animation             results = new Animation();
            List <AnimationFrame> frames  = new List <AnimationFrame>();

            DPAnimationFrame frame;
            int                   numFrames = 3;
            List <double>         currentIteration;
            List <int>            currentPolicy;
            List <List <double> > calculationRound;
            Random                rng          = new Random();
            int                   iterationNum = 0;

            for (int j = 0; j < numFrames; j++)
            {
                // Initialize new frame
                frame = new DPAnimationFrame();

                // Populate the policy for this round
                currentPolicy = new List <int>();
                for (int i = 0; i < cyParams.nodes.Count; i++)
                {
                    // Ensure walls are not given a valid policy
                    if (cyParams.nodes[i].cellType != DPCellType.Wall)
                    {
                        currentPolicy.Add(rng.Next(0, 4));
                    }
                    else
                    {
                        currentPolicy.Add(-1);
                    }
                }

                // Populate the values for this round
                calculationRound = new List <List <double> >();
                for (int k = 0; k < 3; k++)
                {
                    iterationNum++;
                    currentIteration = new List <double>();
                    for (int l = 0; l < cyParams.nodes.Count; l++)
                    {
                        if (cyParams.nodes[l].cellType != DPCellType.Wall)
                        {
                            currentIteration.Add(iterationNum);
                        }
                        else
                        {
                            currentIteration.Add(-1);
                        }
                    }
                    calculationRound.Add(currentIteration);
                }

                // Set and add the frame
                frame.values = calculationRound;
                frame.policy = currentPolicy;
                frames.Add(frame);
            }

            results.frames = frames;

            return(results);
        }
        public static Animation runSimulation(CytoscapeParams cyParams)
        {
            //return sampleData(startID, goalID, cyParams);
            int    startID = cyParams.startID;
            int    goalID  = cyParams.goalID;
            double theta   = cyParams.theta;

            SLIPPING_PROB = cyParams.probOfSlipping;
            Animation             results = new Animation();
            List <AnimationFrame> frames = new List <AnimationFrame>();
            List <int>            currentPolicy, previousPolicy;
            List <double>         currentIteration  = new List <double>();
            List <double>         deltaForIteration = new List <double>();
            List <List <double> > calculation;
            List <List <double> > deltaForCalculation;

            currentPolicy = initializePolicy(cyParams);

            DPSpecific simulationSpecific = new DPSpecific();

            // Maps the id of a location to the utility value associated with it
            Dictionary <int, double> utilityFunction             = new Dictionary <int, double>();
            Dictionary <Tuple <int, int>, CytoscapeNode> nodeMap = new Dictionary <Tuple <int, int>, CytoscapeNode>();
            Dictionary <int, CytoscapeNode> nodeIDMap            = new Dictionary <int, CytoscapeNode>();

            for (int i = 0; i < cyParams.nodes.Count; i++)
            {
                utilityFunction.Add(i, 0.0);
                Tuple <int, int> coords = new Tuple <int, int>(cyParams.nodes[i].x, cyParams.nodes[i].y);
                nodeMap.Add(coords, cyParams.nodes[i]);
                nodeIDMap.Add(i, cyParams.nodes[i]);
            }

            // Initialize delta
            // Initialize random policy
            double delta           = 0.0;
            double gamma           = 0.9;
            int    timeHorizon     = cyParams.nodes.Count;
            int    iterationNumber = 0;

            double           currentValue     = 0.0;
            bool             policyHasChanged = true;
            DPAnimationFrame frame;
            CytoscapeNode    newState;
            int action;

            while (policyHasChanged && iterationNumber < timeHorizon)
            {
                frame = new DPAnimationFrame();
                delta = theta;
                // Perform a Bellman calculation
                calculation         = new List <List <double> >();
                deltaForCalculation = new List <List <double> >();
                while (delta >= theta)
                {
                    currentIteration  = new List <double>();
                    deltaForIteration = new List <double>();
                    delta             = 0.0;
                    // Loop through every state (location in the maze)
                    for (int i = 0; i < cyParams.nodes.Count; i++)
                    {
                        // Walls default to the value WALL_VALUE
                        if (cyParams.nodes[i].cellType == DPCellType.Wall)
                        {
                            deltaForIteration.Add(delta);
                            currentIteration.Add(WALL_VALUE);
                            continue;
                        }
                        // Get the value of the current utility function for that state
                        currentValue = utilityFunction[i];
                        // Update the utility function for that state to be prob(oldState, policyAction, newState)*(reward(newState) + gamma*utility(newState))
                        action = currentPolicy[cyParams.nodes[i].id];
                        //newState = getNewState(nodeMap, action, cyParams.nodes[i]);


                        double newFunctionVal = 0.0;
                        for (int j = 0; j < 5; j++)
                        {
                            newState        = getNewState(nodeMap, j, cyParams.nodes[i]);
                            newFunctionVal += probabilityOfTransition(cyParams.nodes[i], nodeMap, j, action) * (rewardForState(newState, goalID) + gamma * utilityFunction[newState.id]);
                        }
                        utilityFunction[i] = newFunctionVal;
                        //utilityFunction[i] = probabilityOfTransition(cyParams.nodes[i], nodeMap) * (rewardForState(newState, goalID) + gamma * utilityFunction[newState.id]);
                        if (Math.Abs(currentValue - utilityFunction[i]) > delta)
                        {
                            delta = Math.Abs(currentValue - utilityFunction[i]);
                        }

                        deltaForIteration.Add(delta);
                        currentIteration.Add(utilityFunction[i]);
                    }
                    // Store the values for this iteration
                    calculation.Add(currentIteration);
                    deltaForCalculation.Add(deltaForIteration);
                }
                // Store the values for this calculation
                frame.policy = new List <int>(currentPolicy);
                frame.values = calculation;
                frame.deltas = deltaForCalculation;

                // Update the policy
                previousPolicy = new List <int>(currentPolicy);
                currentPolicy  = updatePolicy(currentPolicy, currentIteration, nodeMap, nodeIDMap);
                if (previousPolicy.SequenceEqual(currentPolicy))
                {
                    policyHasChanged = false;
                }
                frames.Add(frame);
                iterationNumber++;
            }

            simulationSpecific.gamma = gamma;
            simulationSpecific.theta = theta;

            results.simulationSpecific = simulationSpecific;
            results.frames             = frames;

            return(results);
        }