/* * Currently using two different classes to perform simulations. One is prefaced with * Cytoscape, which is passed in via the UI's ajax call and used in the simulation. * The other is prefaced with Animation, which is a simplified form and is * passed back to the UI to animate. In addition to it being a simplified form, it * also does not contain any circular references, unlike the Cytoscape versions. * C# has issues serializing objects with circular references, unlike Javascript. */ public static Animation runSimulation(int startID, int goalID, CytoscapeParams cyParams) { //return testAnim(startID, goalID); Animation results = new Animation(); AStarSpecificAnimation aStarSpecific = new AStarSpecificAnimation(); aStarSpecific.frontierOverTime = new List <List <AStarAnimationNode> >(); List <AnimationFrame> frames = new List <AnimationFrame>(); bool goalFound = false; CytoscapeMap map = new CytoscapeMap(initializeInternalNodes(cyParams.nodes)); IntervalHeap <CytoscapeNode> frontier = new IntervalHeap <CytoscapeNode>(); CytoscapeNode current = map.getNode(startID); while (!goalFound) { //Add new frontier to priority queue addToFrontier(map, frontier, current); //Store path every iteration for animation trackAnimationFrame(frames, current); //Store the frontier every iteration for animation storeFrontierOverTime(aStarSpecific, frontier); //Get the next node to expand current = frontier.DeleteMax(); //When done we record the last frame's information and break if (current.id == goalID) { goalFound = true; trackAnimationFrame(frames, current); storeFrontierOverTime(aStarSpecific, frontier); } } results.frames = frames; results.simulationSpecific = aStarSpecific; return(results); }
// Returns a random policy private static List <int> initializePolicy(CytoscapeParams cyParams) { List <int> initialPolicy = new List <int>(); Random rng = new Random(); for (int i = 0; i < cyParams.nodes.Count; i++) { // Ensure walls are not given a valid policy if (cyParams.nodes[i].cellType != DPCellType.Wall) { initialPolicy.Add(rng.Next(0, 4)); } else { initialPolicy.Add(-1); } } return(initialPolicy); }
public Animation runSimulation(CytoscapeParams cyParams) { Animation results = new Animation(); List <List <int> > QLearning = new List <List <int> >(); List <List <int> > SARSA = new List <List <int> >(); randomGenerator = new Random(); int numEpisodes = cyParams.nodes.Count * 3; timeHorizon = cyParams.nodes.Count * 3; // Setup maps from coordinates or ID to the nodes nodeMap = new Dictionary <Tuple <int, int>, CytoscapeNode>(); nodeIDMap = new Dictionary <int, CytoscapeNode>(); for (int i = 0; i < cyParams.nodes.Count; i++) { Tuple <int, int> coords = new Tuple <int, int>(cyParams.nodes[i].x, cyParams.nodes[i].y); nodeMap.Add(coords, cyParams.nodes[i]); nodeIDMap.Add(i, cyParams.nodes[i]); } epsilon = 0.9; originalEpsilon = epsilon; startID = cyParams.startID; goalID = cyParams.goalID; startNode = nodeIDMap[startID]; goalNode = nodeIDMap[goalID]; Tuple <List <string>, int> QLearningActionRewardPair; Tuple <List <string>, int> SARSAActionRewardPair; List <List <int> > QLearningEpisodes = new List <List <int> >(); List <List <int> > SARSAEpisodes = new List <List <int> >(); results.frames = new List <AnimationFrame>(); for (int episodeNumber = 0; episodeNumber < numEpisodes; episodeNumber++) { // Run the episode for each algorithm QLearningActionRewardPair = runEpisode(QLEARNING_EPISODE); SARSAActionRewardPair = runEpisode(SARSA_EPISODE); // Epsilon decreases every 10 episodes if (episodeNumber >= 10 && episodeNumber % 10 == 0) { epsilon = originalEpsilon / (episodeNumber / 10); if (epsilon <= 0.009) { epsilon = 0.0; } } // An animation frame will be either the QLearning or SARSA policy over time RLAnimationFrame frame = new RLAnimationFrame(); frame.QLearningPolicy = collectCurrentOptimalPolicy(cyParams.nodes, QLEARNING_EPISODE); frame.SARSAPolicy = collectCurrentOptimalPolicy(cyParams.nodes, SARSA_EPISODE); frame.QLearningEpisodeStates = QLearningActionRewardPair.Item1; frame.SARSAEpisodeStates = SARSAActionRewardPair.Item1; results.frames.Add(frame); } return(results); }
private static Animation sampleData(int startID, int goalID, CytoscapeParams cyParams) { Animation results = new Animation(); List <AnimationFrame> frames = new List <AnimationFrame>(); DPAnimationFrame frame; int numFrames = 3; List <double> currentIteration; List <int> currentPolicy; List <List <double> > calculationRound; Random rng = new Random(); int iterationNum = 0; for (int j = 0; j < numFrames; j++) { // Initialize new frame frame = new DPAnimationFrame(); // Populate the policy for this round currentPolicy = new List <int>(); for (int i = 0; i < cyParams.nodes.Count; i++) { // Ensure walls are not given a valid policy if (cyParams.nodes[i].cellType != DPCellType.Wall) { currentPolicy.Add(rng.Next(0, 4)); } else { currentPolicy.Add(-1); } } // Populate the values for this round calculationRound = new List <List <double> >(); for (int k = 0; k < 3; k++) { iterationNum++; currentIteration = new List <double>(); for (int l = 0; l < cyParams.nodes.Count; l++) { if (cyParams.nodes[l].cellType != DPCellType.Wall) { currentIteration.Add(iterationNum); } else { currentIteration.Add(-1); } } calculationRound.Add(currentIteration); } // Set and add the frame frame.values = calculationRound; frame.policy = currentPolicy; frames.Add(frame); } results.frames = frames; return(results); }
public static Animation runSimulation(CytoscapeParams cyParams) { //return sampleData(startID, goalID, cyParams); int startID = cyParams.startID; int goalID = cyParams.goalID; double theta = cyParams.theta; SLIPPING_PROB = cyParams.probOfSlipping; Animation results = new Animation(); List <AnimationFrame> frames = new List <AnimationFrame>(); List <int> currentPolicy, previousPolicy; List <double> currentIteration = new List <double>(); List <double> deltaForIteration = new List <double>(); List <List <double> > calculation; List <List <double> > deltaForCalculation; currentPolicy = initializePolicy(cyParams); DPSpecific simulationSpecific = new DPSpecific(); // Maps the id of a location to the utility value associated with it Dictionary <int, double> utilityFunction = new Dictionary <int, double>(); Dictionary <Tuple <int, int>, CytoscapeNode> nodeMap = new Dictionary <Tuple <int, int>, CytoscapeNode>(); Dictionary <int, CytoscapeNode> nodeIDMap = new Dictionary <int, CytoscapeNode>(); for (int i = 0; i < cyParams.nodes.Count; i++) { utilityFunction.Add(i, 0.0); Tuple <int, int> coords = new Tuple <int, int>(cyParams.nodes[i].x, cyParams.nodes[i].y); nodeMap.Add(coords, cyParams.nodes[i]); nodeIDMap.Add(i, cyParams.nodes[i]); } // Initialize delta // Initialize random policy double delta = 0.0; double gamma = 0.9; int timeHorizon = cyParams.nodes.Count; int iterationNumber = 0; double currentValue = 0.0; bool policyHasChanged = true; DPAnimationFrame frame; CytoscapeNode newState; int action; while (policyHasChanged && iterationNumber < timeHorizon) { frame = new DPAnimationFrame(); delta = theta; // Perform a Bellman calculation calculation = new List <List <double> >(); deltaForCalculation = new List <List <double> >(); while (delta >= theta) { currentIteration = new List <double>(); deltaForIteration = new List <double>(); delta = 0.0; // Loop through every state (location in the maze) for (int i = 0; i < cyParams.nodes.Count; i++) { // Walls default to the value WALL_VALUE if (cyParams.nodes[i].cellType == DPCellType.Wall) { deltaForIteration.Add(delta); currentIteration.Add(WALL_VALUE); continue; } // Get the value of the current utility function for that state currentValue = utilityFunction[i]; // Update the utility function for that state to be prob(oldState, policyAction, newState)*(reward(newState) + gamma*utility(newState)) action = currentPolicy[cyParams.nodes[i].id]; //newState = getNewState(nodeMap, action, cyParams.nodes[i]); double newFunctionVal = 0.0; for (int j = 0; j < 5; j++) { newState = getNewState(nodeMap, j, cyParams.nodes[i]); newFunctionVal += probabilityOfTransition(cyParams.nodes[i], nodeMap, j, action) * (rewardForState(newState, goalID) + gamma * utilityFunction[newState.id]); } utilityFunction[i] = newFunctionVal; //utilityFunction[i] = probabilityOfTransition(cyParams.nodes[i], nodeMap) * (rewardForState(newState, goalID) + gamma * utilityFunction[newState.id]); if (Math.Abs(currentValue - utilityFunction[i]) > delta) { delta = Math.Abs(currentValue - utilityFunction[i]); } deltaForIteration.Add(delta); currentIteration.Add(utilityFunction[i]); } // Store the values for this iteration calculation.Add(currentIteration); deltaForCalculation.Add(deltaForIteration); } // Store the values for this calculation frame.policy = new List <int>(currentPolicy); frame.values = calculation; frame.deltas = deltaForCalculation; // Update the policy previousPolicy = new List <int>(currentPolicy); currentPolicy = updatePolicy(currentPolicy, currentIteration, nodeMap, nodeIDMap); if (previousPolicy.SequenceEqual(currentPolicy)) { policyHasChanged = false; } frames.Add(frame); iterationNumber++; } simulationSpecific.gamma = gamma; simulationSpecific.theta = theta; results.simulationSpecific = simulationSpecific; results.frames = frames; return(results); }