//This is called when the dropdown menu boxes change, as well as from display_state(). //This will update the comboboxes, state dropdown, and the q-matrix value textboxes. static public void ViewQmatrixConfiguration(PerceptionState stateTo_view) { //We're only handed states that exist in the q-matrix already //Handle the small dropdowns //Set the dropdown to be selected to the correct percept //These will trigger the selected_indexChanged events //So I'll managed this with a lock lock_indexChange_events = true; foreach (var i in Move.HorizontalMovesAndGrab) { list_qmatrixComboboxes[i].SelectedIndex = list_qmatrixComboboxes[i].Items.IndexOf(stateTo_view.perceptionData[i]); } //Handle the large dropdown qmatrix_stateComboboxLarge.SelectedIndex = qmatrix_stateComboboxLarge.Items.IndexOf(stateTo_view); //Handle the values stored in the textboxes foreach (var i in Move.HorizontalMovesAndGrab) { List_qmatrix_valueTextboxes[i].Text = loadedState.liveQmatrix.matrixData[stateTo_view].moveList[i].ToString(); } lock_indexChange_events = false; }
//This function will give bender perception data from the board public void UnitPercieves(UnitType toFind) { PerceptionState findPerception = new PerceptionState(toFind); foreach (var i in toFind.PerceptionCauses) { findPerception.perceptionData[i] = PercieveMove(i, toFind); } //Translated: for each move, percieve with this move, and update the perception for this move. findPerception.SetName(); units[toFind].perceptionData = findPerception; }
public UnitBase(UnitBase setFrom) { unitName = setFrom.unitName; perceptionData = setFrom.perceptionData; currentLocation = setFrom.currentLocation; previousLocation = setFrom.previousLocation; enemy = setFrom.enemy; ID = setFrom.ID++; chasing = setFrom.chasing; //Whenever you copy a unit, you know the starting state is the copied unit's most recent perception startingPerceptionState = setFrom.perceptionData; }
static public void small_dropdownChanged(ComboBox changed_dropdown) { if (changed_dropdown.SelectedText != "None.") { PerceptionState to_set = new PerceptionState(UnitType.Bender); Move perceptMove = null; foreach (var i in Move.HorizontalMovesAndGrab) { if (changed_dropdown == list_qmatrixComboboxes[i]) { perceptMove = i; } } //get the percept of the dropdown Percept keepFor_bestFit = (Percept)changed_dropdown.SelectedItem; //Build a perception state that matches the dropdowns foreach (var i in Move.HorizontalMovesAndGrab) { to_set.perceptionData[i] = (Percept)list_qmatrixComboboxes[i].SelectedItem; } to_set.SetName(); //This state may not exist in our q-matrix states, because we only changed one of the dropdowns. //The best solution i think is to make the other dropdowns find the most accurate state. //Compare all the states in the q-matrix, and display any that is tied for best matched. //Also, the matching state must have the same item as the dropdown we just changed. int compare_value = 0; int temp = 0; PerceptionState bestPerceptionstate = null; foreach (PerceptionState i in qmatrix_stateComboboxLarge.Items) { temp = to_set.Compare(i); if (temp > compare_value && i.Contains(perceptMove, keepFor_bestFit)) { bestPerceptionstate = i; compare_value = temp; } } ViewQmatrixConfiguration(bestPerceptionstate); } }
//Determine what the next move to make will be. public Move GenerateStep(PerceptionState perceievedState) { if (matrixData.Keys.Contains(perceievedState)) { //Always generate the step using the state at algorithmManager.GetCurrentState() Dictionary <Move, double> bestPercepts = new Dictionary <Move, double>(); //Determine if we will be making a greedy best selection, or a random selection. //e will be a double, possibly very small, but not more than 1. if (MyRandom.Next(1, 101) < e * 100) { randomlyMoved = true; //Random move. return(Move.HorizontalMovesAndGrab[MyRandom.Next(0, 5)]); } else { //Greedy selection, then random among best matches. //Loop through the move-double pair, and do a random selection of any move that is tied for best action. foreach (var i in matrixData[perceievedState].moveList) { if (bestPercepts.Count == 0) { bestPercepts.Add(i.Key, i.Value); } else if (bestPercepts.Values.First() < i.Value) { bestPercepts = new Dictionary <Move, double>(); bestPercepts.Add(i.Key, i.Value); } else if (bestPercepts.Values.First() == i.Value) { bestPercepts.Add(i.Key, i.Value); } } } Move[] moves = bestPercepts.Keys.ToArray(); //Convert the moves we retained to a list return(moves[MyRandom.Next(0, moves.Count())]); //return a random member of this list } //No q-matrix entry, so just do a random move. return(Move.HorizontalMovesAndGrab[MyRandom.Next(0, Move.HorizontalMovesAndGrab.Count)]); }
//When this is called, the q matrix will update a previous state with the value of the next state //Calculate the change here public void UpdateState(PerceptionState stateToUpdate, PerceptionState resultState, Move resultMove, double baseReward) { double oldQmatrixValue = 0; //Initial the start of our update value if (matrixData.Keys.Contains(stateToUpdate)) { oldQmatrixValue = matrixData[stateToUpdate].GetBestValue(); //Whats our old best qmatrix value at our old state? } //Whats the best value at the new one? double newQmatrixValue = 0; if (matrixData.Keys.Contains(resultState)) { newQmatrixValue = matrixData[resultState].GetBestValue(); } double difference = newQmatrixValue - oldQmatrixValue; y = (double)Math.Pow(Ybase, setNumber - 1); //y ^ step-1 is the discount factor double discountedDifference = difference * y; double rewardAdded = discountedDifference + baseReward; double finalValue = n * rewardAdded; didWeUpdate = false; //Status message grabs this later //check if this state already exists, and add it to our list of states we've encountered, if not. if (finalValue != 0) { didWeUpdate = true; if (!matrixData.Keys.Contains(stateToUpdate)) { matrixData[stateToUpdate] = new ValueSet(); } matrixData[stateToUpdate][resultMove] = finalValue; } }
//This is used to display rows of the qmatrix and the q-values for each move //This is called from FormsHandler.DisplayState, as well as directly from the dropdowns when their contents are changed. //When this is called from displaystate, the perception to view may not be valid. //When this is called from the dropdown, the perception should exist in the qmatrix. static private void HandleQmatrixForms(AlgorithmState current_state, PerceptionState perceptionTo_view) { qmatrix_stored_entires.Text = current_state.liveQmatrix.matrixData.Count.ToString(); //May not have qmatrix data at the step being displayed. if (current_state.liveQmatrix.matrixData.Count == 0) { //There are no q-matrix entries. //reset qmatrix combo boxes foreach (var i in list_qmatrixComboboxes.Values) { i.Items.Clear(); i.Items.Add("None"); } qmatrix_stateComboboxLarge.Items.Clear(); qmatrix_stateComboboxLarge.Items.Add("A q-matrix entry has not yet been made."); //reset qmatrix textboxes foreach (var i in List_qmatrix_valueTextboxes.Values) { i.Clear(); } } else { //Build q-matrix dropdowns. //use a hashset to avoid adding duplicates //For each move, we want a hashet of percepts, in other words all the percepts that this move sees in the q matrix entries that exist. Dictionary <Move, HashSet <Percept> > dropdownText_items = new Dictionary <Move, HashSet <Percept> >(); //Initialize hashsets before looping over perceptionstates foreach (var i in Move.HorizontalMovesAndGrab) { dropdownText_items.Add(i, new HashSet <Percept>()); } //Copy the items over to the small comboboxes. foreach (var i in current_state.liveQmatrix.matrixData.Keys) { foreach (var j in Move.HorizontalMovesAndGrab) { //For each qmatrix entry, copy each percept over to dropdowns dictionary for the appropriate move. dropdownText_items[j].Add(i.perceptionData[j]); } } //Cycle through the moves to add to select each small combobox foreach (var i in Move.HorizontalMovesAndGrab) { list_qmatrixComboboxes[i].Items.Clear(); //Cycle through the percepts we gathered for this move's dropdown foreach (var j in dropdownText_items[i].OrderBy(o => o.perceptData)) { list_qmatrixComboboxes[i].Items.Add(j); //I think i can just give my objects a tostring method } } //Refresh the overall-state dropdown qmatrix_stateComboboxLarge.Items.Clear(); foreach (var i in current_state.liveQmatrix.matrixData.Keys.OrderBy(o => o.ID)) { qmatrix_stateComboboxLarge.Items.Add(i); } if (current_state.liveQmatrix.matrixData.Keys.Contains(current_state.GetPerception(UnitType.Bender))) { ViewQmatrixConfiguration(current_state.GetPerception(UnitType.Bender)); } else { ViewQmatrixConfiguration(loadedState.liveQmatrix.matrixData.Keys.First()); //Just grab the first q-matrix item } } }
//This is ran every time we step through the algorithm. //Handles updating all the fields that change every time we look at new data //This method handles any time we are updating what is displayed for any reason once the algorithm is active //We expect the algorithm state to be set from the outside before we enter this. //This will also handle updating the history dropdowns static public void DisplayState() { picture_board.ClonePosition(loadedState.boardData); //This copies the state's board over to our PictureSquare board. //Textboxes update if (AlgorithmManager.algorithmStarted) //Only display this if we've started { //This will configure the q-matrix dropdowns properly, and handle if there is no qmatrix as well. //This doesn't affect the stored entries textbox HandleQmatrixForms(loadedState, loadedState.GetPerception(UnitType.Bender)); //Session progress stepNumber.Text = loadedState.GetStepNumber().ToString(); episodeNumber.Text = loadedState.GetEpisodeNumber().ToString(); e_session.Text = GetString(loadedState.liveQmatrix.e); y_session.Text = loadedState.liveQmatrix.y.ToString(); //If this moveset doesn't exist, we should get an error. //This function should only be called at the algorithm start, or from a dropdown that has a valid q-matrix combination. //These textboxes handle percepts PerceptionState to_view = loadedState.boardData.units[UnitType.Bender].perceptionData; foreach (var i in Move.HorizontalMovesAndGrab) { listCurrentPositionTextboxes[i].Text = to_view.perceptionData[i].ToString(); } beer_remaining.Text = loadedState.boardData.GetCansRemaining().ToString(); beerCollected.Text = loadedState.cansCollected.ToString(); reward_episode.Text = loadedState.episodeRewards.ToString(); rewardTotal.Text = loadedState.totalRewards.ToString(); //Update the history episode dropdown if (combobox_history_episodes.Items.Count < AlgorithmManager.stateHistory.Count) { combobox_history_episodes.Items.Add(AlgorithmManager.stateHistory.Last()); } combobox_history_episodes.SelectedIndex = combobox_history_episodes.Items.Count - 1; if (!combobox_history_steps.Items.Contains(loadedState) || loadedState.GetStepNumber() == 0) { combobox_history_steps.Items.Clear(); combobox_history_steps.Items.AddRange(AlgorithmManager.stateHistory.Last().ToArray()); combobox_history_steps.Text = loadedState.ToString(); } } status_box.Text = loadedState.GetStatus(); //Handle drawing the board foreach (var i in picture_board.boardData) { foreach (var j in i) { ((SquareBoardDisplay)j).SetPicture(); } } DisplayInitialSettings(); //If the algorithm is ended, disable the stepping groupbox. if (AlgorithmManager.algorithmEnded == true) { groupboxControlProgress.Enabled = false; } }
//At the algorithm manager level, "generate step" is ambiguous with actually stepping through the algorithm, //Or starting the algorithm, and making the first history entry at step 0. //Here, a step only happens when we have been asked by the manager to *actually* take a step. public void Step() { boardData.UnitPercieves(UnitType.Url); boardData.UnitPercieves(UnitType.Bender); Move urlFirstMove = null; Move urlSecondMove = null; List <Move> UrlMoves = null; Move benderMove = null; //Url senses twice. If bender moves into him, he wont start chasing until next turn. //This is a pre-bender-move view. this is the view we use //At this point, URL is seeing what happened before bender moves. //We want to see if URL is attacking bender at his new location. But if he isn't, we'll need to... //store where bender used to be, before seeing if he moved out of view, or if we can attack him. //url if (GetUnit(UnitType.Url).chasing) { foreach (var i in boardData.units[UnitType.Url].perceptionData.perceptionData) { if (i.Value == Percept.Enemy) { urlFirstMove = i.Key; } } } //url move should not be null, since once url starts chasing, he should always see bender //each perception thereafter //Url move will most likely be a diagonal //Bender section. benderMove = liveQmatrix.GenerateStep(GetPerception(UnitType.Bender)); GetUnit(UnitType.Bender).SetMoveThisStep(benderMove); //Store the step for status message //Check if bender chose a move that moves him into url. //If bender did not make a move that knocks him into the enemy, //Only then should bender move resultThisStep = boardData.ApplyMove(UnitType.Bender, benderMove); //The move should be performed now, if possible. //See if URL can attack bender after bender moved //In the below section, we see if URL attacks bender, before updating the q matrix. //Update reward in this section boardData.UnitPercieves(UnitType.Url); //what url sees after bender moves //Previous optimal move is stored in urlFirstMove if (GetUnit(UnitType.Url).chasing&& MyRandom.Next(0, InitialSettings.URLStopsChasingChance) == 0) { urlRandomlyStopped = true; GetUnit(UnitType.Url).chasing = false; } if (GetUnit(UnitType.Url).chasing) { //Loop through Url's perceptions foreach (var i in boardData.units[UnitType.Url].perceptionData.perceptionData) { //Url is already chasing. //See if bender made a bad move, and even though he moves before us, //he didn't avoid us properly if (i.Value == Percept.Enemy) { urlSecondMove = i.Key; } } if (urlSecondMove != null) { //Bender made a bad move. Attack him. obtainedReward = MoveResult.list[MoveResult.EnemyEncountered]; benderAttacked = true; resultThisStep = MoveResult.EnemyEncountered; urlFirstMove = urlSecondMove; } else { //Bender ran away correctly. Copy his move and follow him. urlFirstMove = benderMove; } } else { //Url is not chasing. See if after bender moved, we he ran into url. //If so, start chasing. foreach (var i in boardData.units[UnitType.Url].perceptionData.perceptionData) { if (i.Value == Percept.Enemy) { urlSecondMove = i.Key; } } if (urlSecondMove != null) { //We found a move that detects bender. Start chasing. startedChasing = true; GetUnit(UnitType.Url).chasing = true; urlFirstMove = Move.Wait; } else { //urlsecond move was null, so we didn't see bender. Normal behavior. //Pick a random move that isn't moving into a wall or sitting still. UrlMoves = new List <Move>(); //Left off with looping through url's perceptions to find a non-wall, non grab move. foreach (var i in boardData.units[UnitType.Url].perceptionData.perceptionData) { if (i.Value != Percept.Wall && i.Key != Move.Grab && i.Key != Move.Wait && Move.CardinalMoves.Contains(i.Key)) { UrlMoves.Add(i.Key); } } //Its impossible not to have a move here, so take a random one thats available. urlFirstMove = UrlMoves[MyRandom.Next(0, UrlMoves.Count)]; } } obtainedReward = MoveResult.list[resultThisStep]; //Get the reward for this action episodeRewards += obtainedReward; //Update the rewards total if (resultThisStep == MoveResult.CanCollected) { ++cansCollected; } //give the value to the q matrix to digest if (GetStepNumber() == Qmatrix.stepLimit && GetEpisodeNumber() > Qmatrix.episodeLimit) { AlgorithmManager.algorithmEnded = true; } if (benderMove == null) { boardData.units[UnitType.Url].chasing = false; } PerceptionState startingState = GetUnit(UnitType.Bender).GetStartingPerceptionState(); liveQmatrix.UpdateState(startingState, GetPerception(UnitType.Bender), benderMove, obtainedReward); GetUnit(UnitType.Url).SetMoveThisStep(urlFirstMove); //for status smessage //Move url if (!benderAttacked) { boardData.ApplyMove(UnitType.Url, urlFirstMove); urlFirstMove = null; //Now that url moved, detect if he is chasing again. if (GetUnit(UnitType.Url).chasing == false) { foreach (var i in boardData.units[UnitType.Url].perceptionData.perceptionData) { if (i.Value == Percept.Enemy) { urlFirstMove = i.Key; } } if (urlFirstMove != null) { GetUnit(UnitType.Url).chasing = true; startedChasing = true; } } } //Now that url has moved, see if bender is in view. If so, chase him. //Make each unit perceieve after their step UnitPerceives(UnitType.Bender); UnitPerceives(UnitType.Url); }