PerceptionState, ReinforcementLearning C# (CSharp) Exemples de code

Exemple #1

0

Afficher le fichier

        //This is called when the dropdown menu boxes change, as well as from display_state().
        //This will update the comboboxes, state dropdown, and the q-matrix value textboxes.
        static public void ViewQmatrixConfiguration(PerceptionState state_to_view)
        {
            //We're only handed states that exist in the q-matrix already

            //Handle the small dropdowns
            //Set the dropdown to be selected to the correct percept
            //These will trigger the selected_index_changed events
            //So I'll managed this with a lock
            lock_index_change_events = true;
            foreach (var i in Move.list)
            {
                list_qmatrix_comboboxes[i].SelectedIndex = list_qmatrix_comboboxes[i].Items.IndexOf(state_to_view.perception_data[i]);
            }

            //Handle the large dropdown
            qmatrix_state_combobox_large.SelectedIndex = qmatrix_state_combobox_large.Items.IndexOf(state_to_view);

            //Handle the values stored in the textboxes

            foreach (var i in Move.list)
            {
                List_qmatrix_value_textboxes[i].Text = loaded_state.live_qmatrix.matrix_data[state_to_view].move_list[i].ToString();
            }

            lock_index_change_events = false;
        }

Exemple #2

0

Afficher le fichier

        //At the algorithm manager, generate step is ambiguous with actually stepping through the algorithm,
        //Or starting the algorithm, and making the first history entry at step 0.
        //Here, a step only happens when we have been asked by the manager to *actually* take a step.
        public void TakeStep()
        {
            //Get step from qmatrix. Being randomly generated for now.
            move_this_step   = live_qmatrix.GenerateStep(bender_perception_starting); //Tentative; we'll attempt this later. just a random move for now.
            result_this_step = board_data.ApplyMove(move_this_step);                  //The move should be performed now, if possible.
            obtained_reward  = ReinforcementFactors.list[result_this_step];           //Get the reward for this action

            episode_rewards += obtained_reward;                                       //Update the rewards total

            if (result_this_step == MoveResult.can_collected())
            {
                ++cans_collected;
            }

            location_result = new int[2] {
                board_data.bender.x_coordinate, board_data.bender.y_coordinate
            };
            bender_perception_ending = board_data.bender.get_perception_state();

            live_qmatrix.UpdateState(bender_perception_starting, bender_perception_ending, move_this_step, obtained_reward);
            //give the value to the q matrix to digest

            if (GetStepNumber() == Qmatrix.step_limit && GetEpisodeNumber() > Qmatrix.episode_limit)
            {
                algorithm_ended = true;
            }
        }

Exemple #3

0

Afficher le fichier

        //Copied from another algorithm state
        //We reset some data, so we dont reflect values that aren't true for the new state
        //This constructor is called when a new step is being generated, so we transfer some values appropriately.
        public AlgorithmState(AlgorithmState set_from)
        {
            cans_collected = set_from.cans_collected;

            episode_rewards = set_from.episode_rewards; //Reward data
            total_rewards   = set_from.total_rewards;

            board_data = new GameBoard(set_from.board_data); //Copy the board

            //Increase steps in here
            live_qmatrix = new Qmatrix(set_from.live_qmatrix); //Copy the q matrix

            //The initial location will be the resulting location of the last step
            location_initial = new int[2] {
                set_from.location_result[0], set_from.location_result[1]
            };

            bender_perception_starting = set_from.bender_perception_ending;

            //Detect if we reached the limit for this episode

            if (live_qmatrix.step_number == Qmatrix.step_limit)
            {
                StartNewEpisode();
            }
            else
            {
                live_qmatrix.step_number++;
            }
        }

Exemple #4

0

Afficher le fichier

Fichier : Unit.cs Projet : NelsonG6/BenderWorld

 public Unit(Unit set_from)
 {
     x_coordinate      = set_from.x_coordinate;
     y_coordinate      = set_from.y_coordinate;
     unit_name         = set_from.unit_name;
     perception_data   = set_from.perception_data;
     current_location  = set_from.current_location;
     previous_location = set_from.previous_location;
     unit_image        = set_from.unit_image;
 }

Exemple #5

0

Afficher le fichier

Fichier : Unit.cs Projet : NelsonG6/BenderWorld

        public Unit()
        {
            perception_data = new PerceptionState();
            unit_name       = null;

            x_coordinate = 0;
            y_coordinate = 0;

            current_location  = null;
            previous_location = null;
        }

Exemple #6

0

Afficher le fichier

Fichier : GameBoard.cs Projet : NelsonG6/BenderWorld

        //This function will give bender perception data from the board
        public void bender_percieves()
        {
            PerceptionState find_perception = new PerceptionState();

            foreach (var i in Move.list)
            {
                find_perception.perception_data[i] = percieve(i);
            }
            //Translated: for each move, percieve with this move, and update the perception for this move.

            find_perception.set_name();
            bender.perception_data = PerceptionState.GetPerceptionFromList(find_perception);
        }

Exemple #7

0

Afficher le fichier

        static public void small_dropdown_changed(ComboBox changed_dropdown)
        {
            if (changed_dropdown.SelectedText != "None.")
            {
                PerceptionState to_set       = new PerceptionState();
                Move            percept_move = null;

                foreach (var i in Move.list)
                {
                    if (changed_dropdown == list_qmatrix_comboboxes[i])
                    {
                        percept_move = i;
                    }
                }

                //get the percept of the dropdown
                Percept keep_for_best_fit = (Percept)changed_dropdown.SelectedItem;

                //Build a perception state that matches the dropdowns
                foreach (var i in Move.list)
                {
                    to_set.perception_data[i] = (Percept)list_qmatrix_comboboxes[i].SelectedItem;
                }

                to_set.set_name();
                to_set = PerceptionState.list_of_states[to_set]; //Convert to static instance

                //This state may not exist in our q-matrix states, because we only changed one of the dropdowns.
                //The best solution i think is to make the other dropdowns find the most accurate state.
                //Compare all the states in the q-matrix, and display any that is tied for best matched.
                //Also, the matching state must have the same item as the dropdown we just changed.

                int             compare_value        = 0;
                int             temp                 = 0;
                PerceptionState best_perceptionstate = null;
                foreach (PerceptionState i in qmatrix_state_combobox_large.Items)
                {
                    temp = to_set.compare(i);
                    if (temp > compare_value && i.contains(percept_move, keep_for_best_fit))
                    {
                        best_perceptionstate = i;
                        compare_value        = temp;
                    }
                }

                ViewQmatrixConfiguration(best_perceptionstate);
            }
        }

Exemple #8

0

Afficher le fichier

        //Determine what the next move to make will be.
        public Move GenerateStep(PerceptionState perceieved_state)
        {
            if (matrix_data.Keys.Contains(perceieved_state))
            {
                //Always generate the step using the state at algorithmManager.GetCurrentState()
                Dictionary <Move, double> best_percepts = new Dictionary <Move, double>();


                //Determine if we will be making a greedy best selection, or a random selection.
                //e will be a double, possibly very small, but not more than 1.
                if (MyRandom.Next(1, 101) < e_current * 100)
                {
                    randomly_moved = true;
                    //Random move.
                    return(Move.list[MyRandom.Next(0, 5)]);
                }
                else
                {
                    //Greedy selection, then random among best matches.
                    //Loop through the move-double pair, and do a random selection of any move that is tied for best action.
                    foreach (var i in matrix_data[perceieved_state].move_list)
                    {
                        if (best_percepts.Count == 0)
                        {
                            best_percepts.Add(i.Key, i.Value);
                        }
                        else if (best_percepts.Values.First() < i.Value)
                        {
                            best_percepts = new Dictionary <Move, double>();
                            best_percepts.Add(i.Key, i.Value);
                        }
                        else if (best_percepts.Values.First() == i.Value)
                        {
                            best_percepts.Add(i.Key, i.Value);
                        }
                    }
                }

                Move[] moves = best_percepts.Keys.ToArray();    //Convert the moves we retained to a list
                return(moves[MyRandom.Next(0, moves.Count())]); //return a random member of this list
            }

            //No q-matrix entry, so just do a random move.
            return(Move.list[MyRandom.Next(0, Move.list.Count)]);
        }

Exemple #9

0

Afficher le fichier

        //Used to erase session-based progress.
        //This is also called each new episode once we reach the max steps
        //Not called when the program launches
        public void StartNewEpisode()
        {
            cans_collected  = 0;
            episode_rewards = 0;                  //Session - Reward data

            board_data.shuffle_cans_and_bender(); //Shuffle the the current board.

            board_data.bender_percieves();

            location_result = new int[2] {
                board_data.bender.x_coordinate, board_data.bender.y_coordinate
            };

            bender_perception_starting = board_data.bender.get_perception_state();
            bender_perception_ending   = board_data.bender.get_perception_state();

            live_qmatrix.ProcessNewEpisode();
        }

Exemple #10

0

Afficher le fichier

        //When this is called, the q matrix will update a previous state with the value of the next state
        //Calculate the change here
        public void UpdateState(PerceptionState state_to_update, PerceptionState result_state, Move result_move, double base_reward)
        {
            double old_qmatrix_value = 0;

            //Initial the start of our update value
            if (matrix_data.Keys.Contains(state_to_update))
            {
                old_qmatrix_value = matrix_data[state_to_update].GetBestValue(); //Whats our old best qmatrix value at our old state?
            }
            //Whats the best value at the new one?
            double new_qmatrix_value = 0;

            if (matrix_data.Keys.Contains(result_state))
            {
                new_qmatrix_value = matrix_data[result_state].GetBestValue();
            }
            double difference = new_qmatrix_value - old_qmatrix_value;

            y_current = (double)Math.Pow(y, step_number - 1); //y ^ step-1 is the discount factor
            double discounted_difference = difference * y_current;
            double reward_added          = discounted_difference + base_reward;
            double final_value           = n_current * reward_added;

            did_we_update = false; //Status message grabs this later
                                   //check if this state already exists, and add it to our list of states we've encountered, if not.

            if (final_value != 0)
            {
                did_we_update = true;
                if (!matrix_data.Keys.Contains(state_to_update))
                {
                    matrix_data[state_to_update] = new ValueSet();
                }
                matrix_data[state_to_update][result_move] = final_value;
            }
        }

Exemple #11

0

Afficher le fichier

        //This is used to display rows of the qmatrix and the q-values for each move
        //This is called from FormsHandler.DisplayState, as well as directly from the dropdowns when their contents are changed.
        //When this is called from displaystate, the perception to view may not be valid.
        //When this is called from the dropdown, the perception should exist in the qmatrix.
        static private void HandleQmatrixForms(AlgorithmState current_state, PerceptionState perception_to_view)
        {
            qmatrix_stored_entires.Text = current_state.live_qmatrix.matrix_data.Count.ToString();

            //May not have qmatrix data at the step being displayed.
            if (current_state.live_qmatrix.matrix_data.Count == 0)
            {   //There are no q-matrix entries.
                //reset qmatrix combo boxes
                foreach (var i in list_qmatrix_comboboxes.Values)
                {
                    i.Items.Clear();
                    i.Items.Add("None");
                }

                qmatrix_state_combobox_large.Items.Clear();
                qmatrix_state_combobox_large.Items.Add("A q-matrix entry has not yet been made.");


                //reset qmatrix textboxes
                foreach (var i in List_qmatrix_value_textboxes.Values)
                {
                    i.Clear();
                }
            }
            else
            {
                //Build q-matrix dropdowns.
                //use a hashset to avoid adding duplicates
                //For each move, we want a hashet of percepts, in other words all the percepts that this move sees in the q matrix entries that exist.
                Dictionary <Move, HashSet <Percept> > dropdown_text_items = new Dictionary <Move, HashSet <Percept> >();

                //Initialize hashsets before looping over perceptionstates
                foreach (var i in Move.list)
                {
                    dropdown_text_items.Add(i, new HashSet <Percept>());
                }

                //Copy the items over to the small comboboxes.
                foreach (var i in current_state.live_qmatrix.matrix_data.Keys)
                {
                    foreach (var j in Move.list)
                    {
                        //For each qmatrix entry, copy each percept over to dropdowns dictionary for the appropriate move.
                        dropdown_text_items[j].Add(i.perception_data[j]);
                    }
                }


                //Cycle through the moves to add to select each small combobox
                foreach (var i in Move.list)
                {
                    list_qmatrix_comboboxes[i].Items.Clear();
                    //Cycle through the percepts we gathered for this move's dropdown
                    foreach (var j in dropdown_text_items[i].OrderBy(o => o.percept_data))
                    {
                        list_qmatrix_comboboxes[i].Items.Add(j); //I think i can just give my objects a tostring method
                    }
                }

                //Refresh the overall-state dropdown
                qmatrix_state_combobox_large.Items.Clear();
                foreach (var i in current_state.live_qmatrix.matrix_data.Keys.OrderBy(o => o.ID))
                {
                    qmatrix_state_combobox_large.Items.Add(i);
                }

                if (current_state.live_qmatrix.matrix_data.Keys.Contains(current_state.bender_perception_starting))
                {
                    ViewQmatrixConfiguration(loaded_state.bender_perception_starting);
                }
                else
                {
                    ViewQmatrixConfiguration(loaded_state.live_qmatrix.matrix_data.Keys.First()); //Just grab the first q-matrix item
                }
            }
        }

Exemple #12

0

Afficher le fichier

        //This is ran every time we step through the algorithm.
        //Handles updating all the fields that change every time we look at new data
        //This method handles any time we are updating what is displayed for any reason once the algorithm is active
        //We expect the algorithm state to be set from the outside before we enter this.
        //This will also handle updating the history dropdowns
        static public void DisplayState()
        {
            picture_board.clone_position(loaded_state.board_data); //This copies the state's board over to our PictureSquare board.

            //Textboxes update
            if (AlgorithmState.algorithm_started) //Only display this if we've started
            {
                //This will configure the q-matrix dropdowns properly, and handle if there is no qmatrix as well.
                //This doesn't affect the stored entries textbox
                HandleQmatrixForms(loaded_state, loaded_state.bender_perception_starting);

                //Session progress
                step_number.Text    = loaded_state.GetStepNumber().ToString();
                episode_number.Text = loaded_state.GetEpisodeNumber().ToString();
                e_session.Text      = GetString(loaded_state.live_qmatrix.e_current);
                y_session.Text      = loaded_state.live_qmatrix.y_current.ToString();

                //If this moveset doesn't exist, we should get an error.
                //This function should only be called at the algorithm start, or from a dropdown that has a valid q-matrix combination.
                //These textboxes handle percepts

                PerceptionState to_view = loaded_state.board_data.get_bender_perception();

                foreach (var i in Move.list)
                {
                    list_current_position_textboxes[i].Text = to_view.perception_data[i].ToString();
                }

                beer_remaining.Text = loaded_state.board_data.get_cans_remaining().ToString();
                beer_collected.Text = loaded_state.cans_collected.ToString();
                reward_episode.Text = loaded_state.episode_rewards.ToString();
                reward_total.Text   = loaded_state.total_rewards.ToString();

                //Update the history episode dropdown
                if (combobox_history_episodes.Items.Count < AlgorithmState.state_history.Count)
                {
                    combobox_history_episodes.Items.Add(AlgorithmState.state_history.Last());
                }

                combobox_history_episodes.SelectedIndex = combobox_history_episodes.Items.Count - 1;

                if (!combobox_history_steps.Items.Contains(loaded_state) || loaded_state.GetStepNumber() == 0)
                {
                    combobox_history_steps.Items.Clear();
                    combobox_history_steps.Items.AddRange(AlgorithmState.state_history.Last().ToArray());
                    combobox_history_steps.Text = loaded_state.ToString();
                }
            }

            picture_board.clone_position(loaded_state.board_data);

            //Handle drawing the board
            foreach (var i in picture_board.board_data)
            {
                foreach (var j in i)
                {
                    ((PictureSquare)j).setPicture();
                }
            }

            status_box.Text = StatusMessage.GetMessageFromState(loaded_state);

            DisplayInitialSettings();
        }

C# (CSharp) ReinforcementLearning PerceptionState Exemples