Ejemplo n.º 1
0
        //Set info about agent ( full details )
        public void setDetailedInfo(string agentName, Observation obs, int[] action, int position)
        {
            string info = Environment.NewLine + "---------------------------------" + Environment.NewLine + agentName + " is next.Currently on position : " + position.ToString();
            info += Environment.NewLine + "Observation received : " + obs.printInfo();
            //    info += Environment.NewLine + "Action selected :  ";
            for (int i = 0; i < action.Length; i++)
            { info += action[i].ToString() + ","; }

            info.Remove(info.Length - 1);
            MainWindow mw = (MainWindow)Application.Current.MainWindow;
            mw.obsInfo.Text = info;

             //   MessageBox.Show(info);
        }
Ejemplo n.º 2
0
 //Receive observation and reward and send an action back to the environment
 public virtual int agent_step(Observation obs, double reward)
 {
     return 0;
 }
Ejemplo n.º 3
0
 //Receive the first observation of the game
 //No reward is expected now
 //Send an action back to the environment
 public virtual int agent_start(Observation obs)
 {
     return 0;
 }
Ejemplo n.º 4
0
 public EligibilityTrace(Observation o, Action a, double v)
 {
     this.observation = o;
     this.action      = a;
     this.value       = v;
 }
Ejemplo n.º 5
0
 public EligibilityTrace(Observation o, Action a, double v)
 {
     this.observation = o;
     this.action = a;
     this.value = v;
 }
Ejemplo n.º 6
0
 //Update traces  -- sarsa
 private bool updateSTraces(Observation obs, Monopoly.RLClasses.Action a)
 {
     return false;
 }
Ejemplo n.º 7
0
 //Change agent's current observation based on what the agent receives
 public void agent_changeCurrentState(Observation obs)
 {
     this.lastState = obs;
 }
Ejemplo n.º 8
0
        //Sarsa algorithm
        private double Sarsa(Observation lastState, Monopoly.RLClasses.Action lastAction, Observation newState, Monopoly.RLClasses.Action newAction, double reward)
        {
            double QValue = network.Run(createInput(lastState, lastAction.action)).First();

            //run network for last state and last action
            double previousQ = QValue;

            //run network for new state and best action
            double newQ = network.Run(createInput(newState, newAction.action)).First();

            QValue += alpha * (reward + gamma * newQ - previousQ);

            return QValue;
        }
Ejemplo n.º 9
0
        //Update traces -- qlearning---Peng's Q(λ)
        private bool updateQTraces(Observation obs, Monopoly.RLClasses.Action a, double reward)
        {
            bool found = false;

            //Since the state space is huge we'll use a similarity function to decide whether two states are similar enough
            for (int i = 0; i < traces.Count; i++)
            {
                if (checkStateSimilarity(obs,traces[i].observation) && (!a.action.Equals(traces[i].action.action)))
                {
                    traces[i].value = 0;
                    traces.RemoveAt(i);
                    i--;

                }
                else if (checkStateSimilarity(obs, traces[i].observation) && (a.action.Equals(traces[i].action.action)))
                {
                    found = true;

                    traces[i].value = 1 ;

                    //Q[t] (s,a)
                    double qT = network.Run(createInput(traces[i].observation, traces[i].action.action))[0];

                    //maxQ[t] (s[t+1],a)
                    int act = findMaxValues(calculateQValues(obs));
                    double maxQt = network.Run(createInput(obs, act))[0];

                    //maxQ[t] (s[t],a)
                    act = findMaxValues(calculateQValues(lastState));
                    double maxQ = network.Run(createInput(lastState, act))[0];

                    //Q[t+1] (s,a) = Q[t] (s,a) + alpha * ( trace[i].value ) * ( reward + gamma * maxQ[t] (s[t+1],a) * maxQ[t] (s[t],a))
                    double qVal = qT + alpha * (traces[i].value) * (reward + gamma * maxQt - maxQ);

                    trainNeural(createInput(traces[i].observation, traces[i].action.action), qVal);

                }
                else
                {
                    traces[i].value = gamma * lamda * traces[i].value;

                    //Q[t] (s,a)
                    double qT = network.Run(createInput(traces[i].observation, traces[i].action.action))[0];

                    //maxQ[t] (s[t+1],a)
                    int act = findMaxValues(calculateQValues(obs));
                    double maxQt = network.Run(createInput(obs, act))[0];

                    //maxQ[t] (s[t],a)
                    act = findMaxValues(calculateQValues(lastState));
                    double maxQ = network.Run(createInput(lastState, act))[0];

                    //Q[t+1] (s,a) = Q[t] (s,a) + alpha * ( trace[i].value ) * ( reward + gamma * maxQ[t] (s[t+1],a) * maxQ[t] (s[t],a))
                    double qVal = qT + alpha * (traces[i].value) * (reward + gamma * maxQt - maxQ);

                    trainNeural(createInput(traces[i].observation, traces[i].action.action), qVal);
                }
            }

            return found;
        }
Ejemplo n.º 10
0
        //Calculate network's output
        private double[] calculateQValues(Observation obs)
        {
            double[] tempQ = new double[3];

            for (int i = 0; i < tempQ.Length; i++)
            {
                //Run netowrk for action i,j to given observation
                double[] input = createInput(obs, i - 1);

                tempQ[i] = network.Run(input)[0];
            }
            return tempQ;
        }
Ejemplo n.º 11
0
        //Calculate similarity of states
        private bool checkStateSimilarity(Observation obs1, Observation obs2)
        {
            bool similar = true;

            //Check money similarity
            double moneyDif = Math.Abs(obs1.finance.relativeAssets - obs2.finance.relativeAssets) + Math.Abs(obs1.finance.relativePlayersMoney - obs2.finance.relativePlayersMoney);
            if (moneyDif >= 0.1)
                similar = false;

            //Check area similarity
            if (!obs1.position.relativePlayersArea.Equals(obs2.position.relativePlayersArea))
                similar = false;

            double countDif = 0;
            for (int i = 0; i < obs1.area.gameGroupInfo.GetLength(0); i++)
            {
                if (!similar)
                    break;

                countDif = 0;
                for (int j = 0; j < obs1.area.gameGroupInfo.GetLength(1); j++)
                {
                    if (!obs1.area.gameGroupInfo[i, j].Equals(obs2.area.gameGroupInfo[i, j]))
                    {
                        countDif += Math.Abs(obs1.area.gameGroupInfo[i, j] - obs2.area.gameGroupInfo[i, j]);
                        if (countDif >= 0.1)
                        { similar = false; break; }
                    }
                }
            }

            return similar;
        }
Ejemplo n.º 12
0
        //Initialize local parameters for a new game
        public void initParams()
        {
            if (this.policyFrozen)
            {
                this.alpha = 0;
                this.epsilon = 0;
                this.lamda = 0;
                this.gamma = 0;
            }

            //numberOfProperties = 28
            base.propertiesPurchased = new int[28];
            base.mortgagedProperties = new int[28];
            base.buildingsBuilt = new int[28];

            this.agent_changeCurrentState(new Observation());

            //Initialize arrays
            for (int i = 0; i < 28; i++)
            {
                propertiesPurchased[i] = 0;
                mortgagedProperties[i] = 0;
                buildingsBuilt[i] = 0;
            }

            this.isAlive = true;
            base.inJail = false;

            base.money = 1500;
            base.position = 0;

            lastAction = 0;
            lastState = new Observation();

            traces = new List<EligibilityTrace>();
        }
Ejemplo n.º 13
0
        //Create input for the neural network
        public double[] createInput(Observation observation, int action)
        {
            List<double> input = new List<double>();

            //Add action
            input.Add((((double)(action+2))/3));

            //Add every variable of the observation to the input list
            for (int k = 0; k < observation.area.gameGroupInfo.GetLength(0); k++)
            {
                for (int kk = 0; kk < observation.area.gameGroupInfo.GetLength(1); kk++)
                    input.Add(observation.area.gameGroupInfo[k, kk]);
            }

            input.Add(observation.finance.relativeAssets);
            input.Add(observation.finance.relativePlayersMoney);

            input.Add(observation.position.relativePlayersArea);

            //Return the input array
            return input.ToArray();
        }
Ejemplo n.º 14
0
        //Receive an observation and a reward from the environment and send the appropriate action
        public override int agent_step(Observation observation, double reward)
        {
            //If this isn't a random agent calculate the Q values for every possible action
            int action = 0;
            if (!agentType.Equals('r'))
            {
                //Calculate Qvalues
                double [] QValues = calculateQValues(observation);

                //Select action
                action = e_greedySelection(QValues);

                //If the policy of the agent isn't frozen then train the neural network
                if (!policyFrozen)
                {
                    //If the agent is learning then update it's qValue for the selected action
                    double QValue = 0;
                    bool exists = false;

                    //Calculate the qValue either using the Q-learning or the SARSA algorithm
                    if (this.agentType.Equals('q'))
                    {
                        exists = updateQTraces(observation, new Monopoly.RLClasses.Action(action), reward);
                        QValue = Qlearning(lastState, new Monopoly.RLClasses.Action(lastAction), observation, new Monopoly.RLClasses.Action(findMaxValues(QValues)), reward);
                    }
                    else
                    {
                        exists = updateSTraces(observation, new Monopoly.RLClasses.Action(action));
                        QValue = Sarsa(lastState, new Monopoly.RLClasses.Action(lastAction), observation, new Monopoly.RLClasses.Action(action), reward);
                    }

                    trainNeural(createInput(lastState, lastAction), QValue);

                    //Add trace to list
                    if (!exists)
                        traces.Add(new EligibilityTrace(lastState, new RLClasses.Action(lastAction), 1));

                }

                //Update local values
                lastAction = action;
                lastState = observation;

                return action;
            }
            //Random action
            else
            {
                return randomAction();
            }
        }
Ejemplo n.º 15
0
        //First action of the agent, where no reward is to be expected from the environment
        public override int agent_start(Observation observation)
        {
            //Increase currentEpoch paramater ( used only in nn training)
            currentEpoch++;

            //Initialize agent's parameters
            initParams();

            //Create new array for action
            int action = 0;

            if (!agentType.Equals('r'))
            {
                ///Calculate Qvalues
                double[] QValues = calculateQValues(observation);

                //Select final action based on the ε-greedy algorithm
                action = e_greedySelection(QValues);

                //Update local values
                lastAction = action;
                lastState = observation;

                traces.Add(new EligibilityTrace(observation, new RLClasses.Action(action), 1));

                return action;
            }
            else
            {
                return  randomAction();
            }
        }
Ejemplo n.º 16
0
        //Create an instance of Observation class
        //Representing the current state of the environment
        public Observation createObservation()
        {
            Observation obs = new Observation();

            //Create the specific instances of the classes-fields of the Observation
            Obs_Finance finance = createFinance();
            Obs_Position position = createPosition();
            Obs_Area area = createArea();

            obs.area = area;
            obs.finance = finance;
            obs.position = position;

            return obs;
        }