예제 #1
0
        public void Initialize(Workspace ws)
        {            
            actionDimension = 1;            

            State testState = CalcStateFromWorkspace(ws);
            fixArray = new bool[testState.States.Length + actionDimension];
            for (int i = 0; i < fixArray.Length; ++i)
            {
                if (i < testState.States.Length) fixArray[i] = true;
                else fixArray[i] = false;
            }           
        }
예제 #2
0
        public int[] Step(Workspace ws)
        {
            Action action;
            State state = CalcStateFromWorkspace(ws);

           
            float reward = GetRewardFromWorkspace(ws);
            action = EpsilonGreedy(state, epsilon, ws);

            if ((prevState != null) && (prevAction != null))
            {
                int[] maxp;                    
                float Qtp1max = float.MinValue;


                //GetSubspaceGlobalMax
                for (int i = 0; i < 4; ++i)
                {
                    if (Qtp1max < ws.uti[i,state.States[1],state.States[0]]) {
                        Qtp1max = ws.uti[i,state.States[1],state.States[0]];
                        maxp = new int[] {i};
                    }
                }

                float Qt = ws.uti[prevAction.Actions[0], prevState.States[1], prevState.States[0]];

                float deltaQ = alpha * (reward + discount * Qtp1max - Qt);

                ws.uti[prevAction.Actions[0], prevState.States[1], prevState.States[0]] += deltaQ;
            }

            prevAction = action;
            prevState = state;
            //sarList.Add(new StateActionReward(state, reward, action));
           

            return action.Actions;
        }        
예제 #3
0
 private float GetRewardFromWorkspace(Workspace ws)
 {
     if (ws.map[ws.pos.Y, ws.pos.X] == 3)
     {
         return 10;
     }
     else
     {
         return -0.1f;
     }
     
 }
예제 #4
0
        private State CalcStateFromWorkspace(Workspace ws)
        {            
            List<int> data = new List<int>();

            data.Add(ws.pos.X);
            data.Add(ws.pos.Y);           
           
            return new State(data.ToArray());
        }
예제 #5
0
        private Action GetBestAction(State state, Workspace ws)
        {
            int[] maxp = new int[] { 0 };
            float Qtp1max = float.MinValue;


            //GetSubspaceGlobalMax
            for (int i = 0; i < 4; ++i)
            {
                if (Qtp1max < ws.uti[i, state.States[1], state.States[0]])
                {
                    Qtp1max = ws.uti[i, state.States[1], state.States[0]];
                    maxp = new int[] { i };
                }
            }

            return ActionFromInput(maxp);
        }
예제 #6
0
 private Action EpsilonGreedy(State state, float epsilon, Workspace ws)
 {
     if (r.NextDouble() < epsilon)
     {
         return GetBestAction(state,ws);
     }
     else
     {
         int[] data = new int[actionDimension];
         for (int i = 0; i < actionDimension; ++i)
         {
             data[i] = r.Next(4);
         }                
         return new Action(data);
     }
 }