示例#1
0
        public int[] Step(Workspace ws)
        {
            Action action;
            State state = CalcStateFromWorkspace(ws);

           
            float reward = GetRewardFromWorkspace(ws);
            action = EpsilonGreedy(state, epsilon, ws);

            if ((prevState != null) && (prevAction != null))
            {
                int[] maxp;                    
                float Qtp1max = float.MinValue;


                //GetSubspaceGlobalMax
                for (int i = 0; i < 4; ++i)
                {
                    if (Qtp1max < ws.uti[i,state.States[1],state.States[0]]) {
                        Qtp1max = ws.uti[i,state.States[1],state.States[0]];
                        maxp = new int[] {i};
                    }
                }

                float Qt = ws.uti[prevAction.Actions[0], prevState.States[1], prevState.States[0]];

                float deltaQ = alpha * (reward + discount * Qtp1max - Qt);

                ws.uti[prevAction.Actions[0], prevState.States[1], prevState.States[0]] += deltaQ;
            }

            prevAction = action;
            prevState = state;
            //sarList.Add(new StateActionReward(state, reward, action));
           

            return action.Actions;
        }        
示例#2
0
 public StateActionReward(State state, float reward, Action action)
 {
     this.state = state;
     this.reward = reward;
     this.action = action;
 }
示例#3
0
 private float[] InputFromStateAction(State state, Action action)
 {            
     float[] input = new float[state.States.Length + actionDimension];
     for (int i = 0; i < fixArray.Length; ++i)
     {
         if (i < state.States.Length) input[i] = state.States[i];
         else
         {
             if (action != null)
             {
                 input[i] = action.Actions[i - state.States.Length];
             }                    
         }
     }
     return input;
 }
示例#4
0
        private Action GetBestAction(State state, Workspace ws)
        {
            int[] maxp = new int[] { 0 };
            float Qtp1max = float.MinValue;


            //GetSubspaceGlobalMax
            for (int i = 0; i < 4; ++i)
            {
                if (Qtp1max < ws.uti[i, state.States[1], state.States[0]])
                {
                    Qtp1max = ws.uti[i, state.States[1], state.States[0]];
                    maxp = new int[] { i };
                }
            }

            return ActionFromInput(maxp);
        }
示例#5
0
 private Action EpsilonGreedy(State state, float epsilon, Workspace ws)
 {
     if (r.NextDouble() < epsilon)
     {
         return GetBestAction(state,ws);
     }
     else
     {
         int[] data = new int[actionDimension];
         for (int i = 0; i < actionDimension; ++i)
         {
             data[i] = r.Next(4);
         }                
         return new Action(data);
     }
 }
示例#6
0
 public void EpisodeBegin()
 {
     prevAction = null;
     prevState = null;
     //sarList = new List<StateActionReward>();
 }