public override void UpdateMemoryBlocks() { UtilityOutput.Count = NoActions; RewardStats.Count = 2; LearningParams = new MyModuleParams(); Memory = new MyQSAMemory(GlobalDataInput.Count, NoActions); LearningAlgorithm = new MyDiscreteQLearning(LearningParams, Memory); if (GlobalDataInput != null) { if (NoActions == 6) { MyLog.DEBUG.WriteLine("6 actions set by the user, will use action names for gridworld"); Rds = new MyRootDecisionSpace(GlobalDataInput.Count, new String[] { " -", " <", " >", " ^", " v", " P" }, LearningParams); } else if (NoActions == 3) { MyLog.DEBUG.WriteLine("3 actions set by the user, will use action names for pong"); Rds = new MyRootDecisionSpace(GlobalDataInput.Count, new String[] { " <", " -", " >" }, LearningParams); } else { MyLog.DEBUG.WriteLine("Unknown no. of actions, will use automatic naming of actions"); String[] names = new String[NoActions]; for (int i = 0; i < NoActions; i++) { names[i] = "A" + i; } Rds = new MyRootDecisionSpace(GlobalDataInput.Count, names, LearningParams); } CurrentStateOutput.Count = GlobalDataInput.Count; } }
public MyStochasticReturnPredictor(MyRootDecisionSpace rds, int myPromotedVariable, MyModuleParams setup, String label, int level) : base(label, level, setup) { base.AddPromotedVariable(myPromotedVariable, rds); this.Rds = rds; Ds = new MyDecisionSpace(this, rds, setup); Mem = new MyQSAMemory(rds.VarManager.MAX_VARIABLES, 0); m_asm = new MyMotivationBasedDeleteUnselectedASM(setup); LearningAlgorithm = new MyDiscreteQLearning(setup, Mem); m_mlvh = new MyLocalVariableHistory(rds, m_setup, Ds); m_prevSelectedAction = 0; m_prev_st = Ds.GetCurrentState(); this.m_newVariables = new List <int>(); }
private int[] m_prev_st; // previous state (for variable adding and sharing knowledge) #endregion Fields #region Constructors public MyStochasticReturnPredictor(MyRootDecisionSpace rds, int myPromotedVariable, MyModuleParams setup, String label, int level) : base(label, level, setup) { base.AddPromotedVariable(myPromotedVariable, rds); this.Rds = rds; Ds = new MyDecisionSpace(this, rds, setup); Mem = new MyQSAMemory(rds.VarManager.MAX_VARIABLES, 0); m_asm = new MyMotivationBasedDeleteUnselectedASM(setup); LearningAlgorithm = new MyDiscreteQLearning(setup, Mem); m_mlvh = new MyLocalVariableHistory(rds, m_setup, Ds); m_prevSelectedAction = 0; m_prev_st = Ds.GetCurrentState(); this.m_newVariables = new List<int>(); }