public override void UpdateMemoryBlocks() { UtilityOutput.Count = NoActions; RewardStats.Count = 2; LearningParams = new MyModuleParams(); Memory = new MyQSAMemory(GlobalDataInput.Count, NoActions); LearningAlgorithm = new MyDiscreteQLearning(LearningParams, Memory); if (GlobalDataInput != null) { if (NoActions == 6) { MyLog.DEBUG.WriteLine("6 actions set by the user, will use action names for gridworld"); Rds = new MyRootDecisionSpace(GlobalDataInput.Count, new String[] { " -", " <", " >", " ^", " v", " P" }, LearningParams); } else if (NoActions == 3) { MyLog.DEBUG.WriteLine("3 actions set by the user, will use action names for pong"); Rds = new MyRootDecisionSpace(GlobalDataInput.Count, new String[] { " <", " -", " >" }, LearningParams); } else { MyLog.DEBUG.WriteLine("Unknown no. of actions, will use automatic naming of actions"); String[] names = new String[NoActions]; for (int i = 0; i < NoActions; i++) { names[i] = "A" + i; } Rds = new MyRootDecisionSpace(GlobalDataInput.Count, names, LearningParams); } CurrentStateOutput.Count = GlobalDataInput.Count; } }
public MyActionHistory(ActionManager am, MyModuleParams setup) { this.setup = setup; this.am = am; this.changed = new List <int>(); this.isMonitored = new Dictionary <int, bool>(); }
public override void UpdateMemoryBlocks() { UtilityOutput.Count = NoActions; LearningParams = new MyModuleParams(); if (GlobalDataInput != null) { if (NoActions == 6) { MyLog.DEBUG.WriteLine("6 actions set by the user, will use action names for gridworld"); Rds = new MyRootDecisionSpace(GlobalDataInput.Count, new String[] { " -", " <", " >", " ^", " v", " P" }, LearningParams); } else if (NoActions == 3) { MyLog.DEBUG.WriteLine("3 actions set by the user, will use action names for pong"); Rds = new MyRootDecisionSpace(GlobalDataInput.Count, new String[] { " <", " -", " >" }, LearningParams); } else { MyLog.DEBUG.WriteLine("Unknown no. of actions, will use automatic naming of actions"); String[] names = new String[NoActions]; for (int i = 0; i < NoActions; i++) { names[i] = "A" + i; } Rds = new MyRootDecisionSpace(GlobalDataInput.Count, names, LearningParams); } Hierarchy = new MyHierarchyMaintainer(Rds, LearningParams); SubspaceUtils.Count = MaxActions * MaxActions; SubspaceUtils.ColumnHint = MaxActions; } }
public MyActionHistory(ActionManager am, MyModuleParams setup) { this.setup = setup; this.am = am; this.changed = new List<int>(); this.isMonitored = new Dictionary<int, bool>(); }
public MyDiscreteQLearning(MyModuleParams learningParams, MyQSAMemory mem) { this.m_mem = mem; this.m_learning = learningParams; m_trace = new MyEligibilityTrace(learningParams); m_trace.PushState(new int[mem.GetMaxStateVariables()]); }
public MyVariableHistory(VariableManager vm, MyModuleParams setup) { this.vm = vm; this.setup = setup; this.changed = new List <List <int> >(); isMonitored = new bool[vm.GetMaxVariables()]; }
public MyDecisionSpace(MyStochasticReturnPredictor mySRP, MyRootDecisionSpace rds, MyModuleParams setup) { this.m_setup = setup; this.m_rds = rds; this.m_mySRP = mySRP; m_S_t = new int[rds.VarManager.MAX_VARIABLES]; ChildActions = new MyActionLink(0, rds); m_childVariables = new MyVariableLink(rds.VarManager.GetMaxVariables()); }
public ActionManager(String[] labels, MyModuleParams setup) { this.m_setup = setup; Actions = new List<MyMotivatedAction>(); for (int i = 0; i < labels.Length; i++) { Actions.Add(new MyMotivatedAction(labels[i], 0, setup)); } this.AcitonHistory = new MyActionHistory(this, setup); this.AcitonHistory.AddAllCurrentActions(); }
public ActionManager(int noPrimitiveActions, MyModuleParams setup) { this.m_setup = setup; Actions = new List <MyMotivatedAction>(); for (int i = 0; i < noPrimitiveActions; i++) { Actions.Add(new MyMotivatedAction("" + i, 0, setup)); } this.AcitonHistory = new MyActionHistory(this, setup); this.AcitonHistory.AddAllCurrentActions(); }
public ActionManager(int noPrimitiveActions, MyModuleParams setup) { this.m_setup = setup; Actions = new List<MyMotivatedAction>(); for (int i = 0; i < noPrimitiveActions; i++) { Actions.Add(new MyMotivatedAction("" + i, 0, setup)); } this.AcitonHistory = new MyActionHistory(this, setup); this.AcitonHistory.AddAllCurrentActions(); }
public ActionManager(String[] labels, MyModuleParams setup) { this.m_setup = setup; Actions = new List <MyMotivatedAction>(); for (int i = 0; i < labels.Length; i++) { Actions.Add(new MyMotivatedAction(labels[i], 0, setup)); } this.AcitonHistory = new MyActionHistory(this, setup); this.AcitonHistory.AddAllCurrentActions(); }
public MyAbstractHierarchy(MyRootDecisionSpace rds, MyModuleParams learningParams) { this.m_learningParams = learningParams; m_actionLevels = new List <List <MyAction> >(); this.m_rds = rds; for (int i = 0; i < rds.ActionManager.GetNoActions(); i++) { this.AddAction(i); } }
public MyLocalVariableHistory(MyRootDecisionSpace rds, MyModuleParams setup, IDecisionSpace ds) { m_rds = rds; m_setup = setup; changes = new Dictionary <int, float>(); // all potential variables added by default for (int i = 0; i < rds.VarManager.GetMaxVariables(); i++) { changes.Add(i, INIT_VAL); } }
public MyEligibilityTrace(MyModuleParams learning) { this.m_learning = learning; m_traceData = new List <TraceData>(); Lambda = new float[learning.TraceLength]; float tmp = learning.Lambda; for (int i = 0; i < learning.TraceLength; i++) { Lambda[i] = tmp; tmp *= learning.Lambda; } }
public MyStochasticReturnPredictor(MyRootDecisionSpace rds, int myPromotedVariable, MyModuleParams setup, String label, int level) : base(label, level, setup) { base.AddPromotedVariable(myPromotedVariable, rds); this.Rds = rds; Ds = new MyDecisionSpace(this, rds, setup); Mem = new MyQSAMemory(rds.VarManager.MAX_VARIABLES, 0); m_asm = new MyMotivationBasedDeleteUnselectedASM(setup); LearningAlgorithm = new MyDiscreteQLearning(setup, Mem); m_mlvh = new MyLocalVariableHistory(rds, m_setup, Ds); m_prevSelectedAction = 0; m_prev_st = Ds.GetCurrentState(); this.m_newVariables = new List <int>(); }
private int[] m_prev_st; // previous state (for variable adding and sharing knowledge) #endregion Fields #region Constructors public MyStochasticReturnPredictor(MyRootDecisionSpace rds, int myPromotedVariable, MyModuleParams setup, String label, int level) : base(label, level, setup) { base.AddPromotedVariable(myPromotedVariable, rds); this.Rds = rds; Ds = new MyDecisionSpace(this, rds, setup); Mem = new MyQSAMemory(rds.VarManager.MAX_VARIABLES, 0); m_asm = new MyMotivationBasedDeleteUnselectedASM(setup); LearningAlgorithm = new MyDiscreteQLearning(setup, Mem); m_mlvh = new MyLocalVariableHistory(rds, m_setup, Ds); m_prevSelectedAction = 0; m_prev_st = Ds.GetCurrentState(); this.m_newVariables = new List<int>(); }
public VariableManager(String[] labels, MyModuleParams setup) { this.m_labels = labels; m_vars = new MyVariable[labels.Length]; if (MAX_VARIABLES <= 0) { MAX_VARIABLES = 1; } else { this.MAX_VARIABLES = labels.Length; } this.InitVars(); this.VarHistory = new MyVariableHistory(this, setup); this.VarHistory.AddAllPotentialVariables(); }
public VariableManager(int maxVariables, MyModuleParams setup) { if (maxVariables <= 0) { maxVariables = 1; } else { this.MAX_VARIABLES = maxVariables; } m_labels = new String[this.MAX_VARIABLES]; for (int i = 0; i < m_labels.Length; i++) { m_labels[i] = "" + i; } this.InitVars(); this.VarHistory = new MyVariableHistory(this, setup); this.VarHistory.AddAllPotentialVariables(); this.ShouldBeSubspaced = new List <int>(); }
public MyMotivationBasedDeleteUnselectedASM(MyModuleParams setup) { this.m_setup = setup; m_rnd = new Random(); }
public MyMotivatedAction(String label, int level, MyModuleParams setup) : base(label, level) { this.m_motivatonSource = new MyMotivationSource(setup, level); this.m_setup = setup; }
public MyRootDecisionSpace(int maxVariables, String[] actionNames, MyModuleParams setup) { VarManager = new VariableManager(maxVariables, setup); ActionManager = new ActionManager(actionNames, setup); }
public MyMotivationSource(MyModuleParams setup, int level) { this.m_setup = setup; this.m_val = 0; this.m_level = level; }
public MyAbstractHierarchy(MyRootDecisionSpace rds, MyModuleParams learningParams) { this.m_learningParams = learningParams; m_actionLevels = new List<List<MyAction>>(); this.m_rds = rds; for (int i = 0; i < rds.ActionManager.GetNoActions(); i++) { this.AddAction(i); } }
public MyHierarchyMaintainer(MyRootDecisionSpace rds, MyModuleParams learningParams) : base(rds, learningParams) { }
public VariableManager(int maxVariables, MyModuleParams setup) { if (maxVariables <= 0) { maxVariables = 1; } else { this.MAX_VARIABLES = maxVariables; } m_labels = new String[this.MAX_VARIABLES]; for (int i = 0; i < m_labels.Length; i++) { m_labels[i] = "" + i; } this.InitVars(); this.VarHistory = new MyVariableHistory(this, setup); this.VarHistory.AddAllPotentialVariables(); this.ShouldBeSubspaced = new List<int>(); }
private List<TraceData> m_traceData; // position 0 stores the most recent state-aciton #endregion Fields #region Constructors public MyEligibilityTrace(MyModuleParams learning) { this.m_learning = learning; m_traceData = new List<TraceData>(); Lambda = new float[learning.TraceLength]; float tmp = learning.Lambda; for (int i = 0; i < learning.TraceLength; i++) { Lambda[i] = tmp; tmp *= learning.Lambda; } }
public MyLocalVariableHistory(MyRootDecisionSpace rds, MyModuleParams setup, IDecisionSpace ds) { m_rds = rds; m_setup = setup; changes = new Dictionary<int, float>(); // all potential variables added by default for (int i = 0; i < rds.VarManager.GetMaxVariables(); i++) { changes.Add(i, INIT_VAL); } }
public MyVariableHistory(VariableManager vm, MyModuleParams setup) { this.vm = vm; this.setup = setup; this.changed = new List<List<int>>(); isMonitored = new bool[vm.GetMaxVariables()]; }
public MyAbsHierarchyMaintainer(MyRootDecisionSpace rds, MyModuleParams learningParams) : base(rds, learningParams) { }