public override void UpdateMemoryBlocks() { UtilityOutput.Count = NoActions; RewardStats.Count = 2; LearningParams = new MyModuleParams(); Memory = new MyQSAMemory(GlobalDataInput.Count, NoActions); LearningAlgorithm = new MyDiscreteQLearning(LearningParams, Memory); if (GlobalDataInput != null) { if (NoActions == 6) { MyLog.DEBUG.WriteLine("6 actions set by the user, will use action names for gridworld"); Rds = new MyRootDecisionSpace(GlobalDataInput.Count, new String[] { " -", " <", " >", " ^", " v", " P" }, LearningParams); } else if (NoActions == 3) { MyLog.DEBUG.WriteLine("3 actions set by the user, will use action names for pong"); Rds = new MyRootDecisionSpace(GlobalDataInput.Count, new String[] { " <", " -", " >" }, LearningParams); } else { MyLog.DEBUG.WriteLine("Unknown no. of actions, will use automatic naming of actions"); String[] names = new String[NoActions]; for (int i = 0; i < NoActions; i++) { names[i] = "A" + i; } Rds = new MyRootDecisionSpace(GlobalDataInput.Count, names, LearningParams); } CurrentStateOutput.Count = GlobalDataInput.Count; } }
public MyDiscreteQLearning(MyModuleParams learningParams, MyQSAMemory mem) { this.m_mem = mem; this.m_learning = learningParams; m_trace = new MyEligibilityTrace(learningParams); m_trace.PushState(new int[mem.GetMaxStateVariables()]); }
public MyStochasticReturnPredictor(MyRootDecisionSpace rds, int myPromotedVariable, MyModuleParams setup, String label, int level) : base(label, level, setup) { base.AddPromotedVariable(myPromotedVariable, rds); this.Rds = rds; Ds = new MyDecisionSpace(this, rds, setup); Mem = new MyQSAMemory(rds.VarManager.MAX_VARIABLES, 0); m_asm = new MyMotivationBasedDeleteUnselectedASM(setup); LearningAlgorithm = new MyDiscreteQLearning(setup, Mem); m_mlvh = new MyLocalVariableHistory(rds, m_setup, Ds); m_prevSelectedAction = 0; m_prev_st = Ds.GetCurrentState(); this.m_newVariables = new List <int>(); }
private int[] m_prev_st; // previous state (for variable adding and sharing knowledge) #endregion Fields #region Constructors public MyStochasticReturnPredictor(MyRootDecisionSpace rds, int myPromotedVariable, MyModuleParams setup, String label, int level) : base(label, level, setup) { base.AddPromotedVariable(myPromotedVariable, rds); this.Rds = rds; Ds = new MyDecisionSpace(this, rds, setup); Mem = new MyQSAMemory(rds.VarManager.MAX_VARIABLES, 0); m_asm = new MyMotivationBasedDeleteUnselectedASM(setup); LearningAlgorithm = new MyDiscreteQLearning(setup, Mem); m_mlvh = new MyLocalVariableHistory(rds, m_setup, Ds); m_prevSelectedAction = 0; m_prev_st = Ds.GetCurrentState(); this.m_newVariables = new List<int>(); }
/// <summary> /// Method creates 2D array of max action utilities and max action labels over across selected dimensions. /// The values in the memory are automatically scaled into the interval 0,1. Realtime values are multiplied by motivations. /// </summary> /// <param name="values">array passed by reference for storing utilities of best action</param> /// <param name="labelIndexes">array of the same size for best action indexes</param> /// <param name="XVarIndex">global index of state variable in the VariableManager</param> /// <param name="YVarIndex">the same: y axis</param> /// <param name="showRealtimeUtilities">show current utilities (scaled by the current motivation)</param> public void ReadTwoDimensions(ref float[,] values, ref int[,] labelIndexes, int XVarIndex, int YVarIndex, bool showRealtimeUtilities) { if (XVarIndex >= Owner.Rds.VarManager.MAX_VARIABLES) { XVarIndex = Owner.Rds.VarManager.MAX_VARIABLES - 1; } if (YVarIndex >= Owner.Rds.VarManager.MAX_VARIABLES) { YVarIndex = Owner.Rds.VarManager.MAX_VARIABLES - 1; } if (YVarIndex < 0) { YVarIndex = 0; } if (XVarIndex < 0) { XVarIndex = 0; } MyQSAMemory mem = Owner.Memory; int[] sizes = mem.GetStateSizes(); // size of the matrix int[] indexes = Owner.Rds.VarManager.GetCurrentState(); int[] actionGlobalIndexes = mem.GetGlobalActionIndexes(); MyVariable varX = Owner.Rds.VarManager.GetVarNo(XVarIndex); MyVariable varY = Owner.Rds.VarManager.GetVarNo(YVarIndex); float[] varXvals = varX.Values.ToArray(); float[] varYvals = varY.Values.ToArray(); Array.Sort(varXvals); Array.Sort(varYvals); int sx = 0; int sy = 0; sx = varX.Values.Count; sy = varY.Values.Count; if (values == null || labelIndexes == null || values.GetLength(0) != sx || values.GetLength(1) != sy || labelIndexes.GetLength(0) != sx || labelIndexes.GetLength(1) != sy) { values = new float[sx, sy]; labelIndexes = new int[sx, sy]; } for (int i = 0; i < sx; i++) { indexes[XVarIndex] = (int)varXvals[i]; for (int j = 0; j < sy; j++) { indexes[YVarIndex] = (int)varYvals[j]; float[] utilities = mem.ReadData(indexes); float memoryMaxValue = Owner.LearningAlgorithm.GetMaxVal(); if (memoryMaxValue != 0) { for (int k = 0; k < utilities.Length; k++) { utilities[k] = utilities[k] / memoryMaxValue; } } float maxValue = 0.0f; int maxIndex = 0; if (utilities.Length != actionGlobalIndexes.Length) { MyLog.DEBUG.WriteLine("ERROR: unexpected length of utilities array, will place default values"); utilities = new float[actionGlobalIndexes.Length]; } else if (actionGlobalIndexes.Length == 0) { MyLog.DEBUG.WriteLine("WARNING: this DS contains no actions. Will use the action 0"); utilities = new float[1]; actionGlobalIndexes = new int[] { 0 }; } else { maxValue = utilities.Max(); maxIndex = utilities.ToList().IndexOf(maxValue); } if (showRealtimeUtilities) { Owner.MotivationInput.SafeCopyToHost(); float motivation = Owner.MotivationInput.Host[0]; values[i, j] = maxValue * motivation; } else { values[i, j] = maxValue; } labelIndexes[i, j] = actionGlobalIndexes[maxIndex]; } } }
/// <summary> /// For a given predictor, the method creates 2D array of max action utilities and max action labels over selected dimensions. /// The values in the memory are automatically scaled into the interval 0,1. Realtime values are multililed by motivations (therfore are bigger). /// </summary> /// <param name="values">array passed by reference for storing utilities of best action</param> /// <param name="labelIndexes">array of the same size for best action indexes</param> /// <param name="predictor">an asbtract action</param> /// <param name="XVarIndex">global index of state variable in the VariableManager</param> /// <param name="YVarIndex">the same: y axis</param> /// <param name="showRealtimeUtilities">show current utilities (scaled by motivations from the source and the hierarchy?)</param> public void ReadTwoDimensions(ref float[,] values, ref int[,] labelIndexes, MyStochasticReturnPredictor predictor, int XVarIndex, int YVarIndex, bool showRealtimeUtilities) { MyRootDecisionSpace rds = predictor.Rds; if (XVarIndex >= rds.VarManager.MAX_VARIABLES) { XVarIndex = rds.VarManager.MAX_VARIABLES - 1; } if (YVarIndex >= rds.VarManager.MAX_VARIABLES) { YVarIndex = rds.VarManager.MAX_VARIABLES - 1; } if (YVarIndex < 0) { YVarIndex = 0; } if (XVarIndex < 0) { XVarIndex = 0; } MyQSAMemory mem = predictor.Mem; int[] sizes = mem.GetStateSizes(); // size of the matrix int[] indexes = predictor.Ds.GetCurrentState(); // initial indexes int[] actionGlobalIndexes = mem.GetGlobalActionIndexes(); // global indexes of actions in the memory int promotedIndex = predictor.GetPromotedVariableIndex(); MyVariable varX = rds.VarManager.GetVarNo(XVarIndex); MyVariable varY = rds.VarManager.GetVarNo(YVarIndex); float[] varXvals = varX.Values.ToArray(); float[] varYvals = varY.Values.ToArray(); Array.Sort(varXvals); Array.Sort(varYvals); int sx = 0; int sy = 0; if (XVarIndex == promotedIndex) { sx = 1; indexes[XVarIndex] = 0; varXvals = new float[] { 0 }; sy = this.ReadSize(predictor.Ds, varY, YVarIndex, predictor.GetLabel()); } else if (YVarIndex == promotedIndex) { sy = 1; indexes[YVarIndex] = 0; varYvals = new float[] { 0 }; sx = this.ReadSize(predictor.Ds, varX, XVarIndex, predictor.GetLabel()); } else { sx = this.ReadSize(predictor.Ds, varX, XVarIndex, predictor.GetLabel()); sy = this.ReadSize(predictor.Ds, varY, YVarIndex, predictor.GetLabel()); } if (values == null || labelIndexes == null || values.GetLength(0) != sx || values.GetLength(1) != sy || labelIndexes.GetLength(0) != sx || labelIndexes.GetLength(1) != sy) { values = new float[sx, sy]; labelIndexes = new int[sx, sy]; } for (int i = 0; i < sx; i++) { indexes[XVarIndex] = (int)varXvals[i]; for (int j = 0; j < sy; j++) { indexes[YVarIndex] = (int)varYvals[j]; float[] utilities = mem.ReadData(indexes); if (predictor.GetMaxMemoryValue() != 0) { for (int k = 0; k < utilities.Length; k++) { utilities[k] = utilities[k] / predictor.GetMaxMemoryValue(); } } float maxValue = 0.0f; int maxIndex = 0; if (utilities.Length != actionGlobalIndexes.Length) { MyLog.DEBUG.WriteLine("ERROR: unexpected length of utilities array, will place default values"); utilities = new float[actionGlobalIndexes.Length]; } else if (actionGlobalIndexes.Length == 0) { MyLog.DEBUG.WriteLine("WARNING: this DS contains no actions. Will use the action 0"); utilities = new float[1]; actionGlobalIndexes = new int[] { 0 }; } else { maxValue = utilities.Max(); maxIndex = utilities.ToList().IndexOf(maxValue); } if (showRealtimeUtilities) { values[i, j] = maxValue * predictor.GetMyTotalMotivation(); } else { values[i, j] = maxValue; } labelIndexes[i, j] = actionGlobalIndexes[maxIndex]; } } }