/// <summary> /// Method creates 2D array of max action utilities and max action labels over across selected dimensions. /// The values in the memory are automatically scaled into the interval 0,1. Realtime values are multiplied by motivations. /// </summary> /// <param name="values">array passed by reference for storing utilities of best action</param> /// <param name="labelIndexes">array of the same size for best action indexes</param> /// <param name="XVarIndex">global index of state variable in the VariableManager</param> /// <param name="YVarIndex">the same: y axis</param> /// <param name="showRealtimeUtilities">show current utilities (scaled by the current motivation)</param> /// <param name="policyNumber">optinal parameter. In case that the agent has more strategies, you can choose which one to read from.</param> public override void ReadTwoDimensions(ref float[,] values, ref int[,] labelIndexes, int XVarIndex, int YVarIndex, bool showRealtimeUtilities = false, int policyNumber = 0) { MyStochasticReturnPredictor predictor = Vis.GetPredictorNo(policyNumber); Vis.ReadTwoDimensions(ref values, ref labelIndexes, predictor, XVarIndex, YVarIndex, showRealtimeUtilities); }
/// <summary> /// This should be called only after receiving the reward. /// Frequency of variable changes relative to rewards received. /// </summary> /// <param name="parent"></param> public void performOnlineVariableRemoving(MyStochasticReturnPredictor parent) { bool removed = false; // decay all that are included in this DS for (int i = 0; i < m_rds.VarManager.GetMaxVariables(); i++) { if (parent.Ds.IsVariableIncluded(i) && m_rds.VarManager.GetVarNo(i).Values.Count > 1 && i != parent.GetPromotedVariableIndex()) { if (changes[i] < m_setup.OnlineVariableRemovingThreshold) { removed = true; parent.Ds.RemoveVariable(i); } } } if (removed) { String output = "SRP: " + parent.GetLabel() + ": Variables removed, current ones: "; for (int i = 0; i < m_rds.VarManager.GetMaxVariables(); i++) { if (parent.Ds.IsVariableIncluded(i)) { output += i + ", "; } } MyLog.DEBUG.WriteLine(output); } }
public MyDecisionSpace(MyStochasticReturnPredictor mySRP, MyRootDecisionSpace rds, MyModuleParams setup) { this.m_setup = setup; this.m_rds = rds; this.m_mySRP = mySRP; m_S_t = new int[rds.VarManager.MAX_VARIABLES]; ChildActions = new MyActionLink(0, rds); m_childVariables = new MyVariableLink(rds.VarManager.GetMaxVariables()); }
public override MyStochasticReturnPredictor ManualSubspacing( int[] variables, int[] actions, int promotedVariable, String label) { List <int> acs = actions.ToList(); int maxChildActionLevel = 0; if (actions.Length > m_rds.ActionManager.GetNoActions()) { MyLog.ERROR.WriteLine("Too many actions to be added, ignoring this DS!!"); return(null); } MyLog.INFO.Write("Hierarchy: action named " + label + " added, variables: "); MyStochasticReturnPredictor a = new MyStochasticReturnPredictor(m_rds, promotedVariable, m_learningParams, label, 0); for (int i = 0; i < variables.Length; i++) { if (variables[i] == promotedVariable) { //MyLog.ERROR.WriteLine("Cannot add promoted variable into the DS, ignoring this one"); continue; } a.Ds.AddVariable(variables[i]); if (m_learningParams.BuildMultilevelHierarchy) { if (m_rds.VarManager.GetVarNo(variables[i]).MyAction != null) { acs.Add(m_rds.VarManager.GetVarNo(variables[i]).MyAction.GetMyIndex()); } } MyLog.INFO.Write(" " + variables[i]); } MyLog.INFO.Write("\t actions: "); for (int i = 0; i < acs.Count; i++) { a.Ds.AddAction(acs[i]); MyLog.INFO.Write(" " + m_rds.ActionManager.GetActionLabel(acs[i])); if (m_rds.ActionManager.Actions[acs[i]].GetLevel() > maxChildActionLevel) { maxChildActionLevel = m_rds.ActionManager.Actions[acs[i]].GetLevel(); } } MyLog.INFO.WriteLine(); // level is determined by the most abstract child a.SetLevel(maxChildActionLevel + 1); m_rds.ActionManager.AddAction(a); this.AddAction(m_rds.ActionManager.GetNoActions() - 1); return(a); }
public override MyStochasticReturnPredictor ManualSubspacing( int[] variables, int[] actions, int promotedVariable, String label) { List<int> acs = actions.ToList(); int maxChildActionLevel = 0; if (actions.Length > m_rds.ActionManager.GetNoActions()) { MyLog.ERROR.WriteLine("Too many actions to be added, ignoring this DS!!"); return null; } MyLog.INFO.Write("Hierarchy: action named " + label + " added, variables: "); MyStochasticReturnPredictor a = new MyStochasticReturnPredictor(m_rds, promotedVariable, m_learningParams, label, 0); for (int i = 0; i < variables.Length; i++) { if (variables[i] == promotedVariable) { //MyLog.ERROR.WriteLine("Cannot add promoted variable into the DS, ignoring this one"); continue; } a.Ds.AddVariable(variables[i]); if (m_learningParams.BuildMultilevelHierarchy) { if (m_rds.VarManager.GetVarNo(variables[i]).MyAction != null) { acs.Add(m_rds.VarManager.GetVarNo(variables[i]).MyAction.GetMyIndex()); } } MyLog.INFO.Write(" "+variables[i]); } MyLog.INFO.Write("\t actions: "); for (int i = 0; i < acs.Count; i++) { a.Ds.AddAction(acs[i]); MyLog.INFO.Write(" " + m_rds.ActionManager.GetActionLabel(acs[i])); if (m_rds.ActionManager.Actions[acs[i]].GetLevel() > maxChildActionLevel) { maxChildActionLevel = m_rds.ActionManager.Actions[acs[i]].GetLevel(); } } MyLog.INFO.WriteLine(); // level is determined by the most abstract child a.SetLevel(maxChildActionLevel + 1); m_rds.ActionManager.AddAction(a); this.AddAction(m_rds.ActionManager.GetNoActions() - 1); return a; }
/// <summary> /// This updates the variable weights in the DS, if the weight is under the threshold, /// the corresponding variable should be removed from the DS. /// </summary> /// <param name="parent"></param> public void monitorVariableChanges(MyStochasticReturnPredictor parent) { // update info about changed variables List <int> changed = m_rds.VarManager.VarHistory.GetVariablesChangedBefore(1); if (changed != null) { for (int i = 0; i < changed.Count; i++) { changes[changed[i]] += addVal; } } // decay all that are included in this DS for (int i = 0; i < m_rds.VarManager.GetMaxVariables(); i++) { if (parent.Ds.IsVariableIncluded(i) && m_rds.VarManager.GetVarNo(i).Values.Count > 1 && i != parent.GetPromotedVariableIndex()) { changes[i] -= m_setup.OnlineHistoryForgettingRate; } } }
public void SetMyAction(MyStochasticReturnPredictor myAction) { this.MyAction = myAction; }
/// <summary> /// This should be called only after receiving the reward. /// Frequency of variable changes relative to rewards received. /// </summary> /// <param name="parent"></param> public void performOnlineVariableRemoving(MyStochasticReturnPredictor parent) { bool removed = false; // decay all that are included in this DS for (int i = 0; i < m_rds.VarManager.GetMaxVariables(); i++) { if (parent.Ds.IsVariableIncluded(i) && m_rds.VarManager.GetVarNo(i).Values.Count > 1 && i != parent.GetPromotedVariableIndex()) { if (changes[i] < m_setup.OnlineVariableRemovingThreshold) { removed = true; parent.Ds.RemoveVariable(i); } } } if (removed) { String output = "SRP: " + parent.GetLabel() + ": Variables removed, current ones: "; for (int i = 0; i < m_rds.VarManager.GetMaxVariables(); i++) { if (parent.Ds.IsVariableIncluded(i)) { output += i + ", "; } } MyLog.DEBUG.WriteLine(output); } }
/// <summary> /// This updates the variable weights in the DS, if the weight is under the threshold, /// the corresponding variable should be removed from the DS. /// </summary> /// <param name="parent"></param> public void monitorVariableChanges(MyStochasticReturnPredictor parent) { // update info about changed variables List<int> changed = m_rds.VarManager.VarHistory.GetVariablesChangedBefore(1); if (changed != null) { for (int i = 0; i < changed.Count; i++) { changes[changed[i]] += addVal; } } // decay all that are included in this DS for (int i = 0; i < m_rds.VarManager.GetMaxVariables(); i++) { if (parent.Ds.IsVariableIncluded(i) && m_rds.VarManager.GetVarNo(i).Values.Count > 1 && i != parent.GetPromotedVariableIndex()) { changes[i] -= m_setup.OnlineHistoryForgettingRate; } } }
/// <summary> /// For a given predictor, the method creates 2D array of max action utilities and max action labels over selected dimensions. /// The values in the memory are automatically scaled into the interval 0,1. Realtime values are multililed by motivations (therfore are bigger). /// </summary> /// <param name="values">array passed by reference for storing utilities of best action</param> /// <param name="labelIndexes">array of the same size for best action indexes</param> /// <param name="predictor">an asbtract action</param> /// <param name="XVarIndex">global index of state variable in the VariableManager</param> /// <param name="YVarIndex">the same: y axis</param> /// <param name="showRealtimeUtilities">show current utilities (scaled by motivations from the source and the hierarchy?)</param> public void ReadTwoDimensions(ref float[,] values, ref int[,] labelIndexes, MyStochasticReturnPredictor predictor, int XVarIndex, int YVarIndex, bool showRealtimeUtilities) { MyRootDecisionSpace rds = predictor.Rds; if (XVarIndex >= rds.VarManager.MAX_VARIABLES) { XVarIndex = rds.VarManager.MAX_VARIABLES - 1; } if (YVarIndex >= rds.VarManager.MAX_VARIABLES) { YVarIndex = rds.VarManager.MAX_VARIABLES - 1; } if (YVarIndex < 0) { YVarIndex = 0; } if (XVarIndex < 0) { XVarIndex = 0; } MyQSAMemory mem = predictor.Mem; int[] sizes = mem.GetStateSizes(); // size of the matrix int[] indexes = predictor.Ds.GetCurrentState(); // initial indexes int[] actionGlobalIndexes = mem.GetGlobalActionIndexes(); // global indexes of actions in the memory int promotedIndex = predictor.GetPromotedVariableIndex(); MyVariable varX = rds.VarManager.GetVarNo(XVarIndex); MyVariable varY = rds.VarManager.GetVarNo(YVarIndex); float[] varXvals = varX.Values.ToArray(); float[] varYvals = varY.Values.ToArray(); Array.Sort(varXvals); Array.Sort(varYvals); int sx = 0; int sy = 0; if (XVarIndex == promotedIndex) { sx = 1; indexes[XVarIndex] = 0; varXvals = new float[] { 0 }; sy = this.ReadSize(predictor.Ds, varY, YVarIndex, predictor.GetLabel()); } else if (YVarIndex == promotedIndex) { sy = 1; indexes[YVarIndex] = 0; varYvals = new float[] { 0 }; sx = this.ReadSize(predictor.Ds, varX, XVarIndex, predictor.GetLabel()); } else { sx = this.ReadSize(predictor.Ds, varX, XVarIndex, predictor.GetLabel()); sy = this.ReadSize(predictor.Ds, varY, YVarIndex, predictor.GetLabel()); } if (values == null || labelIndexes == null || values.GetLength(0) != sx || values.GetLength(1) != sy || labelIndexes.GetLength(0) != sx || labelIndexes.GetLength(1) != sy) { values = new float[sx, sy]; labelIndexes = new int[sx, sy]; } for (int i = 0; i < sx; i++) { indexes[XVarIndex] = (int)varXvals[i]; for (int j = 0; j < sy; j++) { indexes[YVarIndex] = (int)varYvals[j]; float[] utilities = mem.ReadData(indexes); if (predictor.GetMaxMemoryValue() != 0) { for (int k = 0; k < utilities.Length; k++) { utilities[k] = utilities[k] / predictor.GetMaxMemoryValue(); } } float maxValue = 0.0f; int maxIndex = 0; if (utilities.Length != actionGlobalIndexes.Length) { MyLog.DEBUG.WriteLine("ERROR: unexpected length of utilities array, will place default values"); utilities = new float[actionGlobalIndexes.Length]; } else if (actionGlobalIndexes.Length == 0) { MyLog.DEBUG.WriteLine("WARNING: this DS contains no actions. Will use the action 0"); utilities = new float[1]; actionGlobalIndexes = new int[] { 0 }; } else { maxValue = utilities.Max(); maxIndex = utilities.ToList().IndexOf(maxValue); } if (showRealtimeUtilities) { values[i, j] = maxValue * predictor.GetMyTotalMotivation(); } else { values[i, j] = maxValue; } labelIndexes[i, j] = actionGlobalIndexes[maxIndex]; } } }
public void SetMyAction(MyStochasticReturnPredictor myAction) { this.MyAction = myAction; }