private float MyCurrentPromotedVal() { if (m_mySRP.GetPromotedVariableIndex() < 0) { return(0); } if (m_rds.VarManager.GetVarNo(m_mySRP.GetPromotedVariableIndex()) == null) { return(0); } return(m_rds.VarManager.GetVarNo(m_mySRP.GetPromotedVariableIndex()).Current); }
/// <summary> /// This should be called only after receiving the reward. /// Frequency of variable changes relative to rewards received. /// </summary> /// <param name="parent"></param> public void performOnlineVariableRemoving(MyStochasticReturnPredictor parent) { bool removed = false; // decay all that are included in this DS for (int i = 0; i < m_rds.VarManager.GetMaxVariables(); i++) { if (parent.Ds.IsVariableIncluded(i) && m_rds.VarManager.GetVarNo(i).Values.Count > 1 && i != parent.GetPromotedVariableIndex()) { if (changes[i] < m_setup.OnlineVariableRemovingThreshold) { removed = true; parent.Ds.RemoveVariable(i); } } } if (removed) { String output = "SRP: " + parent.GetLabel() + ": Variables removed, current ones: "; for (int i = 0; i < m_rds.VarManager.GetMaxVariables(); i++) { if (parent.Ds.IsVariableIncluded(i)) { output += i + ", "; } } MyLog.DEBUG.WriteLine(output); } }
/// <summary> /// This updates the variable weights in the DS, if the weight is under the threshold, /// the corresponding variable should be removed from the DS. /// </summary> /// <param name="parent"></param> public void monitorVariableChanges(MyStochasticReturnPredictor parent) { // update info about changed variables List <int> changed = m_rds.VarManager.VarHistory.GetVariablesChangedBefore(1); if (changed != null) { for (int i = 0; i < changed.Count; i++) { changes[changed[i]] += addVal; } } // decay all that are included in this DS for (int i = 0; i < m_rds.VarManager.GetMaxVariables(); i++) { if (parent.Ds.IsVariableIncluded(i) && m_rds.VarManager.GetVarNo(i).Values.Count > 1 && i != parent.GetPromotedVariableIndex()) { changes[i] -= m_setup.OnlineHistoryForgettingRate; } } }
/// <summary> /// This updates the variable weights in the DS, if the weight is under the threshold, /// the corresponding variable should be removed from the DS. /// </summary> /// <param name="parent"></param> public void monitorVariableChanges(MyStochasticReturnPredictor parent) { // update info about changed variables List<int> changed = m_rds.VarManager.VarHistory.GetVariablesChangedBefore(1); if (changed != null) { for (int i = 0; i < changed.Count; i++) { changes[changed[i]] += addVal; } } // decay all that are included in this DS for (int i = 0; i < m_rds.VarManager.GetMaxVariables(); i++) { if (parent.Ds.IsVariableIncluded(i) && m_rds.VarManager.GetVarNo(i).Values.Count > 1 && i != parent.GetPromotedVariableIndex()) { changes[i] -= m_setup.OnlineHistoryForgettingRate; } } }
/// <summary> /// For a given predictor, the method creates 2D array of max action utilities and max action labels over selected dimensions. /// The values in the memory are automatically scaled into the interval 0,1. Realtime values are multililed by motivations (therfore are bigger). /// </summary> /// <param name="values">array passed by reference for storing utilities of best action</param> /// <param name="labelIndexes">array of the same size for best action indexes</param> /// <param name="predictor">an asbtract action</param> /// <param name="XVarIndex">global index of state variable in the VariableManager</param> /// <param name="YVarIndex">the same: y axis</param> /// <param name="showRealtimeUtilities">show current utilities (scaled by motivations from the source and the hierarchy?)</param> public void ReadTwoDimensions(ref float[,] values, ref int[,] labelIndexes, MyStochasticReturnPredictor predictor, int XVarIndex, int YVarIndex, bool showRealtimeUtilities) { MyRootDecisionSpace rds = predictor.Rds; if (XVarIndex >= rds.VarManager.MAX_VARIABLES) { XVarIndex = rds.VarManager.MAX_VARIABLES - 1; } if (YVarIndex >= rds.VarManager.MAX_VARIABLES) { YVarIndex = rds.VarManager.MAX_VARIABLES - 1; } if (YVarIndex < 0) { YVarIndex = 0; } if (XVarIndex < 0) { XVarIndex = 0; } MyQSAMemory mem = predictor.Mem; int[] sizes = mem.GetStateSizes(); // size of the matrix int[] indexes = predictor.Ds.GetCurrentState(); // initial indexes int[] actionGlobalIndexes = mem.GetGlobalActionIndexes(); // global indexes of actions in the memory int promotedIndex = predictor.GetPromotedVariableIndex(); MyVariable varX = rds.VarManager.GetVarNo(XVarIndex); MyVariable varY = rds.VarManager.GetVarNo(YVarIndex); float[] varXvals = varX.Values.ToArray(); float[] varYvals = varY.Values.ToArray(); Array.Sort(varXvals); Array.Sort(varYvals); int sx = 0; int sy = 0; if (XVarIndex == promotedIndex) { sx = 1; indexes[XVarIndex] = 0; varXvals = new float[] { 0 }; sy = this.ReadSize(predictor.Ds, varY, YVarIndex, predictor.GetLabel()); } else if (YVarIndex == promotedIndex) { sy = 1; indexes[YVarIndex] = 0; varYvals = new float[] { 0 }; sx = this.ReadSize(predictor.Ds, varX, XVarIndex, predictor.GetLabel()); } else { sx = this.ReadSize(predictor.Ds, varX, XVarIndex, predictor.GetLabel()); sy = this.ReadSize(predictor.Ds, varY, YVarIndex, predictor.GetLabel()); } if (values == null || labelIndexes == null || values.GetLength(0) != sx || values.GetLength(1) != sy || labelIndexes.GetLength(0) != sx || labelIndexes.GetLength(1) != sy) { values = new float[sx, sy]; labelIndexes = new int[sx, sy]; } for (int i = 0; i < sx; i++) { indexes[XVarIndex] = (int)varXvals[i]; for (int j = 0; j < sy; j++) { indexes[YVarIndex] = (int)varYvals[j]; float[] utilities = mem.ReadData(indexes); if (predictor.GetMaxMemoryValue() != 0) { for (int k = 0; k < utilities.Length; k++) { utilities[k] = utilities[k] / predictor.GetMaxMemoryValue(); } } float maxValue = 0.0f; int maxIndex = 0; if (utilities.Length != actionGlobalIndexes.Length) { MyLog.DEBUG.WriteLine("ERROR: unexpected length of utilities array, will place default values"); utilities = new float[actionGlobalIndexes.Length]; } else if (actionGlobalIndexes.Length == 0) { MyLog.DEBUG.WriteLine("WARNING: this DS contains no actions. Will use the action 0"); utilities = new float[1]; actionGlobalIndexes = new int[] { 0 }; } else { maxValue = utilities.Max(); maxIndex = utilities.ToList().IndexOf(maxValue); } if (showRealtimeUtilities) { values[i, j] = maxValue * predictor.GetMyTotalMotivation(); } else { values[i, j] = maxValue; } labelIndexes[i, j] = actionGlobalIndexes[maxIndex]; } } }