예제 #1
0
        /// <summary>
        /// Method creates 2D array of max action utilities and max action labels over across selected dimensions.
        /// The values in the memory are automatically scaled into the interval 0,1. Realtime values are multiplied by motivations.
        /// </summary>
        /// <param name="values">array passed by reference for storing utilities of best action</param>
        /// <param name="labelIndexes">array of the same size for best action indexes</param>
        /// <param name="XVarIndex">global index of state variable in the VariableManager</param>
        /// <param name="YVarIndex">the same: y axis</param>
        /// <param name="showRealtimeUtilities">show current utilities (scaled by the current motivation)</param>
        /// <param name="policyNumber">optinal parameter. In case that the agent has more strategies, you can choose which one to read from.</param>
        public override void ReadTwoDimensions(ref float[,] values, ref int[,] labelIndexes,
                                               int XVarIndex, int YVarIndex, bool showRealtimeUtilities = false, int policyNumber = 0)
        {
            MyStochasticReturnPredictor predictor = Vis.GetPredictorNo(policyNumber);

            Vis.ReadTwoDimensions(ref values, ref labelIndexes, predictor, XVarIndex, YVarIndex, showRealtimeUtilities);
        }
예제 #2
0
        /// <summary>
        /// This should be called only after receiving the reward.
        /// Frequency of variable changes relative to rewards received.
        /// </summary>
        /// <param name="parent"></param>
        public void performOnlineVariableRemoving(MyStochasticReturnPredictor parent)
        {
            bool removed = false;

            // decay all that are included in this DS
            for (int i = 0; i < m_rds.VarManager.GetMaxVariables(); i++)
            {
                if (parent.Ds.IsVariableIncluded(i) &&
                    m_rds.VarManager.GetVarNo(i).Values.Count > 1 &&
                    i != parent.GetPromotedVariableIndex())
                {
                    if (changes[i] < m_setup.OnlineVariableRemovingThreshold)
                    {
                        removed = true; parent.Ds.RemoveVariable(i);
                    }
                }
            }
            if (removed)
            {
                String output =
                    "SRP: " + parent.GetLabel() + ": Variables removed, current ones: ";

                for (int i = 0; i < m_rds.VarManager.GetMaxVariables(); i++)
                {
                    if (parent.Ds.IsVariableIncluded(i))
                    {
                        output += i + ", ";
                    }
                }
                MyLog.DEBUG.WriteLine(output);
            }
        }
예제 #3
0
        public MyDecisionSpace(MyStochasticReturnPredictor mySRP, MyRootDecisionSpace rds, MyModuleParams setup)
        {
            this.m_setup = setup;
            this.m_rds   = rds;
            this.m_mySRP = mySRP;
            m_S_t        = new int[rds.VarManager.MAX_VARIABLES];

            ChildActions     = new MyActionLink(0, rds);
            m_childVariables = new MyVariableLink(rds.VarManager.GetMaxVariables());
        }
        public override MyStochasticReturnPredictor ManualSubspacing(
            int[] variables, int[] actions, int promotedVariable, String label)
        {
            List <int> acs = actions.ToList();

            int maxChildActionLevel = 0;

            if (actions.Length > m_rds.ActionManager.GetNoActions())
            {
                MyLog.ERROR.WriteLine("Too many actions to be added, ignoring this DS!!");
                return(null);
            }

            MyLog.INFO.Write("Hierarchy: action named " + label + " added, variables: ");
            MyStochasticReturnPredictor a = new MyStochasticReturnPredictor(m_rds, promotedVariable, m_learningParams, label, 0);

            for (int i = 0; i < variables.Length; i++)
            {
                if (variables[i] == promotedVariable)
                {
                    //MyLog.ERROR.WriteLine("Cannot add promoted variable into the DS, ignoring this one");
                    continue;
                }
                a.Ds.AddVariable(variables[i]);

                if (m_learningParams.BuildMultilevelHierarchy)
                {
                    if (m_rds.VarManager.GetVarNo(variables[i]).MyAction != null)
                    {
                        acs.Add(m_rds.VarManager.GetVarNo(variables[i]).MyAction.GetMyIndex());
                    }
                }
                MyLog.INFO.Write(" " + variables[i]);
            }
            MyLog.INFO.Write("\t actions: ");
            for (int i = 0; i < acs.Count; i++)
            {
                a.Ds.AddAction(acs[i]);
                MyLog.INFO.Write(" " + m_rds.ActionManager.GetActionLabel(acs[i]));

                if (m_rds.ActionManager.Actions[acs[i]].GetLevel() > maxChildActionLevel)
                {
                    maxChildActionLevel = m_rds.ActionManager.Actions[acs[i]].GetLevel();
                }
            }
            MyLog.INFO.WriteLine();
            // level is determined by the most abstract child
            a.SetLevel(maxChildActionLevel + 1);
            m_rds.ActionManager.AddAction(a);
            this.AddAction(m_rds.ActionManager.GetNoActions() - 1);
            return(a);
        }
        public override MyStochasticReturnPredictor ManualSubspacing(
        int[] variables, int[] actions, int promotedVariable, String label)
        {
            List<int> acs = actions.ToList();

            int maxChildActionLevel = 0;
            if (actions.Length > m_rds.ActionManager.GetNoActions())
            {
                MyLog.ERROR.WriteLine("Too many actions to be added, ignoring this DS!!");
                return null;
            }

            MyLog.INFO.Write("Hierarchy: action named " + label + " added, variables: ");
            MyStochasticReturnPredictor a = new MyStochasticReturnPredictor(m_rds, promotedVariable, m_learningParams, label, 0);
            for (int i = 0; i < variables.Length; i++)
            {
                if (variables[i] == promotedVariable)
                {
                    //MyLog.ERROR.WriteLine("Cannot add promoted variable into the DS, ignoring this one");
                    continue;
                }
                a.Ds.AddVariable(variables[i]);

                if (m_learningParams.BuildMultilevelHierarchy)
                {
                    if (m_rds.VarManager.GetVarNo(variables[i]).MyAction != null)
                    {
                        acs.Add(m_rds.VarManager.GetVarNo(variables[i]).MyAction.GetMyIndex());
                    }
                }
                MyLog.INFO.Write(" "+variables[i]);
            }
            MyLog.INFO.Write("\t actions: ");
            for (int i = 0; i < acs.Count; i++)
            {
                a.Ds.AddAction(acs[i]);
                MyLog.INFO.Write(" " + m_rds.ActionManager.GetActionLabel(acs[i]));

                if (m_rds.ActionManager.Actions[acs[i]].GetLevel() > maxChildActionLevel)
                {
                    maxChildActionLevel = m_rds.ActionManager.Actions[acs[i]].GetLevel();
                }
            }
            MyLog.INFO.WriteLine();
            // level is determined by the most abstract child
            a.SetLevel(maxChildActionLevel + 1);
            m_rds.ActionManager.AddAction(a);
            this.AddAction(m_rds.ActionManager.GetNoActions() - 1);
            return a;
        }
예제 #6
0
        /// <summary>
        /// This updates the variable weights in the DS, if the weight is under the threshold,
        /// the corresponding variable should be removed from the DS.
        /// </summary>
        /// <param name="parent"></param>
        public void monitorVariableChanges(MyStochasticReturnPredictor parent)
        {
            // update info about changed variables
            List <int> changed = m_rds.VarManager.VarHistory.GetVariablesChangedBefore(1);

            if (changed != null)
            {
                for (int i = 0; i < changed.Count; i++)
                {
                    changes[changed[i]] += addVal;
                }
            }

            // decay all that are included in this DS
            for (int i = 0; i < m_rds.VarManager.GetMaxVariables(); i++)
            {
                if (parent.Ds.IsVariableIncluded(i) &&
                    m_rds.VarManager.GetVarNo(i).Values.Count > 1 &&
                    i != parent.GetPromotedVariableIndex())
                {
                    changes[i] -= m_setup.OnlineHistoryForgettingRate;
                }
            }
        }
예제 #7
0
 public void SetMyAction(MyStochasticReturnPredictor myAction)
 {
     this.MyAction = myAction;
 }
예제 #8
0
        /// <summary>
        /// This should be called only after receiving the reward. 
        /// Frequency of variable changes relative to rewards received.
        /// </summary>
        /// <param name="parent"></param>
        public void performOnlineVariableRemoving(MyStochasticReturnPredictor parent)
        {
            bool removed = false;
            // decay all that are included in this DS
            for (int i = 0; i < m_rds.VarManager.GetMaxVariables(); i++)
            {
                if (parent.Ds.IsVariableIncluded(i)
                    && m_rds.VarManager.GetVarNo(i).Values.Count > 1
                    && i != parent.GetPromotedVariableIndex())
                {
                    if (changes[i] < m_setup.OnlineVariableRemovingThreshold)
                    {
                        removed = true; parent.Ds.RemoveVariable(i);
                    }
                }
            }
            if (removed)
            {
                String output =
                    "SRP: " + parent.GetLabel() + ": Variables removed, current ones: ";

                for (int i = 0; i < m_rds.VarManager.GetMaxVariables(); i++)
                {
                    if (parent.Ds.IsVariableIncluded(i))
                    {
                        output += i + ", ";
                    }
                }
                MyLog.DEBUG.WriteLine(output);
            }
        }
예제 #9
0
        /// <summary>
        /// This updates the variable weights in the DS, if the weight is under the threshold, 
        /// the corresponding variable should be removed from the DS.
        /// </summary>
        /// <param name="parent"></param>
        public void monitorVariableChanges(MyStochasticReturnPredictor parent)
        {
            // update info about changed variables
            List<int> changed = m_rds.VarManager.VarHistory.GetVariablesChangedBefore(1);
            if (changed != null)
            {
                for (int i = 0; i < changed.Count; i++)
                {
                    changes[changed[i]] += addVal;
                }
            }

            // decay all that are included in this DS
            for (int i = 0; i < m_rds.VarManager.GetMaxVariables(); i++)
            {
                if (parent.Ds.IsVariableIncluded(i)
                    && m_rds.VarManager.GetVarNo(i).Values.Count > 1
                    && i != parent.GetPromotedVariableIndex())
                {
                    changes[i] -= m_setup.OnlineHistoryForgettingRate;
                }
            }
        }
            /// <summary>
            /// For a given predictor, the method creates 2D array of max action utilities and max action labels over selected dimensions.
            /// The values in the memory are automatically scaled into the interval 0,1. Realtime values are multililed by motivations (therfore are bigger).
            /// </summary>
            /// <param name="values">array passed by reference for storing utilities of best action</param>
            /// <param name="labelIndexes">array of the same size for best action indexes</param>
            /// <param name="predictor">an asbtract action</param>
            /// <param name="XVarIndex">global index of state variable in the VariableManager</param>
            /// <param name="YVarIndex">the same: y axis</param>
            /// <param name="showRealtimeUtilities">show current utilities (scaled by motivations from the source and the hierarchy?)</param>
            public void ReadTwoDimensions(ref float[,] values, ref int[,] labelIndexes,
                                          MyStochasticReturnPredictor predictor, int XVarIndex, int YVarIndex, bool showRealtimeUtilities)
            {
                MyRootDecisionSpace rds = predictor.Rds;

                if (XVarIndex >= rds.VarManager.MAX_VARIABLES)
                {
                    XVarIndex = rds.VarManager.MAX_VARIABLES - 1;
                }
                if (YVarIndex >= rds.VarManager.MAX_VARIABLES)
                {
                    YVarIndex = rds.VarManager.MAX_VARIABLES - 1;
                }
                if (YVarIndex < 0)
                {
                    YVarIndex = 0;
                }
                if (XVarIndex < 0)
                {
                    XVarIndex = 0;
                }
                MyQSAMemory mem = predictor.Mem;

                int[] sizes   = mem.GetStateSizes();                      // size of the matrix
                int[] indexes = predictor.Ds.GetCurrentState();           // initial indexes

                int[] actionGlobalIndexes = mem.GetGlobalActionIndexes(); // global indexes of actions in the memory

                int promotedIndex = predictor.GetPromotedVariableIndex();

                MyVariable varX = rds.VarManager.GetVarNo(XVarIndex);
                MyVariable varY = rds.VarManager.GetVarNo(YVarIndex);

                float[] varXvals = varX.Values.ToArray();
                float[] varYvals = varY.Values.ToArray();

                Array.Sort(varXvals);
                Array.Sort(varYvals);

                int sx = 0;
                int sy = 0;

                if (XVarIndex == promotedIndex)
                {
                    sx = 1;
                    indexes[XVarIndex] = 0;

                    varXvals = new float[] { 0 };

                    sy = this.ReadSize(predictor.Ds, varY, YVarIndex, predictor.GetLabel());
                }
                else if (YVarIndex == promotedIndex)
                {
                    sy = 1;
                    indexes[YVarIndex] = 0;

                    varYvals = new float[] { 0 };

                    sx = this.ReadSize(predictor.Ds, varX, XVarIndex, predictor.GetLabel());
                }
                else
                {
                    sx = this.ReadSize(predictor.Ds, varX, XVarIndex, predictor.GetLabel());
                    sy = this.ReadSize(predictor.Ds, varY, YVarIndex, predictor.GetLabel());
                }

                if (values == null || labelIndexes == null ||
                    values.GetLength(0) != sx || values.GetLength(1) != sy ||
                    labelIndexes.GetLength(0) != sx || labelIndexes.GetLength(1) != sy)
                {
                    values       = new float[sx, sy];
                    labelIndexes = new int[sx, sy];
                }

                for (int i = 0; i < sx; i++)
                {
                    indexes[XVarIndex] = (int)varXvals[i];

                    for (int j = 0; j < sy; j++)
                    {
                        indexes[YVarIndex] = (int)varYvals[j];

                        float[] utilities = mem.ReadData(indexes);
                        if (predictor.GetMaxMemoryValue() != 0)
                        {
                            for (int k = 0; k < utilities.Length; k++)
                            {
                                utilities[k] = utilities[k] / predictor.GetMaxMemoryValue();
                            }
                        }

                        float maxValue = 0.0f;
                        int   maxIndex = 0;

                        if (utilities.Length != actionGlobalIndexes.Length)
                        {
                            MyLog.DEBUG.WriteLine("ERROR: unexpected length of utilities array, will place default values");
                            utilities = new float[actionGlobalIndexes.Length];
                        }
                        else if (actionGlobalIndexes.Length == 0)
                        {
                            MyLog.DEBUG.WriteLine("WARNING: this DS contains no actions. Will use the action 0");
                            utilities           = new float[1];
                            actionGlobalIndexes = new int[] { 0 };
                        }
                        else
                        {
                            maxValue = utilities.Max();
                            maxIndex = utilities.ToList().IndexOf(maxValue);
                        }
                        if (showRealtimeUtilities)
                        {
                            values[i, j] = maxValue * predictor.GetMyTotalMotivation();
                        }
                        else
                        {
                            values[i, j] = maxValue;
                        }
                        labelIndexes[i, j] = actionGlobalIndexes[maxIndex];
                    }
                }
            }
예제 #11
0
 public void SetMyAction(MyStochasticReturnPredictor myAction)
 {
     this.MyAction = myAction;
 }