public override void UpdateMemoryBlocks()
        {
            UtilityOutput.Count = NoActions;
            RewardStats.Count   = 2;

            LearningParams    = new MyModuleParams();
            Memory            = new MyQSAMemory(GlobalDataInput.Count, NoActions);
            LearningAlgorithm = new MyDiscreteQLearning(LearningParams, Memory);

            if (GlobalDataInput != null)
            {
                if (NoActions == 6)
                {
                    MyLog.DEBUG.WriteLine("6 actions set by the user, will use action names for gridworld");
                    Rds = new MyRootDecisionSpace(GlobalDataInput.Count, new String[] { " -", " <", " >", " ^", " v", " P" }, LearningParams);
                }
                else if (NoActions == 3)
                {
                    MyLog.DEBUG.WriteLine("3 actions set by the user, will use action names for pong");
                    Rds = new MyRootDecisionSpace(GlobalDataInput.Count, new String[] { " <", " -", " >" }, LearningParams);
                }
                else
                {
                    MyLog.DEBUG.WriteLine("Unknown no. of actions, will use automatic naming of actions");
                    String[] names = new String[NoActions];
                    for (int i = 0; i < NoActions; i++)
                    {
                        names[i] = "A" + i;
                    }
                    Rds = new MyRootDecisionSpace(GlobalDataInput.Count, names, LearningParams);
                }
                CurrentStateOutput.Count = GlobalDataInput.Count;
            }
        }
Example #2
0
        public MyDiscreteQLearning(MyModuleParams learningParams, MyQSAMemory mem)
        {
            this.m_mem      = mem;
            this.m_learning = learningParams;

            m_trace = new MyEligibilityTrace(learningParams);
            m_trace.PushState(new int[mem.GetMaxStateVariables()]);
        }
        public MyDiscreteQLearning(MyModuleParams learningParams, MyQSAMemory mem)
        {
            this.m_mem = mem;
            this.m_learning = learningParams;

            m_trace = new MyEligibilityTrace(learningParams);
            m_trace.PushState(new int[mem.GetMaxStateVariables()]);
        }
        public MyStochasticReturnPredictor(MyRootDecisionSpace rds, int myPromotedVariable,
                                           MyModuleParams setup, String label, int level)
            : base(label, level, setup)
        {
            base.AddPromotedVariable(myPromotedVariable, rds);

            this.Rds             = rds;
            Ds                   = new MyDecisionSpace(this, rds, setup);
            Mem                  = new MyQSAMemory(rds.VarManager.MAX_VARIABLES, 0);
            m_asm                = new MyMotivationBasedDeleteUnselectedASM(setup);
            LearningAlgorithm    = new MyDiscreteQLearning(setup, Mem);
            m_mlvh               = new MyLocalVariableHistory(rds, m_setup, Ds);
            m_prevSelectedAction = 0;
            m_prev_st            = Ds.GetCurrentState();
            this.m_newVariables  = new List <int>();
        }
        private int[] m_prev_st; // previous state (for variable adding and sharing knowledge)

        #endregion Fields

        #region Constructors

        public MyStochasticReturnPredictor(MyRootDecisionSpace rds, int myPromotedVariable,
            MyModuleParams setup, String label, int level)
            : base(label, level, setup)
        {
            base.AddPromotedVariable(myPromotedVariable, rds);

            this.Rds = rds;
            Ds = new MyDecisionSpace(this, rds, setup);
            Mem = new MyQSAMemory(rds.VarManager.MAX_VARIABLES, 0);
            m_asm = new MyMotivationBasedDeleteUnselectedASM(setup);
            LearningAlgorithm = new MyDiscreteQLearning(setup, Mem);
            m_mlvh = new MyLocalVariableHistory(rds, m_setup, Ds);
            m_prevSelectedAction = 0;
            m_prev_st = Ds.GetCurrentState();
            this.m_newVariables = new List<int>();
        }
            /// <summary>
            /// Method creates 2D array of max action utilities and max action labels over across selected dimensions.
            /// The values in the memory are automatically scaled into the interval 0,1. Realtime values are multiplied by motivations.
            /// </summary>
            /// <param name="values">array passed by reference for storing utilities of best action</param>
            /// <param name="labelIndexes">array of the same size for best action indexes</param>
            /// <param name="XVarIndex">global index of state variable in the VariableManager</param>
            /// <param name="YVarIndex">the same: y axis</param>
            /// <param name="showRealtimeUtilities">show current utilities (scaled by the current motivation)</param>
            public void ReadTwoDimensions(ref float[,] values, ref int[,] labelIndexes,
                                          int XVarIndex, int YVarIndex, bool showRealtimeUtilities)
            {
                if (XVarIndex >= Owner.Rds.VarManager.MAX_VARIABLES)
                {
                    XVarIndex = Owner.Rds.VarManager.MAX_VARIABLES - 1;
                }
                if (YVarIndex >= Owner.Rds.VarManager.MAX_VARIABLES)
                {
                    YVarIndex = Owner.Rds.VarManager.MAX_VARIABLES - 1;
                }
                if (YVarIndex < 0)
                {
                    YVarIndex = 0;
                }
                if (XVarIndex < 0)
                {
                    XVarIndex = 0;
                }
                MyQSAMemory mem = Owner.Memory;

                int[] sizes   = mem.GetStateSizes();            // size of the matrix
                int[] indexes = Owner.Rds.VarManager.GetCurrentState();

                int[] actionGlobalIndexes = mem.GetGlobalActionIndexes();

                MyVariable varX = Owner.Rds.VarManager.GetVarNo(XVarIndex);
                MyVariable varY = Owner.Rds.VarManager.GetVarNo(YVarIndex);

                float[] varXvals = varX.Values.ToArray();
                float[] varYvals = varY.Values.ToArray();

                Array.Sort(varXvals);
                Array.Sort(varYvals);

                int sx = 0;
                int sy = 0;

                sx = varX.Values.Count;
                sy = varY.Values.Count;

                if (values == null || labelIndexes == null ||
                    values.GetLength(0) != sx || values.GetLength(1) != sy ||
                    labelIndexes.GetLength(0) != sx || labelIndexes.GetLength(1) != sy)
                {
                    values       = new float[sx, sy];
                    labelIndexes = new int[sx, sy];
                }

                for (int i = 0; i < sx; i++)
                {
                    indexes[XVarIndex] = (int)varXvals[i];

                    for (int j = 0; j < sy; j++)
                    {
                        indexes[YVarIndex] = (int)varYvals[j];

                        float[] utilities      = mem.ReadData(indexes);
                        float   memoryMaxValue = Owner.LearningAlgorithm.GetMaxVal();

                        if (memoryMaxValue != 0)
                        {
                            for (int k = 0; k < utilities.Length; k++)
                            {
                                utilities[k] = utilities[k] / memoryMaxValue;
                            }
                        }

                        float maxValue = 0.0f;
                        int   maxIndex = 0;

                        if (utilities.Length != actionGlobalIndexes.Length)
                        {
                            MyLog.DEBUG.WriteLine("ERROR: unexpected length of utilities array, will place default values");
                            utilities = new float[actionGlobalIndexes.Length];
                        }
                        else if (actionGlobalIndexes.Length == 0)
                        {
                            MyLog.DEBUG.WriteLine("WARNING: this DS contains no actions. Will use the action 0");
                            utilities           = new float[1];
                            actionGlobalIndexes = new int[] { 0 };
                        }
                        else
                        {
                            maxValue = utilities.Max();
                            maxIndex = utilities.ToList().IndexOf(maxValue);
                        }
                        if (showRealtimeUtilities)
                        {
                            Owner.MotivationInput.SafeCopyToHost();
                            float motivation = Owner.MotivationInput.Host[0];

                            values[i, j] = maxValue * motivation;
                        }
                        else
                        {
                            values[i, j] = maxValue;
                        }
                        labelIndexes[i, j] = actionGlobalIndexes[maxIndex];
                    }
                }
            }
            /// <summary>
            /// For a given predictor, the method creates 2D array of max action utilities and max action labels over selected dimensions.
            /// The values in the memory are automatically scaled into the interval 0,1. Realtime values are multililed by motivations (therfore are bigger).
            /// </summary>
            /// <param name="values">array passed by reference for storing utilities of best action</param>
            /// <param name="labelIndexes">array of the same size for best action indexes</param>
            /// <param name="predictor">an asbtract action</param>
            /// <param name="XVarIndex">global index of state variable in the VariableManager</param>
            /// <param name="YVarIndex">the same: y axis</param>
            /// <param name="showRealtimeUtilities">show current utilities (scaled by motivations from the source and the hierarchy?)</param>
            public void ReadTwoDimensions(ref float[,] values, ref int[,] labelIndexes,
                                          MyStochasticReturnPredictor predictor, int XVarIndex, int YVarIndex, bool showRealtimeUtilities)
            {
                MyRootDecisionSpace rds = predictor.Rds;

                if (XVarIndex >= rds.VarManager.MAX_VARIABLES)
                {
                    XVarIndex = rds.VarManager.MAX_VARIABLES - 1;
                }
                if (YVarIndex >= rds.VarManager.MAX_VARIABLES)
                {
                    YVarIndex = rds.VarManager.MAX_VARIABLES - 1;
                }
                if (YVarIndex < 0)
                {
                    YVarIndex = 0;
                }
                if (XVarIndex < 0)
                {
                    XVarIndex = 0;
                }
                MyQSAMemory mem = predictor.Mem;

                int[] sizes   = mem.GetStateSizes();                      // size of the matrix
                int[] indexes = predictor.Ds.GetCurrentState();           // initial indexes

                int[] actionGlobalIndexes = mem.GetGlobalActionIndexes(); // global indexes of actions in the memory

                int promotedIndex = predictor.GetPromotedVariableIndex();

                MyVariable varX = rds.VarManager.GetVarNo(XVarIndex);
                MyVariable varY = rds.VarManager.GetVarNo(YVarIndex);

                float[] varXvals = varX.Values.ToArray();
                float[] varYvals = varY.Values.ToArray();

                Array.Sort(varXvals);
                Array.Sort(varYvals);

                int sx = 0;
                int sy = 0;

                if (XVarIndex == promotedIndex)
                {
                    sx = 1;
                    indexes[XVarIndex] = 0;

                    varXvals = new float[] { 0 };

                    sy = this.ReadSize(predictor.Ds, varY, YVarIndex, predictor.GetLabel());
                }
                else if (YVarIndex == promotedIndex)
                {
                    sy = 1;
                    indexes[YVarIndex] = 0;

                    varYvals = new float[] { 0 };

                    sx = this.ReadSize(predictor.Ds, varX, XVarIndex, predictor.GetLabel());
                }
                else
                {
                    sx = this.ReadSize(predictor.Ds, varX, XVarIndex, predictor.GetLabel());
                    sy = this.ReadSize(predictor.Ds, varY, YVarIndex, predictor.GetLabel());
                }

                if (values == null || labelIndexes == null ||
                    values.GetLength(0) != sx || values.GetLength(1) != sy ||
                    labelIndexes.GetLength(0) != sx || labelIndexes.GetLength(1) != sy)
                {
                    values       = new float[sx, sy];
                    labelIndexes = new int[sx, sy];
                }

                for (int i = 0; i < sx; i++)
                {
                    indexes[XVarIndex] = (int)varXvals[i];

                    for (int j = 0; j < sy; j++)
                    {
                        indexes[YVarIndex] = (int)varYvals[j];

                        float[] utilities = mem.ReadData(indexes);
                        if (predictor.GetMaxMemoryValue() != 0)
                        {
                            for (int k = 0; k < utilities.Length; k++)
                            {
                                utilities[k] = utilities[k] / predictor.GetMaxMemoryValue();
                            }
                        }

                        float maxValue = 0.0f;
                        int   maxIndex = 0;

                        if (utilities.Length != actionGlobalIndexes.Length)
                        {
                            MyLog.DEBUG.WriteLine("ERROR: unexpected length of utilities array, will place default values");
                            utilities = new float[actionGlobalIndexes.Length];
                        }
                        else if (actionGlobalIndexes.Length == 0)
                        {
                            MyLog.DEBUG.WriteLine("WARNING: this DS contains no actions. Will use the action 0");
                            utilities           = new float[1];
                            actionGlobalIndexes = new int[] { 0 };
                        }
                        else
                        {
                            maxValue = utilities.Max();
                            maxIndex = utilities.ToList().IndexOf(maxValue);
                        }
                        if (showRealtimeUtilities)
                        {
                            values[i, j] = maxValue * predictor.GetMyTotalMotivation();
                        }
                        else
                        {
                            values[i, j] = maxValue;
                        }
                        labelIndexes[i, j] = actionGlobalIndexes[maxIndex];
                    }
                }
            }