예제 #1
0
        /// <summary>
        ///
        /// </summary>
        /// <returns></returns>
        private (int x, int y, double[] classIndex) RandomAccessMemory()
        {
            //use actionNumber represent real types
            int   rawValueIndex = NP.Random(_randomSeedKeys);
            Point p             = _memory[rawValueIndex].RandomTake();

            //current one-hot action
            double[] classIndex = NP.ToOneHot(Array.IndexOf(_randomSeedKeys, rawValueIndex), ActionNum);
            return(p.X, p.Y, classIndex);
        }
예제 #2
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="state"></param>
        /// <returns></returns>
        private double[] MakeInput(double[] state)
        {
            double[] input  = new double[_featuresNumber + _actionsNumber];
            int      offset = 0;

            Array.ConstrainedCopy(state, 0, input, offset, _featuresNumber);
            offset += _featuresNumber;
            Array.ConstrainedCopy(NP.ToOneHot(0, _actionsNumber), 0, input, offset, _actionsNumber);
            offset += _actionsNumber;
            return(input);
        }
예제 #3
0
        /// <summary>
        /// 输出每一个 state 对应的 action 值
        /// </summary>
        /// <returns></returns>
        public (double[] action, double q) ChooseAction(double[] state)
        {
            double[] input = new double[_featuresNumber + _actionsNumber];
            Dictionary <double[], double> predicts = new Dictionary <double[], double>();

            //1.create dict to simulate action,based on
            if (_env.IsSingleAction)//env.singleAction == true
            {
                for (int i = 0; i < _actionsNumber; i++)
                {
                    predicts.Add(NP.ToOneHot(i, _actionsNumber), -1.0);
                }
            }
            else//2.env.singleAction == false
            {
                for (int i = 1; i < Math.Pow(2, _actionsNumber); i++)
                {
                    char[]   strOnehot    = Convert.ToString(i, 2).PadLeft(_actionsNumber, '0').ToCharArray();
                    double[] doubleOnehot = new double[_actionsNumber];
                    for (int index = 0; index < _actionsNumber; index++)
                    {
                        doubleOnehot[_actionsNumber - index - 1] = Convert.ToDouble(strOnehot[index].ToString());
                    }
                    predicts.Add(doubleOnehot, -1.0);
                }
            }
            List <double[]> keyCollection = predicts.Keys.ToList();

            //2.choose action
            for (int i = 0; i < keyCollection.Count; i++)
            {
                double[] key    = keyCollection[i];
                int      offset = 0;
                Array.ConstrainedCopy(state, 0, input, offset, _featuresNumber);
                offset += _featuresNumber;
                Array.ConstrainedCopy(key, 0, input, offset, _actionsNumber);
                offset += _actionsNumber;
                double[] preditOutput = _actorNet.Predict(input);
                predicts[key] = preditOutput[0];
            }
            //3.sort dictionary
            var target = predicts.OrderByDescending(p => p.Value).ToDictionary(p => p.Key, o => o.Value).First();

            //3. calcute action and qvalue
            return(target.Key, target.Value);
        }
예제 #4
0
        /// <summary>
        /// -----------------------------------------------------
        /// *    0  |  1  |  2
        /// * -----------------------
        /// *    7  |  8  |  3
        /// * -----------------------
        /// *    6  |  5  |  4
        /// </summary>
        /// <returns></returns>
        private (int x, int y, double[] actions) RandomAccessMemory()
        {
            //
            int   rawValueIndex = NP.Random(_randomSeedKeys);
            Point pt            = _memory[rawValueIndex].RandomTake();

            //
            double[] actions = new double[ActionNum];
            //快速搜索x++方向点
            List <Point> points = new List <Point>()
            {
                new Point(pt.X - 1, pt.Y - 1), //(-1,-1)
                new Point(pt.X, pt.Y - 1),     //(0,-1)
                new Point(pt.X + 1, pt.Y - 1), //(1,-1)
                new Point(pt.X + 1, pt.Y),     //(1,0)
                new Point(pt.X + 1, pt.Y + 1), //(1,1)
                new Point(pt.X, pt.Y + 1),     //(0,1)
                new Point(pt.X - 1, pt.Y + 1), //(-1,1)
                new Point(pt.X - 1, pt.Y),     //(-1,0)
            };

            //search next point
            for (int pointIndex = 0; pointIndex < ActionNum; pointIndex++)
            {
                Point p = points[pointIndex];
                //if reach to the end, use original point
                if (p.X >= _limit_x || p.X < 0 || p.Y >= _limit_y || p.Y < 0)
                {
                    continue;
                }
                //store right action(one-hot)
                if (_queryTable[p.X, p.Y] == rawValueIndex)
                {
                    actions.CombineOneHot(NP.ToOneHot(pointIndex, ActionNum));
                }
            }
            //
            if (!_existActions.Exists(p => NP.Equal(p, actions)))
            {
                _existActions.Add(actions);
            }
            //
            return(pt.X, pt.Y, actions);
        }
예제 #5
0
        /// <summary>
        /// random数据集
        /// </summary>
        public double[] RandomAction()
        {
            int action = NP.Random(ActionNum);

            return(NP.ToOneHot(action, ActionNum));
        }