/// <summary> /// /// </summary> /// <returns></returns> private (int x, int y, double[] classIndex) RandomAccessMemory() { //use actionNumber represent real types int rawValueIndex = NP.Random(_randomSeedKeys); Point p = _memory[rawValueIndex].RandomTake(); //current one-hot action double[] classIndex = NP.ToOneHot(Array.IndexOf(_randomSeedKeys, rawValueIndex), ActionNum); return(p.X, p.Y, classIndex); }
/// <summary> /// /// </summary> /// <param name="state"></param> /// <returns></returns> private double[] MakeInput(double[] state) { double[] input = new double[_featuresNumber + _actionsNumber]; int offset = 0; Array.ConstrainedCopy(state, 0, input, offset, _featuresNumber); offset += _featuresNumber; Array.ConstrainedCopy(NP.ToOneHot(0, _actionsNumber), 0, input, offset, _actionsNumber); offset += _actionsNumber; return(input); }
/// <summary> /// 输出每一个 state 对应的 action 值 /// </summary> /// <returns></returns> public (double[] action, double q) ChooseAction(double[] state) { double[] input = new double[_featuresNumber + _actionsNumber]; Dictionary <double[], double> predicts = new Dictionary <double[], double>(); //1.create dict to simulate action,based on if (_env.IsSingleAction)//env.singleAction == true { for (int i = 0; i < _actionsNumber; i++) { predicts.Add(NP.ToOneHot(i, _actionsNumber), -1.0); } } else//2.env.singleAction == false { for (int i = 1; i < Math.Pow(2, _actionsNumber); i++) { char[] strOnehot = Convert.ToString(i, 2).PadLeft(_actionsNumber, '0').ToCharArray(); double[] doubleOnehot = new double[_actionsNumber]; for (int index = 0; index < _actionsNumber; index++) { doubleOnehot[_actionsNumber - index - 1] = Convert.ToDouble(strOnehot[index].ToString()); } predicts.Add(doubleOnehot, -1.0); } } List <double[]> keyCollection = predicts.Keys.ToList(); //2.choose action for (int i = 0; i < keyCollection.Count; i++) { double[] key = keyCollection[i]; int offset = 0; Array.ConstrainedCopy(state, 0, input, offset, _featuresNumber); offset += _featuresNumber; Array.ConstrainedCopy(key, 0, input, offset, _actionsNumber); offset += _actionsNumber; double[] preditOutput = _actorNet.Predict(input); predicts[key] = preditOutput[0]; } //3.sort dictionary var target = predicts.OrderByDescending(p => p.Value).ToDictionary(p => p.Key, o => o.Value).First(); //3. calcute action and qvalue return(target.Key, target.Value); }
/// <summary> /// ----------------------------------------------------- /// * 0 | 1 | 2 /// * ----------------------- /// * 7 | 8 | 3 /// * ----------------------- /// * 6 | 5 | 4 /// </summary> /// <returns></returns> private (int x, int y, double[] actions) RandomAccessMemory() { // int rawValueIndex = NP.Random(_randomSeedKeys); Point pt = _memory[rawValueIndex].RandomTake(); // double[] actions = new double[ActionNum]; //快速搜索x++方向点 List <Point> points = new List <Point>() { new Point(pt.X - 1, pt.Y - 1), //(-1,-1) new Point(pt.X, pt.Y - 1), //(0,-1) new Point(pt.X + 1, pt.Y - 1), //(1,-1) new Point(pt.X + 1, pt.Y), //(1,0) new Point(pt.X + 1, pt.Y + 1), //(1,1) new Point(pt.X, pt.Y + 1), //(0,1) new Point(pt.X - 1, pt.Y + 1), //(-1,1) new Point(pt.X - 1, pt.Y), //(-1,0) }; //search next point for (int pointIndex = 0; pointIndex < ActionNum; pointIndex++) { Point p = points[pointIndex]; //if reach to the end, use original point if (p.X >= _limit_x || p.X < 0 || p.Y >= _limit_y || p.Y < 0) { continue; } //store right action(one-hot) if (_queryTable[p.X, p.Y] == rawValueIndex) { actions.CombineOneHot(NP.ToOneHot(pointIndex, ActionNum)); } } // if (!_existActions.Exists(p => NP.Equal(p, actions))) { _existActions.Add(actions); } // return(pt.X, pt.Y, actions); }
/// <summary> /// random数据集 /// </summary> public double[] RandomAction() { int action = NP.Random(ActionNum); return(NP.ToOneHot(action, ActionNum)); }