void FixedUpdate() { int score1 = 0; int score2 = 0; Pucks.ForEach(x => { if (x.position.x < -10.0f) { RewardList[0] -= 50; score1++; } if (x.position.x > 10.0f) { RewardList[0] += 18; score2++; } RewardList[0] += CollisionNum; double x_dif2 = (this.transform.position.x - x.position.x) * (this.transform.position.x - x.position.x); double z_dif2 = (this.transform.position.z - x.position.z) * (this.transform.position.z - x.position.z); RewardList[1] = -(float)Math.Sqrt(x_dif2 + z_dif2) / 20.0f; //RewardList[1]-=CurrentStep/100.0f; }); SetReward(CulcReward()); if (score1 == endScore || score2 == endScore) { Done(); } }
// Q学習用 public override int GetState() { int r = 0; MalletControllers.ForEach(h => { r = r * 9 + h.GetState(); }); Pucks.Select(puck => puck.GetComponent <Puck>()).ToList() .ForEach(b => r = r * 9 + b.GetState(MalletControllers.Select(h => h.transform).ToList())); return(r); }
void Awake() { MalletControllers = new List <MalletController> { Mallet }; StartPuckPositions = Pucks.Select(x => x.position).ToList(); //?? MyPosition = transform.position; // 報酬に関して RewardList = new List <float>() { 0, 0 }; //報酬を加算 }
//エージェントの初期化 public override void AgentReset() { CurrentStep = 0; CurrentStepMax = 1000; CollisionNum = 0; gameObject.SetActive(false); gameObject.SetActive(true); MalletControllers.ForEach(x => x.ResetParams()); Pucks.ForEach(b => b.GetComponent <Puck>().ResetParams()); RewardList = new List <float>() { 0, 0 }; }
//(NE)状態を取得する public override List <double> CollectObservations() { var observations = new List <double>(); MalletControllers.ForEach(x => { observations.Add(x.RB.position.x - MyPosition.x); observations.Add(x.RB.position.z - MyPosition.z); }); Pucks.OrderBy(x => x.position.x).ToList().ForEach(x => { observations.Add(x.position.x - MyPosition.x); observations.Add(x.position.z - MyPosition.z); observations.Add(x.velocity.x); observations.Add(x.velocity.z); }); return(observations); }