Beispiel #1
0
        private double[] computeEvaluationDensity(List <ActionPlan> plans)
        {
            double[] density        = new double[plans.Count];
            int      inferencecount = Session.GetConfiguration().evaluation.policy.init_plan_depth;

            for (int i = 0; i < plans.Count; i++)
            {
                List <double> values = new List <double>();

                List <Vector> obs = net.GetMergeReceptorValues(plans[i].observation, plans[i].actions);
                for (int j = 0; j < inferencecount; j++)
                {
                    obs = net.forward_inference(obs);
                    if (obs == null)
                    {
                        break;
                    }
                    ObservationHistory.Scene scene = net.actionMemory.FindMatchedScene(obs).Item1;
                    if (scene == null)
                    {
                        continue;
                    }
                    if (double.IsNaN(scene.density))
                    {
                        continue;
                    }
                    values.Add(scene.density);
                }
                density[i] = values.Count <= 0 ? -1 * inferencecount : values.Average();
            }
            return(density);
        }
Beispiel #2
0
        /// <summary>
        /// 制订新的行动计划
        /// </summary>
        /// <param name="time"></param>
        /// <param name="session"></param>
        /// <param name="reward"></param>
        /// <param name="policyConfig"></param>
        /// <returns></returns>
        private ActionPlan makeNewActionPlan(int time, Session session)
        {
            //取得与当前场景匹配的所有行动计划
            var s = net.actionMemory.FindMatchedScene();

            ObservationHistory.Scene scene = s.Item1;
            List <ObservationHistory.ActionRecord> actionRecords = scene == null ? new List <ObservationHistory.ActionRecord>() : scene.records;
            //如果行动计划不是全部,补齐全部可能的行动计划,且按照与本能行动一致的顺序排序
            List <ActionPlan> plans = checkActionPlansFull(actionRecords, time);

            //找到本能行动计划和维持行动计划
            List <double> instictAction = Session.instinctActionHandler(net, time);
            ActionPlan    instinctPlan  = plans.FirstOrDefault(p => p.actions[0] == instictAction[0]);
            ActionPlan    maintainPlan  = plans.FirstOrDefault(p => p.actions[0] == 0.5);

            //遍历所有的计划
            double[] forcast = new double[plans.Count];
            for (int i = 0; i < plans.Count; i++)
            {
                //上次是负奖励,则维持行动没有必要
                if (net.reward < 0 && plans[i].IsMaintainAction())
                {
                    continue;
                }
                //如果第i个行动确定是正评估,就是它了
                if (plans[i].evaulation > 0)
                {
                    plans[i].reason    = "走向正评估";
                    plans[i].planSteps = (int)plans[i].evaulation + policyConfig.init_plan_depth;
                    //forcast[i] = forcastActionPlan(scene!=null?scene.scene:plans[i].inputObs, plans[i].actions);
                    //if (forcast[i] < 0) continue;
                    //plans[i].expect = forcast[i];
                    return(plans[i]);
                }
                //如果第i个行动是未知评估,就是它了
                else if (double.IsNaN(plans[i].evaulation))
                {
                    plans[i].reason    = "探索未知";
                    plans[i].planSteps = policyConfig.init_plan_depth;
                    //forcast[i] = forcastActionPlan(scene != null ? scene.scene : plans[i].inputObs, plans[i].actions);
                    //if (forcast[i] < 0) continue;
                    //plans[i].expect = forcast[i];
                    return(plans[i]);
                }
                else
                {
                    forcast[i] = plans[i].evaulation;
                }
            }

            //执行到这里,说明所有的评估都是负评估了.
            //处理起来非常麻烦,因为奖励与目标点无关,使得根据迷宫不同,有时候应该向本能放下走,有时候不能。
            //1.要么修改奖励计算方法,在保留碰撞负奖励以外,让沿着路线走的方向获得的奖励大些,这样在这里就简单了,直接选择记忆场景中奖励最大的行为
            //2 麻烦的办法是用推理测试,沿着每个方向走的时候,其后面的每一步在本能附近方向未知或者碰撞距离较大,但是这种做法计算太复杂
            //3 计算每个行动记忆场景的奖励密度。按照优先级选择碰撞密度最小的前三个

            /*double[] evaluationDensity = computeEvaluationDensity(plans);
             * for(int i=0;i<plans.Count;i++)
             * {
             *  //负奖励表示已经碰撞,维持行动一定不可取
             *  if (net.reward < 0 && plans[i].actions[0] == 0.5) continue;
             *  //碰撞后维持行动附近的行动也不可取
             *  if (net.reward < 0 && Math.Abs(plans[i].actions[0] - 0.5) < 0.25)
             *      continue;
             *  //计算是第几个碰撞密度
             *  int c = evaluationDensity.ToList().Count(d => d <= evaluationDensity[i]);
             *  if(c <=3)
             *  {
             *      plans[i].reason = "最小评估密度("+ evaluationDensity[i].ToString("F3")+")";
             *      plans[i].planSteps = -1 * (int)plans[i].evaulation / 2;
             *      return plans[i];
             *  }
             *
             * }*/

            //下面应该不会发生
            List <ActionPlan> ps = plans;

            if (net.reward < 0)
            {
                ps = plans.FindAll(p => Math.Abs(p.actions[0] - 0.5) >= 0.25);
            }
            int t = Network.rng.Next(0, ps.Count);

            ps[t].reason    = "全部负面,随机选择";
            ps[t].planSteps = double.IsNaN(ps[t].evaulation)?16:-1 * (int)ps[t].evaulation / 2;
            return(plans[t]);
        }