Ejemplo n.º 1
0
        /// <summary>
        /// 生成可以测试的动作计划集:从动作记忆中找到的行动计划,加上新补充的一些
        /// </summary>
        /// <param name="plans">从动作记忆中找到的行动计划</param>
        /// <param name="time"></param>
        /// <returns></returns>
        private List <ActionPlan> checkActionPlansFull(List <ActionPlan> plans, int time)
        {
            if (plans == null)
            {
                plans = new List <ActionPlan>();
            }
            List <List <double> > actionSets = CreateTestActionSet(Session.instinctActionHandler(net, time));

            ActionPlan[] r = new ActionPlan[actionSets.Count];
            for (int i = 0; i < actionSets.Count; i++)
            {
                ActionPlan plan = plans.FirstOrDefault(p => p.Equals(actionSets[i]));
                if (plan == null)
                {
                    String judgeType = ActionPlan.JUDGE_INFERENCE;
                    if (i == 0)
                    {
                        judgeType = ActionPlan.JUDGE_INSTINCT;
                    }
                    else if (actionSets[i][0] == 0.5)
                    {
                        judgeType = ActionPlan.JUDGE_MAINTAIN;
                    }
                    plan = ActionPlan.CreateActionPlan(net, actionSets[i], time, judgeType, "", 0);
                }
                r[i] = plan;
            }
            return(r.ToList());
        }
Ejemplo n.º 2
0
        /// <summary>
        /// 生成可以测试的动作计划集:从动作记忆中找到的行动计划,加上新补充的一些
        /// </summary>
        /// <param name="plans">从动作记忆中找到的行动计划</param>
        /// <param name="time"></param>
        /// <returns></returns>
        private List <ActionPlan> checkActionPlansFull(List <ObservationHistory.ActionRecord> actionRecords, int time)
        {
            List <List <double> > actionSets = CreateTestActionSet(Session.instinctActionHandler(net, time));

            ActionPlan[] r = new ActionPlan[actionSets.Count];
            for (int i = 0; i < actionSets.Count; i++)
            {
                ActionPlan plan      = null;
                String     judgeType = ActionPlan.JUDGE_INFERENCE;
                if (i == 0)
                {
                    judgeType = ActionPlan.JUDGE_INSTINCT;
                }
                else if (actionSets[i][0] == 0.5)
                {
                    judgeType = ActionPlan.JUDGE_MAINTAIN;
                }

                ObservationHistory.ActionRecord record = actionRecords.FirstOrDefault(p => p.actions[0] == actionSets[i][0]);
                if (record == null)
                {
                    plan = ActionPlan.CreateActionPlan(net, actionSets[i], time, judgeType, "");
                }
                else
                {
                    plan            = ActionPlan.CreateActionPlan(net, record.actions, time, judgeType, "");
                    plan.evaulation = record.evaluation;
                }
                r[i] = plan;
            }
            return(r.ToList());
        }
Ejemplo n.º 3
0
        /// <summary>
        /// 1.处理reward
        /// 1.1 将当前环境场景放入
        /// 2.生成测试行动集
        /// 3.对每一个行动,计算评估值
        /// 4.选择评估值最大行动
        /// </summary>
        /// <param name="time"></param>
        /// <param name="session"></param>
        /// <returns></returns>
        public override ActionPlan execute(int time, Session session)
        {
            //1.1 处理reward
            processReward(time);

            //1.2 仍在随机动作阶段
            if (time < 1)
            {
                return(net.actionPlanChain.PutNext(ActionPlan.CreateRandomPlan(net, time, "随机漫游")));
            }

            if (net.reward > 0 && net.actionPlanChain.Length > 0 && net.actionPlanChain.Root.planSteps > net.actionPlanChain.Length)
            {
                return(net.actionPlanChain.PutNext(ActionPlan.createMaintainPlan(net, time, "maintain", 0, net.actionPlanChain.Last.planSteps - 1)));
            }

            //1.3 创建测试动作集
            List <double>         instinctAction = Session.instinctActionHandler(net, time);
            List <double>         maintainAction = ActionPlan.MaintainAction;
            List <List <double> > testActionSet  = this.CreateTestActionSet(instinctAction);

            //如果碰撞了,将维持动作删除去
            if (net.reward <= 0)
            {
                int mindex = testActionSet.IndexOf(a => a[0] == 0.5);
                if (mindex >= 0)
                {
                    testActionSet.RemoveAt(mindex);
                }
            }

            //1.4 寻找每个测试动作的匹配记录,计算所有匹配记录的均值
            double[] evaluations        = new double[testActionSet.Count];
            List <InferenceRecord>[] rs = new List <InferenceRecord> [testActionSet.Count];
            for (int i = 0; i < evaluations.Length; i++)
            {
                evaluations[i] = double.NaN;
            }
            List <Vector> envValues = net.GetReceptorSceneValues();

            for (int i = 0; i < testActionSet.Count; i++)
            {
                List <InferenceRecord> records = net.GetMatchInfRecords(envValues, testActionSet[i], time);
                rs[i] = new List <InferenceRecord>(records);
                List <InferenceRecord> evaluatedRecords = records.FindAll(r => !double.IsNaN(r.evulation));
                if (evaluatedRecords != null && evaluatedRecords.Count > 0)
                {
                    evaluations[i] = evaluatedRecords.ConvertAll(r => r.evulation).Average();
                }
            }

            for (int i = 0; i < testActionSet.Count; i++)
            {
                if (this.net.reward < -1.0 && testActionSet[i][0] == 0.5)
                {
                    evaluations[i] = double.MinValue;
                }
                if (testActionSet[i][0] == 0.0 && net.actionPlanChain.Length >= 2 && net.actionPlanChain.ToList()[net.actionPlanChain.Length - 1].actions[0] == 0 &&
                    net.actionPlanChain.ToList()[net.actionPlanChain.Length - 2].actions[0] == 0)
                {
                    evaluations[i] = double.MinValue;
                }
            }


            ActionPlan plan = null;
            //寻找是否有为评估的,优先评估它
            int index = -1;

            for (int i = 0; i < evaluations.Length; i++)
            {
                if (double.IsNaN(evaluations[i]))
                {
                    index = i; break;
                }
            }
            if (index >= 0)
            {
                plan = net.actionPlanChain.PutNext(ActionPlan.CreateActionPlan(net, testActionSet[index], time, ActionPlan.JUDGE_INFERENCE, "未知评估"));
                if (rs[index] != null && rs[index].Count > 0)
                {
                    plan.inferenceRecords = rs[index].ConvertAll(r => (r, 0.0));
                }
                plan.evaulation = evaluations[index];
                if (plan.actions[0] == instinctAction[0])
                {
                    plan.planSteps = 0;
                }
                else
                {
                    plan.planSteps = 0;
                }
                return(plan);
            }


            index = evaluations.ToList().IndexOf(evaluations.Max());
            plan  = net.actionPlanChain.PutNext(ActionPlan.CreateActionPlan(net, testActionSet[index], time, ActionPlan.JUDGE_INFERENCE, "最大评估"));
            if (rs[index] != null && rs[index].Count > 0)
            {
                plan.inferenceRecords = rs[index].ConvertAll(r => (r, 0.0));
            }
            plan.evaulation = evaluations[index];
            if (plan.actions[0] == instinctAction[0])
            {
                plan.planSteps = 0;
            }
            else
            {
                plan.planSteps = 0;
            }
            return(plan);
        }
Ejemplo n.º 4
0
        public override ActionPlan Execute(int time, Session session)
        {
            //1.1 处理reward
            processReward(time);

            //取得当前姿态和最优姿态
            Vector curGesture    = net.GetReceptorGesture(null);
            Vector optimaGesture = Session.handleGetOptimaGesture(net, time);

            policyState               = new PolicyState();
            policyState.curGesture    = curGesture.clone();
            policyState.optimaGesture = optimaGesture.clone();

            //1.2 仍在随机动作阶段
            if (time < Session.GetConfiguration().learning.development.step)
            {
                ActionPlan actionPlan = ActionPlan.CreateRandomPlan(net, time, "random walk");
                policyState.policeText = actionPlan.judgeType;
                policyState.action     = actionPlan.actions[0];
                return(net.actionPlanChain.PutNext(actionPlan));
            }

            //1.3 初始化值差异方向感
            List <double> actions = null;
            Dictionary <Vector, Vector> actionToGesture = null;

            if (expectGesture == null)
            {
                expectGesture = optimaGesture.clone();
                actions       = net.doInference(time, expectGesture, out actionToGesture);
                policyState.objectiveGesture = expectGesture.clone();
                policyState.action           = actions[0];
                policyState.policeText       = "optima posture";
                return(net.actionPlanChain.Reset(ActionPlan.CreateActionPlan(net, actions, time, ActionPlan.JUDGE_INFERENCE, "optima posture")));
            }

            //1.4计算当前环境状态下的评估
            double envEvaluation = doEvaluateEnviornment(time, policyState);

            //1.5当前姿态是最优姿态,且当前环境状态为正评估,执行维持动作,结束
            if (net.IsGestureInTolerateDistance(curGesture, optimaGesture) && envEvaluation >= 0)
            {
                optimaMaintainCount += 1;
                ActionPlan plan = ActionPlan.createMaintainPlan(net, time, "maintain optimal posture", envEvaluation, 0);
                policyState.action     = plan.actions[0];
                policyState.policeText = plan.judgeType;
                return(net.actionPlanChain.PutNext(plan));
            }
            if (optimaMaintainCount > 0)
            {
                optimaMaintainCount = 0;
                if (envEvaluation >= 0)
                {
                    actions                = net.doInference(time, optimaGesture, out actionToGesture);
                    policyState.action     = actions[0];
                    policyState.policeText = "adjust optimal posture";
                    return(net.actionPlanChain.Reset(ActionPlan.CreateActionPlan(net, actions, time, ActionPlan.JUDGE_INFERENCE, "adjust optimal posture")));
                }
            }

            //1.6 计算偏离方向(如果与最优姿态出现偏离,计算偏离的方向)
            //对应距离来说,方向是指偏大偏小,对于角度来说,方向是指顺时针逆时针
            if (expectDirection == null)
            {
                List <Receptor>     gestureReceptors = net.GesturesReceptors;
                List <MeasureTools> measureTools     = net.GestureMeasureTools;
                expectDirection = new Vector(true, curGesture.Size);
                for (int i = 0; i < curGesture.Size; i++)
                {
                    if (double.IsNaN(optimaGesture[i]))
                    {
                        expectDirection[i] = 0;
                    }
                    else
                    {
                        expectDirection[i] = measureTools[i].getChangeDirection(curGesture[i], optimaGesture[i]);
                    }
                }
            }

            //1.6 当前环境是正评估或未知评估
            int    K = 1;
            Vector objectiveGesture = null;
            int    maintainSteps    = net.actionPlanChain.Length;

            if (envEvaluation >= 0)
            {
                //1.6.1维持小于K步,执行维持动作
                if (maintainSteps <= K)
                {
                    ActionPlan plan = ActionPlan.createMaintainPlan(net, time, "positive evaluation and maintenance ", envEvaluation, 0);
                    policyState.action     = plan.actions[0];
                    policyState.policeText = plan.judgeType;
                    return(net.actionPlanChain.PutNext(plan));
                }
                //1.6.2 当前环境是正评估,目标姿态为当前姿态向期望方向靠近
                else
                {
                    Vector tempCurGesture = curGesture;
                    while (true)
                    {
                        objectiveGesture = moveGesture(tempCurGesture, optimaGesture, expectDirection, 1);
                        actions          = net.doInference(time, objectiveGesture, out actionToGesture);
                        actions          = checkMaxActions(actions);
                        actions          = checkMove(actions, curGesture, objectiveGesture);
                        policyState.setGestureAction(envEvaluation, curGesture, objectiveGesture, actions[0], actionToGesture);
                        policyState.policeText = "Expectation improvement after positive evaluation";
                        return(net.actionPlanChain.Reset(ActionPlan.CreateActionPlan(net, actions, time, ActionPlan.JUDGE_INFERENCE, "Expectation improvement after positive evaluation")));
                    }
                }
            }
            //1.7 当前奖励是负,切换期望姿态
            objectiveGesture = moveGesture(curGesture, optimaGesture, expectDirection, -1);
            actions          = net.doInference(time, objectiveGesture, out actionToGesture);
            actions          = checkMaxActions(actions);
            if (actions[0] == 0.5)
            {
                if (expectDirection == 1)
                {
                    actions[0] -= 0.1;
                }
                else if (expectDirection == -1)
                {
                    actions[0] += 0.1;
                }
            }

            policyState.setGestureAction(envEvaluation, curGesture, objectiveGesture, actions[0], actionToGesture);
            policyState.setGestureAction(envEvaluation, curGesture, objectiveGesture, actions[0], actionToGesture);
            policyState.policeText = "lower expectations after negative evaluation";

            //actions = checkMove(actions, curGesture, objectiveGesture);
            return(net.actionPlanChain.Reset(ActionPlan.CreateActionPlan(net, actions, time, ActionPlan.JUDGE_INFERENCE, "lower expectations after negative evaluation")));
        }