public void testValueIterationInCellWorld()
        {
            MDPUtilityFunction <CellWorldPosition> uf = fourByThreeMDP
                                                        .valueIterationTillMAximumUtilityGrowthFallsBelowErrorMargin(1,
                                                                                                                     0.00001);

            // AIMA2e check against Fig 17.3
            Assert.AreEqual(0.705, uf.getUtility(new CellWorldPosition(1, 1)),
                            0.001);
            Assert.AreEqual(0.655, uf.getUtility(new CellWorldPosition(1, 2)),
                            0.001);
            Assert.AreEqual(0.611, uf.getUtility(new CellWorldPosition(1, 3)),
                            0.001);
            Assert.AreEqual(0.388, uf.getUtility(new CellWorldPosition(1, 4)),
                            0.001);

            Assert.AreEqual(0.762, uf.getUtility(new CellWorldPosition(2, 1)),
                            0.001);
            Assert.AreEqual(0.660, uf.getUtility(new CellWorldPosition(2, 3)),
                            0.001);
            Assert.AreEqual(-1.0, uf.getUtility(new CellWorldPosition(2, 4)),
                            0.001);

            Assert.AreEqual(0.812, uf.getUtility(new CellWorldPosition(3, 1)),
                            0.001);
            Assert.AreEqual(0.868, uf.getUtility(new CellWorldPosition(3, 2)),
                            0.001);
            Assert.AreEqual(0.918, uf.getUtility(new CellWorldPosition(3, 3)),
                            0.001);
            Assert.AreEqual(1.0, uf.getUtility(new CellWorldPosition(3, 4)),
                            0.001);

            Assert.AreEqual(0.868, uf.getUtility(new CellWorldPosition(3, 2)),
                            0.001);
        }
        public override TAction DecideAction(MDPPerception <TState> perception)
        {
            if (!(utilityFunction.HasUtilityFor(perception.GetState())))
            { // if
                // perceptionState
                // is
                // new
                utilityFunction.SetUtility(perception.GetState(), perception.GetReward());
                MDP.SetReward(perception.GetState(), perception.GetReward());
            }
            if (!(PreviousState == null))
            {
                stateCount.IncrementFor(PreviousState);
                utilityFunction = this.UpdateUtilityFunction(1.0);
            }

            if (MDP.IsTerminalState(CurrentState))
            {
                PreviousState  = null;
                PreviousAction = null;
                //TODO: make sure that 0 is appropriate value for what used to be null in java
                previousReward = 0;
            }
            else
            {
                PreviousState  = CurrentState;
                PreviousAction = policy.GetAction(CurrentState);
                previousReward = CurrentReward;
            }
            return(PreviousAction);
        }
Beispiel #3
0
 public PassiveTDAgent(MDP <STATE_TYPE, ACTION_TYPE> mdp,
                       MDPPolicy <STATE_TYPE, ACTION_TYPE> policy) : base(mdp.emptyMdp())
 {
     this.policy          = policy;
     this.utilityFunction = new MDPUtilityFunction <STATE_TYPE>();
     this.stateCount      = new FrequencyCounter <STATE_TYPE>();
 }
 public PassiveTDAgent(MDP <TState, TAction> mdp,
                       MDPPolicy <TState, TAction> policy) : base(mdp.EmptyMdp())
 {
     this.policy          = policy;
     this.utilityFunction = new MDPUtilityFunction <TState>();
     this.stateCount      = new FrequencyCounter <TState>();
 }
Beispiel #5
0
        public override ACTION_TYPE decideAction(MDPPerception <STATE_TYPE> perception)
        {
            if (!(utilityFunction.hasUtilityFor(perception.getState())))       // if
            // perceptionState
            // is
            // new
            {
                utilityFunction.setUtility(perception.getState(), perception
                                           .getReward());
                mdp.setReward(perception.getState(), perception.getReward());
            }
            if (!(previousState == null))
            {
                stateCount.incrementFor(previousState);
                utilityFunction = updateUtilityFunction(1.0);
            }

            if (mdp.isTerminalState(currentState))
            {
                previousState  = default(STATE_TYPE);
                previousAction = default(ACTION_TYPE);
                previousReward = double.MinValue;
            }
            else
            {
                previousState  = currentState;
                previousAction = policy.getAction(currentState);
                previousReward = currentReward;
            }
            return(previousAction);
        }
Beispiel #6
0
 public PassiveADPAgent(MDP <STATE_TYPE, ACTION_TYPE> mdp,
                        MDPPolicy <STATE_TYPE, ACTION_TYPE> policy) : base(mdp.emptyMdp())
 {
     this.policy          = policy;
     this.utilityFunction = new MDPUtilityFunction <STATE_TYPE>();
     this.nsa             = new Dictionary <Pair <STATE_TYPE, ACTION_TYPE>, Double>();
     this.nsasdash        = new Dictionary <MDPTransition <STATE_TYPE, ACTION_TYPE>, Double>();
 }
Beispiel #7
0
 public PassiveADPAgent(MDP <TState, TAction> mdp,
                        MDPPolicy <TState, TAction> policy) : base(mdp.EmptyMdp())
 {
     this.policy          = policy;
     this.utilityFunction = new MDPUtilityFunction <TState>();
     this.nsa             = new Dictionary <Pair <TState, TAction>, double>();
     this.nsasdash        = new Dictionary <MDPTransition <TState, TAction>, double>();
 }
Beispiel #8
0
        public override TAction DecideAction(MDPPerception <TState> perception)
        {
            if (!(utilityFunction.HasUtilityFor(perception.GetState())))
            { // if
                // perceptionState
                // is
                // new
                utilityFunction.SetUtility(perception.GetState(), perception
                                           .GetReward());
                MDP.SetReward(perception.GetState(), perception.GetReward());
            }
            if (!(PreviousState == null))
            {
                if (nsa.ContainsKey(new Pair <TState, TAction>(
                                        PreviousState, PreviousAction)))
                {
                    nsa[new Pair <TState, TAction>(PreviousState, PreviousAction)] += 1;
                }
                else
                {
                    nsa[new Pair <TState, TAction>(PreviousState, PreviousAction)] = 1.0;
                }
                if (nsasdash.ContainsKey(new MDPTransition <TState, TAction>(PreviousState, PreviousAction, CurrentState)))
                {
                    nsasdash[new MDPTransition <TState, TAction>(PreviousState, PreviousAction, CurrentState)] += 1;
                }
                else
                {
                    nsasdash[new MDPTransition <TState, TAction>(PreviousState, PreviousAction, CurrentState)] = 1.0;
                }

                foreach (MDPTransition <TState, TAction> transition in nsasdash.Keys)
                {
                    if (nsasdash[transition] != 0.0)
                    {
                        double newValue = nsasdash[transition]
                                          / nsa[new Pair <TState, TAction>(
                                                    transition.GetInitialState(), transition.GetAction())];
                        MDP.SetTransitionProbability(transition, newValue);
                    }
                }
                IList <MDPTransition <TState, TAction> > validTransitions = MDP
                                                                            .GetTransitionsWith(PreviousState, policy.GetAction(PreviousState));
                utilityFunction = this.ValueDetermination(validTransitions, 1);
            }

            if (MDP.IsTerminalState(CurrentState))
            {
                PreviousState  = null;
                PreviousAction = null;
            }
            else
            {
                PreviousState  = CurrentState;
                PreviousAction = policy.GetAction(CurrentState);
            }
            return(PreviousAction);
        }
        public void testPolicyEvaluation()
        {
            MDPPolicy <CellWorldPosition, String> policy = fourByThreeMDP
                                                           .randomPolicy();
            MDPUtilityFunction <CellWorldPosition> uf1 = fourByThreeMDP
                                                         .initialUtilityFunction();

            MDPUtilityFunction <CellWorldPosition> uf2 = fourByThreeMDP
                                                         .policyEvaluation(policy, uf1, 1, 3);

            Assert.IsFalse(uf1.Equals(uf2));
        }
Beispiel #10
0
        //
        // PRIVATE METHODS
        //

        private MDPUtilityFunction <STATE_TYPE> updateUtilityFunction(double gamma)
        {
            MDPUtilityFunction <STATE_TYPE> uf = utilityFunction.copy();
            double u_s           = utilityFunction.getUtility(previousState);
            double gammaUtilDIff = ((gamma * utilityFunction
                                     .getUtility(currentState)) - utilityFunction
                                    .getUtility(previousState));
            double alphaTerm = stateCount.probabilityOf(previousState)
                               * (previousReward + gammaUtilDIff);

            uf.setUtility(previousState, u_s + alphaTerm);
            return(uf);
        }
        private MDPUtilityFunction <TState> UpdateUtilityFunction(double gamma)
        {
            MDPUtilityFunction <TState> uf = utilityFunction.Copy();
            double u_s           = utilityFunction.GetUtility(PreviousState);
            double gammaUtilDIff = ((gamma * utilityFunction
                                     .GetUtility(CurrentState)) - utilityFunction
                                    .GetUtility(PreviousState));
            double alphaTerm = stateCount.ProbabilityOf(PreviousState)
                               * (previousReward + gammaUtilDIff);

            uf.SetUtility(PreviousState, u_s + alphaTerm);
            return(uf);
        }
        public void testPassiveTDAgent()
        {
            PassiveTDAgent <CellWorldPosition, String> agent = new PassiveTDAgent <CellWorldPosition, String>(
                fourByThree, policy);
            // Randomizer r = new JavaRandomizer();
            Randomizer r = new MockRandomizer(new double[] { 0.1, 0.9, 0.2, 0.8,
                                                             0.3, 0.7, 0.4, 0.6, 0.5 });
            MDPUtilityFunction <CellWorldPosition> uf = null;

            for (int i = 0; i < 200; i++)
            {
                agent.executeTrial(r);
                uf = agent.getUtilityFunction();
                // System.Console.WriteLine(uf);
            }

            Assert.AreEqual(0.662, uf.getUtility(new CellWorldPosition(1, 1)),
                            0.001);
            Assert.AreEqual(0.610, uf.getUtility(new CellWorldPosition(1, 2)),
                            0.001);
            Assert.AreEqual(0.553, uf.getUtility(new CellWorldPosition(1, 3)),
                            0.001);
            Assert.AreEqual(0.496, uf.getUtility(new CellWorldPosition(1, 4)),
                            0.001);

            Assert.AreEqual(0.735, uf.getUtility(new CellWorldPosition(2, 1)),
                            0.001);
            Assert.AreEqual(0.835, uf.getUtility(new CellWorldPosition(2, 3)),
                            0.001);
            // AreEqual(-1.0, uf.getUtility(new
            // CellWorldPosition(2,4)),0.001);//the pseudo random genrator never
            // gets to this square

            Assert.AreEqual(0.789, uf.getUtility(new CellWorldPosition(3, 1)),
                            0.001);
            Assert.AreEqual(0.889, uf.getUtility(new CellWorldPosition(3, 3)),
                            0.001);
            Assert.AreEqual(1.0, uf.getUtility(new CellWorldPosition(3, 4)),
                            0.001);
        }
        public void testPassiveADPAgent()
        {
            PassiveADPAgent <CellWorldPosition, String> agent = new PassiveADPAgent <CellWorldPosition, String>(
                fourByThree, policy);

            // Randomizer r = new JavaRandomizer();
            Randomizer r = new MockRandomizer(new double[] { 0.1, 0.9, 0.2, 0.8,
                                                             0.3, 0.7, 0.4, 0.6, 0.5 });
            MDPUtilityFunction <CellWorldPosition> uf = null;

            for (int i = 0; i < 100; i++)
            {
                agent.executeTrial(r);
                uf = agent.getUtilityFunction();
            }

            Assert.AreEqual(0.676, uf.getUtility(new CellWorldPosition(1, 1)),
                            0.001);
            Assert.AreEqual(0.626, uf.getUtility(new CellWorldPosition(1, 2)),
                            0.001);
            Assert.AreEqual(0.573, uf.getUtility(new CellWorldPosition(1, 3)),
                            0.001);
            Assert.AreEqual(0.519, uf.getUtility(new CellWorldPosition(1, 4)),
                            0.001);

            Assert.AreEqual(0.746, uf.getUtility(new CellWorldPosition(2, 1)),
                            0.001);
            Assert.AreEqual(0.865, uf.getUtility(new CellWorldPosition(2, 3)),
                            0.001);
            // AreEqual(-1.0, uf.getUtility(new
            // CellWorldPosition(2,4)),0.001);//the pseudo random genrator never
            // gets to this square

            Assert.AreEqual(0.796, uf.getUtility(new CellWorldPosition(3, 1)),
                            0.001);
            Assert.AreEqual(0.906, uf.getUtility(new CellWorldPosition(3, 3)),
                            0.001);
            Assert.AreEqual(1.0, uf.getUtility(new CellWorldPosition(3, 4)),
                            0.001);
        }
Beispiel #14
0
        private MDPUtilityFunction <TState> ValueDetermination(
            IList <MDPTransition <TState, TAction> > validTransitions,
            double gamma)
        {
            MDPUtilityFunction <TState> uf = utilityFunction.Copy();
            double additional = 0.0;

            if (validTransitions.Count > 0)
            {
                TState initState = validTransitions[0].GetInitialState();
                double reward    = MDP.GetRewardFor(initState);
                additional =
                    validTransitions.Sum(
                        transition =>
                        MDP.GetTransitionProbability(transition) *
                        this.utilityFunction.GetUtility(transition.GetDestinationState()));

                uf.SetUtility(initState, reward + (gamma * additional));
            }

            return(uf);
        }
Beispiel #15
0
        //
        // PRIVATE METHODS
        //
        private MDPUtilityFunction <STATE_TYPE> valueDetermination(
            List <MDPTransition <STATE_TYPE, ACTION_TYPE> > validTransitions,
            double gamma)
        {
            MDPUtilityFunction <STATE_TYPE> uf = utilityFunction.copy();
            double additional = 0.0;

            if (validTransitions.Count > 0)
            {
                STATE_TYPE initState = validTransitions[0].getInitialState();
                double     reward    = mdp.getRewardFor(initState);
                foreach (MDPTransition <STATE_TYPE, ACTION_TYPE> transition in validTransitions)
                {
                    additional += mdp.getTransitionProbability(transition)
                                  * utilityFunction.getUtility(transition
                                                               .getDestinationState());
                }
                uf.setUtility(initState, reward + (gamma * additional));
            }

            return(uf);
        }
Beispiel #16
0
        public override ACTION_TYPE decideAction(MDPPerception <STATE_TYPE> perception)
        {
            if (!(utilityFunction.hasUtilityFor(perception.getState())))
            { // if
                // perceptionState
                // is
                // new
                utilityFunction.setUtility(perception.getState(), perception
                                           .getReward());
                mdp.setReward(perception.getState(), perception.getReward());
            }
            if (!(previousState == null))
            {
                Pair <STATE_TYPE, ACTION_TYPE> prevState = new Pair <STATE_TYPE, ACTION_TYPE>(previousState, previousAction);

                if (!nsa.ContainsKey(prevState))
                {
                    nsa.Add(prevState, 1.0);
                }
                else
                {
                    nsa[prevState]++;
                }
                MDPTransition <STATE_TYPE, ACTION_TYPE> prevTransition = new MDPTransition <STATE_TYPE, ACTION_TYPE>(
                    previousState, previousAction, currentState);

                if (!nsasdash.ContainsKey(prevTransition))
                {
                    nsasdash.Add(prevTransition, 1.0);
                }
                else
                {
                    nsasdash[prevTransition]++;
                }
                foreach (MDPTransition <STATE_TYPE, ACTION_TYPE> transition in nsasdash
                         .Keys)
                {
                    if (nsasdash[transition] != 0.0)
                    {
                        double newValue = nsasdash[transition]
                                          / nsa[new Pair <STATE_TYPE, ACTION_TYPE>(
                                                    transition.getInitialState(), transition
                                                    .getAction())];
                        mdp.setTransitionProbability(transition, newValue);
                    }
                }
                List <MDPTransition <STATE_TYPE, ACTION_TYPE> > validTransitions = mdp
                                                                                   .getTransitionsWith(previousState, policy
                                                                                                       .getAction(previousState));
                utilityFunction = valueDetermination(validTransitions, 1);
            }

            if (mdp.isTerminalState(currentState))
            {
                previousState  = default(STATE_TYPE);
                previousAction = default(ACTION_TYPE);
            }
            else
            {
                previousState  = currentState;
                previousAction = policy.getAction(currentState);
            }
            return(previousAction);
        }
Beispiel #17
0
	//
	// PRIVATE METHODS
	//
	private void assertPolicyReccomends(CellWorld cw,
			MDPUtilityFunction<CellWorldPosition> uf, int x, int y,
			String actionExpected) {
		Pair<String, Double> p = cw.getTransitionModel()
				.getTransitionWithMaximumExpectedUtility(
						new CellWorldPosition(x, y), uf);

		Assert.assertEquals(actionExpected, p.getFirst());
	}
Beispiel #18
0
	public void testMaximumTransitionDetection() {
		// aka policy extraction
		// given a utility function

		// create the Utility Function depicted in Fig 17.3
		MDPUtilityFunction<CellWorldPosition> uf = new MDPUtilityFunction<CellWorldPosition>();
		uf.setUtility(new CellWorldPosition(1, 1), 0.705);
		uf.setUtility(new CellWorldPosition(1, 2), 0.655);
		uf.setUtility(new CellWorldPosition(1, 3), 0.611);
		uf.setUtility(new CellWorldPosition(1, 4), 0.388);

		uf.setUtility(new CellWorldPosition(2, 1), 0.762);
		uf.setUtility(new CellWorldPosition(2, 3), 0.660);
		uf.setUtility(new CellWorldPosition(2, 4), -1.0);

		uf.setUtility(new CellWorldPosition(3, 1), 0.812);
		uf.setUtility(new CellWorldPosition(3, 2), 0.868);
		uf.setUtility(new CellWorldPosition(3, 3), 0.918);
		uf.setUtility(new CellWorldPosition(3, 4), 1.0);

		assertPolicyReccomends(cw, uf, 1, 1, CellWorld.UP);
		assertPolicyReccomends(cw, uf, 1, 2, CellWorld.LEFT);
		assertPolicyReccomends(cw, uf, 1, 3, CellWorld.LEFT);
		assertPolicyReccomends(cw, uf, 1, 4, CellWorld.LEFT);

		assertPolicyReccomends(cw, uf, 2, 1, CellWorld.UP);
		assertPolicyReccomends(cw, uf, 2, 3, CellWorld.UP);
		assertPolicyReccomends(cw, uf, 2, 4, null);

		assertPolicyReccomends(cw, uf, 3, 1, CellWorld.RIGHT);
		assertPolicyReccomends(cw, uf, 3, 2, CellWorld.RIGHT);
		assertPolicyReccomends(cw, uf, 3, 3, CellWorld.RIGHT);
		assertPolicyReccomends(cw, uf, 3, 4, null);
	}