コード例 #1
0
ファイル: MDP.cs プロジェクト: mlohstroh/ml
        private Tuple<MDPAction, float> ActionSum(State current)
        {
			Dictionary<MDPAction, float> tmp = new Dictionary<MDPAction, float> ();

            for (int x = 0; x < current.Actions.Count; x++)
            {
                MDPAction a = current.Actions[x];
                float sum = 0;
                foreach(var kvp in a.Chances)
                {
					sum += GetJAtPreviousIteration(kvp.Key) * kvp.Value;
                }
                
				tmp.Add (a, sum);
            }

			var max = tmp.Aggregate((l, r) => l.Value > r.Value ? l : r).Key;

			// current reward + gamma * action_sum
			return new Tuple<MDPAction, float>(max, current.Reward + Gamma * tmp[max]);
        }
コード例 #2
0
ファイル: Program.cs プロジェクト: mlohstroh/ml
        static void Main(string[] args)
        {
            if(args.Length != 4)
            {
                Console.WriteLine("Incorrect arguments...");
            }
            int numActions = 3;
            string file = "data/mytest.in.txt";
            float gamma = 0.9f;

            try
            {
                numActions = int.Parse(args[1]);

                file = args[2];
                gamma = float.Parse(args[3]);
            }
            catch(Exception)
            {
                Console.WriteLine("Exception reading arguments...");
                Console.WriteLine("Please enter exactly 4 arguments");
            }

            MDP mdp = new MDP()
            {
                Gamma = gamma
            };

            using (StreamReader reader = new StreamReader(file))
            {
                string line;
                while ((line = reader.ReadLine()) != null)
                {
                    
					line = line.Trim ();
                    string[] data = line.Split(new char[] { '\t', ' ' });

                    if (data.Length > 2)
                    {
                        string name = data[0];
                        float reward = float.Parse(data[1]);
                        State s = new State()
                        {
                            Name = name,
                            Reward = reward
                        };

                        List<MDPAction> actions = new List<MDPAction>();

                        // define actions for each states
                        for(int i = 0; i < numActions; i++)
                        {
                            actions.Add(new MDPAction()
                            {
                                Name = string.Format("a{0}", i + 1)
                            });
                        }

                        // begin the transitions
                        for(int i = 2; i < data.Length; i += 3)
                        {
                            string actionName = data[i].Substring(1);
                            string stateKey = data[i + 1];
                            string tmp = data[i + 2];
                            float probability = float.Parse(tmp.Substring(0, tmp.Length - 1));

                            var action = actions.Where(x => x.Name == actionName).First();

                            action.AddTransition(stateKey, probability);
                        }

                        s.Actions = actions;
                        mdp.AddState(s);
                    }
                }
            }


            mdp.SimulateAll();

            Console.ReadKey(true);
        }
コード例 #3
0
ファイル: MDP.cs プロジェクト: mlohstroh/ml
 public void AddState(State s)
 {
     _states.Add(s);
 }