Exemple #1
0
        public override double update(StateTransition <stateType, actionType> transition)
        {
            stats.cumulativeReward += transition.reward;

            double RPE = 0;

            if (trueModel.T.GetStateValueTable(transition.oldState, transition.action).Count > 0)
            {
                RPE = transition.reward - trueModel.PredictReward(transition.oldState, transition.action, transition.newState);
            }


            if (RPE < 0 && LScounter <= 0)
            {
                lossTransition = transition;

                //Dictionary<stateType, int> thisT = trueModel.T.GetStateValueTable(transition.oldState, transition.action);
                //Dictionary<stateType, double> thisR = trueModel.R.GetStateValueTable(transition.oldState, transition.action);

                //backupT = new Dictionary<stateType, int>(thisT, stateComparer);
                //backupR = new Dictionary<stateType, double>(thisR, stateComparer);
                //backupQ = trueModel.value(transition.oldState, transition.action);

                //thisT = new Dictionary<stateType, int>(stateComparer);
                //thisR = new Dictionary<stateType, double>(stateComparer);
                //trueModel.Qtable[transition.oldState][transition.action] = RPE;
                backupR = trueModel.R.Get(transition.oldState, transition.action, transition.newState);
                Histogram temp = new Histogram(0); temp.Add(RPE);
                trueModel.R.Set(transition.oldState, transition.action, transition.newState, temp);

                LScounter = 8;
            }

            if (LScounter == 0)
            {
                //Dictionary<stateType, int> thisT = trueModel.T.GetStateValueTable(lossTransition.oldState, lossTransition.action);
                //Dictionary<stateType, double> thisR = trueModel.R.GetStateValueTable(lossTransition.oldState, lossTransition.action);

                //thisT = new Dictionary<stateType, int>(backupT, stateComparer);
                //thisR = new Dictionary<stateType, double>(backupR, stateComparer);
                //trueModel.Qtable[lossTransition.oldState][lossTransition.action] = backupQ;

                //backupT = null;
                //backupR = null;

                trueModel.R.Set(lossTransition.oldState, lossTransition.action, lossTransition.newState, backupR);
                lossTransition = null;
            }

            trueModel.update(transition);
            LScounter--;

            //Console.WriteLine(RPE.ToString() + ", " + LScounter.ToString());
            return(0);
        }
Exemple #2
0
        public override double update(StateTransition <int[], int[]> transition)
        {
            stats.cumulativeReward += transition.reward;

            int[] alloOldState = new int[2] {
                transition.oldState[0], transition.oldState[1]
            };
            int[] alloNewState = new int[2] {
                transition.newState[0], transition.newState[1]
            };
            int[] egoOldState = new int[8];
            Array.Copy(transition.oldState, 2, egoOldState, 0, 8);
            int[] egoNewState = new int[8];
            Array.Copy(transition.newState, 2, egoNewState, 0, 8);

            // load the transition into the history
            if (saHistory.Count > 500)
            {
                saHistory.Dequeue();
                sPrimeHistory.Dequeue();
            }
            double[] sa = new double[10];
            Array.Copy(egoOldState, sa, 8);
            sa[8] = transition.action[0];
            sa[9] = transition.action[1];
            Console.WriteLine("sa: " + string.Join(",", sa));
            Console.WriteLine("sprime: " + alloNewState[0] + "," + alloNewState[1]);

            //double[] dummy;
            //if (!inSample(sa, out dummy))
            //{
            saHistory.Enqueue(sa);
            sPrimeHistory.Enqueue(new double[3] {
                alloNewState[0] - alloOldState[0], alloNewState[1] - alloOldState[1], transition.reward
            });
            //}

            // run regression
            if (saHistory.Count > 50 && fullPredictionMode)
            {
                double error;
                for (int epoch = 1; epoch < 2; epoch++)
                {
                    error = teacher.RunEpoch(saHistory.ToArray(), sPrimeHistory.ToArray()) / saHistory.Count;
                }
            }

            // update models with the current transition
            alloModel.update(new StateTransition <int[], int[]>(alloOldState, transition.action, transition.reward, alloNewState));
            egoModel.update(new StateTransition <int[], int[]>(egoOldState, transition.action, transition.reward, egoNewState));

            // transfer info from ego to allo models
            Console.WriteLine("current state: " + alloNewState[0] + "," + alloNewState[1]);
            Console.WriteLine("ego. state: " + string.Join(",", egoNewState));

            foreach (int[] a in availableActions)
            {
                sa = new double[10];
                Array.Copy(egoNewState, sa, 8);
                sa[8] = a[0];
                sa[9] = a[1];
                double[] predicted    = network.Compute(sa);// linearModel.Compute(sa);
                int[]    predictedAlo = { (int)Math.Round(predicted[0]) + alloNewState[0], (int)Math.Round(predicted[1]) + alloNewState[1] };
                double   reward       = predicted[2];

                double handCodedReward; int[] handCodedPredictedAlo;
                handCodedPrediction(egoNewState, a, out handCodedReward, alloNewState, out handCodedPredictedAlo);

                Console.WriteLine("action " + a[0] + "," + a[1] + " -> " + predictedAlo[0] + "," + predictedAlo[1] + " reward: " + reward);

                if (saHistory.Count >= 50)
                {
                    double[] matchingSample;
                    //if (inSample(sa, out matchingSample))
                    //{
                    if (alloModel.value(alloNewState, a) == alloModel.defaultQ)
                    {
                        if (fullPredictionMode)
                        {
                            alloModel.update(new StateTransition <int[], int[]>(alloNewState, a, reward, predictedAlo));
                        }
                        else
                        {
                            alloModel.Qtable[alloNewState][a] = egoModel.value(egoNewState, a);
                        }
                    }
                    //}
                }
            }


            return(0);
        }