Ejemplo n.º 1
0
        MultiResValue <stateType, actionType> newModel(double defaultQ)
        {
            MultiResValue <stateType, actionType> newmodel = new MultiResValue <stateType, actionType>((IEqualityComparer <int[]>)stateComparer, actionComparer, availableActions, (int[])((object)startState), layers);

            foreach (ModelBasedValue <int[], actionType> m in newmodel.models)
            {
                m.maxUpdates = maxUpdates;
            }
            newmodel.models[0].defaultQ = defaultQ;

            //foreach(StateTransition<stateType, actionType> t in transitionHistory)
            //{
            //    newmodel.update((StateTransition<int[], actionType>)(object)t);
            //}

            return(newmodel);
        }
Ejemplo n.º 2
0
        MultiResValue <stateType, actionType> copyModel(MultiResValue <stateType, actionType> toCopy)
        {
            string filename = rnd.Next().ToString() + ".bin";

            IFormatter formatter = new BinaryFormatter();
            Stream     stream    = new FileStream(filename, FileMode.Create, FileAccess.Write, FileShare.None);

            formatter.Serialize(stream, toCopy);
            stream.Close();

            formatter = new BinaryFormatter();
            stream    = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read);
            MultiResValue <stateType, actionType> copied = (MultiResValue <stateType, actionType>)formatter.Deserialize(stream);

            stream.Close();

            copied.ResetStats();

            return(copied);
        }
Ejemplo n.º 3
0
        public ContextSwitchValue(IEqualityComparer <stateType> StateComparer, IEqualityComparer <actionType> ActionComparer, List <actionType> AvailableActions, stateType StartState, params object[] parameters)
            : base(StateComparer, ActionComparer, AvailableActions, StartState, parameters)
        {
            if (parameters.Length > 0)
            {
                layers = (int)parameters[0];
            }
            if (parameters.Length > 1)
            {
                maxUpdates = (int)parameters[1];
            }

            stateComparer    = StateComparer;
            actionComparer   = ActionComparer;
            availableActions = AvailableActions;
            startState       = StartState;

            currentModel = newModel(defaultQValue);
            models.Add(currentModel);

            currentMachineState = machineState.useCurrent;
        }
Ejemplo n.º 4
0
        double EventProbability(IEnumerable <StateTransition <stateType, actionType> > transitions, MultiResValue <stateType, actionType> model, int priorCnt)
        {
            double p = 1;

            foreach (StateTransition <stateType, actionType> transition in transitions)
            {
                Dictionary <int[], int> s2Counts = model.models[0].T.GetStateValueTable((int[])((object)transition.oldState), transition.action);
                double thisS2Counts = priorCnt;
                if (s2Counts.ContainsKey((int[])((object)transition.newState)))
                {
                    thisS2Counts = (double)s2Counts[(int[])((object)transition.newState)] + priorCnt;
                }
                double total = (double)s2Counts.Values.Sum() + priorCnt;

                p *= (thisS2Counts / total);

                double thisP_R = model.models[0].R.Get((int[])((object)transition.oldState), transition.action, (int[])((object)transition.newState)).P(transition.reward, priorCnt);
                p *= thisP_R;
            }
            return(p);
        }
Ejemplo n.º 5
0
        public override double[] value(stateType state, List <actionType> actions)
        {
            if (currentModel == null)
            {
                double bestValue = double.NegativeInfinity;
                foreach (MultiResValue <stateType, actionType> m in models)
                {
                    double thisValue = m.models[0].value((int[])(object)state, actions).Max();
                    if (thisValue > bestValue)
                    {
                        bestValue    = thisValue;
                        currentModel = m;
                    }
                }
                Console.WriteLine("starting with model " + models.IndexOf(currentModel));
            }

            if (currentMachineState == machineState.tryAdapt)
            {
                try
                {
                    return(currentModel.value((int[])((object)state), actions));
                }
                catch (ApplicationException ex)
                {
                    //candidateModel = null;
                    //currentModel = newModel(15);
                    //models.Add(currentModel);

                    currentModel   = candidateModel;
                    candidateModel = null;
                    models.Add(currentModel);
                    currentModel.models[0].defaultQ = 15;

                    currentMachineState = machineState.useCurrent;
                    Console.WriteLine("Starting new model (" + models.Count + ")");
                    return(currentModel.value((int[])((object)state), actions));
                }
            }
            else if (currentMachineState == machineState.useCurrent)
            {
                return(currentModel.models[0].value((int[])((object)state), actions));
            }

            return(null);

            //    // switch to the model which best explains the recent transition history
            //    double bestP = 0;
            //    MultiResValue<stateType, actionType> bestModel = currentModel;
            //    foreach (MultiResValue<stateType, actionType> m in models)
            //    {
            //        if (m == currentModel)
            //            continue;

            //        double thisP = EventProbability(transitionHistory, m, priorCnts);
            //Console.WriteLine(thisP);
            //if (thisP > (bestP + 0.05))
            //        {
            //            bestP = thisP;
            //            bestModel = m;

            //            if (thisP >= pThreshold)
            //            {
            //                Console.WriteLine("Switching to previously learned model: " + models.IndexOf(m) + "(p = " + Math.Round(thisP, 2) + ")");
            //                currentModel = m;
            //            }
            //        }
            //    }

            //    if (bestP < pThreshold) // if none explain it well
            //    {
            //        currentModel = newModel(15);
            //        models.Add(currentModel);
            //        Console.WriteLine("Starting new model (p = " + bestP + ")");
            //    }
            //    currentMachineState = machineState.useCurrent;
            //    return currentModel.value((int[])((object)state), actions);
            //}
            //        break;
            //}

            //return null;
        }
Ejemplo n.º 6
0
        public override double update(StateTransition <stateType, actionType> transition)
        {
            transitionHistory.Enqueue(transition);
            while (transitionHistory.Count() > 100)
            {
                transitionHistory.Dequeue();
            }


            switch (currentMachineState)
            {
            case machineState.useCurrent:

                //// switch to the model which best explains the recent transition history
                //double bestP = EventProbability(transitionHistory, currentModel, priorCnts);
                ////Console.WriteLine(bestP);
                //MultiResValue < stateType, actionType > bestModel = currentModel;

                //foreach (MultiResValue<stateType, actionType> m in models)
                //{
                //    if (m == currentModel)
                //        continue;

                //    double thisP = EventProbability(transitionHistory, m, priorCnts);

                //    if (thisP > (bestP + 0.05))
                //    {
                //        if (thisP >= pThreshold)
                //        {
                //            Console.WriteLine("Switching to previously learned model: " + models.IndexOf(m) + "(p = " + Math.Round(thisP, 2) + " vs " + Math.Round(bestP,2) + ")");
                //            currentModel = m;
                //        }
                //        bestP = thisP;
                //        bestModel = m;
                //    }
                //}
                ////Console.WriteLine(bestP);
                double[] pVals      = new double[models.Count];
                double   currentVal = double.NaN;
                for (int i = 0; i < models.Count; i++)
                {
                    pVals[i] = EventProbability(transitionHistory, models[i], priorCnts);
                    if (models[i] == currentModel)
                    {
                        currentVal = pVals[i];
                    }
                }
                int bestModelIndex = softmax(pVals);

                if (pVals[bestModelIndex] > (currentVal + 0.05))
                {
                    softmax(pVals);
                    currentModel = models[bestModelIndex];
                    Console.WriteLine("Switching to previously learned model: " + bestModelIndex + "(p = " + Math.Round(pVals[bestModelIndex], 2) + ")");
                }


                if (pVals.Max() < pThreshold)     // if none explain it well
                {
                    //// find the model with the best value from the current state
                    //double bestVal = currentModel.models[0].value((int[])((object)transition.newState), availableActions).Max();
                    //bestModel = currentModel;

                    //foreach (MultiResValue<stateType, actionType> m in models)
                    //{
                    //    double thisVal = m.models[0].value((int[])((object)transition.newState), availableActions).Max();
                    //    if (thisVal > bestVal)
                    //    {
                    //        bestVal = thisVal;
                    //        bestModel = m;
                    //    }
                    //}

                    // does the unexpected event relate to reward?
                    double rProb = currentModel.models[0].PredictReward((int[])(object)transition.oldState, transition.action).P(transition.reward, 1);
                    //Console.WriteLine(rProb);
                    bool rewardRelatedError = rProb < pThreshold;


                    if (layers > 1 && !rewardRelatedError)
                    {
                        // copy the best model for adaptation
                        Console.WriteLine("Adapting model " + bestModelIndex + " (p = " + pVals[bestModelIndex] + ")");    // + ", bestVal = " + bestVal + ")");
                        currentModel = copyModel(models[bestModelIndex]);
                        //models.Add(currentModel); //??????????????????? if not here then move to adaptation successful
                        candidateModel      = newModel(0.001);
                        currentMachineState = machineState.tryAdapt;
                    }
                    else
                    {
                        currentModel = newModel(defaultQValue);
                        resetHistory();
                        models.Add(currentModel);
                        Console.WriteLine("Starting new model (p = " + pVals[bestModelIndex] + ")(" + models.Count + ")");
                    }
                }

                break;

            case machineState.tryAdapt:

                // let the candidate model see the state transition
                candidateModel.update((StateTransition <int[], actionType>)((object)transition));

                // if goal has been found, assume model is adapted successfully

                if (transition.reward > 0)
                {
                    currentMachineState = machineState.useCurrent;
                    currentModel        = candidateModel;
                    models.Add(candidateModel);
                    //models.Add(currentModel);
                    Console.WriteLine("Adaptation successful");
                    break;
                }

                //// switch to the model which best explains the recent transition history
                //bestP = EventProbability(transitionHistory, currentModel, 1);
                //bestModel = currentModel;
                //foreach (MultiResValue<stateType, actionType> m in models)
                //{
                //    if (m == currentModel)
                //        continue;

                //    double thisP = EventProbability(transitionHistory, m, 1);
                //    if (thisP > (bestP + 0.05))
                //    {
                //        bestP = thisP;
                //        bestModel = m;

                //        if (thisP >= pThreshold)
                //        {
                //            Console.WriteLine("Adaptation aborted. Switching to previously learned model: " + models.IndexOf(m) + "(p = " + Math.Round(thisP, 2) + ")");
                //            currentModel = m;
                //            candidateModel = null;
                //            currentMachineState = machineState.useCurrent;
                //        }
                //    }
                //}

                // if value gradient flattens, assume model cannot be adapted
                double currentValue = currentModel.models[0].value((int[])((object)transition.newState), availableActions).Max();
                if (currentValue < vThreshold)
                {
                    //currentModel = newModel(15);
                    //models.Add(currentModel);
                    //candidateModel = null;

                    currentModel   = candidateModel;
                    candidateModel = null;
                    models.Add(currentModel);
                    currentModel.models[0].defaultQ = 15;

                    currentMachineState = machineState.useCurrent;
                    Console.WriteLine("Adaptation failed. Starting new model  (" + models.Count + ")");

                    //// switch to the model which best explains the recent transition history
                    //bestP = 0;
                    //bestModel = currentModel;
                    //foreach (MultiResValue<stateType, actionType> m in models)
                    //{
                    //    if (m == currentModel)
                    //        continue;

                    //    double thisP = EventProbability(transitionHistory, m, priorCnts);
                    //    Console.WriteLine(thisP);
                    //    if (thisP > (bestP + 0.05))
                    //    {
                    //        bestP = thisP;
                    //        bestModel = m;

                    //        if (thisP >= pThreshold)
                    //        {
                    //            Console.WriteLine("Switching to previously learned model: " + models.IndexOf(m) + "(p = " + Math.Round(thisP, 2) + ")");
                    //            currentModel = m;
                    //        }
                    //    }
                    //}

                    //if (bestP < pThreshold) // if none explain it well
                    //{
                    //    currentModel = newModel(15);
                    //    models.Add(currentModel);
                    //    Console.WriteLine("Starting new model (p = " + bestP + ")");
                    //}
                    //currentMachineState = machineState.useCurrent;
                }
                break;
            }


            currentModel.update((StateTransition <int[], actionType>)((object)transition));
            if (transition.absorbingStateReached)
            {
                resetHistory();
            }

            return(0);
        }