コード例 #1
        MultiResValue <stateType, actionType> newModel(double defaultQ)
            MultiResValue <stateType, actionType> newmodel = new MultiResValue <stateType, actionType>((IEqualityComparer <int[]>)stateComparer, actionComparer, availableActions, (int[])((object)startState), layers);

            foreach (ModelBasedValue <int[], actionType> m in newmodel.models)
                m.maxUpdates = maxUpdates;
            newmodel.models[0].defaultQ = defaultQ;

            //foreach(StateTransition<stateType, actionType> t in transitionHistory)
            //    newmodel.update((StateTransition<int[], actionType>)(object)t);

コード例 #2
        MultiResValue <stateType, actionType> copyModel(MultiResValue <stateType, actionType> toCopy)
            string filename = rnd.Next().ToString() + ".bin";

            IFormatter formatter = new BinaryFormatter();
            Stream     stream    = new FileStream(filename, FileMode.Create, FileAccess.Write, FileShare.None);

            formatter.Serialize(stream, toCopy);

            formatter = new BinaryFormatter();
            stream    = new FileStream(filename, FileMode.Open, FileAccess.Read, FileShare.Read);
            MultiResValue <stateType, actionType> copied = (MultiResValue <stateType, actionType>)formatter.Deserialize(stream);



コード例 #3
        public ContextSwitchValue(IEqualityComparer <stateType> StateComparer, IEqualityComparer <actionType> ActionComparer, List <actionType> AvailableActions, stateType StartState, params object[] parameters)
            : base(StateComparer, ActionComparer, AvailableActions, StartState, parameters)
            if (parameters.Length > 0)
                layers = (int)parameters[0];
            if (parameters.Length > 1)
                maxUpdates = (int)parameters[1];

            stateComparer    = StateComparer;
            actionComparer   = ActionComparer;
            availableActions = AvailableActions;
            startState       = StartState;

            currentModel = newModel(defaultQValue);

            currentMachineState = machineState.useCurrent;
コード例 #4
        double EventProbability(IEnumerable <StateTransition <stateType, actionType> > transitions, MultiResValue <stateType, actionType> model, int priorCnt)
            double p = 1;

            foreach (StateTransition <stateType, actionType> transition in transitions)
                Dictionary <int[], int> s2Counts = model.models[0].T.GetStateValueTable((int[])((object)transition.oldState), transition.action);
                double thisS2Counts = priorCnt;
                if (s2Counts.ContainsKey((int[])((object)transition.newState)))
                    thisS2Counts = (double)s2Counts[(int[])((object)transition.newState)] + priorCnt;
                double total = (double)s2Counts.Values.Sum() + priorCnt;

                p *= (thisS2Counts / total);

                double thisP_R = model.models[0].R.Get((int[])((object)transition.oldState), transition.action, (int[])((object)transition.newState)).P(transition.reward, priorCnt);
                p *= thisP_R;
コード例 #5
        public override double[] value(stateType state, List <actionType> actions)
            if (currentModel == null)
                double bestValue = double.NegativeInfinity;
                foreach (MultiResValue <stateType, actionType> m in models)
                    double thisValue = m.models[0].value((int[])(object)state, actions).Max();
                    if (thisValue > bestValue)
                        bestValue    = thisValue;
                        currentModel = m;
                Console.WriteLine("starting with model " + models.IndexOf(currentModel));

            if (currentMachineState == machineState.tryAdapt)
                    return(currentModel.value((int[])((object)state), actions));
                catch (ApplicationException ex)
                    //candidateModel = null;
                    //currentModel = newModel(15);

                    currentModel   = candidateModel;
                    candidateModel = null;
                    currentModel.models[0].defaultQ = 15;

                    currentMachineState = machineState.useCurrent;
                    Console.WriteLine("Starting new model (" + models.Count + ")");
                    return(currentModel.value((int[])((object)state), actions));
            else if (currentMachineState == machineState.useCurrent)
                return(currentModel.models[0].value((int[])((object)state), actions));


            //    // switch to the model which best explains the recent transition history
            //    double bestP = 0;
            //    MultiResValue<stateType, actionType> bestModel = currentModel;
            //    foreach (MultiResValue<stateType, actionType> m in models)
            //    {
            //        if (m == currentModel)
            //            continue;

            //        double thisP = EventProbability(transitionHistory, m, priorCnts);
            //if (thisP > (bestP + 0.05))
            //        {
            //            bestP = thisP;
            //            bestModel = m;

            //            if (thisP >= pThreshold)
            //            {
            //                Console.WriteLine("Switching to previously learned model: " + models.IndexOf(m) + "(p = " + Math.Round(thisP, 2) + ")");
            //                currentModel = m;
            //            }
            //        }
            //    }

            //    if (bestP < pThreshold) // if none explain it well
            //    {
            //        currentModel = newModel(15);
            //        models.Add(currentModel);
            //        Console.WriteLine("Starting new model (p = " + bestP + ")");
            //    }
            //    currentMachineState = machineState.useCurrent;
            //    return currentModel.value((int[])((object)state), actions);
            //        break;

            //return null;
コード例 #6
        public override double update(StateTransition <stateType, actionType> transition)
            while (transitionHistory.Count() > 100)

            switch (currentMachineState)
            case machineState.useCurrent:

                //// switch to the model which best explains the recent transition history
                //double bestP = EventProbability(transitionHistory, currentModel, priorCnts);
                //MultiResValue < stateType, actionType > bestModel = currentModel;

                //foreach (MultiResValue<stateType, actionType> m in models)
                //    if (m == currentModel)
                //        continue;

                //    double thisP = EventProbability(transitionHistory, m, priorCnts);

                //    if (thisP > (bestP + 0.05))
                //    {
                //        if (thisP >= pThreshold)
                //        {
                //            Console.WriteLine("Switching to previously learned model: " + models.IndexOf(m) + "(p = " + Math.Round(thisP, 2) + " vs " + Math.Round(bestP,2) + ")");
                //            currentModel = m;
                //        }
                //        bestP = thisP;
                //        bestModel = m;
                //    }
                double[] pVals      = new double[models.Count];
                double   currentVal = double.NaN;
                for (int i = 0; i < models.Count; i++)
                    pVals[i] = EventProbability(transitionHistory, models[i], priorCnts);
                    if (models[i] == currentModel)
                        currentVal = pVals[i];
                int bestModelIndex = softmax(pVals);

                if (pVals[bestModelIndex] > (currentVal + 0.05))
                    currentModel = models[bestModelIndex];
                    Console.WriteLine("Switching to previously learned model: " + bestModelIndex + "(p = " + Math.Round(pVals[bestModelIndex], 2) + ")");

                if (pVals.Max() < pThreshold)     // if none explain it well
                    //// find the model with the best value from the current state
                    //double bestVal = currentModel.models[0].value((int[])((object)transition.newState), availableActions).Max();
                    //bestModel = currentModel;

                    //foreach (MultiResValue<stateType, actionType> m in models)
                    //    double thisVal = m.models[0].value((int[])((object)transition.newState), availableActions).Max();
                    //    if (thisVal > bestVal)
                    //    {
                    //        bestVal = thisVal;
                    //        bestModel = m;
                    //    }

                    // does the unexpected event relate to reward?
                    double rProb = currentModel.models[0].PredictReward((int[])(object)transition.oldState, transition.action).P(transition.reward, 1);
                    bool rewardRelatedError = rProb < pThreshold;

                    if (layers > 1 && !rewardRelatedError)
                        // copy the best model for adaptation
                        Console.WriteLine("Adapting model " + bestModelIndex + " (p = " + pVals[bestModelIndex] + ")");    // + ", bestVal = " + bestVal + ")");
                        currentModel = copyModel(models[bestModelIndex]);
                        //models.Add(currentModel); //??????????????????? if not here then move to adaptation successful
                        candidateModel      = newModel(0.001);
                        currentMachineState = machineState.tryAdapt;
                        currentModel = newModel(defaultQValue);
                        Console.WriteLine("Starting new model (p = " + pVals[bestModelIndex] + ")(" + models.Count + ")");


            case machineState.tryAdapt:

                // let the candidate model see the state transition
                candidateModel.update((StateTransition <int[], actionType>)((object)transition));

                // if goal has been found, assume model is adapted successfully

                if (transition.reward > 0)
                    currentMachineState = machineState.useCurrent;
                    currentModel        = candidateModel;
                    Console.WriteLine("Adaptation successful");

                //// switch to the model which best explains the recent transition history
                //bestP = EventProbability(transitionHistory, currentModel, 1);
                //bestModel = currentModel;
                //foreach (MultiResValue<stateType, actionType> m in models)
                //    if (m == currentModel)
                //        continue;

                //    double thisP = EventProbability(transitionHistory, m, 1);
                //    if (thisP > (bestP + 0.05))
                //    {
                //        bestP = thisP;
                //        bestModel = m;

                //        if (thisP >= pThreshold)
                //        {
                //            Console.WriteLine("Adaptation aborted. Switching to previously learned model: " + models.IndexOf(m) + "(p = " + Math.Round(thisP, 2) + ")");
                //            currentModel = m;
                //            candidateModel = null;
                //            currentMachineState = machineState.useCurrent;
                //        }
                //    }

                // if value gradient flattens, assume model cannot be adapted
                double currentValue = currentModel.models[0].value((int[])((object)transition.newState), availableActions).Max();
                if (currentValue < vThreshold)
                    //currentModel = newModel(15);
                    //candidateModel = null;

                    currentModel   = candidateModel;
                    candidateModel = null;
                    currentModel.models[0].defaultQ = 15;

                    currentMachineState = machineState.useCurrent;
                    Console.WriteLine("Adaptation failed. Starting new model  (" + models.Count + ")");

                    //// switch to the model which best explains the recent transition history
                    //bestP = 0;
                    //bestModel = currentModel;
                    //foreach (MultiResValue<stateType, actionType> m in models)
                    //    if (m == currentModel)
                    //        continue;

                    //    double thisP = EventProbability(transitionHistory, m, priorCnts);
                    //    Console.WriteLine(thisP);
                    //    if (thisP > (bestP + 0.05))
                    //    {
                    //        bestP = thisP;
                    //        bestModel = m;

                    //        if (thisP >= pThreshold)
                    //        {
                    //            Console.WriteLine("Switching to previously learned model: " + models.IndexOf(m) + "(p = " + Math.Round(thisP, 2) + ")");
                    //            currentModel = m;
                    //        }
                    //    }

                    //if (bestP < pThreshold) // if none explain it well
                    //    currentModel = newModel(15);
                    //    models.Add(currentModel);
                    //    Console.WriteLine("Starting new model (p = " + bestP + ")");
                    //currentMachineState = machineState.useCurrent;

            currentModel.update((StateTransition <int[], actionType>)((object)transition));
            if (transition.absorbingStateReached)
