C# (CSharp) MultiResolutionRL.ValueCalculation ModelBasedValue.update Exemples

Langage de programmation: C# (CSharp)

Espace de nommage/Pack: MultiResolutionRL.ValueCalculation

Class/Type: ModelBasedValue

Méthode/Fonction: update

Exemples au hotexamples.com: 2

C# (CSharp) MultiResolutionRL.ValueCalculation ModelBasedValue.update - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de MultiResolutionRL.ValueCalculation.ModelBasedValue.update extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

update(2)

value(2)

GetType(1)

PredictReward(1)

Méthodes fréquemment utilisées

update (2)

value (2)

GetType (1)

PredictReward (1)

Associées

Event

CoronavirusPage

Trace

AnonymousMethodExpression

RenderPC

RenderOp

InterfaceRec

CellsSO

AssetRegisterContext

AgentCell

Related in langs

ishyoboy_seo_plugin_active (PHP)

NagiosHostTemplateAutodiscoveryServicePeer (PHP)

rejectComponentControlMessageDestroy (C++)

redrawMenu (C++)

NewServer (Go)

Register (Go)

BusyPainter (Java)

X509Certificate (Java)

clear_workingcopy (Python)

Roster (Python)

Exemple #1

0

Afficher le fichier

public override double update(StateTransition <stateType, actionType> transition) { stats.cumulativeReward += transition.reward; double RPE = 0; if (trueModel.T.GetStateValueTable(transition.oldState, transition.action).Count > 0) { RPE = transition.reward - trueModel.PredictReward(transition.oldState, transition.action, transition.newState); } if (RPE < 0 && LScounter <= 0) { lossTransition = transition; //Dictionary<stateType, int> thisT = trueModel.T.GetStateValueTable(transition.oldState, transition.action); //Dictionary<stateType, double> thisR = trueModel.R.GetStateValueTable(transition.oldState, transition.action); //backupT = new Dictionary<stateType, int>(thisT, stateComparer); //backupR = new Dictionary<stateType, double>(thisR, stateComparer); //backupQ = trueModel.value(transition.oldState, transition.action); //thisT = new Dictionary<stateType, int>(stateComparer); //thisR = new Dictionary<stateType, double>(stateComparer); //trueModel.Qtable[transition.oldState][transition.action] = RPE; backupR = trueModel.R.Get(transition.oldState, transition.action, transition.newState); Histogram temp = new Histogram(0); temp.Add(RPE); trueModel.R.Set(transition.oldState, transition.action, transition.newState, temp); LScounter = 8; } if (LScounter == 0) { //Dictionary<stateType, int> thisT = trueModel.T.GetStateValueTable(lossTransition.oldState, lossTransition.action); //Dictionary<stateType, double> thisR = trueModel.R.GetStateValueTable(lossTransition.oldState, lossTransition.action); //thisT = new Dictionary<stateType, int>(backupT, stateComparer); //thisR = new Dictionary<stateType, double>(backupR, stateComparer); //trueModel.Qtable[lossTransition.oldState][lossTransition.action] = backupQ; //backupT = null; //backupR = null; trueModel.R.Set(lossTransition.oldState, lossTransition.action, lossTransition.newState, backupR); lossTransition = null; } trueModel.update(transition); LScounter--; //Console.WriteLine(RPE.ToString() + ", " + LScounter.ToString()); return(0); }

Exemple #2

0

Afficher le fichier

Fichier : EgoAllo.cs Projet : edgarbc/MultiLevelRL

public override double update(StateTransition <int[], int[]> transition) { stats.cumulativeReward += transition.reward; int[] alloOldState = new int[2] { transition.oldState[0], transition.oldState[1] }; int[] alloNewState = new int[2] { transition.newState[0], transition.newState[1] }; int[] egoOldState = new int[8]; Array.Copy(transition.oldState, 2, egoOldState, 0, 8); int[] egoNewState = new int[8]; Array.Copy(transition.newState, 2, egoNewState, 0, 8); // load the transition into the history if (saHistory.Count > 500) { saHistory.Dequeue(); sPrimeHistory.Dequeue(); } double[] sa = new double[10]; Array.Copy(egoOldState, sa, 8); sa[8] = transition.action[0]; sa[9] = transition.action[1]; Console.WriteLine("sa: " + string.Join(",", sa)); Console.WriteLine("sprime: " + alloNewState[0] + "," + alloNewState[1]); //double[] dummy; //if (!inSample(sa, out dummy)) //{ saHistory.Enqueue(sa); sPrimeHistory.Enqueue(new double[3] { alloNewState[0] - alloOldState[0], alloNewState[1] - alloOldState[1], transition.reward }); //} // run regression if (saHistory.Count > 50 && fullPredictionMode) { double error; for (int epoch = 1; epoch < 2; epoch++) { error = teacher.RunEpoch(saHistory.ToArray(), sPrimeHistory.ToArray()) / saHistory.Count; } } // update models with the current transition alloModel.update(new StateTransition <int[], int[]>(alloOldState, transition.action, transition.reward, alloNewState)); egoModel.update(new StateTransition <int[], int[]>(egoOldState, transition.action, transition.reward, egoNewState)); // transfer info from ego to allo models Console.WriteLine("current state: " + alloNewState[0] + "," + alloNewState[1]); Console.WriteLine("ego. state: " + string.Join(",", egoNewState)); foreach (int[] a in availableActions) { sa = new double[10]; Array.Copy(egoNewState, sa, 8); sa[8] = a[0]; sa[9] = a[1]; double[] predicted = network.Compute(sa);// linearModel.Compute(sa); int[] predictedAlo = { (int)Math.Round(predicted[0]) + alloNewState[0], (int)Math.Round(predicted[1]) + alloNewState[1] }; double reward = predicted[2]; double handCodedReward; int[] handCodedPredictedAlo; handCodedPrediction(egoNewState, a, out handCodedReward, alloNewState, out handCodedPredictedAlo); Console.WriteLine("action " + a[0] + "," + a[1] + " -> " + predictedAlo[0] + "," + predictedAlo[1] + " reward: " + reward); if (saHistory.Count >= 50) { double[] matchingSample; //if (inSample(sa, out matchingSample)) //{ if (alloModel.value(alloNewState, a) == alloModel.defaultQ) { if (fullPredictionMode) { alloModel.update(new StateTransition <int[], int[]>(alloNewState, a, reward, predictedAlo)); } else { alloModel.Qtable[alloNewState][a] = egoModel.value(egoNewState, a); } } //} } } return(0); }