private void sARSALAMBDA(State currentState, Strategie strategie) { firstRound(currentState, strategie); //Q(s,a) QValue qValue = strategie.getQValue(lastState, action, qValues); //e(s,a) <- e(s,a)+1 EValue eValue = strategie.getEValue(lastState, action, eValues); //Stacking //--> Replacing: eValue.setValue(1); eValue.setValue(eValue.getValue() + 1); // a ausführen ExecuteAction(qValue.getAction()); // r beobachten reward = getReward(); //s' beobachten currentState = DetermineState(); //Q(s',a') gemäß Strategie QValue currentQValue = strategie.getQValueForStrategie(currentState, qValues); //Tabelle aktualisieren qValues = SARSA_LAMBDA.updateQTable(lastState, lastQValue.getAction(), currentState, currentQValue.getAction(), reward, qValues, eValues); //e(s,a) <- 𝛾𝜆e(s,a) //updateETable --> eValue.setValue(gamma * lambda * eValue.getValue()); eValues = SARSA_LAMBDA.updateETable(lastState, lastQValue.getAction(), currentState, currentQValue.getAction(), eValues); // s<-s', a<-a' lastState = currentState; lastQValue = currentQValue; action = currentQValue.getAction(); }
/** * Selects the best classifier containing the best action for Situation */ public static QValue SelectClassifierWithBestActionFromQTable(State state, List <QValue> qTable) { var states = qTable.FindAll(c => c.getState().Equals(state)).ToArray(); QValue classifier = states[0].getValue() >= states[1].getValue() ? states[0] : states[1]; return(classifier); }
private void sARSA(State currentState, Strategie strategie) { firstRound(currentState, strategie); //Q(s,a) QValue qValue = strategie.getQValue(lastState, action, qValues); // a ausführen ExecuteAction(qValue.getAction()); // r beobachten reward = getReward(); //s' beobachten currentState = DetermineState(); //Q(s',a') gemäß Strategie QValue currentQValue = strategie.getQValueForStrategie(currentState, qValues); //Tabelle aktualisieren qValues = SARSA.updateTable(lastState, lastQValue.getAction(), currentState, currentQValue.getAction(), reward, qValues); // s<-s', a<-a' lastState = currentState; lastQValue = currentQValue; action = currentQValue.getAction(); }
public static void AddCompressionFilter(HttpRequest request, HttpResponse response) { // load encodings from header QValueList encodings = new QValueList(request.Headers["Accept-Encoding"]); // get the types we can handle, can be accepted and // in the defined client preference QValue preferred = encodings.FindPreferred("gzip", "deflate", "identity"); // if none of the preferred values were found, but the // client can accept wildcard encodings, we'll default // to Gzip. if (preferred.IsEmpty && encodings.AcceptWildcard && encodings.Find("gzip").IsEmpty) preferred = new QValue("gzip"); // handle the preferred encoding switch (preferred.Name) { case "gzip": response.AppendHeader("Content-Encoding", "gzip"); response.Filter = new GZipStream(response.Filter, CompressionMode.Compress); break; case "deflate": response.AppendHeader("Content-Encoding", "deflate"); response.Filter = new DeflateStream(response.Filter, CompressionMode.Compress); break; case "identity": default: break; } }
/// <summary> /// Selects an action from the given Q-value /// </summary> /// <param name="value">The Q-value</param> /// <returns>The index of the action to execute</returns> public int Select(QValue value) { double random = this.random.NextDouble(); double lower = 0.0; double upper = 0.0; double sumExp = 0.0; //Calculate the sum of exponentials for (int i = 0; i < value.Count; i++) { sumExp += Math.Exp(GetExponent(value[i])); } //Select the action for (int i = 0; i < value.Count; i++) { lower = upper; upper += Math.Exp(GetExponent(value[i])) / sumExp; if (random >= lower && random < upper) { return(i); } } return(0); }
/// <summary> /// deal with the request accept encoding and add the necessary filter to the response /// </summary> protected void HandleCompression() { var context = Http.Context; /// load encodings from header QValueList encodings = new QValueList(context.Request.Headers["Accept-Encoding"]); /// get the types we can handle, can be accepted and /// in the defined client preference QValue preferred = encodings.FindPreferred("gzip", "deflate", "identity"); /// if none of the preferred values were found, but the /// client can accept wildcard encodings, we'll default /// to Gzip. if (preferred.IsEmpty && encodings.AcceptWildcard && encodings.Find("gzip").IsEmpty) { preferred = new QValue("gzip"); } // handle the preferred encoding switch (preferred.Name.ToLowerInvariant()) { case "gzip": context.Response.AppendHeader("Content-Encoding", "gzip"); context.Response.Filter = new GZipStream(context.Response.Filter, CompressionMode.Compress); break; case "deflate": context.Response.AppendHeader("Content-Encoding", "deflate"); context.Response.Filter = new DeflateStream(context.Response.Filter, CompressionMode.Compress); break; case "identity": default: break; } }
public void TestRandomProbability() { var random = new Random(1337); var epsilon = 0.4; var eGreedy = new EGreedy(epsilon, random); var qValue = new QValue(new double[] { 121, 231, 425, 676, 812, 1012, 1231, 1301, 1412, 1541, 1701, 2015 }); var bestAction = PolicyHelpers.SelectMax(qValue, random); int numBestSelected = 0; int numTests = 3000; for (int i = 0; i < numTests; i++) { int action = eGreedy.Select(qValue); if (action == bestAction) { numBestSelected++; } } Assert.AreEqual((1 - epsilon) + epsilon * (1.0 / qValue.Count), numBestSelected / (double)numTests, 0.05); }
public static void ChooseSuitableCompression(NameValueCollection requestHeaders, HttpResponseBase response) { if (requestHeaders == null) throw new ArgumentNullException(nameof(requestHeaders)); if (response == null) throw new ArgumentNullException(nameof(response)); /// load encodings from header QValueList encodings = new QValueList(requestHeaders[ACCEPT_ENCODING_HEADER]); /// get the types we can handle, can be accepted and /// in the defined client preference QValue preferred = encodings.FindPreferred("gzip", "deflate", "identity"); /// if none of the preferred values were found, but the /// client can accept wildcard encodings, we'll default /// to Gzip. if (preferred.IsEmpty && encodings.AcceptWildcard && encodings.Find("gzip").IsEmpty) preferred = new QValue("gzip"); // handle the preferred encoding switch (preferred.Name) { case "gzip": response.AppendHeader(CONTENT_ENCODING_HEADER, "gzip"); response.Filter = new GZipStream(response.Filter, CompressionMode.Compress); break; case "deflate": response.AppendHeader(CONTENT_ENCODING_HEADER, "deflate"); response.Filter = new DeflateStream(response.Filter, CompressionMode.Compress); break; case "identity": default: break; } }
public void TestDecay() { var random = new Random(); var epsilon = 0.5; var eGreedy = new EGreedy(epsilon, random, DecayHelpers.ConstantDecay(1, 5, 0.5, 0.0)); var qValue = new QValue(new double[] { 121, 231, 425, 676, 812, 1012, 1231, 1301, 1412, 1541, 1701, 2015 }); var valueEpsilon = 0.00000000001; Assert.AreEqual(0.5, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(1); Assert.AreEqual(0.4, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(2); Assert.AreEqual(0.3, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(3); Assert.AreEqual(0.2, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(4); Assert.AreEqual(0.1, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(5); Assert.AreEqual(0.0, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(6); Assert.AreEqual(0.0, eGreedy.Epsilon, valueEpsilon); eGreedy.Update(7); Assert.AreEqual(0.0, eGreedy.Epsilon, valueEpsilon); }
private void firstRound(State currentState, Strategie strategie) { if (lastQValue.getAction().Equals(Assets.Scripts.Enum.Action.Begin)) { lastState = currentState; lastQValue = strategie.getQValueForStrategie(lastState, qValues); action = lastQValue.getAction(); } }
public override void Reward(GameState S, MyInput A, GameState newS, float r) { // Q(S, A) = Q(S, A) + learningRate * (GreedyPolicy(Q(newS, ?).S) - Q(S, A)); QValue QSA = Q(S, A); MyInput newA = GreedyPolicy(newS); QSA.value = QSA.value + learningRate * (r + facteurDeReduction * Q(newS, newA).value - QSA.value); // Et on remet cette valeur dans la table SetQTable(QSA.S, QSA.A, QSA.value); }
private void qLearning(State currentState, Strategie strategie) { var qValue = strategie.getQValueForStrategie(currentState, qValues); ExecuteAction(qValue.getAction()); if (!lastQValue.getAction().Equals(Assets.Scripts.Enum.Action.Begin)) { qValues = QLearning.updateTable(lastQValue, qValue, qValues, getReward()); } lastState = currentState; lastQValue = qValue; }
/// <summary> /// Selects an action from the given Q-value /// </summary> /// <param name="value">The Q-value</param> /// <returns>The index of the action to execute</returns> public int Select(QValue value) { if (this.random.NextDouble() < this.epsilon) { //Random action return(PolicyHelpers.SelectRandom(value, this.random)); } else { //Best action return(PolicyHelpers.SelectMax(value, this.random)); } }
public void initRewardMap() { for (int i = 0; i < mapSize; ++i) { for (int j = 0; j < mapSize; ++j) { RewardMap[i, j] = new QValue(i, j); if (map[i][j].spotType == Spot.SPOT_TYPE.WALL) { RewardMap[i, j].isWall(); } } } }
public static List <QValue> updateTable(QValue lastClassifier, QValue currentClassifier, List <QValue> qTable, float reward) { foreach (QValue classifierToBeUpdated in qTable) { if (classifierToBeUpdated.Equals(lastClassifier)) { double newValue = classifierToBeUpdated.getValue() + learningRate * (reward + gamma * SelectClassifierWithBestActionFromQTable(currentClassifier.getState(), qTable).getValue() - classifierToBeUpdated.getValue()); classifierToBeUpdated.setValue(newValue); } } return(qTable); }
public static void ChooseSuitableCompression(NameValueCollection requestHeaders, HttpResponseBase response) { if (requestHeaders == null) { throw new ArgumentNullException(nameof(requestHeaders)); } if (response == null) { throw new ArgumentNullException(nameof(response)); } /// load encodings from header QValueList encodings = new QValueList(requestHeaders[ACCEPT_ENCODING_HEADER]); /// get the types we can handle, can be accepted and /// in the defined client preference QValue preferred = encodings.FindPreferred("gzip", "deflate", "identity"); /// if none of the preferred values were found, but the /// client can accept wildcard encodings, we'll default /// to Gzip. if (preferred.IsEmpty && encodings.AcceptWildcard && encodings.Find("gzip").IsEmpty) { preferred = new QValue("gzip"); } // handle the preferred encoding switch (preferred.Name) { case "gzip": response.AppendHeader(CONTENT_ENCODING_HEADER, "gzip"); response.Filter = new GZipStream(response.Filter, CompressionMode.Compress); break; case "deflate": response.AppendHeader(CONTENT_ENCODING_HEADER, "deflate"); response.Filter = new DeflateStream(response.Filter, CompressionMode.Compress); break; case "identity": default: break; } }
public static void ApplyCompression(HttpRequest Request, HttpResponse Response) { if (IsCompressEnabled(Request) == false) { return; } // load encodings from header QValueList encodings = new QValueList(Request.Headers["Accept-Encoding"]); // get the types we can handle, can be accepted and // in the defined client preference QValue preferred = encodings.FindPreferred("gzip", "deflate", "identity"); // if none of the preferred values were found, but the // client can accept wildcard encodings, we'll default // to Gzip. if (preferred.IsEmpty && encodings.AcceptWildcard && encodings.Find("gzip").IsEmpty) { preferred = new QValue("gzip"); } // handle the preferred encoding switch (preferred.Name) { case "gzip": Response.AppendHeader("Content-Encoding", "gzip"); Response.Filter = new GZipStream(Response.Filter, CompressionMode.Compress); AddToCookie(Response, Request, "gzip"); break; case "deflate": Response.AppendHeader("Content-Encoding", "deflate"); Response.Filter = new DeflateStream(Response.Filter, CompressionMode.Compress); AddToCookie(Response, Request, "deflate"); break; case "identity": default: break; } }
public override void OnResultExecuted(ResultExecutedContext filterContext) { if (filterContext == null) throw new ArgumentNullException("filterContext"); if (filterContext.Exception != null) return; if (filterContext.IsChildAction) return; var request = filterContext.HttpContext.Request; var response = filterContext.HttpContext.Response; if (response.Filter == null) return; var encodings = new QValueList(request.Headers["Accept-Encoding"]); var preferred = encodings.FindPreferred("gzip", "deflate", "identity"); // if none of the preferred values were found, but the // client can accept wildcard encodings, we'll default // to Gzip. if (preferred.IsEmpty && encodings.AcceptWildcard && encodings.Find("gzip").IsEmpty) preferred = new QValue("gzip"); switch (preferred.Name) { case "gzip": response.AppendHeader("Content-encoding", "gzip"); response.Filter = new GZipStream(response.Filter, CompressionMode.Compress); break; case "deflate": response.AppendHeader("Content-encoding", "deflate"); response.Filter = new DeflateStream(response.Filter, CompressionMode.Compress); break; case "identity": break; default: break; } }
/// <summary> /// Selects the best action from the given Q-value. /// If there are more than one best value, a random one is choosen uniformly. /// </summary> /// <param name="value">The Q-value</param> /// <param name="random">The random generator</param> /// <returns>The index of the action</returns> public static int SelectMax(QValue value, Random random) { var best = new List <int>(); var bestValue = double.MinValue; for (int i = 0; i < value.Count; i++) { double actionValue = value[i]; if (actionValue > bestValue) { best.Clear(); bestValue = actionValue; best.Add(i); } else if (actionValue == bestValue) { best.Add(i); } } return(best[random.Next(0, best.Count)]); }
public void TestRandomProbability() { var random = new Random(1337); var tau = 200; var softmax = new Softmax(tau, random); var qValue = new QValue(new double[] { 121, 231, 425, 676 }); var bestAction = PolicyHelpers.SelectMax(qValue, random); var numSelected = new TestInstance[qValue.Count]; for (int i = 0; i < qValue.Count; i++) { numSelected[i] = new TestInstance() { Action = i }; } int numTests = 3000; for (int i = 0; i < numTests; i++) { int action = softmax.Select(qValue); numSelected[action].Count++; } numSelected = numSelected.OrderBy(x => x.Count).ToArray(); Assert.AreEqual(0, numSelected[0].Action); Assert.AreEqual(1, numSelected[1].Action); Assert.AreEqual(2, numSelected[2].Action); Assert.AreEqual(3, numSelected[3].Action); }
/// <summary> /// Selects a random action /// </summary> /// <param name="value">The Q-value</param> /// <param name="random">The random generator</param> /// <returns>The index of the action</returns> public static int SelectRandom(QValue value, Random random) { return(random.Next(0, value.Count)); }
private int returnIndexOfQValue(QValue qVal) { return(qValues.IndexOf(qVal)); }
protected abstract List <Policy> GetPolicyInverse(Tile t); // the inverse is everything that is not this policy type // for example, if the policy is MoveEast, then we will get every other policy that is not MoveEast for the target tile public void PrintQValue() { Console.Write(QValue.ToString().PadRight(4).Substring(0, 4)); }
public static QValue updateTable(QValue firstQValue, QValue secondQValue, float reward) { firstQValue.setValue(firstQValue.getValue() + learningRate * (reward + gamma * secondQValue.getValue() - firstQValue.getValue())); return(firstQValue); }
private static async Task AddCompressionFilter(IOwinRequest request, IOwinResponse response, AssetData data) { // load encodings from header QValueList encodings = new QValueList(request.Headers["Accept-Encoding"]); // get the types we can handle, can be accepted and // in the defined client preference QValue preferred = encodings.FindPreferred("gzip", "deflate", "identity"); // if none of the preferred values were found, but the // client can accept wildcard encodings, we'll default // to Gzip. if (preferred.IsEmpty && encodings.AcceptWildcard && encodings.Find("gzip").IsEmpty) { preferred = new QValue("gzip"); } // handle the preferred encoding switch (preferred.Name) { case "deflate": response.ContentLength = data.DeflateData.Length; response.Headers.Add("Content-Encoding", new[] { "deflate" }); await response.WriteAsync(data.DeflateData).ConfigureAwait(false); break; case "gzip": response.ContentLength = data.ZipData.Length; response.Headers.Add("Content-Encoding", new[] { "gzip" }); await response.WriteAsync(data.ZipData).ConfigureAwait(false); break; default: response.ContentLength = data.Data.Length; await response.WriteAsync(data.Data).ConfigureAwait(false); break; } }
public void DeclareValue(string name, QValue value) { _idsValues[name] = value; }