public int Act(double[] state) { var s = StateKey(state); // act according to epsilon greedy policy var a = 0; var poss = AllowedActions(state); var probs = new List <double>(); for (var i = 0; i < poss.Length; i++) { probs.Add(P[poss[i] * NS + s]); } // epsilon greedy policy if (Tembo.Random() < Options.Epsilon) { a = poss[Tembo.RandomInt(0, poss.Length)]; // random available action Explored = true; } else { a = poss[Tembo.SampleWeighted(probs.ToArray())]; Explored = false; } // shift state memory s0 = s1; a0 = a1; s1 = s; a1 = a; return(a); }
/// <summary> /// Returns an action from a state /// </summary> /// <param name="state">state size must be equal to NumberOfStates</param> /// <returns></returns> public int Act(double[] state) { Tembo.Assert(state.Length == NumberOfStates, $"Current state({state.Length}) not equal to NS({NumberOfStates})"); var a = 0; // convert to a Mat column vector var s = new Matrix(NumberOfStates, 1); s.Set(state); // epsilon greedy policy if (Tembo.Random() < Options.Epsilon) { a = Tembo.RandomInt(0, NumberOfActions); } else { // greedy wrt Q function var amat = ForwardQ(Network, s, false); a = Tembo.Maxi(amat.W); // returns index of argmax action } // shift state memory this.s0 = this.s1; this.a0 = this.a1; this.s1 = s; this.a1 = a; return(a); }
private void BLearning() { while (true) { if (Historical.Count < 20000) { // Thread.Sleep(TimeSpan.FromMinutes(30)); } var correct = 0.0; var total = 0.0; var options = new AgentOptions { Gamma = Tembo.Random(0.01, 0.99), Epsilon = Tembo.Random(0.01, 0.75), Alpha = Tembo.Random(0.01, 0.99), ExperinceAddEvery = Tembo.RandomInt(1, 10000), ExperienceSize = 0, LearningSteps = Tembo.RandomInt(1, 10), HiddenUnits = Tembo.RandomInt(100000, 100000000), ErrorClamp = Tembo.Random(0.01, 1.0), AdaptiveLearningSteps = true }; var agent = new DQN(dqnAgent.NumberOfStates, dqnAgent.NumberOfActions, options); for (var i = 0; i < Historical.Count; i++) { var spi = Historical.ElementAt(i); var action = agent.Act(spi.Value.Values); if (action == spi.Value.Output) { correct += 1; agent.Learn(1); } else { agent.Learn(-1); } total += 1; } var winrate = (correct / total) * 100; if (winrate > WinRate) { CN.Log($"NEW AGENT DISCOVERED --> WINRATE {winrate.ToString("p")}, CLASS: {AgentName}", 2); Save(); dqnAgent = agent; WinRate = winrate; } } }