Beispiel #1
0
        /// <summary>
        /// OOP advatages adopted during translation...
        /// </summary>
        /// <param name="experience">See Experience</param>
        /// <returns></returns>
        private double LearnFromExperience(Experience experience /*Matrix s0, int a0, double r0, Matrix s1, int a1*/)
        {
            // want: Q(s,a) = r + gamma * max_a' Q(s',a')
            // compute the target Q value
            var tmat = ForwardQ(Network, s1, false);
            var qmax = r0 + Options.Gamma * tmat.W[Tembo.Maxi(tmat.W)];
            // now predict
            var pred    = ForwardQ(Network, s0, true);
            var tderror = pred.W[a0] - qmax;
            var clamp   = Options.ErrorClamp;

            if (Math.Abs(tderror) > clamp)
            {  // huber loss to robustify
                if (tderror > clamp)
                {
                    tderror = clamp;
                }
                if (tderror < -clamp)
                {
                    tderror = -clamp;
                }
            }
            pred.DW[a0] = tderror;
            LastGraph.Backward(); // compute gradients on net params
            // update net
            Tembo.UpdateNetwork(Network, Options.Alpha);
            return(tderror);
        }