RLConfig.Load, Keepaway C# (CSharp) примеры использования

Пример #1

0

Показать файл

Файл: RLVisualisation.cs Проект: sdidi/KeepawaySim

        private void NextEpisode(SMDPAgent agent)
        {
            config = RLConfig.Load("RLConfig.xml");
            if (this.Episodes > config.numEpisodes)
            {
                return;
            }
            Console.WriteLine("Gets to NextEpisode() {0} times", cnt++);
            if (this.Episodes == 0)
            {
                fitness = 1;
                sum     = 0;
                counter = 0;
            }
            this.Episodes++;

            agent.setEpsilon(config.lambda);
            fitness = RLGameWorld.FitnessValue(agent);
            reward  = fitness; // This is to send a reward to endEpisode
            sum    += fitness;
            if (this.Episodes % count_fit == 0)
            {
                counter++;
                average_fitness = sum / count_fit;
                // recordPerformance(this.Episodes, fitness);
                recordPerformance(counter, average_fitness);
                sum = 0;
            }

            //this.Previous = this.Current;
            //this.Current = this.Next;
            //this.Next = new KPEnvironment(this.Episode + 1);
        }

Пример #2

0

Показать файл

Файл: RLProgram.cs Проект: sdidi/KeepawaySim

        public void AllEpisodes()
        {
            config = RLConfig.Load("RLConfig.xml");
            double[] widthArray = new double[config.numFeatures];
            for (int i = 0; i < config.numFeatures; i++)
            {
                widthArray[i] = 1 / config.numFeatures;
            }

            switch (config.learningMethod)
            {
            case "Q_Learning":
            {
                Q_LearningAgent qlp = new Q_LearningAgent(config.numFeatures, config.numActions, true, fitness, widthArray, "weightsFile.data", "weightsFile.data");
                Console.WriteLine("Gets to call AllEpisodes");
                while (Episodes <= config.numEpisodes)
                {
                    qlp.saveWeights("weightsFile.data");
                    this.OneEpisode(qlp);
                }

                break;
            }

            case "SARSA":
            {
                SarsaAgent sap = new SarsaAgent(config.numFeatures, config.numActions, true, fitness, widthArray, "weightsFile.data", "weightsFile.data");
                Console.WriteLine("Gets to call AllEpisodes");
                while (Episodes <= config.numEpisodes)
                {
                    if (Episodes % 50 == 0)
                    {
                        sap.saveWeights("weightsFile.data");
                    }
                    this.OneEpisode(sap);
                }

                break;
            }

            default:
            {
                Console.WriteLine("Default case does not exist");
                break;
            }
            }
        }

Пример #3

0

Показать файл

Файл: RLProgram.cs Проект: sdidi/KeepawaySim

        public static void Main(string[] args)
        {
            RLGameWorld ng   = new RLGameWorld();
            Thread      play = new Thread(new ThreadStart(ng.Game));

            play.IsBackground = true;
            play.Start();
            config = RLConfig.Load("RLConfig.xml");
            RLProgram mainprog = new RLProgram();

            Console.WriteLine("Gets here before it calls main program visualize = {0} and learning ={1}", config.visualize, config.learning);
            if (config.visualize == 0 && config.learning == 1)
            {
                Console.WriteLine("Gets to call main program ");
                mainprog.AllEpisodes();
            }
            Console.WriteLine("It got here");
            Console.ReadLine();
        }

Пример #4

0

Показать файл

Файл: RLGameWorld.cs Проект: sdidi/KeepawaySim

        public static double FitnessValue(SMDPAgent policy)
        {
            int cycles = 0;

            config = RLConfig.Load("RLConfig.xml");

            switch (config.learningMethod)
            {
            case "Q_Learning":
            { for (int i = 0; i < players.Count; i++)
              {
                  players[i].qlPolicy = (Q_LearningAgent)policy;
              }
              break; }

            case "SARSA":
            { for (int i = 0; i < players.Count; i++)
              {
                  players[i].saPolicy = (SarsaAgent)policy;
              }
              break; }

            default:
                Console.WriteLine("Default case does not exist");
                break;
            }

            do
            {
                timer.RunCycle();

                cycles++;
            } while (!kref.episodeEnded);

            kref.episodeEnded = false;
            return(cycles / (10.0));
        }

Пример #5

0

Показать файл

Файл: RLKeepawayPlayer.cs Проект: sdidi/KeepawaySim

        public override void keeper()
        {
            config = RLConfig.Load("RLConfig.xml");
            if (config.visualize == 1)
            {
                if (RLVisualisation.epi != 0 && RLVisualisation.epi != episodesCount)
                {
                    episodesCount = RLVisualisation.epi;
                    if (config.learningMethod == "SARSA")
                    {
                        saPolicy.endEpisode(-1);//(RLVisualisation.reward);
                    }
                    else
                    {
                        qlPolicy.endEpisode(-1);// (RLVisualisation.reward);
                    }
                    //set time start episode
                    //last_action to be set to -1
                }
            }
            else
            {
                if (RLProgram.epi != 0 && RLProgram.epi != episodesCount)
                {
                    episodesCount = RLProgram.epi;
                    if (config.learningMethod == "SARSA")
                    {
                        saPolicy.endEpisode(-1);//(RLProgram.reward);
                    }
                    else
                    {
                        qlPolicy.endEpisode(-1);// (RLProgram.reward);
                    }
                    //set time start episode
                    //last_action to be set to -1
                }
            }
            // If the ball is kickable,
            // call main action selection routine.
            if (isBallKickable())
            {
                turns             = 0;
                trajectoryChanges = 0;
                trajectorySame    = 2;
                keeperWithBall();
            }

            // Get fastest to ball
            int iTmp    = 0;
            int fastest = getFastestInSetTo(teammates, ballp, ref iTmp);


            // If fastest, intercept the ball.
            if (fastest == Unum)
            {
                intercept();
                return;
            }

            // Not fastest, get open
            turns = 0;
            keeperSupport(fastest);
        }

Пример #6

0

Показать файл

Файл: RLKeepawayPlayer.cs Проект: sdidi/KeepawaySim

        public override void keeperWithBall()
        {
            config = RLConfig.Load("RLConfig.xml");
            count++;
            // Console.WriteLine("Trial : {0}", count);
            // base.keeperWithBall();

            Vector[] tmatesp     = null;
            Vector[] oppPlayersp = null;
            Vector   posToPass   = new Vector(0, 0);
            Command  soc         = new Command();

            soc.Com = "illegal";
            int action = 0, j = 0, h = 0, v = 0, g = 0; double ang1, ang2, ang3;

            tmatesp     = myTeammatesPlayers(teammatesp, config.num_Keepers);
            oppPlayersp = myTeammatesPlayers(opp, config.num_Takers);
            tmatesp[config.num_Keepers - 1] = mySelfp;

            int possibleStates = config.numFeatures;

            double[] statesVector    = new double[possibleStates];
            double[] prevStateVector = new double[possibleStates];
            //statesVector = keeperstateVars(possibleStates);
            double[] closestmateOpp = new double[teammatesp.Length - 1];
            try
            {
                if (inputs == null)
                {
                    inputs = new double[config.numFeatures];
                }


                for (int i = 0; i < closestmateOpp.Length; i++)
                {
                    closestmateOpp[i] = 1000;
                }


                for (int i = 0; i < tmatesp.Length; i++)
                {
                    if (i != Unum - 1)
                    {
                        inputs[j++] = tmatesp[i].distance(mySelfp);
                        getClosestInSetTo(oppPlayersp, tmatesp[i], ref closestmateOpp[h]);
                        inputs[j++] = closestmateOpp[h++];
                        inputs[j++] = DistFromCenter(tmatesp[i]);
                        ang1        = mySelfp.angle(tmatesp[i]);
                        ang2        = mySelfp.angle(oppPlayersp[0]);
                        ang3        = mySelfp.angle(oppPlayersp[1]);
                        inputs[j++] = Math.Min(Magnitude(ang1 - ang2), Magnitude(ang1 - ang3));
                    }
                }

                for (int i = 0; i < oppPlayersp.Length; i++)
                {
                    inputs[v++] = oppPlayersp[i].distance(mySelfp);
                    inputs[v++] = DistFromCenter(oppPlayersp[i]);
                }


                inputs[v] = DistFromCenter(mySelfp);
            }
            catch (IndexOutOfRangeException e)
            {
                Console.WriteLine("Exception:Array index out of range :-> Increase the num of Features");
                throw e;
            }


            statesVector = inputs;

            switch (config.learningMethod)
            {
            case "Q_Learning":
            {
                // qlPolicy = new Q_LearningAgent(); // turn on when running SARSA otherwise off
                if (qlPolicy == null)
                {
                    Console.WriteLine("The Policy object is null");
                    base.keeperWithBall();
                }
                //Reading the saved xml file so as to use it in the next episode step

                /*
                 * XDocument loadVector = XDocument.Load("Previous_State.xml");
                 * var varVector = loadVector.Element("state-variables").Elements("variable");
                 * prevStateVector = varVector.Select(x => Double.Parse(x.Value)).ToArray();
                 * //do the same with last action
                 * XDocument loadValue = XDocument.Load("lactAction.xml");
                 * string data = loadValue.Root.Element("lastAction").Value;
                 * lastAction = int.Parse(data);
                 *
                 *
                 * //verify by printing
                 * Console.WriteLine("!!!!!! ---- last action is = {0}", lastAction);
                 */
                //testing if it has read the xml file back to an array by saving it again as different file
                XDocument testState = new XDocument();
                testState.Add(new XElement("state-variables", statesVector.Select(x => new XElement("variable", x))));
                testState.Save("Test_Previous_State.xml");
                ///////////
                if (statesVector.Length > 0)
                {         // if we can calculate state vars
                    // Call startEpisode() on the first SMDP step
                    if (timeLastAction == 0)
                    {
                        //temp_reward = 0;
                        action = qlPolicy.startEpisode(statesVector);
                    }
                    else if (timeLastAction == std.Time - 1 && lastAction > 0)
                    {                             // if we were in the middle of a pass last cycle
                        temp_reward = body.Time - timeLastAction;
                        action      = lastAction; // then we follow through with it
                    }                             // Call step() on all but first SMDP step
                    else
                    {
                        temp_reward = body.Time - timeLastAction;
                        //action = saPolicy.step(std.Time - timeLastAction, statesVector);
                        action = saPolicy.step(temp_reward, statesVector);

                        // action = qlPolicy.step(temp_reward, statesVector);
                    }
                    lastAction = action;
                    // temp_reward += 1;
                    //Save the previous state to an xml file
                    XDocument prevState = new XDocument();
                    prevState.Add(new XElement("state-variables", statesVector.Select(x => new XElement("variable", x))));
                    prevState.Save("Previous_State.xml");
                    //Save the last action to xml file
                    XDocument prevAction = new XDocument();
                    prevAction.Add(new XElement("Actions", new XElement("lastAction", lastAction)));
                    prevAction.Save("lactAction.xml");
                }
                else
                {                    // if we don't have enough info to calculate state vars
                    action      = 1; // hold ball
                    temp_reward = body.Time - timeLastAction;
                    // return;
                }

                //action = saPolicy.GetAction(inputs.Length);
                // action = saPolicy.;
                //temp_reward += 1;
                max       = Unum - 1;
                posToPass = mySelfp;
                if (action < 0)
                {
                    lastAction = 0;
                }
                else
                {
                    lastAction = action;
                }
                timeLastAction = body.Time;

                break;
            }

            case "SARSA":
            {
                //saPolicy = new SarsaAgent(); // Turn on when running Q_Learning otherwise off
                if (saPolicy == null)
                {
                    Console.WriteLine("The Policy object is null");
                    base.keeperWithBall();
                }
                //Reading the saved xml file so as to use it in the next episode step

                /*
                 * XDocument loadVector = XDocument.Load("Previous_State.xml");
                 * var varVector = loadVector.Element("state-variables").Elements("variable");
                 * prevStateVector = varVector.Select(x => Double.Parse(x.Value)).ToArray();
                 * //do the same with last action
                 * XDocument loadValue = XDocument.Load("lactAction.xml");
                 * string data = loadValue.Root.Element("lastAction").Value;
                 * lastAction = int.Parse(data);
                 *
                 *
                 * //verify by printing
                 * Console.WriteLine("!!!!!! ---- last action is = {0}", lastAction);
                 */
                //testing if it has read the xml file back to an array by saving it again as different file
                //XDocument testState = new XDocument();
                //testState.Add(new XElement("state-variables", statesVector.Select(x => new XElement("variable", x))));
                // testState.Save("Test_Previous_State.xml");
                ///////////
                if (statesVector.Length > 0)
                {         // if we can calculate state vars
                    // Call startEpisode() on the first SMDP step
                    if (timeLastAction == 0)
                    {
                        //temp_reward = 0;
                        action = saPolicy.startEpisode(statesVector);
                    }
                    else if (timeLastAction == std.Time - 1 && lastAction > 0)
                    {                             // if we were in the middle of a pass last cycle
                        temp_reward = body.Time - timeLastAction;
                        action      = lastAction; // then we follow through with it
                    }                             // Call step() on all but first SMDP step
                    else
                    {
                        //action = saPolicy.step(std.Time - timeLastAction, statesVector);
                        //action = saPolicy.step(saPolicy.lastReward, statesVector);
                        temp_reward = body.Time - timeLastAction;
                        action      = saPolicy.step(temp_reward, statesVector);
                    }
                    lastAction = action;
                    // temp_reward += 1;
                    //Save the previous state to an xml file
                    // XDocument prevState = new XDocument();
                    // prevState.Add(new XElement("state-variables", statesVector.Select(x => new XElement("variable", x))));
                    // prevState.Save("Previous_State.xml");
                    //Save the last action to xml file
                    // XDocument prevAction = new XDocument();
                    // prevAction.Add( new XElement("Actions", new XElement("lastAction",lastAction)));
                    // prevAction.Save("lactAction.xml");
                }
                else
                {                    // if we don't have enough info to calculate state vars
                    action      = 1; // hold ball
                    temp_reward = body.Time - timeLastAction;
                    // return;
                }


                //action = saPolicy.GetAction(inputs.Length);
                // action = saPolicy.;
                //temp_reward += 1;
                max       = Unum - 1;
                posToPass = mySelfp;
                if (action < 0)
                {
                    lastAction = 0;
                }
                else
                {
                    lastAction = action;
                }
                timeLastAction = body.Time;

                break;
            }

            default:
                Console.WriteLine("Default Case does not exist");
                break;
            } //end of a switch
            if (action == Unum - 1)
            {
                holdBall(0.7);
                return;
            }
            else
            {
                Vector pos = new Vector(0, 0);
                pos.assign(teammates[action + 1].X, teammates[action + 1].Y);
                //pos.assign(predictPlayerPosAfterNrCycles(teammates[action + 1], 4, 30, null, null, false)); //Sabre commented out this to check if it can solve issue of passing wrongly
                pos.assign((float)Math.Min(20 / 2, Math.Max(-20 / 2, pos.X)), (float)Math.Min(20 / 2, Math.Max(-20 / 2, pos.Y)));
                directPass(pos, "fast");
                /* To be added on this RL algorithm is the estimate of Q and update of Qvalues */
                // get a new state using keeperStates()
                // then update the Q-Value

                return;
            }

            // int possibleStates = 13; double[] vec = new double[possibleStates];
            //vec = keeperstateVars(possibleStates);
            //Console.WriteLine("States are : {0:f} {1:f} {2:f} {3:f} {4:f} {5:f} {6:f} {7:f} {8:f} {9:f} {10:f} {11:f} {12:f} ", vec[0], vec[1], vec[2], vec[3], vec[4], vec[5], vec[6], vec[7], vec[8], vec[9], vec[10], vec[11], vec[12]);
            // base.keeperWithBall();
        }

Пример #7

0

Показать файл

Файл: RLKeepawayPlayer.cs Проект: sdidi/KeepawaySim

        double[] keeperstateVars(int stateCount)
        {
            config = RLConfig.Load("RLConfig.xml");
            Vector[] tmatesp     = null;
            Vector[] oppPlayersp = null;
            Vector   posToPass   = new Vector(0, 0);
            Command  soc         = new Command();

            inputs  = new double[stateCount];
            soc.Com = "illegal";
            int j = 0, h = 0, v = 0, g = 0; double ang1, ang2, ang3;

            tmatesp     = myTeammatesPlayers(teammatesp, config.num_Keepers);
            oppPlayersp = myTeammatesPlayers(opp, config.num_Takers);
            tmatesp[config.num_Keepers - 1] = mySelfp;
            double[] closestmateOpp = new double[teammatesp.Length - 1];
            // Brain.ResetState();

            double[] widthArray = new double[config.numFeatures];
            for (int i = 0; i < config.numFeatures; i++)
            {
                widthArray[i] = 1 / config.numFeatures;
            }

            if (inputs == null)
            {
                inputs = new double[stateCount];
            }

            for (int i = 0; i < closestmateOpp.Length; i++)
            {
                closestmateOpp[i] = 1000;
            }


            for (int i = 0; i < tmatesp.Length; i++)
            {
                if (i != Unum - 1)
                {
                    inputs[j++] = tmatesp[i].distance(mySelfp);
                    getClosestInSetTo(oppPlayersp, tmatesp[i], ref closestmateOpp[h]);
                    inputs[j++] = closestmateOpp[h++];
                    inputs[j++] = DistFromCenter(tmatesp[i]);
                    ang1        = mySelfp.angle(tmatesp[i]);
                    ang2        = mySelfp.angle(oppPlayersp[0]);
                    ang3        = mySelfp.angle(oppPlayersp[1]);
                    inputs[j++] = Math.Min(Magnitude(ang1 - ang2), Magnitude(ang1 - ang3));
                }
            }

            for (int i = 0; i < oppPlayersp.Length; i++)
            {
                inputs[j++] = oppPlayersp[i].distance(mySelfp);
                inputs[j++] = DistFromCenter(oppPlayersp[i]);
            }
            // RoboCup.Server srv = new RoboCup.Server(std);

            inputs[j] = DistFromCenter(mySelfp);

            Console.Write("The state inputs are: ");
            for (int i = 0; i < inputs.Length; i++)
            {
                Console.Write("\t {0:f}", inputs[i]);
            }

            return(inputs);
        }

Пример #8

0

Показать файл

Файл: RLGameWorld.cs Проект: sdidi/KeepawaySim

        public void Game()
        {
            #region Initialization

            timer.AddReferee(kref);
            config = RLConfig.Load("RLConfig.xml");

            #endregion


            if (config.visualize == 1 && config.learning == 0) //Heuristic Policy with Visualisation
            {
                Application.EnableVisualStyles();
                Application.SetCompatibleTextRenderingDefault(false);

                RoboCup.FieldVisualizer f = new RoboCup.FieldVisualizer();
                std.Realtime   = false;
                timer.OnCycle += f.OnStadiumUpdate;
                for (int i = 0; i < config.num_Keepers; i++)
                {
                    std.addPlayer(new FixedKeepawayPlayer(std, "keepers", i + 1, "l", config.num_Keepers, config.num_Takers));
                }

                for (int i = 0; i < config.num_Takers; i++)
                {
                    std.addPlayer(new FixedKeepawayPlayer(std, "takers", i + 1, "r", config.num_Keepers, config.num_Takers));
                }
                f.std = std;
                f.backgroundWorker1.DoWork += new System.ComponentModel.DoWorkEventHandler(backgroundWorker1_DoWork);


                Application.Run(f);
            }
            else if (config.visualize == 1 && config.learning == 1) //Reinforcement Learning with Visualisation
            {
                Application.EnableVisualStyles();
                Application.SetCompatibleTextRenderingDefault(false);

                //Visualizer form = new Visualizer();

                // rlv.OneEpisode += form.UpdateGraph;

                RoboCup.FieldVisualizer f = new RoboCup.FieldVisualizer();
                std.Realtime   = false;
                timer.OnCycle += f.OnStadiumUpdate;

                for (int i = 0; i < config.num_Keepers; i++)
                {
                    players.Add(new RLKeepawayPlayer(std, "keepers", i + 1, "l", config.num_Keepers, config.num_Takers));
                }

                for (int i = 0; i < config.num_Takers; i++)
                {
                    players.Add(new RLKeepawayPlayer(std, "takers", i + 1, "r", config.num_Keepers, config.num_Takers));
                }

                for (int i = 0; i < players.Count; i++)
                {
                    std.addPlayer(players[i]);
                }

                f.std = std;
                //invokes an event handler that calls RLVisualisation methods
                f.backgroundWorker1.DoWork += new System.ComponentModel.DoWorkEventHandler(backgroundWorker2_DoWork);

                Application.Run(f);
            }
            else //Reinforcement learning without Visualisation
            {
                Console.WriteLine("Initialises the players well ");
                std.Realtime = false;

                for (int i = 0; i < config.num_Keepers; i++)
                {
                    players.Add(new RLKeepawayPlayer(std, "keepers", i + 1, "l", config.num_Keepers, config.num_Takers));
                }

                for (int i = 0; i < config.num_Takers; i++)
                {
                    players.Add(new RLKeepawayPlayer(std, "takers", i + 1, "r", config.num_Keepers, config.num_Takers));
                }

                for (int i = 0; i < players.Count; i++)
                {
                    std.addPlayer(players[i]);
                }
            }
        }

Пример #9

0

Показать файл

        public Q_LearningAgent(int numFeatures, int numActions, bool bLearn, double rewards,
                               double [] widths,
                               string loadWeightsFile, string saveWeightsFile) : base(numFeatures, numActions)
        {
            this.numActions  = numActions;
            this.numFeatures = numFeatures;
            numTiles         = numFeatures * numDimTiles;
            tileWidths       = new double[numFeatures];
            Q = new double[numActions];
            //weights = new double[numActions,numTiles];
            //weightsRaw=new double[numActions,numTiles];
            //weights = weightsRaw;
            traces   = new double[numActions, numTiles];
            traceAll = new double[numTiles];

            // number of tiles in each tiling
            tile                 = new int[numFeatures, numDimTiles];
            actionTiles          = new int[numActions, rl_memory_Size];
            tiless               = new Tile[numFeatures, numDimTiles];
            nonzeroTraces        = new int[rl_max_nonzero_Traces];
            nonzeroTracesInverse = new int[rl_memory_Size];
            currentState         = new int[numActions, numDimTiles];
            previousState        = new int[numActions, numDimTiles];
            weightsFile          = saveWeightsFile;

            bLearning = bLearn;

            lastReward = rewards;
            for (int i = 0; i < getNumFeatures(); i++)
            {
                tileWidths[i] = widths[i];
            }


            config       = RLConfig.Load("RLConfig.xml");
            alpha        = config.alpha;
            gamma        = config.gamma;
            lambda       = config.lambda;
            epsilon      = config.epsilon;
            minimumTrace = config.traceability;
            //for (int i=0; i < rl_memory_Size;i++)
            for (int j = 0; j < numActions; j++) //yet to do ... each Q(s,a) should represent
            {
                Q[j] = 0;
            }

            epochNum   = 0;
            lastAction = -1;


            numNonzeroTraces = 0;
            //Sabre changed this to initial weights for all tiles per action
            //need to check if this yields to weights being always zero when ever you call a SarsaAgent object
            //if it is remove this initialisation and put it in the start-episode method
            if (config.transfer == 1)
            {
                weights = loadWeights(loadWeightsFile);
                //load traces too
                //at the moment initial the traces but need to explore saving them too
                for (int j = 0; j < numActions; j++)
                {
                    for (int i = 0; i < numTiles; i++)
                    {
                        traces[j, i] = 0;
                    }
                }
            }
            else
            {
                for (int j = 0; j < numActions; j++)
                {
                    for (int i = 0; i < numTiles; i++)
                    {
                        weights[j, i] = 0;
                        traces[j, i]  = 0;
                    }
                }
            }

            int []   tmp  = new int[2];
            float [] tmpf = new float[2];
            colTab = new collision_table(rl_memory_Size, 1);
            tiles obj_tiles = new tiles();
        }

Пример #10

0

Показать файл

//inter-task mapping
        public double[,] loadWeights(string fileToLoad)
        {
            config = RLConfig.Load("RLConfig.xml");
            double[,] currentWeight = new double[numActions, numTiles];
            if (File.Exists(fileToLoad))
            {
                BinaryFormatter bf = new BinaryFormatter();

                FileStream file = File.Open(fileToLoad, FileMode.Open);
                weights = (double[, ])bf.Deserialize(file);
                file.Close();
                Console.WriteLine("Loading weights here-----------------------");
                // weights;
            }
            else
            {
                return(null);
            }

            //XDocument loadVector = XDocument.Load("weightsFile.xml");
            // var varVector = loadVector.Element("weight-variables").Elements("variable");
            // weights = varVector.Select(x => Double.Parse(x.Value)).ToArray();


            for (int jj = 0; jj < numActions; jj++)
            {
                int j = 0; int v = 0;
                if (jj < sourceActions) //check for direct transfer w_source = w_target for actions
                {                       //number of actions
                    for (int k = 0; k < config.num_Keepers - 1; k++)
                    {
                        if (k < config.source_Keepers - 1)
                        {
                            for (int s = 0; s < 4; s++)
                            {
                                for (int tl = 0; tl < tilesPerTiling; tl++)
                                {
                                    currentWeight[jj, v++] = weights[jj, j++];
                                }
                            }
                        }
                        else
                        {
                            j -= (4 * tilesPerTiling);
                            for (int s = 0; s < 4; s++)
                            {
                                for (int tl = 0; tl < tilesPerTiling; tl++)
                                {
                                    currentWeight[jj, v++] = weights[jj, j++];
                                }
                            }
                        }
                    }

                    for (int ii = 0; ii < config.num_Takers; ii++)
                    {
                        if (ii < config.source_Takers)
                        {
                            for (int s = 0; s < 2; s++)
                            {
                                for (int tl = 0; tl < tilesPerTiling; tl++)
                                {
                                    currentWeight[jj, v++] = weights[jj, j++];
                                }
                            }
                        }
                        else
                        {
                            j -= (2 * tilesPerTiling);
                            for (int s = 0; s < 2; s++)
                            {
                                for (int tl = 0; tl < tilesPerTiling; tl++)
                                {
                                    currentWeight[jj, v++] = weights[jj, j++];
                                }
                            }
                        }
                    }

                    for (int tl = 0; tl < tilesPerTiling; tl++)
                    {
                        currentWeight[jj, v] = weights[jj, j];
                    }
                }
                else //its an added action not available in the source task
                {
                    for (int i = 0; i < numTiles; i++)
                    {
                        currentWeight[jj, i] = currentWeight[jj, i];
                    }
                }
            }


            return(currentWeight);
        }

C# (CSharp) Keepaway RLConfig.Load примеры использования