Exemple #1
0
        void pollAlternativeAction(IAgent agent)
        {
            // This will be a negative (bad) reward.
            // We need to handle this by polling other agents to see if they
            // could have avoided it.
            if (TeachParadigm != TeachingParadigm.EveryonePolling &&
                TeachParadigm != TeachingParadigm.EveryoneRewardsAndPolling &&
                TeachParadigm != TeachingParadigm.SubculturePolling &&
                TeachParadigm != TeachingParadigm.SubcultureRewardsAndPolling)
            {
                return;
            }

            // If we are using EveryonePolling, the agent can ask the entire population.
            // If we are using SubculturePolling, the agent can only ask agents in its subculture.
            var available = TeachParadigm == TeachingParadigm.EveryonePolling ? _agents.Where((a, i) => agent.Id != i)
                                : _agents.Where((a, i) => _agentGroups[agent.Id] == _agentGroups[i] && i != agent.Id);

            // The teacher is the highest fitness agent available to the eaten agent.
            var teacher = (SocialAgent)available.OrderByDescending(a => a.Fitness).FirstOrDefault();

            // Get the corrected moves for every action in the eaten agent's memory
            LinkedList <StateActionReward> badTrajectory     = ((SocialAgent)agent).Memory;
            LinkedList <StateActionReward> correctTrajectory = new LinkedList <StateActionReward>();

            foreach (var bad in badTrajectory)
            {
                double[] state  = bad.State;
                double[] action = new double[bad.Action.Length];
                teacher.activateNetworkWithoutMemory(bad.State).CopyTo(action, 0, action.Length);
                StateActionReward good = new StateActionReward(state, action, 0);
                correctTrajectory.AddLast(good);
            }

            // Train the agent with the correct trajectory according to the teacher.
            TeachAgent(agent, correctTrajectory);
        }