void pollAlternativeAction(IAgent agent) { // This will be a negative (bad) reward. // We need to handle this by polling other agents to see if they // could have avoided it. if (TeachParadigm != TeachingParadigm.EveryonePolling && TeachParadigm != TeachingParadigm.EveryoneRewardsAndPolling && TeachParadigm != TeachingParadigm.SubculturePolling && TeachParadigm != TeachingParadigm.SubcultureRewardsAndPolling) { return; } // If we are using EveryonePolling, the agent can ask the entire population. // If we are using SubculturePolling, the agent can only ask agents in its subculture. var available = TeachParadigm == TeachingParadigm.EveryonePolling ? _agents.Where((a, i) => agent.Id != i) : _agents.Where((a, i) => _agentGroups[agent.Id] == _agentGroups[i] && i != agent.Id); // The teacher is the highest fitness agent available to the eaten agent. var teacher = (SocialAgent)available.OrderByDescending(a => a.Fitness).FirstOrDefault(); // Get the corrected moves for every action in the eaten agent's memory LinkedList <StateActionReward> badTrajectory = ((SocialAgent)agent).Memory; LinkedList <StateActionReward> correctTrajectory = new LinkedList <StateActionReward>(); foreach (var bad in badTrajectory) { double[] state = bad.State; double[] action = new double[bad.Action.Length]; teacher.activateNetworkWithoutMemory(bad.State).CopyTo(action, 0, action.Length); StateActionReward good = new StateActionReward(state, action, 0); correctTrajectory.AddLast(good); } // Train the agent with the correct trajectory according to the teacher. TeachAgent(agent, correctTrajectory); }