public void Initialize() { // Dimension Value Pairs: sayWhat = World.NewDimensionValuePair("YourAction", "What do you want to do?"); // External Action Chunks: sayCooperate = World.NewExternalActionChunk("Cooperate"); sayDefect = World.NewExternalActionChunk("Defect"); // placeholder // GoalChunk salute = World.NewGoalChunk("Salute"); // GoalChunk bidFarewell = World.NewGoalChunk("Bid Farewell"); // WM Actions: wmuacC = World.NewWorkingMemoryUpdateActionChunk("Remember my opponent cooperated"); wmuacD = World.NewWorkingMemoryUpdateActionChunk("Remember my opponent defected"); DeclarativeChunk dcoc = World.NewDeclarativeChunk("My opponent cooperated"); DeclarativeChunk dcod = World.NewDeclarativeChunk("My opponent defected"); wmuacC.Add(WorkingMemory.RecognizedActions.SET_RESET, dcoc); wmuacD.Add(WorkingMemory.RecognizedActions.SET_RESET, dcod); // Set up a two agent model (meaning two agents with the same setup, playing against each other) Alice = World.NewAgent("Alice"); Bob = World.NewAgent("Bob"); // Simulating environment will determine inputs to each agent based on what each agent does.. // Feedback is determined by payoff matrix.. payoff = new int [2, 2, 2]; // Doing this the hard way. Could set this up all in-line above, but this makes the table // more explicit in terms of how we want to use it. // The payoff matrix here is called "Friend or Foe", about the simplest case // indices mean: FOR-WHICH-AGENT, WHAT-ALICE-DOES, WHAT-BOB-DOES payoff[_ALICE, _COOPERATE, _COOPERATE] = 1; payoff[_ALICE, _COOPERATE, _DEFECT] = 0; payoff[_ALICE, _DEFECT, _COOPERATE] = 2; payoff[_ALICE, _DEFECT, _DEFECT] = 0; payoff[_BOB, _COOPERATE, _COOPERATE] = 1; payoff[_BOB, _COOPERATE, _DEFECT] = 2; payoff[_BOB, _DEFECT, _COOPERATE] = 0; payoff[_BOB, _DEFECT, _DEFECT] = 0; maxpay = 2; results = new int[_TRIALS, 2, 2]; // Set up a Q-learning Net = // -- Eligibility Condition = True if "What do you want to do?" is in input, otherwise False // -- Input = "My opponent cooperated", "My opponent defected", "What do you want to do?" // -- Output = "I want to defect", "I want to cooperate" // // Also, RER is turned ON QBPNetwork net_A = AgentInitializer.InitializeImplicitDecisionNetwork(Alice, QBPNetwork.Factory, QNetEC); net_A.Input.Add(sayWhat); net_A.Input.Add(sayCooperate); net_A.Input.Add(sayDefect); net_A.Output.Add(sayCooperate); net_A.Output.Add(sayDefect); Alice.Commit(net_A); net_A.Parameters.LEARNING_RATE = 1; Alice.ACS.Parameters.PERFORM_RER_REFINEMENT = true; // it's true by default anyway Alice.ACS.Parameters.LEVEL_SELECTION_METHOD = ActionCenteredSubsystem.LevelSelectionMethods.COMBINED; Alice.ACS.Parameters.LEVEL_SELECTION_OPTION = ActionCenteredSubsystem.LevelSelectionOptions.FIXED; Alice.ACS.Parameters.FIXED_FR_LEVEL_SELECTION_MEASURE = 1; Alice.ACS.Parameters.FIXED_BL_LEVEL_SELECTION_MEASURE = 1; Alice.ACS.Parameters.FIXED_RER_LEVEL_SELECTION_MEASURE = 1; Alice.ACS.Parameters.WM_UPDATE_ACTION_PROBABILITY = 1; // Rules (2 rules) = // Rule 1: // -- Condition = "Your opponent cooperated" // -- Action = Set "My opponent cooperated" in WM // Rule 2: // -- Condition = "Your opponent defected" // -- Action = Set "My opponent defect" in WM FixedRule ruleA1 = AgentInitializer.InitializeActionRule(Alice, FixedRule.Factory, wmuacC, FRSC); FixedRule ruleA2 = AgentInitializer.InitializeActionRule(Alice, FixedRule.Factory, wmuacD, FRSC); Alice.Commit(ruleA1); Alice.Commit(ruleA2); QBPNetwork net_B = AgentInitializer.InitializeImplicitDecisionNetwork(Bob, QBPNetwork.Factory, QNetEC); net_B.Input.Add(sayWhat); net_B.Input.Add(sayCooperate); net_B.Input.Add(sayDefect); net_B.Output.Add(sayCooperate); net_B.Output.Add(sayDefect); Bob.Commit(net_B); // Use Weighted Combination // NO partial match on TL net_B.Parameters.LEARNING_RATE = 1; Bob.ACS.Parameters.PERFORM_RER_REFINEMENT = true; Bob.ACS.Parameters.LEVEL_SELECTION_METHOD = ActionCenteredSubsystem.LevelSelectionMethods.COMBINED; Bob.ACS.Parameters.LEVEL_SELECTION_OPTION = ActionCenteredSubsystem.LevelSelectionOptions.FIXED; Bob.ACS.Parameters.FIXED_FR_LEVEL_SELECTION_MEASURE = 1; Bob.ACS.Parameters.FIXED_BL_LEVEL_SELECTION_MEASURE = 1; Bob.ACS.Parameters.FIXED_RER_LEVEL_SELECTION_MEASURE = 1; Bob.ACS.Parameters.WM_UPDATE_ACTION_PROBABILITY = 1; FixedRule ruleB1 = AgentInitializer.InitializeActionRule(Bob, FixedRule.Factory, wmuacC, FRSC); FixedRule ruleB2 = AgentInitializer.InitializeActionRule(Bob, FixedRule.Factory, wmuacD, FRSC); Bob.Commit(ruleB1); Bob.Commit(ruleB2); // Initially using the same parameters for RER as Full Hello World RefineableActionRule.GlobalParameters.SPECIALIZATION_THRESHOLD_1 = -.6; RefineableActionRule.GlobalParameters.GENERALIZATION_THRESHOLD_1 = -.1; RefineableActionRule.GlobalParameters.INFORMATION_GAIN_OPTION = RefineableActionRule.IGOptions.PERFECT; /* * Note -- What should be seems is that when you pass in "Your opponent…", * the agent should return the "Do Nothing" external action * (since it performed an internal WM action).. * However, you can just ignore this either way.. */ }
public void Initialize(Groups group) { World.Initialize(); John = World.NewAgent(); QBPNetwork idn = AgentInitializer.InitializeImplicitDecisionNetwork(John, QBPNetwork.Factory); World.NewDimensionValuePair("Target P", target); World.NewDimensionValuePair("Current P", target); World.NewExternalActionChunk(target); for (double i = 0; i < 12; i++) { if (World.GetDimensionValuePair("Target P", i) == null) { idn.Input.Add(World.NewDimensionValuePair("Target P", i)); idn.Input.Add(World.NewDimensionValuePair("Current P", i)); idn.Input.Add(World.NewExternalActionChunk(i)); idn.Output.Add(World.GetActionChunk(i)); } else { idn.Input.Add(World.GetDimensionValuePair("Target P", i)); idn.Input.Add(World.GetDimensionValuePair("Current P", i)); idn.Input.Add(World.GetActionChunk(i)); idn.Output.Add(World.GetActionChunk(i)); } } foreach (double i in As) { World.NewDimensionValuePair("A", i); } foreach (double i in Bs) { World.NewDimensionValuePair("B", i); } foreach (double i in Cs) { World.NewDimensionValuePair("C", i); } switch (group) { case Groups.VERBALIZATION: idn.Parameters.POSITIVE_MATCH_THRESHOLD = 1; RefineableActionRule.GlobalParameters.POSITIVE_MATCH_THRESHOLD = 1; RefineableActionRule.GlobalParameters.GENERALIZATION_THRESHOLD_1 = 1; RefineableActionRule.GlobalParameters.SPECIALIZATION_THRESHOLD_1 = .5; threshold_4 = .5; break; case Groups.MEMORY: for (double i = 0; i < 12; i++) { ExternalActionChunk w = (ExternalActionChunk)World.GetActionChunk((double)rand.Next(12)); var p = World.GetDimensionValuePair("Current P", FactoryOutput(i, (double)w.LabelAsIComparable)); ExternalActionChunk w1 = (ExternalActionChunk)World.GetActionChunk(Math.Round((target + p.Value + NoiseOptions[rand.Next(3)]) / 2)); FixedRule mfr = AgentInitializer.InitializeActionRule(John, FixedRule.Factory, w1, MemoryGroup_SupportCalculator); mfr.GeneralizedCondition.Add(p, true); mfr.GeneralizedCondition.Add(w, true); John.Commit(mfr); } goto default; case Groups.SIMPLE_RULE: for (double i = 0; i < 12; i++) { FixedRule sfr = AgentInitializer.InitializeActionRule(John, FixedRule.Factory, World.GetActionChunk(i), SimpleRule_SupportCalculator); John.Commit(sfr); } goto default; default: idn.Parameters.LEARNING_RATE = .05; idn.Parameters.DISCOUNT = .95; John.ACS.Parameters.SELECTION_TEMPERATURE = .09; idn.Parameters.POSITIVE_MATCH_THRESHOLD = 1; RefineableActionRule.GlobalParameters.GENERALIZATION_THRESHOLD_1 = 2; RefineableActionRule.GlobalParameters.SPECIALIZATION_THRESHOLD_1 = 1.2; RefineableActionRule.GlobalParameters.POSITIVE_MATCH_THRESHOLD = 1; threshold_4 = .2; break; } RefineableActionRule.GlobalParameters.INFORMATION_GAIN_OPTION = RefineableActionRule.IGOptions.PERFECT; John.Commit(idn); }