/// <inheritdoc /> public option ChooseOption(candidate cand) { var rootNode = new BanditNode(this, cand, 0); // var NumTrials = rootNode.Options.Count; var NumTrials = 50; Console.WriteLine("Running {0} trails, Possible #options: {1}", NumTrials, rootNode.Options.Count); for (var i = 0; i < NumTrials; i++) { Console.Write("."); RunTrial(rootNode, MaxDepth); } Console.WriteLine(""); Console.WriteLine("Avg reward and #tries for each option:"); rootNode.Bandit.PrintOptInfo(); return(rootNode.Options[rootNode.Bandit.GetBestArm()]); // return index of best arm we've found }
public void buildTree(candidate cand) { sw = new StreamWriter("/home/manion/Documents/CleanMORFHPCC/output/MCTS/nodeInfo.txt"); sw.WriteLine("NodeNum,Depth,Reward,SmileString"); var rootNode = new BanditNode(this, cand, 0); var NumTrials = 1000; Console.WriteLine("Running {0} trails, Possible #options: {1}", NumTrials, rootNode.Options.Count); for (var i = 0; i < NumTrials; i++) { RunTrial(rootNode, MaxDepth); } Console.WriteLine(""); Console.WriteLine("Avg reward and #tries for each option:"); rootNode.Bandit.PrintOptInfo(); sw.Close(); // saveTree(rootNode); }
private double RunTrial(BanditNode node, int depth) { if (depth == 0) // leaf node { return(_evaluation.Evaluate(node.Cand)); } if (!node.Bandit.HasOption()) { return(_evaluation.Evaluate(node.Cand)); } var optionIndex = node.Bandit.SelectPullArm(); double totalReward; // Console.WriteLine("Querry opt idx: {0}, No.Children: {1}", optionIndex, node.Children.Length); // If we reach max child nodes, then select randomly among children according to how much we've visited if (node.Children[optionIndex].Count >= MaxWidth) { // Console.WriteLine("Should never be here1."); var successors = node.Children[optionIndex].Keys.ToList(); var selectedOption = successors[node.Multinomial(optionIndex)];; node.Children[optionIndex][selectedOption].Visits += 1; var successorNode = node.Children[optionIndex][selectedOption].Node; totalReward = successorNode.TransitionReward + RunTrial(successorNode, depth - 1); } else { // generate a new successor node var successorState = CopyAndApplyOption(node.Options[optionIndex], node.Cand, true); // var immediateReward = Evaluate(successorState) - node.AbsoluteReward; // how much better than last node? var immediateReward = 0 - node.AbsoluteReward; // how much better than last node? // If the successor state is already in node.Children if (node.Children[optionIndex].ContainsKey(successorState)) { // Console.WriteLine("Should never be here2."); var successorNode = node.Children[optionIndex][successorState].Node; node.Children[optionIndex][successorState].Visits += 1; // mark that we've sampled totalReward = immediateReward + RunTrial(successorNode, depth - 1); } else { var successorNode = new BanditNode(this, successorState, immediateReward); node.Children[optionIndex][successorState] = new BanditNode.NodeCountTuple(successorNode); totalReward = immediateReward + _evaluation.Evaluate(successorState);//this evalutation tells how this state is POTENTIALLY good var fileDir = "_runDirectory" + "/intermediateLinkers/linker" + nodeCnt; Directory.CreateDirectory(fileDir); Settings.filer.Save("_runDirectory" + "/intermediateLinkers/linker" + nodeCnt + "/linker" + nodeCnt + ".xml", successorState); Console.WriteLine("Node{0}: depth: {1}, reward: {2}, smi: {3}", nodeCnt, MaxDepth - depth + 1, totalReward, OBFunctions.moltoSMILES(OBFunctions.designgraphtomol(successorState.graph))); sw.WriteLine("{0},{1},{2},{3}", nodeCnt, MaxDepth - depth + 1, totalReward, OBFunctions.moltoSMILES(OBFunctions.designgraphtomol(successorState.graph))); nodeCnt++; } } node.Bandit.Update(optionIndex, totalReward); return(totalReward); }
public int Visits = 1; // should be initialized on the first visit public NodeCountTuple(BanditNode node) { Node = node; }