Changeset 11742 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs
- Timestamp:
- 01/09/15 14:57:28 (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs
r11732 r11742 12 12 public string ident; 13 13 public int randomTries; 14 public int policyTries; 15 public IPolicyActionInfo actionInfo; 14 public IBanditPolicyActionInfo actionInfo; 16 15 public TreeNode[] children; 17 16 public bool done = false; … … 22 21 23 22 public override string ToString() { 24 return string.Format("Node({0} tries: {1}, done: {2}, policy: {3})", ident, randomTries + policyTries, done, actionInfo);23 return string.Format("Node({0} tries: {1}, done: {2}, policy: {3})", ident, actionInfo.Tries, done, actionInfo); 25 24 } 26 25 } … … 34 33 private readonly Random random; 35 34 private readonly int randomTries; 36 private readonly I Policy policy;35 private readonly IBanditPolicy policy; 37 36 38 37 private List<TreeNode> updateChain; … … 47 46 // } 48 47 49 public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, I Policy policy) {48 public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, IBanditPolicy policy) { 50 49 this.maxLen = maxLen; 51 50 this.problem = problem; … … 78 77 public void PrintStats() { 79 78 var n = rootNode; 80 Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}", treeDepth, treeSize, rootNode.policyTries + rootNode.randomTries);79 Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}", treeDepth, treeSize, n.actionInfo.Tries); 81 80 while (n.children != null) { 82 81 Console.WriteLine(); 83 82 Console.WriteLine("{0,5}->{1,-50}", n.ident, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.ident)))); 84 Console.WriteLine("{0,5} {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.randomTries + ch.policyTries)))); 83 Console.WriteLine("{0,5} {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4:F2}", ch.actionInfo.Value * 10)))); 84 Console.WriteLine("{0,5} {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.done ? "X" : ch.actionInfo.Tries.ToString())))); 85 85 //n.policy.PrintStats(); 86 n = n.children. OrderByDescending(c => c.policyTries).First();86 n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.actionInfo.Value).First(); 87 87 } 88 88 Console.ReadLine(); … … 108 108 if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException(); 109 109 TreeNode n = rootNode; 110 bool done = phrase.IsTerminal;111 110 var curDepth = 0; 112 while (! done) {111 while (!phrase.IsTerminal) { 113 112 updateChain.Add(n); 114 113 … … 127 126 if (n.randomTries == randomTries && n.children == null) { 128 127 n.children = alts.Select(alt => new TreeNode(alt.ToString())).ToArray(); // create a new node for each alternative 129 //n.children = alts.Select(alt => new TreeNode(string.Empty)).ToArray(); // create a new node for each alternative130 128 foreach (var ch in n.children) ch.actionInfo = policy.CreateActionInfo(); 131 129 treeSize += n.children.Length; 132 130 } 133 n.policyTries++;134 131 // => select using bandit policy 135 132 int selectedAltIdx = policy.SelectAction(random, n.children.Select(c => c.actionInfo)); … … 140 137 141 138 curDepth++; 142 143 done = phrase.IsTerminal;144 139 145 140 // prepare for next iteration … … 153 148 // the last node is a leaf node (sentence is done), so we never need to visit this node again 154 149 n.done = true; 155 n.actionInfo.Disable();156 150 157 151 treeDepth = Math.Max(treeDepth, curDepth); … … 165 159 foreach (var e in updateChain) { 166 160 var node = e; 161 if (node.done) node.actionInfo.Disable(); 167 162 if (node.children != null && node.children.All(c => c.done)) { 168 163 node.done = true; … … 171 166 if (!node.done) { 172 167 node.actionInfo.UpdateReward(reward); 173 //policy.UpdateReward(action, reward / updateChain.Count);174 168 } 175 169 }
Note: See TracChangeset
for help on using the changeset viewer.