Changeset 11747 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs
- Timestamp:
- 01/12/15 21:23:01 (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs
r11745 r11747 5 5 using System.Text; 6 6 using HeuristicLab.Algorithms.Bandits; 7 using HeuristicLab.Common; 7 8 using HeuristicLab.Problems.GrammaticalOptimization; 8 9 … … 13 14 public int randomTries; 14 15 public IBanditPolicyActionInfo actionInfo; 16 public TreeNode parent; 15 17 public TreeNode[] children; 16 18 public bool done = false; 17 19 18 public TreeNode(string id ) {20 public TreeNode(string id, TreeNode parent) { 19 21 this.ident = id; 22 this.parent = parent; 20 23 } 21 24 … … 35 38 private readonly IBanditPolicy policy; 36 39 37 private List<TreeNode> updateChain;40 private TreeNode lastNode; // the bottom node in one episode 38 41 private TreeNode rootNode; 39 42 … … 75 78 Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}, rootQ {3:F3}, bestQ {4:F3}", treeDepth, treeSize, n.actionInfo.Tries, n.actionInfo.Value, bestQuality); 76 79 while (n.children != null) { 80 Console.WriteLine("{0,-30}", n.ident); 81 double maxVForRow = n.children.Select(ch => ch.actionInfo.Value).Max(); 82 if (maxVForRow == 0) maxVForRow = 1.0; 83 84 for (int i = 0; i < n.children.Length; i++) { 85 var ch = n.children[i]; 86 SetColorForChild(ch, maxVForRow); 87 Console.Write("{0,5}", ch.ident); 88 } 77 89 Console.WriteLine(); 78 Console.WriteLine("{0,5}->{1,-50}", n.ident, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.ident)))); 79 Console.WriteLine("{0,5} {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4:F2}", ch.actionInfo.Value * 10)))); 80 Console.WriteLine("{0,5} {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.done ? "X" : ch.actionInfo.Tries.ToString())))); 90 for (int i = 0; i < n.children.Length; i++) { 91 var ch = n.children[i]; 92 SetColorForChild(ch, maxVForRow); 93 Console.Write("{0,5:F2}", ch.actionInfo.Value * 10); 94 } 95 Console.WriteLine(); 96 for (int i = 0; i < n.children.Length; i++) { 97 var ch = n.children[i]; 98 SetColorForChild(ch, maxVForRow); 99 Console.Write("{0,5}", ch.done ? "X" : ch.actionInfo.Tries.ToString()); 100 } 101 Console.ForegroundColor = ConsoleColor.White; 102 Console.WriteLine(); 81 103 //n.policy.PrintStats(); 82 n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.actionInfo.Value).First(); 83 } 104 //n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.actionInfo.Value).First(); 105 n = n.children.Where(ch=>!ch.done).OrderByDescending(c => c.actionInfo.Value).First(); 106 } 107 Console.WriteLine("-----------------------"); 108 } 109 110 private void SetColorForChild(TreeNode ch, double maxVForRow) { 111 //if (ch.done) Console.ForegroundColor = ConsoleColor.White; 112 //else 113 Console.ForegroundColor = ConsoleEx.ColorForValue(ch.actionInfo.Value / maxVForRow); 84 114 } 85 115 86 116 private void InitPolicies(IGrammar grammar) { 87 this.updateChain = new List<TreeNode>(); 88 89 rootNode = new TreeNode(grammar.SentenceSymbol.ToString() );117 118 119 rootNode = new TreeNode(grammar.SentenceSymbol.ToString(), null); 90 120 rootNode.actionInfo = policy.CreateActionInfo(); 91 121 treeDepth = 0; … … 94 124 95 125 private Sequence SampleSentence(IGrammar grammar) { 96 updateChain.Clear();126 lastNode = null; 97 127 var startPhrase = new Sequence(grammar.SentenceSymbol); 128 //var startPhrase = new Sequence("a*b+c*d+e*f+E"); 129 98 130 return CompleteSentence(grammar, startPhrase); 99 131 } … … 105 137 var curDepth = 0; 106 138 while (!phrase.IsTerminal) { 107 updateChain.Add(n);108 139 109 140 if (n.randomTries < randomTries) { 110 141 n.randomTries++; 111 142 treeDepth = Math.Max(treeDepth, curDepth); 143 lastNode = n; 112 144 return g.CompleteSentenceRandomly(random, phrase, maxLen); 113 145 } else { … … 120 152 121 153 if (n.randomTries == randomTries && n.children == null) { 122 n.children = alts.Select(alt => new TreeNode(alt.ToString() )).ToArray(); // create a new node for each alternative154 n.children = alts.Select(alt => new TreeNode(alt.ToString(), n)).ToArray(); // create a new node for each alternative 123 155 foreach (var ch in n.children) ch.actionInfo = policy.CreateActionInfo(); 124 156 treeSize += n.children.Length; … … 138 170 } // while 139 171 140 updateChain.Add(n);172 lastNode = n; 141 173 142 174 … … 150 182 private void DistributeReward(double reward) { 151 183 // iterate in reverse order (bottom up) 152 updateChain.Reverse(); 153 154 foreach (var e in updateChain) { 155 var node = e; 156 if (node.done) node.actionInfo.Disable(); 184 185 var node = lastNode; 186 while (node != null) { 187 if (node.done) node.actionInfo.Disable(reward); 157 188 if (node.children != null && node.children.All(c => c.done)) { 158 189 node.done = true; 159 node.actionInfo.Disable(); 190 var bestActionValue = node.children.Select(c => c.actionInfo.Value).Max(); 191 node.actionInfo.Disable(bestActionValue); 160 192 } 161 193 if (!node.done) { 162 194 node.actionInfo.UpdateReward(reward); 163 195 } 196 node = node.parent; 164 197 } 165 198 }
Note: See TracChangeset
for help on using the changeset viewer.