Context Navigation

← Previous Change
Next Change →

TemporalDifferenceTreeSearchSampler.cs

Timestamp:

01/12/15 21:23:01 (10 years ago)

Author:

gkronber

Message:

#2283: implemented test problems for MCTS

File:

: 1 edited

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/TemporalDifferenceTreeSearchSampler.cs (modified) (6 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/TemporalDifferenceTreeSearchSampler.cs

-                      r11744
+                      r11747
     private readonly Random random;
     private readonly int randomTries;
-    private readonly IBanditPolicy policy;
     private List<TreeNode> updateChain;
 …
     public TemporalDifferenceTreeSearchSampler(IProblem problem, int maxLen, Random random, int randomTries, IBanditPolicy policy) {
+    public TemporalDifferenceTreeSearchSampler(IProblem problem, int maxLen, Random random, int randomTries) {
       this.maxLen = maxLen;
       this.problem = problem;
       this.random = random;
       this.randomTries = randomTries;
-      this.policy = policy;
+    }
 …
       Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}, rootQ {3:F3}, bestQ {4:F3}", treeDepth, treeSize, n.tries, n.q, bestQuality);
       while (n.children != null) {
+        Console.WriteLine("{0,-30}", n.ident);
+        double maxVForRow = n.children.Select(ch => ch.q).Max();
+        if (maxVForRow == 0) maxVForRow = 1.0;
+        for (int i = 0; i < n.children.Length; i++) {
+          var ch = n.children[i];
+          Console.ForegroundColor = ConsoleEx.ColorForValue(ch.q / maxVForRow);
+          Console.Write("{0,5}", ch.ident);
+        }
         Console.WriteLine();
+        Console.WriteLine("{0,5}->{1,-50}", n.ident, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.ident))));
+        Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4:F2}", ch.q * 10))));
+        Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.done ? "X" : ch.tries.ToString()))));
+        for (int i = 0; i < n.children.Length; i++) {
+          var ch = n.children[i];
+          Console.ForegroundColor = ConsoleEx.ColorForValue(ch.q / maxVForRow);
+          Console.Write("{0,5:F2}", ch.q * 10);
+        }
+        Console.WriteLine();
+        for (int i = 0; i < n.children.Length; i++) {
+          var ch = n.children[i];
+          Console.ForegroundColor = ConsoleEx.ColorForValue(ch.q / maxVForRow);
+          Console.Write("{0,5}", ch.done ? "X" : ch.tries.ToString());
+        }
+        Console.ForegroundColor = ConsoleColor.White;
+        Console.WriteLine();
         //n.policy.PrintStats();
         n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.q).First();
+      }
-      //Console.ReadLine();
+    }
 …
+          }
           // => select using bandit policy
           int selectedAltIdx = SelectAction(random, n.children);
+          int selectedAltIdx = SelectEpsGreedy(random, n.children);
           Sequence selectedAlt = alts.ElementAt(selectedAltIdx);
 …
     // eps-greedy
     private int SelectAction(Random random, TreeNode[] children) {
+    private int SelectEpsGreedy(Random random, TreeNode[] children) {
       if (random.NextDouble() < 0.1) {
 …
       } else {
         var bestQ = double.NegativeInfinity;
         var bestChildIdx = -1;
+        var bestChildIdx = new List<int>();
         for (int i = 0; i < children.Length; i++) {
           if (children[i].done) continue;
+          if (children[i].tries == 0) return i;
+          if (children[i].q > bestQ) {
+            bestQ = children[i].q;
+            bestChildIdx = i;
+          // if (children[i].tries == 0) return i;
+          var q = children[i].q;
+          if (q > bestQ) {
+            bestQ = q;
+            bestChildIdx.Clear();
+            bestChildIdx.Add(i);
+          } else if (q == bestQ) {
+            bestChildIdx.Add(i);
+          }
+        }
         Debug.Assert(bestChildIdx > -1);
         return bestChildIdx;
+        Debug.Assert(bestChildIdx.Any());
+        return bestChildIdx.SelectRandom(random);
+      }
+    }
     private void DistributeReward(double reward) {
-      const double alpha = 0.1;
-      const double gamma = 1;
-      // iterate in reverse order (bottom up)
       updateChain.Reverse();
+      var nextQ = 0.0;
+      foreach (var e in updateChain) {
+        var node = e;
+        node.tries++;
+      foreach (var node in updateChain) {
         if (node.children != null && node.children.All(c => c.done)) {
           node.done = true;
+        }
+        // reward is recieved only for the last action
+        if (e == updateChain.First()) {
+          node.q = node.q + alpha * (reward + gamma * nextQ - node.q);
+          nextQ = node.q;
+        } else {
+          node.q = node.q + alpha * (0 + gamma * nextQ - node.q);
+          nextQ = node.q;
+        }
+      }
+      }
+      updateChain.Reverse();
+      //const double alpha = 0.1;
+      const double gamma = 1;
+      double alpha;
+      foreach (var p in updateChain.Zip(updateChain.Skip(1), Tuple.Create)) {
+        var parent = p.Item1;
+        var child = p.Item2;
+        parent.tries++;
+        alpha = 1.0 / parent.tries;
+        //alpha = 0.01;
+        parent.q = parent.q + alpha * (0 + gamma * child.q - parent.q);
+      }
+      // reward is recieved only for the last action
+      var n = updateChain.Last();
+      n.tries++;
+      alpha = 1.0 / n.tries;
+      //alpha = 0.1;
+      n.q = n.q + alpha * reward;
+    }

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 11747 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/TemporalDifferenceTreeSearchSampler.cs

Legend:

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/TemporalDifferenceTreeSearchSampler.cs

Download in other formats: