Context Navigation

← Previous Change
Next Change →

Changeset 11747 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization

Timestamp:

01/12/15 21:23:01 (10 years ago)

Author:

gkronber

Message:

#2283: implemented test problems for MCTS

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization

Files:

: 4 edited

HeuristicLab.Algorithms.GrammaticalOptimization.csproj (modified) (1 diff)
MctsContextualSampler.cs (modified) (16 diffs)
MctsSampler.cs (modified) (9 diffs)
TemporalDifferenceTreeSearchSampler.cs (modified) (6 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj

r11744	r11747
45	45	<Compile Include="AlternativesSampler.cs" />
46	46	<Compile Include="AlternativesContextSampler.cs" />
	47	<Compile Include="MctsQLearningSampler.cs" />
47	48	<Compile Include="TemporalDifferenceTreeSearchSampler.cs" />
48	49	<Compile Include="ExhaustiveRandomFirstSearch.cs" />

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsContextualSampler.cs

-                      r11745
+                      r11747
       public int randomTries;
       public int tries;
+      public List<TreeNode> parents;
       public TreeNode[] children;
       public bool done = false;
 …
         this.ident = id;
         this.alt = alt;
+        this.parents = new List<TreeNode>();
+      }
 …
+    }
+    private Dictionary<string, TreeNode> treeNodes;
+    private TreeNode GetTreeNode(string id, ReadonlySequence alt) {
+      TreeNode n;
+      var canonicalId = problem.CanonicalRepresentation(id);
+      if (!treeNodes.TryGetValue(canonicalId, out n)) {
+        n = new TreeNode(canonicalId, alt);
+        tries.TryGetValue(canonicalId, out n.tries);
+        treeNodes[canonicalId] = n;
+      }
+      return n;
+    }
     public event Action<string, double> FoundNewBestSolution;
 …
       this.v = new Dictionary<string, double>(1000000);
       this.tries = new Dictionary<string, int>(1000000);
+      treeNodes = new Dictionary<string, TreeNode>();
+    }
 …
       InitPolicies(problem.Grammar);
       for (int i = 0; !rootNode.done && i < maxIterations; i++) {
+        var sentence = SampleSentence(problem.Grammar).ToString();
+        var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
+        Debug.Assert(quality >= 0 && quality <= 1.0);
+        DistributeReward(quality);
+        RaiseSolutionEvaluated(sentence, quality);
+        if (quality > bestQuality) {
+          bestQuality = quality;
+          RaiseFoundNewBestSolution(sentence, quality);
+        bool success;
+        var sentence = SampleSentence(problem.Grammar, out success).ToString();
+        if (success) {
+          var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
+          Debug.Assert(quality >= 0 && quality <= 1.0);
+          DistributeReward(quality);
+          RaiseSolutionEvaluated(sentence, quality);
+          if (quality > bestQuality) {
+            bestQuality = quality;
+            RaiseFoundNewBestSolution(sentence, quality);
+          }
+        }
+      }
 …
       Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}, rootQ {3:F3}, bestQ {4:F3}", treeDepth, treeSize, n.tries, V(n), bestQuality);
       while (n.children != null) {
         Console.WriteLine("{0}", n.ident);
         double maxVForRow = n.children.Select(ch => V(ch)).Max();
+        Console.WriteLine("{0,-30}", n.ident);
+        double maxVForRow = n.children.Select(ch => Math.Min(1.0, Math.Max(0.0, V(ch)))).Max();
         if (maxVForRow == 0) maxVForRow = 1.0;
         for (int i = 0; i < n.children.Length; i++) {
           var ch = n.children[i];
           Console.ForegroundColor = ConsoleEx.ColorForValue(V(ch) / maxVForRow);
+          Console.ForegroundColor = ConsoleEx.ColorForValue(Math.Min(1.0, V(ch)) / maxVForRow);
           Console.Write("{0,5}", ch.alt);
+        }
 …
         for (int i = 0; i < n.children.Length; i++) {
           var ch = n.children[i];
           Console.ForegroundColor = ConsoleEx.ColorForValue(V(ch) / maxVForRow);
           Console.Write("{0,5:F2}", V(ch) * 10);
+          Console.ForegroundColor = ConsoleEx.ColorForValue(Math.Min(1.0, V(ch)) / maxVForRow);
+          Console.Write("{0,5:F2}", Math.Min(1.0, V(ch)) * 10);
+        }
         Console.WriteLine();
         for (int i = 0; i < n.children.Length; i++) {
           var ch = n.children[i];
           Console.ForegroundColor = ConsoleEx.ColorForValue(V(ch) / maxVForRow);
+          Console.ForegroundColor = ConsoleEx.ColorForValue(Math.Min(1.0, V(ch)) / maxVForRow);
           Console.Write("{0,5}", ch.done ? "X" : ch.tries.ToString());
+        }
 …
         Console.WriteLine();
         //n.policy.PrintStats();
         n = n.children.Where(ch => !ch.done).OrderByDescending(c => V(c)).First();
+        n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.tries).First();
+      }
+    }
 …
       this.tries.Clear();
       rootNode = new TreeNode(grammar.SentenceSymbol.ToString(), new ReadonlySequence("$"));
+      rootNode = GetTreeNode(grammar.SentenceSymbol.ToString(), new ReadonlySequence("$"));
       treeDepth = 0;
       treeSize = 0;
+    }
     private Sequence SampleSentence(IGrammar grammar) {
+    private Sequence SampleSentence(IGrammar grammar, out bool success) {
       updateChain.Clear();
       //var startPhrase = new Sequence("a*b+c*d+e*f+E");
       var startPhrase = new Sequence(grammar.SentenceSymbol);
       return CompleteSentence(grammar, startPhrase);
+    }
     private Sequence CompleteSentence(IGrammar g, Sequence phrase) {
+      return CompleteSentence(grammar, startPhrase, out success);
+    }
+    private Sequence CompleteSentence(IGrammar g, Sequence phrase, out bool success) {
       if (phrase.Length > maxLen) throw new ArgumentException();
       if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
 …
           n.randomTries++;
           treeDepth = Math.Max(treeDepth, curDepth);
+          success = true;
           return g.CompleteSentenceRandomly(random, phrase, maxLen);
         } else {
 …
               newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt);
               if (!newPhrase.IsTerminal) newPhrase = newPhrase.Subsequence(0, newPhrase.FirstNonTerminalIndex + 1);
+              n.children[i++] = new TreeNode(newPhrase.ToString(), new ReadonlySequence(alt));
+              var treeNode = GetTreeNode(newPhrase.ToString(), new ReadonlySequence(alt));
+              treeNode.parents.Add(n);
+              n.children[i++] = treeNode;
+            }
             treeSize += n.children.Length;
+            UpdateDone(n);
+            // it could happend that we already finished all variations starting from the branch
+            // stop
+            if (n.done) {
+              success = false;
+              return phrase;
+            }
+          }
+          //int selectedAltIdx = SelectRandom(random, n.children);
           // => select using eps-greedy
           int selectedAltIdx = SelectEpsGreedy(random, n.children);
 …
           curDepth++;
           // prepare for next iteration
           parent = n;
           n = n.children[selectedAltIdx];
+          //UpdateTD(parent, n, 0.0);
+        }
       } // while
 …
       treeDepth = Math.Max(treeDepth, curDepth);
+      success = true;
       return phrase;
+    }
+    //private void UpdateTD(TreeNode parent, TreeNode child, double reward) {
+    //  double alpha = 1.0;
+    //  var vParent = V(parent);
+    //  var vChild = V(child);
+    //  if (double.IsInfinity(vParent)) vParent = 0.0;
+    //  if (double.IsInfinity(vChild)) vChild = 0.0;
+    //  UpdateV(parent, (alpha * (reward + vChild - vParent)));
+    //}
     private void DistributeReward(double reward) {
       // iterate in reverse order (bottom up)
+      updateChain.Reverse();
+      //updateChain.Reverse();
+      UpdateDone(updateChain.Last().Item1);
+      //UpdateTD(updateChain.Last().Item2, updateChain.Last().Item1, reward);
+      //return;
+      BackPropReward(updateChain.Last().Item1, reward);
+      /*
       foreach (var e in updateChain) {
         var node = e.Item1;
         var parent = e.Item2;
+        //var parent = e.Item2;
         node.tries++;
         if (node.children != null && node.children.All(c => c.done)) {
           node.done = true;
+        }
+        //if (node.children != null && node.children.All(c => c.done)) {
+        //  node.done = true;
+        //}
         UpdateV(node, reward);
         // the reward for the parent is either the just recieved reward or the value of the best action so far
         double value = 0.0;
         if (parent != null) {
           var doneChilds = parent.children.Where(ch => ch.done);
           if (doneChilds.Any()) value = doneChilds.Select(ch => V(ch)).Max();
+        }
+        //double value = 0.0;
+        //if (parent != null) {
+        //  var doneChilds = parent.children.Where(ch => ch.done);
+        //  if (doneChilds.Any()) value = doneChilds.Select(ch => V(ch)).Max();
+        //}
         //if (value > reward) reward = value;
+      }
+    }
+      }*/
+    }
+    private void BackPropReward(TreeNode n, double reward) {
+      n.tries++;
+      UpdateV(n, reward);
+      foreach (var p in n.parents) BackPropReward(p, reward);
+    }
+    private void UpdateDone(TreeNode n) {
+      if (!n.done && n.children != null && n.children.All(c => c.done)) n.done = true;
+      if (n.done) foreach (var p in n.parents) UpdateDone(p);
+    }
     private Dictionary<string, double> v;
 …
         tries.Add(canonicalStr, 1);
       } else {
         v[canonicalStr] = stateV + 0.005 * (reward - stateV);
         //v[canonicalStr] = stateV + (1.0 / tries[canonicalStr]) * (reward - stateV);
+        //v[canonicalStr] = stateV + 0.005 * (reward - stateV);
+        v[canonicalStr] = stateV + (1.0 / tries[canonicalStr]) * (reward - stateV);
         tries[canonicalStr]++;
+      }
 …
       //var canonicalStr = n.ident;
       double stateV;
+      if (!tries.ContainsKey(canonicalStr)) return double.PositiveInfinity;
       if (!v.TryGetValue(canonicalStr, out  stateV)) {
         return 0.0;
 …
+    }
+    private int SelectRandom(Random random, TreeNode[] children) {
+      return children.Select((ch, i) => Tuple.Create(ch, i)).Where(p => !p.Item1.done).SelectRandom(random).Item2;
+    }
     private int SelectEpsGreedy(Random random, TreeNode[] children) {
       if (random.NextDouble() < 0.2) {
+        return children.Select((ch, i) => Tuple.Create(ch, i)).Where(p => !p.Item1.done).SelectRandom(random).Item2;
+        return SelectRandom(random, children);
       } else {
         var bestQ = double.NegativeInfinity;

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs

-                      r11745
+                      r11747
 using System.Text;
 using HeuristicLab.Algorithms.Bandits;
+using HeuristicLab.Common;
 using HeuristicLab.Problems.GrammaticalOptimization;
 …
       public int randomTries;
       public IBanditPolicyActionInfo actionInfo;
+      public TreeNode parent;
       public TreeNode[] children;
       public bool done = false;
       public TreeNode(string id) {
+      public TreeNode(string id, TreeNode parent) {
         this.ident = id;
+        this.parent = parent;
+      }
 …
     private readonly IBanditPolicy policy;
     private List<TreeNode> updateChain;
+    private TreeNode lastNode; // the bottom node in one episode
     private TreeNode rootNode;
 …
       Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}, rootQ {3:F3}, bestQ {4:F3}", treeDepth, treeSize, n.actionInfo.Tries, n.actionInfo.Value, bestQuality);
       while (n.children != null) {
+        Console.WriteLine("{0,-30}", n.ident);
+        double maxVForRow = n.children.Select(ch => ch.actionInfo.Value).Max();
+        if (maxVForRow == 0) maxVForRow = 1.0;
+        for (int i = 0; i < n.children.Length; i++) {
+          var ch = n.children[i];
+          SetColorForChild(ch, maxVForRow);
+          Console.Write("{0,5}", ch.ident);
+        }
         Console.WriteLine();
+        Console.WriteLine("{0,5}->{1,-50}", n.ident, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.ident))));
+        Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4:F2}", ch.actionInfo.Value * 10))));
+        Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.done ? "X" : ch.actionInfo.Tries.ToString()))));
+        for (int i = 0; i < n.children.Length; i++) {
+          var ch = n.children[i];
+          SetColorForChild(ch, maxVForRow);
+          Console.Write("{0,5:F2}", ch.actionInfo.Value * 10);
+        }
+        Console.WriteLine();
+        for (int i = 0; i < n.children.Length; i++) {
+          var ch = n.children[i];
+          SetColorForChild(ch, maxVForRow);
+          Console.Write("{0,5}", ch.done ? "X" : ch.actionInfo.Tries.ToString());
+        }
+        Console.ForegroundColor = ConsoleColor.White;
+        Console.WriteLine();
         //n.policy.PrintStats();
+        n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.actionInfo.Value).First();
+      }
+        //n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.actionInfo.Value).First();
+        n = n.children.Where(ch=>!ch.done).OrderByDescending(c => c.actionInfo.Value).First();
+      }
+      Console.WriteLine("-----------------------");
+    }
+    private void SetColorForChild(TreeNode ch, double maxVForRow) {
+      //if (ch.done) Console.ForegroundColor = ConsoleColor.White;
+      //else
+      Console.ForegroundColor = ConsoleEx.ColorForValue(ch.actionInfo.Value / maxVForRow);
+    }
     private void InitPolicies(IGrammar grammar) {
+      this.updateChain = new List<TreeNode>();
       rootNode = new TreeNode(grammar.SentenceSymbol.ToString());
+      rootNode = new TreeNode(grammar.SentenceSymbol.ToString(), null);
       rootNode.actionInfo = policy.CreateActionInfo();
       treeDepth = 0;
 …
     private Sequence SampleSentence(IGrammar grammar) {
       updateChain.Clear();
+      lastNode = null;
       var startPhrase = new Sequence(grammar.SentenceSymbol);
+      //var startPhrase = new Sequence("a*b+c*d+e*f+E");
       return CompleteSentence(grammar, startPhrase);
+    }
 …
       var curDepth = 0;
       while (!phrase.IsTerminal) {
-        updateChain.Add(n);
         if (n.randomTries < randomTries) {
           n.randomTries++;
           treeDepth = Math.Max(treeDepth, curDepth);
+          lastNode = n;
           return g.CompleteSentenceRandomly(random, phrase, maxLen);
         } else {
 …
           if (n.randomTries == randomTries && n.children == null) {
             n.children = alts.Select(alt => new TreeNode(alt.ToString())).ToArray(); // create a new node for each alternative
+            n.children = alts.Select(alt => new TreeNode(alt.ToString(), n)).ToArray(); // create a new node for each alternative
             foreach (var ch in n.children) ch.actionInfo = policy.CreateActionInfo();
             treeSize += n.children.Length;
 …
       } // while
       updateChain.Add(n);
+      lastNode = n;
 …
     private void DistributeReward(double reward) {
       // iterate in reverse order (bottom up)
+      updateChain.Reverse();
+      foreach (var e in updateChain) {
+        var node = e;
+        if (node.done) node.actionInfo.Disable();
+      var node = lastNode;
+      while (node != null) {
+        if (node.done) node.actionInfo.Disable(reward);
         if (node.children != null && node.children.All(c => c.done)) {
           node.done = true;
+          node.actionInfo.Disable();
+          var bestActionValue = node.children.Select(c => c.actionInfo.Value).Max();
+          node.actionInfo.Disable(bestActionValue);
+        }
         if (!node.done) {
           node.actionInfo.UpdateReward(reward);
+        }
+        node = node.parent;
+      }
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/TemporalDifferenceTreeSearchSampler.cs

-                      r11744
+                      r11747
     private readonly Random random;
     private readonly int randomTries;
-    private readonly IBanditPolicy policy;
     private List<TreeNode> updateChain;
 …
     public TemporalDifferenceTreeSearchSampler(IProblem problem, int maxLen, Random random, int randomTries, IBanditPolicy policy) {
+    public TemporalDifferenceTreeSearchSampler(IProblem problem, int maxLen, Random random, int randomTries) {
       this.maxLen = maxLen;
       this.problem = problem;
       this.random = random;
       this.randomTries = randomTries;
-      this.policy = policy;
+    }
 …
       Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}, rootQ {3:F3}, bestQ {4:F3}", treeDepth, treeSize, n.tries, n.q, bestQuality);
       while (n.children != null) {
+        Console.WriteLine("{0,-30}", n.ident);
+        double maxVForRow = n.children.Select(ch => ch.q).Max();
+        if (maxVForRow == 0) maxVForRow = 1.0;
+        for (int i = 0; i < n.children.Length; i++) {
+          var ch = n.children[i];
+          Console.ForegroundColor = ConsoleEx.ColorForValue(ch.q / maxVForRow);
+          Console.Write("{0,5}", ch.ident);
+        }
         Console.WriteLine();
+        Console.WriteLine("{0,5}->{1,-50}", n.ident, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.ident))));
+        Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4:F2}", ch.q * 10))));
+        Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.done ? "X" : ch.tries.ToString()))));
+        for (int i = 0; i < n.children.Length; i++) {
+          var ch = n.children[i];
+          Console.ForegroundColor = ConsoleEx.ColorForValue(ch.q / maxVForRow);
+          Console.Write("{0,5:F2}", ch.q * 10);
+        }
+        Console.WriteLine();
+        for (int i = 0; i < n.children.Length; i++) {
+          var ch = n.children[i];
+          Console.ForegroundColor = ConsoleEx.ColorForValue(ch.q / maxVForRow);
+          Console.Write("{0,5}", ch.done ? "X" : ch.tries.ToString());
+        }
+        Console.ForegroundColor = ConsoleColor.White;
+        Console.WriteLine();
         //n.policy.PrintStats();
         n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.q).First();
+      }
-      //Console.ReadLine();
+    }
 …
+          }
           // => select using bandit policy
           int selectedAltIdx = SelectAction(random, n.children);
+          int selectedAltIdx = SelectEpsGreedy(random, n.children);
           Sequence selectedAlt = alts.ElementAt(selectedAltIdx);
 …
     // eps-greedy
     private int SelectAction(Random random, TreeNode[] children) {
+    private int SelectEpsGreedy(Random random, TreeNode[] children) {
       if (random.NextDouble() < 0.1) {
 …
       } else {
         var bestQ = double.NegativeInfinity;
         var bestChildIdx = -1;
+        var bestChildIdx = new List<int>();
         for (int i = 0; i < children.Length; i++) {
           if (children[i].done) continue;
+          if (children[i].tries == 0) return i;
+          if (children[i].q > bestQ) {
+            bestQ = children[i].q;
+            bestChildIdx = i;
+          // if (children[i].tries == 0) return i;
+          var q = children[i].q;
+          if (q > bestQ) {
+            bestQ = q;
+            bestChildIdx.Clear();
+            bestChildIdx.Add(i);
+          } else if (q == bestQ) {
+            bestChildIdx.Add(i);
+          }
+        }
         Debug.Assert(bestChildIdx > -1);
         return bestChildIdx;
+        Debug.Assert(bestChildIdx.Any());
+        return bestChildIdx.SelectRandom(random);
+      }
+    }
     private void DistributeReward(double reward) {
-      const double alpha = 0.1;
-      const double gamma = 1;
-      // iterate in reverse order (bottom up)
       updateChain.Reverse();
+      var nextQ = 0.0;
+      foreach (var e in updateChain) {
+        var node = e;
+        node.tries++;
+      foreach (var node in updateChain) {
         if (node.children != null && node.children.All(c => c.done)) {
           node.done = true;
+        }
+        // reward is recieved only for the last action
+        if (e == updateChain.First()) {
+          node.q = node.q + alpha * (reward + gamma * nextQ - node.q);
+          nextQ = node.q;
+        } else {
+          node.q = node.q + alpha * (0 + gamma * nextQ - node.q);
+          nextQ = node.q;
+        }
+      }
+      }
+      updateChain.Reverse();
+      //const double alpha = 0.1;
+      const double gamma = 1;
+      double alpha;
+      foreach (var p in updateChain.Zip(updateChain.Skip(1), Tuple.Create)) {
+        var parent = p.Item1;
+        var child = p.Item2;
+        parent.tries++;
+        alpha = 1.0 / parent.tries;
+        //alpha = 0.01;
+        parent.q = parent.q + alpha * (0 + gamma * child.q - parent.q);
+      }
+      // reward is recieved only for the last action
+      var n = updateChain.Last();
+      n.tries++;
+      alpha = 1.0 / n.tries;
+      //alpha = 0.1;
+      n.q = n.q + alpha * reward;
+    }

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats:

Update cookies preferences