Context Navigation

← Previous Change
Next Change →

HeuristicLab.Algorithms.GrammaticalOptimization

Timestamp:

01/10/15 14:06:29 (10 years ago)

Author:

gkronber

Message:

#2283: worked on contextual MCTS

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization

Files:

: 2 edited

MctsContextualSampler.cs (modified) (10 diffs)
MctsSampler.cs (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsContextualSampler.cs

-                      r11742
+                      r11745
 using System.Text;
 using HeuristicLab.Algorithms.Bandits;
+using HeuristicLab.Common;
 using HeuristicLab.Problems.GrammaticalOptimization;
 …
   public class MctsContextualSampler {
     private class TreeNode {
+      public string ident;
+      public ReadonlySequence alt;
       public int randomTries;
       public int policyTries;
+      public int tries;
       public TreeNode[] children;
+      public readonly ReadonlySequence phrase;
+      public readonly ReadonlySequence alt;
+      // phrase represents the phrase of the state and alt represents how the phrase has been reached from the parent state
+      public TreeNode(ReadonlySequence phrase, ReadonlySequence alt) {
+        this.phrase = phrase;
+      public bool done = false;
+      public TreeNode(string id, ReadonlySequence alt) {
+        this.ident = id;
         this.alt = alt;
+      }
       public override string ToString() {
         return string.Format("Node({0} tries: {1})", phrase, randomTries + policyTries);
+        return string.Format("Node({0} tries: {1}, done: {2})", ident, tries, done);
+      }
+    }
 …
     private readonly Random random;
     private readonly int randomTries;
+    private readonly IGrammarPolicy policy;
+    private List<Tuple<ReadonlySequence, ReadonlySequence, ReadonlySequence>> updateChain;
+    private List<Tuple<TreeNode, TreeNode>> updateChain;
     private TreeNode rootNode;
     public int treeDepth;
     public int treeSize;
+    // public MctsSampler(IProblem problem, int maxLen, Random random) :
+    //   this(problem, maxLen, random, 10, (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.1)) {
+    //
+    // }
+    public MctsContextualSampler(IProblem problem, int maxLen, Random random, int randomTries, IGrammarPolicy policy) {
+    private double bestQuality;
+    public MctsContextualSampler(IProblem problem, int maxLen, Random random, int randomTries) {
       this.maxLen = maxLen;
       this.problem = problem;
       this.random = random;
       this.randomTries = randomTries;
+      this.policy = policy;
+      this.v = new Dictionary<string, double>(1000000);
+      this.tries = new Dictionary<string, int>(1000000);
+    }
     public void Run(int maxIterations) {
       double bestQuality = double.MinValue;
+      bestQuality = double.MinValue;
       InitPolicies(problem.Grammar);
       for (int i = 0; !policy.Done(rootNode.phrase) && i < maxIterations; i++) {
+      for (int i = 0; !rootNode.done && i < maxIterations; i++) {
         var sentence = SampleSentence(problem.Grammar).ToString();
         var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
 …
     public void PrintStats() {
       var n = rootNode;
       Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}", treeDepth, treeSize, rootNode.policyTries + rootNode.randomTries);
+      Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}, rootQ {3:F3}, bestQ {4:F3}", treeDepth, treeSize, n.tries, V(n), bestQuality);
       while (n.children != null) {
+        Console.WriteLine("{0}", n.ident);
+        double maxVForRow = n.children.Select(ch => V(ch)).Max();
+        if (maxVForRow == 0) maxVForRow = 1.0;
+        for (int i = 0; i < n.children.Length; i++) {
+          var ch = n.children[i];
+          Console.ForegroundColor = ConsoleEx.ColorForValue(V(ch) / maxVForRow);
+          Console.Write("{0,5}", ch.alt);
+        }
         Console.WriteLine();
+        Console.WriteLine("{0,5}->{1,-50}", n.alt, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.alt))));
+        Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.randomTries + ch.policyTries))));
+        for (int i = 0; i < n.children.Length; i++) {
+          var ch = n.children[i];
+          Console.ForegroundColor = ConsoleEx.ColorForValue(V(ch) / maxVForRow);
+          Console.Write("{0,5:F2}", V(ch) * 10);
+        }
+        Console.WriteLine();
+        for (int i = 0; i < n.children.Length; i++) {
+          var ch = n.children[i];
+          Console.ForegroundColor = ConsoleEx.ColorForValue(V(ch) / maxVForRow);
+          Console.Write("{0,5}", ch.done ? "X" : ch.tries.ToString());
+        }
+        Console.ForegroundColor = ConsoleColor.White;
+        Console.WriteLine();
         //n.policy.PrintStats();
         n = n.children.OrderByDescending(c => c.policyTries).First();
+      }
       Console.ReadLine();
+    }
+        n = n.children.Where(ch => !ch.done).OrderByDescending(c => V(c)).First();
+      }
+    }
     private void InitPolicies(IGrammar grammar) {
+      this.updateChain = new List<Tuple<ReadonlySequence, ReadonlySequence, ReadonlySequence>>();
+      rootNode = new TreeNode(new ReadonlySequence(grammar.SentenceSymbol), new ReadonlySequence("$"));
+      this.updateChain = new List<Tuple<TreeNode, TreeNode>>();
+      this.v.Clear();
+      this.tries.Clear();
+      rootNode = new TreeNode(grammar.SentenceSymbol.ToString(), new ReadonlySequence("$"));
       treeDepth = 0;
       treeSize = 0;
 …
     private Sequence SampleSentence(IGrammar grammar) {
       updateChain.Clear();
+      var startPhrase = new Sequence(rootNode.phrase);
+      //var startPhrase = new Sequence("a*b+c*d+e*f+E");
+      var startPhrase = new Sequence(grammar.SentenceSymbol);
       return CompleteSentence(grammar, startPhrase);
+    }
 …
       TreeNode parent = null;
       TreeNode n = rootNode;
-      bool done = false;
       var curDepth = 0;
+      while (!done) {
+        if (parent != null)
+          updateChain.Add(Tuple.Create(parent.phrase, n.alt, n.phrase));
+      while (!phrase.IsTerminal) {
+        updateChain.Add(Tuple.Create(n, parent));
         if (n.randomTries < randomTries) {
 …
           if (n.randomTries == randomTries && n.children == null) {
+            // create a new node for each alternative
             n.children = new TreeNode[alts.Count()];
             int cIdx = 0;
+            var i = 0;
             foreach (var alt in alts) {
               var newPhrase = new Sequence(phrase);
+              newPhrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, alt);
+              n.children[cIdx++] = new TreeNode(new ReadonlySequence(newPhrase), new ReadonlySequence(alt));
+              newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt);
+              if (!newPhrase.IsTerminal) newPhrase = newPhrase.Subsequence(0, newPhrase.FirstNonTerminalIndex + 1);
+              n.children[i++] = new TreeNode(newPhrase.ToString(), new ReadonlySequence(alt));
+            }
             treeSize += n.children.Length;
+          }
+          n.policyTries++;
+          // => select using bandit policy
+          ReadonlySequence selectedAlt = policy.SelectAction(random, n.phrase, n.children.Select(c => c.alt));
+          // => select using eps-greedy
+          int selectedAltIdx = SelectEpsGreedy(random, n.children);
+          //int selectedAltIdx = SelectActionUCB1(random, n.children);
+          Sequence selectedAlt = alts.ElementAt(selectedAltIdx);
           // replace nt with alt
 …
           curDepth++;
-          done = phrase.IsTerminal;
           // prepare for next iteration
           parent = n;
           n = n.children.Single(ch => ch.alt == selectedAlt); // TODO: perf
+          n = n.children[selectedAltIdx];
+        }
       } // while
+      n.policyTries++;
+      updateChain.Add(Tuple.Create(parent.phrase, n.alt, n.phrase));
+      updateChain.Add(Tuple.Create(n, parent));
+      // the last node is a leaf node (sentence is done), so we never need to visit this node again
+      n.done = true;
 …
       foreach (var e in updateChain) {
+        var state = e.Item1;
+        var action = e.Item2;
+        var newState = e.Item3;
+        policy.UpdateReward(state, action, reward, newState);
+        //policy.UpdateReward(action, reward / updateChain.Count);
+      }
+    }
+        var node = e.Item1;
+        var parent = e.Item2;
+        node.tries++;
+        if (node.children != null && node.children.All(c => c.done)) {
+          node.done = true;
+        }
+        UpdateV(node, reward);
+        // the reward for the parent is either the just recieved reward or the value of the best action so far
+        double value = 0.0;
+        if (parent != null) {
+          var doneChilds = parent.children.Where(ch => ch.done);
+          if (doneChilds.Any()) value = doneChilds.Select(ch => V(ch)).Max();
+        }
+        //if (value > reward) reward = value;
+      }
+    }
+    private Dictionary<string, double> v;
+    private Dictionary<string, int> tries;
+    private void UpdateV(TreeNode n, double reward) {
+      var canonicalStr = problem.CanonicalRepresentation(n.ident);
+      //var canonicalStr = n.ident;
+      double stateV;
+      if (!v.TryGetValue(canonicalStr, out  stateV)) {
+        v.Add(canonicalStr, reward);
+        tries.Add(canonicalStr, 1);
+      } else {
+        v[canonicalStr] = stateV + 0.005 * (reward - stateV);
+        //v[canonicalStr] = stateV + (1.0 / tries[canonicalStr]) * (reward - stateV);
+        tries[canonicalStr]++;
+      }
+    }
+    private double V(TreeNode n) {
+      var canonicalStr = problem.CanonicalRepresentation(n.ident);
+      //var canonicalStr = n.ident;
+      double stateV;
+      if (!v.TryGetValue(canonicalStr, out  stateV)) {
+        return 0.0;
+      } else {
+        return stateV;
+      }
+    }
+    private int SelectEpsGreedy(Random random, TreeNode[] children) {
+      if (random.NextDouble() < 0.2) {
+        return children.Select((ch, i) => Tuple.Create(ch, i)).Where(p => !p.Item1.done).SelectRandom(random).Item2;
+      } else {
+        var bestQ = double.NegativeInfinity;
+        var bestChildIdx = new List<int>();
+        for (int i = 0; i < children.Length; i++) {
+          if (children[i].done) continue;
+          // if (children[i].tries == 0) return i;
+          var q = V(children[i]);
+          if (q > bestQ) {
+            bestQ = q;
+            bestChildIdx.Clear();
+            bestChildIdx.Add(i);
+          } else if (q == bestQ) {
+            bestChildIdx.Add(i);
+          }
+        }
+        Debug.Assert(bestChildIdx.Any());
+        return bestChildIdx.SelectRandom(random);
+      }
+    }
+    private int SelectActionUCB1(Random random, TreeNode[] children) {
+      int bestAction = -1;
+      double bestQ = double.NegativeInfinity;
+      int totalTries = children.Sum(ch => ch.tries);
+      for (int a = 0; a < children.Length; a++) {
+        var ch = children[a];
+        if (ch.done) continue;
+        if (ch.tries == 0) return a;
+        var q = V(ch) + Math.Sqrt((2 * Math.Log(totalTries)) / ch.tries);
+        if (q > bestQ) {
+          bestQ = q;
+          bestAction = a;
+        }
+      }
+      Debug.Assert(bestAction > -1);
+      return bestAction;
+    }
     private void RaiseSolutionEvaluated(string sentence, double quality) {
 …
       if (handler != null) handler(sentence, quality);
+    }
+  }
+}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs

-                      r11744
+                      r11745
     public int treeSize;
     private double bestQuality;
-    // public MctsSampler(IProblem problem, int maxLen, Random random) :
-    //   this(problem, maxLen, random, 10, (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.1)) {
-    //
-    // }
     public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, IBanditPolicy policy) {

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 11745 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization

Legend:

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsContextualSampler.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs

Download in other formats: