Context Navigation

← Previous Change
Next Change →

Changeset 11745 for branches

Timestamp:

01/10/15 14:06:29 (10 years ago)

Author:

gkronber

Message:

#2283: worked on contextual MCTS

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization

Files:

: 1 added
: 7 edited

HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1Policy.cs (modified) (1 diff)
HeuristicLab.Algorithms.GrammaticalOptimization/MctsContextualSampler.cs (modified) (10 diffs)
HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs (modified) (1 diff)
HeuristicLab.Common/ConsoleEx.cs (added)
HeuristicLab.Common/HeuristicLab.Common.csproj (modified) (2 diffs)
HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs (modified) (1 diff)
Main/Program.cs (modified) (4 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1Policy.cs

-                      r11742
+                      r11745
   public class UCB1Policy : IBanditPolicy {
     public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
       var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>().ToArray(); // TODO: performance
+      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
       int bestAction = -1;
       double bestQ = double.NegativeInfinity;
       int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
+      for (int a = 0; a < myActionInfos.Length; a++) {
+        if (myActionInfos[a].Disabled) continue;
+        if (myActionInfos[a].Tries == 0) return a;
+        var q = myActionInfos[a].SumReward / myActionInfos[a].Tries + Math.Sqrt((2 * Math.Log(totalTries)) / myActionInfos[a].Tries);
+      int aIdx = -1;
+      foreach (var aInfo in myActionInfos) {
+        aIdx++;
+        if (aInfo.Disabled) continue;
+        if (aInfo.Tries == 0) return aIdx;
+        var q = aInfo.SumReward / aInfo.Tries + Math.Sqrt((2 * Math.Log(totalTries)) / aInfo.Tries);
         if (q > bestQ) {
           bestQ = q;
           bestAction = a;
+          bestAction = aIdx;
+        }
+      }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsContextualSampler.cs

-                      r11742
+                      r11745
 using System.Text;
 using HeuristicLab.Algorithms.Bandits;
+using HeuristicLab.Common;
 using HeuristicLab.Problems.GrammaticalOptimization;
 …
   public class MctsContextualSampler {
     private class TreeNode {
+      public string ident;
+      public ReadonlySequence alt;
       public int randomTries;
       public int policyTries;
+      public int tries;
       public TreeNode[] children;
+      public readonly ReadonlySequence phrase;
+      public readonly ReadonlySequence alt;
+      // phrase represents the phrase of the state and alt represents how the phrase has been reached from the parent state
+      public TreeNode(ReadonlySequence phrase, ReadonlySequence alt) {
+        this.phrase = phrase;
+      public bool done = false;
+      public TreeNode(string id, ReadonlySequence alt) {
+        this.ident = id;
         this.alt = alt;
+      }
       public override string ToString() {
         return string.Format("Node({0} tries: {1})", phrase, randomTries + policyTries);
+        return string.Format("Node({0} tries: {1}, done: {2})", ident, tries, done);
+      }
+    }
 …
     private readonly Random random;
     private readonly int randomTries;
+    private readonly IGrammarPolicy policy;
+    private List<Tuple<ReadonlySequence, ReadonlySequence, ReadonlySequence>> updateChain;
+    private List<Tuple<TreeNode, TreeNode>> updateChain;
     private TreeNode rootNode;
     public int treeDepth;
     public int treeSize;
+    // public MctsSampler(IProblem problem, int maxLen, Random random) :
+    //   this(problem, maxLen, random, 10, (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.1)) {
+    //
+    // }
+    public MctsContextualSampler(IProblem problem, int maxLen, Random random, int randomTries, IGrammarPolicy policy) {
+    private double bestQuality;
+    public MctsContextualSampler(IProblem problem, int maxLen, Random random, int randomTries) {
       this.maxLen = maxLen;
       this.problem = problem;
       this.random = random;
       this.randomTries = randomTries;
+      this.policy = policy;
+      this.v = new Dictionary<string, double>(1000000);
+      this.tries = new Dictionary<string, int>(1000000);
+    }
     public void Run(int maxIterations) {
       double bestQuality = double.MinValue;
+      bestQuality = double.MinValue;
       InitPolicies(problem.Grammar);
       for (int i = 0; !policy.Done(rootNode.phrase) && i < maxIterations; i++) {
+      for (int i = 0; !rootNode.done && i < maxIterations; i++) {
         var sentence = SampleSentence(problem.Grammar).ToString();
         var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
 …
     public void PrintStats() {
       var n = rootNode;
       Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}", treeDepth, treeSize, rootNode.policyTries + rootNode.randomTries);
+      Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}, rootQ {3:F3}, bestQ {4:F3}", treeDepth, treeSize, n.tries, V(n), bestQuality);
       while (n.children != null) {
+        Console.WriteLine("{0}", n.ident);
+        double maxVForRow = n.children.Select(ch => V(ch)).Max();
+        if (maxVForRow == 0) maxVForRow = 1.0;
+        for (int i = 0; i < n.children.Length; i++) {
+          var ch = n.children[i];
+          Console.ForegroundColor = ConsoleEx.ColorForValue(V(ch) / maxVForRow);
+          Console.Write("{0,5}", ch.alt);
+        }
         Console.WriteLine();
+        Console.WriteLine("{0,5}->{1,-50}", n.alt, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.alt))));
+        Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.randomTries + ch.policyTries))));
+        for (int i = 0; i < n.children.Length; i++) {
+          var ch = n.children[i];
+          Console.ForegroundColor = ConsoleEx.ColorForValue(V(ch) / maxVForRow);
+          Console.Write("{0,5:F2}", V(ch) * 10);
+        }
+        Console.WriteLine();
+        for (int i = 0; i < n.children.Length; i++) {
+          var ch = n.children[i];
+          Console.ForegroundColor = ConsoleEx.ColorForValue(V(ch) / maxVForRow);
+          Console.Write("{0,5}", ch.done ? "X" : ch.tries.ToString());
+        }
+        Console.ForegroundColor = ConsoleColor.White;
+        Console.WriteLine();
         //n.policy.PrintStats();
         n = n.children.OrderByDescending(c => c.policyTries).First();
+      }
       Console.ReadLine();
+    }
+        n = n.children.Where(ch => !ch.done).OrderByDescending(c => V(c)).First();
+      }
+    }
     private void InitPolicies(IGrammar grammar) {
+      this.updateChain = new List<Tuple<ReadonlySequence, ReadonlySequence, ReadonlySequence>>();
+      rootNode = new TreeNode(new ReadonlySequence(grammar.SentenceSymbol), new ReadonlySequence("$"));
+      this.updateChain = new List<Tuple<TreeNode, TreeNode>>();
+      this.v.Clear();
+      this.tries.Clear();
+      rootNode = new TreeNode(grammar.SentenceSymbol.ToString(), new ReadonlySequence("$"));
       treeDepth = 0;
       treeSize = 0;
 …
     private Sequence SampleSentence(IGrammar grammar) {
       updateChain.Clear();
+      var startPhrase = new Sequence(rootNode.phrase);
+      //var startPhrase = new Sequence("a*b+c*d+e*f+E");
+      var startPhrase = new Sequence(grammar.SentenceSymbol);
       return CompleteSentence(grammar, startPhrase);
+    }
 …
       TreeNode parent = null;
       TreeNode n = rootNode;
-      bool done = false;
       var curDepth = 0;
+      while (!done) {
+        if (parent != null)
+          updateChain.Add(Tuple.Create(parent.phrase, n.alt, n.phrase));
+      while (!phrase.IsTerminal) {
+        updateChain.Add(Tuple.Create(n, parent));
         if (n.randomTries < randomTries) {
 …
           if (n.randomTries == randomTries && n.children == null) {
+            // create a new node for each alternative
             n.children = new TreeNode[alts.Count()];
             int cIdx = 0;
+            var i = 0;
             foreach (var alt in alts) {
               var newPhrase = new Sequence(phrase);
+              newPhrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, alt);
+              n.children[cIdx++] = new TreeNode(new ReadonlySequence(newPhrase), new ReadonlySequence(alt));
+              newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt);
+              if (!newPhrase.IsTerminal) newPhrase = newPhrase.Subsequence(0, newPhrase.FirstNonTerminalIndex + 1);
+              n.children[i++] = new TreeNode(newPhrase.ToString(), new ReadonlySequence(alt));
+            }
             treeSize += n.children.Length;
+          }
+          n.policyTries++;
+          // => select using bandit policy
+          ReadonlySequence selectedAlt = policy.SelectAction(random, n.phrase, n.children.Select(c => c.alt));
+          // => select using eps-greedy
+          int selectedAltIdx = SelectEpsGreedy(random, n.children);
+          //int selectedAltIdx = SelectActionUCB1(random, n.children);
+          Sequence selectedAlt = alts.ElementAt(selectedAltIdx);
           // replace nt with alt
 …
           curDepth++;
-          done = phrase.IsTerminal;
           // prepare for next iteration
           parent = n;
           n = n.children.Single(ch => ch.alt == selectedAlt); // TODO: perf
+          n = n.children[selectedAltIdx];
+        }
       } // while
+      n.policyTries++;
+      updateChain.Add(Tuple.Create(parent.phrase, n.alt, n.phrase));
+      updateChain.Add(Tuple.Create(n, parent));
+      // the last node is a leaf node (sentence is done), so we never need to visit this node again
+      n.done = true;
 …
       foreach (var e in updateChain) {
+        var state = e.Item1;
+        var action = e.Item2;
+        var newState = e.Item3;
+        policy.UpdateReward(state, action, reward, newState);
+        //policy.UpdateReward(action, reward / updateChain.Count);
+      }
+    }
+        var node = e.Item1;
+        var parent = e.Item2;
+        node.tries++;
+        if (node.children != null && node.children.All(c => c.done)) {
+          node.done = true;
+        }
+        UpdateV(node, reward);
+        // the reward for the parent is either the just recieved reward or the value of the best action so far
+        double value = 0.0;
+        if (parent != null) {
+          var doneChilds = parent.children.Where(ch => ch.done);
+          if (doneChilds.Any()) value = doneChilds.Select(ch => V(ch)).Max();
+        }
+        //if (value > reward) reward = value;
+      }
+    }
+    private Dictionary<string, double> v;
+    private Dictionary<string, int> tries;
+    private void UpdateV(TreeNode n, double reward) {
+      var canonicalStr = problem.CanonicalRepresentation(n.ident);
+      //var canonicalStr = n.ident;
+      double stateV;
+      if (!v.TryGetValue(canonicalStr, out  stateV)) {
+        v.Add(canonicalStr, reward);
+        tries.Add(canonicalStr, 1);
+      } else {
+        v[canonicalStr] = stateV + 0.005 * (reward - stateV);
+        //v[canonicalStr] = stateV + (1.0 / tries[canonicalStr]) * (reward - stateV);
+        tries[canonicalStr]++;
+      }
+    }
+    private double V(TreeNode n) {
+      var canonicalStr = problem.CanonicalRepresentation(n.ident);
+      //var canonicalStr = n.ident;
+      double stateV;
+      if (!v.TryGetValue(canonicalStr, out  stateV)) {
+        return 0.0;
+      } else {
+        return stateV;
+      }
+    }
+    private int SelectEpsGreedy(Random random, TreeNode[] children) {
+      if (random.NextDouble() < 0.2) {
+        return children.Select((ch, i) => Tuple.Create(ch, i)).Where(p => !p.Item1.done).SelectRandom(random).Item2;
+      } else {
+        var bestQ = double.NegativeInfinity;
+        var bestChildIdx = new List<int>();
+        for (int i = 0; i < children.Length; i++) {
+          if (children[i].done) continue;
+          // if (children[i].tries == 0) return i;
+          var q = V(children[i]);
+          if (q > bestQ) {
+            bestQ = q;
+            bestChildIdx.Clear();
+            bestChildIdx.Add(i);
+          } else if (q == bestQ) {
+            bestChildIdx.Add(i);
+          }
+        }
+        Debug.Assert(bestChildIdx.Any());
+        return bestChildIdx.SelectRandom(random);
+      }
+    }
+    private int SelectActionUCB1(Random random, TreeNode[] children) {
+      int bestAction = -1;
+      double bestQ = double.NegativeInfinity;
+      int totalTries = children.Sum(ch => ch.tries);
+      for (int a = 0; a < children.Length; a++) {
+        var ch = children[a];
+        if (ch.done) continue;
+        if (ch.tries == 0) return a;
+        var q = V(ch) + Math.Sqrt((2 * Math.Log(totalTries)) / ch.tries);
+        if (q > bestQ) {
+          bestQ = q;
+          bestAction = a;
+        }
+      }
+      Debug.Assert(bestAction > -1);
+      return bestAction;
+    }
     private void RaiseSolutionEvaluated(string sentence, double quality) {
 …
       if (handler != null) handler(sentence, quality);
+    }
+  }
+}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs

-                      r11744
+                      r11745
     public int treeSize;
     private double bestQuality;
-    // public MctsSampler(IProblem problem, int maxLen, Random random) :
-    //   this(problem, maxLen, random, 10, (rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.1)) {
-    //
-    // }
     public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, IBanditPolicy policy) {

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/HeuristicLab.Common.csproj

-                      r11727
+                      r11745
     <Reference Include="System" />
     <Reference Include="System.Core" />
+    <Reference Include="System.Drawing" />
     <Reference Include="System.Xml.Linq" />
     <Reference Include="System.Data.DataSetExtensions" />
 …
   </ItemGroup>
   <ItemGroup>
+    <Compile Include="ConsoleEx.cs" />
     <Compile Include="Extensions.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs

-                      r11742
+                      r11745
       var randSeed = 31415;
       var nArms = 20;
+      Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(20, 0.01));
+      Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(100, 0.01));
+      Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(500, 0.01));
+      Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(1000, 0.01));
+      Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
+      Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy());
+      Console.WriteLine("Generic Thompson (Gaussian fixed variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 0.1)));
+      Console.WriteLine("Generic Thompson (Gaussian unknown variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 1, 1)));
+      Console.WriteLine("Generic Thompson (Gaussian Mixture)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianMixtureModel()));
+      // Console.WriteLine("Threshold Ascent (20)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(20, 0.01));
+      // Console.WriteLine("Threshold Ascent (100)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(100, 0.01));
+      // Console.WriteLine("Threshold Ascent (500)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(500, 0.01));
+      // Console.WriteLine("Threshold Ascent (1000)"); TestPolicyGaussianMixture(randSeed, nArms, new ThresholdAscentPolicy(1000, 0.01));
+      // Console.WriteLine("Thompson (Gaussian orig)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy(true));
+      // Console.WriteLine("Thompson (Gaussian new)"); TestPolicyGaussianMixture(randSeed, nArms, new GaussianThompsonSamplingPolicy());
+      // Console.WriteLine("Generic Thompson (Gaussian fixed variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 0.1)));
+      // Console.WriteLine("Generic Thompson (Gaussian unknown variance)"); TestPolicyGaussianMixture(randSeed, nArms, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 1, 1, 1)));
       /*

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs

-                      r11742
+                      r11745
     // right now only + and * is supported
+    public string CanonicalRepresentation(string terminalPhrase) {
+      var terms = terminalPhrase.Split('+');
+      return string.Join("+", terms.Select(term => string.Join("", term.Replace("*", "").OrderBy(ch => ch)))
+        .OrderBy(term => term));
+    public string CanonicalRepresentation(string phrase) {
+      var terms = phrase.Split('+').Select(t => t.Replace("*", ""));
+      var terminalTerms = terms.Where(t => t.All(ch => grammar.IsTerminal(ch)));
+      var nonTerminalTerms = terms.Where(t => t.Any(ch => grammar.IsNonTerminal(ch)));
+      return string.Join("+", terminalTerms.Select(term => CanonicalTerm(term)).OrderBy(term => term).Concat(nonTerminalTerms.Select(term => CanonicalTerm(term))));
+    }
+    private string CanonicalTerm(string term) {
+      return string.Join("", term.OrderByDescending(ch => (byte)ch));
+    }
+  }

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

-                      r11744
+                      r11745
       // good results e.g. with       var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
       // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant
       //var problem = new SantaFeAntProblem();
+      //var problem = new SantaFeAntProblem();
       //var problem = new SymbolicRegressionProblem("Tower");
       //var problem = new PalindromeProblem();
 …
       //var problem = new RoyalPairProblem();
       //var problem = new EvenParityProblem();
+      var alg = new MctsSampler(problem, 25, random, 0, new GenericThompsonSamplingPolicy(new LogitNormalModel()));
+      //var alg = new TemporalDifferenceTreeSearchSampler(problem, 23, random, 0, new RandomPolicy());
+      //var alg = new MctsSampler(problem, 23, random, 0, new GaussianThompsonSamplingPolicy(true));
+      var alg = new MctsContextualSampler(problem, 23, random, 0);
+      //var alg = new TemporalDifferenceTreeSearchSampler(problem, 17, random, 10, new EpsGreedyPolicy(0.1));
       //var alg = new ExhaustiveBreadthFirstSearch(problem, 17);
       //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));
 …
         bestQuality = quality;
         bestSentence = sentence;
+        Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
+        //Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
+        //Console.ReadLine();
       };
       alg.SolutionEvaluated += (sentence, quality) => {
 …
         globalStatistics.AddSentence(sentence, quality);
         if (iterations % 100 == 0) {
+          Console.Clear();
+          //if (iterations % 1000 == 0) Console.Clear();
+          Console.SetCursorPosition(0, 0);
           alg.PrintStats();
+        }
         if (iterations % 10000 == 0) {
           //Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence);
           //Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
           Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
+          //Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
+        }
       };

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats:

Update cookies preferences