Context Navigation

← Previous Change
Next Change →

Changeset 13658 for trunk

Timestamp:

03/07/16 14:50:02 (9 years ago)

Author:

gkronber

Message:

#2581: extracted policies from MCTS to allow experimentation with different policies for MCTS

Location:

trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4

Files:

: 4 edited

HeuristicLab.Algorithms.DataAnalysis-3.4.csproj (modified) (1 diff)
MctsSymbolicRegression/MctsSymbolicRegressionAlgorithm.cs (modified) (6 diffs)
MctsSymbolicRegression/MctsSymbolicRegressionStatic.cs (modified) (14 diffs)
MctsSymbolicRegression/Tree.cs (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

-                      r13653
+                      r13658
     <Compile Include="MctsSymbolicRegression\MctsSymbolicRegressionStatic.cs" />
     <Compile Include="MctsSymbolicRegression\OpCodes.cs" />
+    <Compile Include="MctsSymbolicRegression\Policies\EpsGreedy.cs" />
+    <Compile Include="MctsSymbolicRegression\Policies\UcbTuned.cs" />
+    <Compile Include="MctsSymbolicRegression\Policies\IActionStatistics.cs" />
+    <Compile Include="MctsSymbolicRegression\Policies\IPolicy.cs" />
+    <Compile Include="MctsSymbolicRegression\Policies\PolicyBase.cs" />
+    <Compile Include="MctsSymbolicRegression\Policies\Ucb.cs" />
     <Compile Include="MctsSymbolicRegression\SymbolicExpressionGenerator.cs" />
     <Compile Include="MctsSymbolicRegression\Tree.cs" />

trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/MctsSymbolicRegressionAlgorithm.cs

-                      r13652
+                      r13658
 using System.Runtime.CompilerServices;
 using System.Threading;
+using HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression.Policies;
 using HeuristicLab.Analysis;
 using HeuristicLab.Common;
 …
     private const string AllowedFactorsParameterName = "Allowed factors";
     private const string ConstantOptimizationIterationsParameterName = "Iterations (constant optimization)";
     private const string CParameterName = "C";
+    private const string PolicyParameterName = "Policy";
     private const string SeedParameterName = "Seed";
     private const string SetSeedRandomlyParameterName = "SetSeedRandomly";
 …
       get { return (IFixedValueParameter<IntValue>)Parameters[ConstantOptimizationIterationsParameterName]; }
+    }
     public IFixedValueParameter<DoubleValue> CParameter {
       get { return (IFixedValueParameter<DoubleValue>)Parameters[CParameterName]; }
+    public IValueParameter<IPolicy> PolicyParameter {
+      get { return (IValueParameter<IPolicy>)Parameters[PolicyParameterName]; }
+    }
     public IFixedValueParameter<DoubleValue> PunishmentFactorParameter {
 …
       set { MaxVariableReferencesParameter.Value.Value = value; }
+    }
+    public double C {
+      get { return CParameter.Value.Value; }
+      set { CParameter.Value.Value = value; }
+    }
+    public IPolicy Policy {
+      get { return PolicyParameter.Value; }
+      set { PolicyParameter.Value = value; }
+    }
     public double PunishmentFactor {
       get { return PunishmentFactorParameter.Value.Value; }
 …
       Parameters.Add(new FixedValueParameter<IntValue>(MaxVariablesParameterName,
         "Maximal number of variables references in the symbolic regression models (multiple usages of the same variable are counted)", new IntValue(5)));
+      Parameters.Add(new FixedValueParameter<DoubleValue>(CParameterName,
+        "Balancing parameter in UCT formula (0 < c < 1000). Small values: greedy search. Large values: enumeration. Default: 1.0", new DoubleValue(1.0)));
+      // Parameters.Add(new FixedValueParameter<DoubleValue>(CParameterName,
+      //   "Balancing parameter in UCT formula (0 < c < 1000). Small values: greedy search. Large values: enumeration. Default: 1.0", new DoubleValue(1.0)));
+      Parameters.Add(new ValueParameter<IPolicy>(PolicyParameterName,
+        "The policy to use for selecting nodes in MCTS (e.g. Ucb)", new Ucb()));
+      PolicyParameter.Hidden = true;
       Parameters.Add(new ValueParameter<ICheckedItemList<StringValue>>(AllowedFactorsParameterName,
         "Choose which expressions are allowed as factors in the model.", defaultFactorsList));
 …
       var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
       if (!AllowedFactors.CheckedItems.Any()) throw new ArgumentException("At least on type of factor must be allowed");
+      var state = MctsSymbolicRegressionStatic.CreateState(problemData, (uint)Seed, MaxVariableReferences, C, ScaleVariables, ConstantOptimizationIterations,
+      var state = MctsSymbolicRegressionStatic.CreateState(problemData, (uint)Seed, MaxVariableReferences, ScaleVariables, ConstantOptimizationIterations,
+        Policy,
         lowerLimit, upperLimit,
         allowProdOfVars: AllowedFactors.CheckedItems.Any(s => s.Value.Value == VariableProductFactorName),

trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/MctsSymbolicRegressionStatic.cs

-                      r13657
+                      r13658
 using System.Diagnostics.Contracts;
 using System.Linq;
+using HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression.Policies;
 using HeuristicLab.Common;
 using HeuristicLab.Core;
 …
       internal readonly Automaton automaton;
       internal IRandom random { get; private set; }
-      internal readonly double c;
       internal readonly Tree tree;
-      internal readonly List<Tree> bestChildrenBuf;
       internal readonly Func<byte[], int, double> evalFun;
+      internal readonly IPolicy treePolicy;
       // MCTS might get stuck. Track statistics on the number of effective rollouts
       internal int totalRollouts;
 …
       private readonly double[][] gradBuf;
+      public State(IRegressionProblemData problemData, uint randSeed, int maxVariables, double c, bool scaleVariables, int constOptIterations,
+      public State(IRegressionProblemData problemData, uint randSeed, int maxVariables, bool scaleVariables, int constOptIterations,
+        IPolicy treePolicy = null,
         double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue,
         bool allowProdOfVars = true,
 …
         this.problemData = problemData;
-        this.c = c;
         this.constOptIterations = constOptIterations;
         this.evalFun = this.Eval;
 …
         this.automaton = new Automaton(x, maxVariables, allowProdOfVars, allowExp, allowLog, allowInv, allowMultipleTerms);
+        this.tree = new Tree() { state = automaton.CurrentState };
+        this.treePolicy = treePolicy ?? new Ucb();
+        this.tree = new Tree() { state = automaton.CurrentState, actionStatistics = treePolicy.CreateActionStatistics() };
         // reset best solution
 …
         this.ones = Enumerable.Repeat(1.0, MaxParams).ToArray();
         constsBuf = new double[MaxParams];
-        this.bestChildrenBuf = new List<Tree>(2 * x.Length); // the number of follow states in the automaton is O(number of variables) 2 * number of variables should be sufficient (capacity is increased if necessary anyway)
         this.predBuf = new double[y.Length];
         this.testPredBuf = new double[testY.Length];
 …
       #region IState inferface
       public bool Done { get { return tree != null && tree.done; } }
+      public bool Done { get { return tree != null && tree.Done; } }
       public double BestSolutionTrainingQuality {
 …
+    }
+    public static IState CreateState(IRegressionProblemData problemData, uint randSeed, int maxVariables = 3, double c = 1.0,
+      bool scaleVariables = true, int constOptIterations = 0, double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue,
+    public static IState CreateState(IRegressionProblemData problemData, uint randSeed, int maxVariables = 3,
+      bool scaleVariables = true, int constOptIterations = 0,
+      IPolicy policy = null,
+      double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue,
       bool allowProdOfVars = true,
       bool allowExp = true,
 …
       bool allowMultipleTerms = false
       ) {
+      return new State(problemData, randSeed, maxVariables, c, scaleVariables, constOptIterations,
+      return new State(problemData, randSeed, maxVariables, scaleVariables, constOptIterations,
+        policy,
         lowerEstimationLimit, upperEstimationLimit,
         allowProdOfVars, allowExp, allowLog, allowInv, allowMultipleTerms);
 …
       var tree = mctsState.tree;
       var eval = mctsState.evalFun;
-      var bestChildrenBuf = mctsState.bestChildrenBuf;
       var rand = mctsState.random;
       double c = mctsState.c;
+      var treePolicy = mctsState.treePolicy;
       double q = 0;
-      double deltaQ = 0;
-      double deltaSqrQ = 0;
-      int deltaVisits = 0;
       bool success = false;
       do {
         automaton.Reset();
         success = TryTreeSearchRec(rand, tree, c, automaton, eval, bestChildrenBuf, out q, out deltaQ, out deltaSqrQ, out deltaVisits);
+        success = TryTreeSearchRec(rand, tree, automaton, eval, treePolicy, out q);
         mctsState.totalRollouts++;
       } while (!success && !tree.done);
+      } while (!success && !tree.Done);
       mctsState.effectiveRollouts++;
       return q;
 …
     // in this case we get stuck we just restart
     // see ConstraintHandler.cs for more info
+    private static bool TryTreeSearchRec(IRandom rand, Tree tree, double c, Automaton automaton, Func<byte[], int, double> eval, List<Tree> bestChildrenBuf,
+      out double q, // quality of the expression
+      out double deltaQ, out double deltaSqrQ, out int deltaVisits // the updates for total quality and number of visits (can be negative if branches have been fully explored)
+      ) {
+    private static bool TryTreeSearchRec(IRandom rand, Tree tree, Automaton automaton, Func<byte[], int, double> eval, IPolicy treePolicy,
+      out double q) {
       Tree selectedChild = null;
       Contract.Assert(tree.state == automaton.CurrentState);
       Contract.Assert(!tree.done);
+      Contract.Assert(!tree.Done);
       if (tree.children == null) {
         if (automaton.IsFinalState(tree.state)) {
           // final state
           tree.done = true;
+          tree.Done = true;
           // EVALUATE
 …
           automaton.GetCode(out code, out nParams);
           q = eval(code, nParams);
+          tree.visits += 1;
+          tree.sumQuality += q;
+          tree.sumSqrQuality += q * q;
+          deltaQ = q;
+          deltaVisits = 1;
+          deltaSqrQ = q * q;
+          treePolicy.Update(tree.actionStatistics, q);
           return true; // we reached a final state
         } else {
 …
             // stuck in a dead end (no final state and no allowed follow states)
             q = 0;
+            deltaQ = 0;
+            deltaSqrQ = 0.0;
+            deltaVisits = 0;
+            tree.done = true;
+            tree.Done = true;
             tree.children = null;
-            tree.visits = 1;
             return false;
+          }
           tree.children = new Tree[nFs];
           for (int i = 0; i < tree.children.Length; i++)
             tree.children[i] = new Tree() { children = null, done = false, state = possibleFollowStates[i], visits = 0 };
+            tree.children[i] = new Tree() { children = null, state = possibleFollowStates[i], actionStatistics = treePolicy.CreateActionStatistics() };
           selectedChild = nFs > 1 ? SelectFinalOrRandom(automaton, tree, rand) : tree.children[0];
 …
         // tree.children != null
         // UCT selection within tree
+        selectedChild = tree.children.Length > 1 ? SelectUctTuned(tree, rand, c, bestChildrenBuf) : tree.children[0];
+        int selectedIdx = 0;
+        if (tree.children.Length > 1) {
+          selectedIdx = treePolicy.Select(tree.children.Select(ch => ch.actionStatistics), rand);
+        }
+        selectedChild = tree.children[selectedIdx];
+      }
       // make selected step and recurse
       automaton.Goto(selectedChild.state);
+      var success = TryTreeSearchRec(rand, selectedChild, c, automaton, eval, bestChildrenBuf,
+        out q, out deltaQ, out deltaSqrQ, out deltaVisits);
+      var success = TryTreeSearchRec(rand, selectedChild, automaton, eval, treePolicy, out q);
       if (success) {
         // only update if successful
+        tree.sumQuality += deltaQ;
+        tree.sumSqrQuality += deltaSqrQ;
+        tree.visits += deltaVisits;
+      }
+      if (tree.children.All(ch => ch.done)) {
+        tree.done = true;
+        // update parent nodes to remove information from this branch
+        if (tree.children.Length > 1) {
+          deltaQ = -(tree.sumQuality - deltaQ);
+          deltaSqrQ = -(tree.sumSqrQuality - deltaSqrQ);
+          deltaVisits = -(tree.visits - deltaVisits);
+        }
+        treePolicy.Update(tree.actionStatistics, q);
+      }
+      tree.Done = tree.children.All(ch => ch.Done);
+      if (tree.Done) {
         tree.children = null; // cut off the sub-branch if it has been fully explored
+      }
       return success;
+    }
-    private static Tree SelectUct(Tree tree, IRandom rand, double c, List<Tree> bestChildrenBuf) {
-      // determine total tries of still active children
-      int totalTries = 0;
-      bestChildrenBuf.Clear();
-      for (int i = 0; i < tree.children.Length; i++) {
-        var ch = tree.children[i];
-        if (ch.done) continue;
-        if (ch.visits == 0) bestChildrenBuf.Add(ch);
-        else totalTries += tree.children[i].visits;
+      }
-      // if there are unvisited children select a random child
-      if (bestChildrenBuf.Any()) {
-        return bestChildrenBuf[rand.Next(bestChildrenBuf.Count)];
+      }
-      Contract.Assert(totalTries > 0); // the tree is not done yet so there is at least on child that is not done
-      double logTotalTries = Math.Log(totalTries);
-      var bestQ = double.NegativeInfinity;
-      for (int i = 0; i < tree.children.Length; i++) {
-        var ch = tree.children[i];
-        if (ch.done) continue;
-        var childQ = ch.AverageQuality + c * Math.Sqrt(logTotalTries / ch.visits);
-        if (childQ > bestQ) {
-          bestChildrenBuf.Clear();
-          bestChildrenBuf.Add(ch);
-          bestQ = childQ;
-        } else if (childQ >= bestQ) {
-          bestChildrenBuf.Add(ch);
+        }
+      }
-      return bestChildrenBuf[rand.Next(bestChildrenBuf.Count)];
+    }
-    private static Tree SelectUctTuned(Tree tree, IRandom rand, double c, List<Tree> bestChildrenBuf) {
-      // determine total tries of still active children
-      int totalTries = 0;
-      bestChildrenBuf.Clear();
-      for (int i = 0; i < tree.children.Length; i++) {
-        var ch = tree.children[i];
-        if (ch.done) continue;
-        if (ch.visits == 0) bestChildrenBuf.Add(ch);
-        else totalTries += tree.children[i].visits;
+      }
-      // if there are unvisited children select a random child
-      if (bestChildrenBuf.Any()) {
-        return bestChildrenBuf[rand.Next(bestChildrenBuf.Count)];
+      }
-      Contract.Assert(totalTries > 0); // the tree is not done yet so there is at least on child that is not done
-      double logTotalTries = Math.Log(totalTries);
-      var bestQ = double.NegativeInfinity;
-      for (int i = 0; i < tree.children.Length; i++) {
-        var ch = tree.children[i];
-        if (ch.done) continue;
-        var varianceBound = ch.QualityVariance + Math.Sqrt(2.0 * logTotalTries / ch.visits);
-        if (varianceBound > 0.25) varianceBound = 0.25;
-        var childQ = ch.AverageQuality + c * Math.Sqrt(logTotalTries / ch.visits * varianceBound);
-        if (childQ > bestQ) {
-          bestChildrenBuf.Clear();
-          bestChildrenBuf.Add(ch);
-          bestQ = childQ;
-        } else if (childQ >= bestQ) {
-          bestChildrenBuf.Add(ch);
+        }
+      }
-      return bestChildrenBuf[rand.Next(bestChildrenBuf.Count)];
+    }

trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/Tree.cs

-                      r13657
+                      r13658
 #endregion
+using HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression.Policies;
 namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
   // represents tree nodes for the search tree in MCTS
   internal class Tree {
     public int state;
+    public int visits;
+    public double sumQuality;
+    public double sumSqrQuality; // for variance
+    public double AverageQuality { get { return sumQuality / (double)visits; } }
+    public double QualityVariance { get { return sumSqrQuality / (double)visits - AverageQuality * AverageQuality; } }
+    public bool done;
+    public bool Done {
+      get { return actionStatistics.Done; }
+      set { actionStatistics.Done = value; }
+    }
+    public IActionStatistics actionStatistics;
     public Tree[] children;
+  }

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats:

Update cookies preferences