Context Navigation

← Previous Change
Next Change →

Changeset 15437 for branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis

Timestamp:

10/27/17 18:42:04 (7 years ago)

Author:

gkronber

Message:

#2796 comments, simplifications, reviewed tests for structure enumeration (not working)

Location:

branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4

Files:

: 6 edited

HeuristicLab.Algorithms.DataAnalysis.MCTSSymbReg.csproj (modified) (2 diffs)
Heuristics.cs (modified) (1 diff)
MctsSymbolicRegression/Automaton.cs (modified) (7 diffs)
MctsSymbolicRegression/Disassembler.cs (modified) (1 diff)
MctsSymbolicRegression/MctsSymbolicRegressionAlgorithm.cs (modified) (1 diff)
MctsSymbolicRegression/MctsSymbolicRegressionStatic.cs (modified) (17 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis.MCTSSymbReg.csproj

-                      r15425
+                      r15437
     <Compile Include="MctsSymbolicRegression\Automaton.cs" />
     <Compile Include="MctsSymbolicRegression\CodeGenerator.cs" />
-    <Compile Include="MctsSymbolicRegression\ConstraintHandler.cs" />
     <Compile Include="MctsSymbolicRegression\Disassembler.cs" />
     <Compile Include="MctsSymbolicRegression\ExpressionEvaluator.cs" />
 …
     <Compile Include="MctsSymbolicRegression\Policies\IPolicy.cs" />
     <Compile Include="MctsSymbolicRegression\Policies\PolicyBase.cs" />
-    <Compile Include="MctsSymbolicRegression\Policies\Ucb.cs" />
-    <Compile Include="MctsSymbolicRegression\Policies\UcbTuned.cs" />
     <Compile Include="MctsSymbolicRegression\ExprHash.cs" />
     <Compile Include="MctsSymbolicRegression\EmptyConstraintHandler.cs" />

branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/Heuristics.cs

-                      r15426
+                      r15437
   //           - even if variables are colinear?
   //           - even for non-linear transformations
+  //
+  // Also see Multi-variate adaptive regression splines (MARS)
+  // Maybe we could use MARS-style basis functions to identify the relevant interaction terms. (tune split points and find optimal interaction term with max spearmans rank)
+  //
   // assuming we interactions of have scaled/shifted variables (x + xo) * (y + yo) with constant xo and yo
   // this leads to: x y + x yo + y xo + yo xo.

branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/Automaton.cs

-                      r15420
+                      r15437
   //
   internal class Automaton {
-    // TODO: refactor so that State is an enumerable type
     // there is a single final state (ExprEnd)
     // states with lower values are closer to the final state
     // (this is helpful when we try to navigate to the final state)
+    // we cannot use an enum type here because the set of states is dynamic (including states for variables)
     public const int StateExprEnd = 1;
     public const int StateTermEnd = 2;
 …
     public const int FirstDynamicState = 25;
     // more states for individual variables are created dynamically
+    public readonly List<string> stateNames = new List<string>() {
+       string.Empty,
+      "ExprEnd",
+      "TermEnd",
+      "FactorEnd",
+      "VariableFactorEnd",
+      "ExpFactorEnd",
+      "LogFactorEnd",
+      "InvFactorEnd",
+      "ExpFEnd",
+      "LogTEnd",
+      "InvTEnd",
+      "LogTFEnd",
+      "InfTFEnd",
+      "LogTFStart",
+      "InvTFStart",
+      "ExpFStart",
+      "LogTStart",
+      "InvTStart",
+      "VariableFactorStart",
+      "ExpFactorStart",
+      "LogFactorStart",
+      "InvFactorStart",
+      "FactorStart",
+      "TermStart",
+      "Expr",
+    };
     private const int StartState = StateExpr;
     public int CurrentState { get; private set; }
-    public readonly List<string> stateNames;
     private List<int>[] followStates;
     private List<Action>[,] actions; // not every follow state is possible but this representation should be efficient
 …
        bool allowMultipleTerms = false) {
       int nVars = vars.Length;
-      stateNames = new List<string>() { string.Empty, "Expr", "ExprEnd", "TermStart", "TermEnd", "FactorStart", "FactorEnd", "VarFactorStart", "VarFactorEnd", "ExpFactorStart", "ExpFactorEnd", "LogFactorStart", "LogFactorEnd", "InvFactorStart", "InvFactorEnd", "ExpFStart", "ExpFEnd", "LogTStart", "LogTEnd", "LogTFStart", "LogTFEnd", "InvTStart", "InvTEnd", "InvTFStart", "InvTFEnd" };
       codeGenerator = new CodeGenerator();
       this.constraintHandler = constraintHandler;
 …
+    }
+    private readonly int[] followStatesBuf = new int[1000];
+    public void FollowStates(int state, out int[] buf, out int nElements) {
+    public void FollowStates(int state, ref int[] buf, out int nElements) {
       var fs = followStates[state];
       int j = 0;
 …
         var s = fs[i];
         if (constraintHandler.IsAllowedFollowState(state, s)) {
           followStatesBuf[j++] = s;
+          buf[j++] = s;
+        }
+      }
-      buf = followStatesBuf;
       nElements = j;
+    }
 …
     internal string GetActionString(int fromState, int toState) {
       return actionStrings[fromState,toState] != null ? string.Join(" , ", actionStrings[fromState, toState]) : "";
+      return actionStrings[fromState, toState] != null ? string.Join(" , ", actionStrings[fromState, toState]) : "";
+    }
 …
         writer.WriteLine("digraph {");
         // writer.WriteLine("rankdir=LR");
         for (int s = StartState; s < stateNames.Count; s++) {
+        for (int s = 1; s < stateNames.Count; s++) {
           for (int i = 0; i < followStates[s].Count; i++) {
             if (followStates[s][i] <= 0) continue;

branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/Disassembler.cs

-                      r15414
+                      r15437
         switch (op) {
           case (byte)OpCodes.Add: sb.Append(" + "); break;
           case (byte)OpCodes.Mul: sb.Append(""); break;
           case (byte)OpCodes.LoadConst1: break;
           case (byte)OpCodes.LoadConst0: break;
           case (byte)OpCodes.LoadParamN: break;
+          case (byte)OpCodes.Mul: sb.Append(" * "); break;
+          case (byte)OpCodes.LoadConst1: sb.Append(" 1 ");  break;
+          case (byte)OpCodes.LoadConst0: sb.Append(" 0 "); break;
+          case (byte)OpCodes.LoadParamN: sb.Append(" c "); break;
           case (byte)OpCodes.LoadVar: {
               short arg = (short)((code[pc] << 8) | code[pc + 1]);

branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/MctsSymbolicRegressionAlgorithm.cs

r15416	r15437
186	186	// "Balancing parameter in UCT formula (0 < c < 1000). Small values: greedy search. Large values: enumeration. Default: 1.0", new DoubleValue(1.0)));
187	187	Parameters.Add(new ValueParameter<IPolicy>(PolicyParameterName,
188		"The policy to use for selecting nodes in MCTS ~~(e.g. Ucb)", new Ucb~~()));
	188	"The policy to use for selecting nodes in MCTS", new EpsilonGreedy()));
189	189	PolicyParameter.Hidden = true;
190	190	Parameters.Add(new ValueParameter<ICheckedItemList<StringValue>>(AllowedFactorsParameterName,

branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/MctsSymbolicRegressionStatic.cs

-                      r15425
+                      r15437
     // TODO: Solve Poly-10
     // TODO: After state unification the recursive backpropagation of results takes a lot of time. How can this be improved?
     // TODO: Why is the algorithm so slow for rather greedy policies (e.g. low C value in UCB)?
     // TODO: check if we can use a quality measure with range [-1..1] in policies
+    // ~~obsolete TODO: Why is the algorithm so slow for rather greedy policies (e.g. low C value in UCB)?
+    // ~~obsolete TODO: check if we can use a quality measure with range [-1..1] in policies
     // TODO: unit tests for benchmark problems which contain log / exp / x^-1 but without numeric constants
     // TODO: check if transformation of y is correct and works (Obj 2)
 …
       private readonly double[] predBuf, testPredBuf;
       private readonly double[][] gradBuf;
-      // debugging stats
-      // calculate for each level the number of alternatives the average 'inequality' of tries and 'inequality' of quality over the alternatives for each trie
-      // inequality can be calculated using the Gini coefficient
-      internal readonly double[] pathGiniCoeffs = new double[100];
-      internal readonly double[] pathQs = new double[100];
-      internal readonly double[] levelBestQ = new double[100];
-      // internal readonly double[] levelMaxTries = new double[100];
-      internal readonly double[] pathBestQ = new double[100]; // as long as pathBestQs = levelBestQs we are following the correct path
-      internal readonly string[] levelBestAction = new string[100];
-      internal readonly string[] curAction = new string[100];
-      internal readonly double[] pathSelectedQ = new double[100];
       public State(IRegressionProblemData problemData, uint randSeed, int maxVariables, bool scaleVariables,
 …
         this.automaton = new Automaton(x, new SimpleConstraintHandler(maxVariables), allowProdOfVars, allowExp, allowLog, allowInv, allowMultipleTerms);
         this.treePolicy = treePolicy ?? new Ucb();
+        this.treePolicy = treePolicy ?? new EpsilonGreedy();
         this.tree = new Tree() {
           state = automaton.CurrentState,
 …
       #endregion
-#if DEBUG
-      internal void ClearStats() {
-        for (int i = 0; i < pathGiniCoeffs.Length; i++) pathGiniCoeffs[i] = -1;
-        for (int i = 0; i < pathQs.Length; i++) pathGiniCoeffs[i] = -99;
-        for (int i = 0; i < pathBestQ.Length; i++) pathBestQ[i] = -99;
-        for (int i = 0; i < pathSelectedQ.Length; i++) pathSelectedQ[i] = -99;
+      }
-      internal void WriteGiniStats() {
-        Console.WriteLine(string.Join("\t", pathGiniCoeffs.TakeWhile(x => x >= 0).Select(x => string.Format("{0:N3}", x))));
+      }
-      internal void WriteQs() {
-        // Console.WriteLine(string.Join("\t", pathQs.TakeWhile(x => x >= -100).Select(x => string.Format("{0:N3}", x))));
-        var sb = new StringBuilder();
-        // length
-        int i = 0;
-        while (i < pathBestQ.Length && pathBestQ[i] > -99 && pathBestQ[i] == levelBestQ[i]) {
-          i++;
+        }
-        sb.AppendFormat("{0,-3}",i);
-        i = 0;
-        // sb.AppendFormat("{0:N3}", levelBestQ[0]);
-        while (i < pathSelectedQ.Length && pathSelectedQ[i] > -99) {
-          sb.AppendFormat("\t{0:N3}", pathSelectedQ[i]);
-          i++;
+        }
-        Console.WriteLine(sb.ToString());
-        sb.Clear();
-        i = 0;
-        // sb.AppendFormat("{0:N3}", levelBestQ[0]);
-        while (i < pathBestQ.Length && pathBestQ[i] > -99) {
-          sb.AppendFormat("\t{0:N3}", pathBestQ[i]);
-          i++;
+        }
-        Console.WriteLine(sb.ToString());
-        sb.Clear();
-        i = 0;
-        while (i < pathBestQ.Length && pathBestQ[i] > -99) {
-          sb.AppendFormat("\t{0:N3}", levelBestQ[i]);
-          i++;
+        }
-        Console.WriteLine(sb.ToString());
-        sb.Clear();
-        i = 0;
-        while (i < pathBestQ.Length && pathBestQ[i] > -99) {
-          sb.AppendFormat("\t{0,-5}", (curAction[i] != null && curAction[i].Length > 5) ? curAction[i].Substring(0, 5) : curAction[i]);
-          i++;
+        }
-        Console.WriteLine(sb.ToString());
-        sb.Clear();
-        i = 0;
-        while (i < pathBestQ.Length && pathBestQ[i] > -99) {
-          sb.AppendFormat("\t{0,-5}", (levelBestAction[i] != null && levelBestAction[i].Length > 5) ? levelBestAction[i].Substring(0, 5) : levelBestAction[i]);
-          i++;
+        }
-        Console.WriteLine(sb.ToString());
-        Console.WriteLine();
+      }
-#endif
+    }
 …
       bool success = false;
       do {
+#if DEBUG
+        mctsState.ClearStats();
+#endif
         automaton.Reset();
         success = TryTreeSearchRec2(rand, tree, automaton, eval, treePolicy, mctsState, out q);
 …
 #if DEBUG
-      // mctsState.WriteGiniStats();
       Console.WriteLine(ExprStr(automaton));
-      mctsState.WriteQs();
-      // Console.WriteLine(WriteStatistics(tree, mctsState));
 #endif
-      //if (mctsState.effectiveRollouts % 100 == 1) {
-      // Console.WriteLine(WriteTree(tree, mctsState));
-      // Console.WriteLine(TraceTree(tree, mctsState));
-      //}
       return q;
+    }
 …
       while (!automaton.IsFinalState(automaton.CurrentState)) {
+        Console.WriteLine(automaton.stateNames[automaton.CurrentState]);
         if (state.children.ContainsKey(tree)) {
           if (state.children[tree].All(ch => ch.Done)) {
 …
             selectedIdx = treePolicy.Select(state.children[tree].Select(ch => ch.actionStatistics), rand);
+          }
-          // STATS
-          state.pathGiniCoeffs[tree.level] = InequalityCoefficient(state.children[tree].Select(ch => (double)ch.actionStatistics.AverageQuality));
-          state.pathQs[tree.level] = tree.actionStatistics.AverageQuality;
           tree = state.children[tree][selectedIdx];
           // move the automaton forward until reaching the state
           // all steps where no alternatives are possible are immediately taken
+          // all steps where no alternatives could be taken immediately (without expanding the tree)
           // TODO: simplification of the automaton
           int[] possibleFollowStates;
+          int[] possibleFollowStates = new int[1000];
           int nFs;
+          automaton.FollowStates(automaton.CurrentState, out possibleFollowStates, out nFs);
+          // TODO!
+          // while (possibleFollowStates[0] != tree.state && nFs == 1 &&
+          //   !automaton.IsEvalState(possibleFollowStates[0]) && !automaton.IsFinalState(possibleFollowStates[0])) {
+          //   automaton.Goto(possibleFollowStates[0]);
+          //   automaton.FollowStates(automaton.CurrentState, out possibleFollowStates, out nFs);
+          // }
+          automaton.FollowStates(automaton.CurrentState, ref possibleFollowStates, out nFs);
           Debug.Assert(possibleFollowStates.Contains(tree.state));
           automaton.Goto(tree.state);
         } else {
           // EXPAND
           int[] possibleFollowStates;
+          int[] possibleFollowStates = new int[1000];
           int nFs;
           string actionString = "";
+          automaton.FollowStates(automaton.CurrentState, out possibleFollowStates, out nFs);
+          // TODO
+          // while (nFs == 1 && !automaton.IsEvalState(possibleFollowStates[0]) && !automaton.IsFinalState(possibleFollowStates[0])) {
+          //   actionString += " " + automaton.GetActionString(automaton.CurrentState, possibleFollowStates[0]);
+          //   // no alternatives -> just go to the next state
+          //   automaton.Goto(possibleFollowStates[0]);
+          //   automaton.FollowStates(automaton.CurrentState, out possibleFollowStates, out nFs);
+          // }
+          automaton.FollowStates(automaton.CurrentState, ref possibleFollowStates, out nFs);
           if (nFs == 0) {
             // stuck in a dead end (no final state and no allowed follow states)
 …
             // for selected states (EvalStates) we introduce state unification (detection of equivalent states)
             if (automaton.IsEvalState(possibleFollowStates[i])) {
               var hc = Hashcode(automaton);
+              var hc = Hashcode(automaton); // TODO fix unit test for structure enumeration
               if (!state.nodes.TryGetValue(hc, out child)) {
                 child = new Tree() {
 …
+              }
               // only allow forward edges (don't add the child if we would go back in the graph)
               else if (child.level > tree.level)  {
+              else if (child.level > tree.level) {
                 // whenever we join paths we need to propagate back the statistics of the existing node through the newly created link
                 // to all parents
 …
                 Debug.Assert(child.level <= tree.level);
                 child = null;
+              }
+              }
             } else {
               child = new Tree() {
 …
         q = eval(code, nParams);
         // Console.WriteLine("{0:N4}\t{1}", q*q, tree.expr);
-        q = TransformQuality(q);
         success = true;
         BackpropagateQuality(tree, q, treePolicy, state);
+        BackpropagateQuality(tree, q, treePolicy, state);
       } else {
         // we got stuck in roll-out (not evaluation necessary!)
 …
       return success;
+    }
-    private static double InequalityCoefficient(IEnumerable<double> xs) {
-      var arr = xs.ToArray();
-      var sad = 0.0;
-      var sum = 0.0;
-      for(int i=0;i<arr.Length;i++) {
-        for(int j=0;j<arr.Length;j++) {
-          sad += Math.Abs(arr[i] - arr[j]);
-          sum += arr[j];
+        }
+      }
-      return 0.5 * sad / sum;
+    }
-    private static double TransformQuality(double q) {
-      // no transformation
-      return q;
-      // EXPERIMENTAL!
-      // Fisher transformation
-      // (assumes q is Correl(pred, target)
-      q = Math.Min(q,  0.99999999);
-      q = Math.Max(q, -0.99999999);
-      return 0.5 * Math.Log((1 + q) / (1 - q));
-      // optimal result: q = 1 -> return huge value
-      // if (q >= 1.0) return 1E16;
-      // // return number of 9s in R²
-      // return -Math.Log10(1 - q);
+    }
 …
+      }
-      state.pathSelectedQ[tree.level] = tree.actionStatistics.AverageQuality;
-      state.pathBestQ[tree.level] = tree.actionStatistics.BestQuality;
-      state.curAction[tree.level] = tree.expr;
-      if (state.levelBestQ[tree.level] < tree.actionStatistics.BestQuality) {
-        state.levelBestQ[tree.level] = tree.actionStatistics.BestQuality;
-        state.levelBestAction[tree.level] = tree.expr;
+      }
+    }
 …
       Tree minChild = children.First();
       for (int i = 1; i < children.Count; i++) {
         if(children[i].state < minChild.state)
+        if (children[i].state < minChild.state)
           selectedChildIdx = i;
+      }
 …
         } else {
           // EXPAND
           int[] possibleFollowStates;
+          int[] possibleFollowStates = new int[1000];
           int nFs;
           automaton.FollowStates(automaton.CurrentState, out possibleFollowStates, out nFs);
+          automaton.FollowStates(automaton.CurrentState, ref possibleFollowStates, out nFs);
           if (nFs == 0) {
             // stuck in a dead end (no final state and no allowed follow states)
 …
+        }
         foreach(var tup in list) {
+        foreach (var tup in list) {
           var ch = tup.Item3;
           var chId = tup.Item2;
           if(state.children.ContainsKey(ch) && state.children[ch].Count == 1) {
+          if (state.children.ContainsKey(ch) && state.children[ch].Count == 1) {
             var chch = state.children[ch].First();
             nextId++;

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats:

Update cookies preferences