Context Navigation

← Previous Change
Next Change →

Changeset 15606 for branches/MCTS-SymbReg-2796

Timestamp:

01/12/18 16:27:39 (7 years ago)

Author:

gkronber

Message:

#2796: comments and typos

Location:

branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression

Files:

: 8 edited

ApproximateDoubleEqualityComparer.cs (modified) (3 diffs)
Automaton.cs (modified) (3 diffs)
ExprHashSymbolic.cs (modified) (5 diffs)
ExpressionEvaluator.cs (modified) (1 diff)
MctsSymbolicRegressionAlgorithm.cs (modified) (2 diffs)
MctsSymbolicRegressionStatic.cs (modified) (9 diffs)
SymbolicExpressionGenerator.cs (modified) (1 diff)
Tree.cs (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/ApproximateDoubleEqualityComparer.cs

-                      r15414
+                      r15606
 namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
+  // unused?
   internal class ApproximateDoubleEqualityComparer : IEqualityComparer<double> {
     public bool Equals(double x, double y) {
 …
       var yl = (ulong)BitConverter.DoubleToInt64Bits(y);
       xl = xl & 0xFFFFFFFFFFFFFFE0;
+      xl = xl & 0xFFFFFFFFFFFFFFE0; // ignore least significant bits
       yl = yl & 0xFFFFFFFFFFFFFFE0;
 …
       var bits = (ulong)BitConverter.DoubleToInt64Bits(obj);
       bits = bits & 0xFFFFFFFFFFFFFFE0;
+      bits = bits & 0xFFFFFFFFFFFFFFE0; // ignore least significant bits
       return (int)bits;

branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/Automaton.cs

-                      r15441
+                      r15606
 namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
   // this is the core class for generating expressions.
   // it represents a finite state automaton, each state transition can be associated with an action (e.g. to produce code).
   // the automaton determines the possible structures for expressions.
+  // This is the core class for generating expressions.
+  // It represents a finite state automaton, each state transition can be associated with an action (e.g. to produce code).
+  // The automaton determines the possible structures for expressions.
   //
   // to understand this code it is worthwile to generate a graphical visualization of the automaton (see PrintAutomaton).
   // If the code is compiled in debug mode the automaton produces a Graphviz file into the folder of the application
+  // To understand this code, it is worthwhile to generate a graphical visualization of the automaton (see PrintAutomaton).
+  // If the code is compiled in debug mode, the automaton produces a Graphviz file into the folder of the application
   // whenever an instance of the automaton is constructed.
   //
 …
+    }
     // postfix notation
+    // Produce postfix notation for expression:
     // Expr -> 0 Term { '+' Term } '+' 'exit'
     // Term -> c Fact { '*' Fact } '*'
 …
       followStates[StateExprEnd] = new List<int>(); // no follow states
       // order all followstates (the first follow state leads to the final state)
+      // order all follow states (the first follow state leads to the final state)
       foreach (var list in followStates) {
         if (list != null)

branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/ExprHashSymbolic.cs

-                      r15440
+                      r15606
 using System.Linq;
+// code for hashing of expressions based on symbolic analysis of monomials and polynomials.
+// slow, but easy to implement correctly.
 namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
   internal enum UnaryFunctionType { Log, Exp, Inv };
 …
   // calculates a hash-code for expressions.
   public static class ExprHashSymbolic {
 …
     static ExprHashSymbolic() {
       const string symbols = "abcdefghijklmnopqrstuvwxyz";
+      const string symbols = "abcdefghijklmnopqrstuvwxyz"; // limited to 26 variables -> TODO
       varSymbols = new SymbolFactor[MaxVariables];
 …
+    }
+  // takes an array of code and transforms it into a polynomial for hashing.
+  // slow!
     private static int Eval(byte[] code, int nParams) {
       // The hash code calculation already preserves commutativity, associativity and distributivity of operations.
 …
       // - exp(x1) * exp(x1) is equivalent to exp(x1)
       //
+      // The following experssions must not hash to the same value.
+      // - exp(x1) + exp(x1) is different from exp(x1)
+      // - log(x1) + log(x1) is different from log(x1)
+      // - 1/x1 + 1/x1 is different from 1/x1
+      // - TODO list all
+      // think about speed later (TODO)
+      // The following expressions must not hash to the same value.
+      // - exp(x1) + exp(x1) is different from exp(x1), because c1*exp(c2*x1) + c3*exp(c4*x1) cannot be simplified to c5*exp(c6*x1) for all values of c2 and c4
+      // - log(x1) + log(x1) is different from log(x1), same as above
+      // - 1/x1 + 1/x1 is different from 1/x1, same as above 1/(x1 + c1) + 1/(x1 + c2)
+      // - TODO: list further exceptions
       var stack = new Polynomial[MaxStackSize];

branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/ExpressionEvaluator.cs

r15403	r15606
25	25
26	26	namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
27		// evalutes expressions (on vectors)
	27	// evaluates expressions (on vectors)
28	28	internal class ExpressionEvaluator {
29	29	// manages it's own vector buffers

branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/MctsSymbolicRegressionAlgorithm.cs

-                      r15439
+                      r15606
 namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
   // TODO: support pause (persisting/cloning the state)
   [Item("MCTS Symbolic Regression", "Monte carlo tree search for symbolic regression.")]
+  [Item("Symbolic Regression Tree Search", "tree search for symbolic regression.")]
   [StorableClass]
   [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 250)]
 …
       int n = 0;
       // cancelled before we acutally started
+      // canceled before we actually started
       cancellationToken.ThrowIfCancellationRequested();

branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/MctsSymbolicRegressionStatic.cs

-                      r15441
+                      r15606
     //      - standardization of variables is possible (or might be necessary) as we adjust numeric parameters of the expression anyway
     //      - to simplify the problem we can restrict the set of functions e.g. we assume which functions are necessary for the problem instance
     //        -> several steps: (a) polyinomials, (b) rational polynomials, (c) exponential or logarithmic functions, rational functions with exponential and logarithmic parts
+    //        -> several steps: (a) polynomials, (b) rational polynomials, (c) exponential or logarithmic functions, rational functions with exponential and logarithmic parts
     // 3) efficiency and effectiveness for real-world problems
     //    - e.g. Tower problem
 …
     // TODO: The samples of x1*... or x2*... do not give any information about the relevance of the interaction term x1*x2 in general!
     //       --> E.g. if x1, x2 ~ N(0, 1) or U(-1, 1) this is trivial to show
     //       --> Therefore, looking at rollout statistics for arm selection is useless in the general case!
+    //       --> Therefore, looking at roll-out statistics for arm selection (MCTS-style) is useless in the general case!
     //       --> It is necessary to rely on other features for the arm selection.
     //       --> TODO: Which heuristics can we apply?
 …
     //       and later we find the a longer form x1 + x1 + x2 where the number of variable references
     //       exceeds the maximum in the automaton this leads to an error (see unit tests)
-    // ~~obsolete TODO: After state unification the recursive backpropagation of results takes a lot of time. How can this be improved?
-    // ~~obsolete TODO: Why is the algorithm so slow for rather greedy policies (e.g. low C value in UCB)?
-    // ~~obsolete TODO: check if we can use a quality measure with range [-1..1] in policies
     // TODO: unit tests for benchmark problems which contain log / exp / x^-1 but without numeric constants
     // TODO: check if transformation of y is correct and works (Obj 2)
 …
     // TODO: analyze / improve perf of ExprHashing (canonical form for expressions)
     // TODO: support empty test partition
     // TODO: the algorithm should be invariant to linear transformations of the space (y = f(x') = f( Ax ) ) for invertible transformations A --> unit tests
+    // TODO: the algorithm should be invariant to linear transformations of the space (y = f(x') = f( Ax ) ) for invertible transformations A --> see unit tests
     #region static API
 …
       internal readonly Tree tree;
       internal readonly Func<byte[], int, double> evalFun;
       // MCTS might get stuck. Track statistics on the number of effective rollouts
+      // MCTS might get stuck. Track statistics on the number of effective roll-outs
       internal int totalRollouts;
       internal int effectiveRollouts;
 …
+      }
       // takes the code of the best solution and creates and equivalent symbolic regression model
+      // takes the code of the best solution and creates and equivalent symbolic regression models
       public ISymbolicRegressionModel BestModel {
         get {
 …
       // State equivalence is checked through ExprHash (based on the generated code through the path).
       // We switch between rollout-mode and expansion mode
       // Rollout-mode means we are navigating an existing path through the tree (using a rollout policy, e.g. UCB)
       // Expansion mode means we expand the graph, creating new nodes and edges (using an expansion policy, e.g. shortest route to a complete expression)
       // In expansion mode we might re-enter the graph and switch back to rollout-mode
       // We do this until we reach a complete expression (final state)
       // Loops in the graph are prevented by checking that the level of a child must be larger than the level of the parent
+      // We switch between rollout-mode and expansion mode.
+      // Rollout-mode means we are navigating an existing path through the tree (using a rollout policy, e.g. UCB).
+      // Expansion mode means we expand the graph, creating new nodes and edges (using an expansion policy, e.g. shortest route to a complete expression).
+      // In expansion mode we might re-enter the graph and switch back to rollout-mode.
+      // We do this until we reach a complete expression (final state).
+      // Loops in the graph are prevented by checking that the level of a child must be larger than the level of the parent.
       // Sub-graphs which have been completely searched are marked as done.
       // Roll-out could lead to a state where all follow-states are done. In this case we call the rollout ineffective.
 …
     // for debugging only
+    #region debugging
     private static string TraceTree(Tree tree, State state) {
 …
       return sb.ToString();
+    }
+  #endregion
+  }
+}

branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/SymbolicExpressionGenerator.cs

-                      r14185
+                      r15606
 namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
+  // translates byte code to a symbolic expression tree
+  // helper class.
+  // Translates byte code into a symbolic expression tree, e.g. for the final solution.
   internal class SymbolicExpressionTreeGenerator {
     const int MaxStackSize = 100;

branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/Tree.cs

-                      r15438
+                      r15606
     public bool Done { get; set; }
     public int visits;
-    //   {
-    //   get { return actionStatistics.Done; }
-    //   set { actionStatistics.Done = value; }
-    // }
-    // public IActionStatistics actionStatistics;
-    // public Tree[] children;
+  }
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 15606 for branches/MCTS-SymbReg-2796

Legend:

Download in other formats: