Free cookie consent management tool by TermsFeed Policy Generator

Changeset 15403


Ignore:
Timestamp:
10/04/17 22:00:52 (7 years ago)
Author:
gkronber
Message:

#2796 worked on MCTS symb reg

Location:
branches/MCTS-SymbReg-2796
Files:
3 added
8 edited

Legend:

Unmodified
Added
Removed
  • branches/MCTS-SymbReg-2796

    • Property svn:ignore set to
      TestResults
  • branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/Automaton.cs

    r14185 r15403  
    9696    }
    9797
    98     // reverse notation ops
    99     // Expr -> c 0 Term { '+' Term } '+' '*' c '+' 'exit'
     98    // reverse notation
     99    // Expr -> 0 Term { '+' Term } '+' 'exit'
    100100    // Term -> c Fact { '*' Fact } '*'
    101101    // Fact -> VarFact | ExpFact | LogFact | InvFact
     
    120120      actionStrings = new List<string>[nStates, nStates];
    121121
    122       // Expr -> c 0 Term { '+' Term } '+' '*' c '+' 'exit'
     122      // Expr -> 0 Term { '+' Term } '+' 'exit'
    123123      AddTransition(StateExpr, StateTermStart, () => {
    124124        codeGenerator.Reset();
    125         codeGenerator.Emit1(OpCodes.LoadParamN);
    126125        codeGenerator.Emit1(OpCodes.LoadConst0);
    127126        constraintHandler.Reset();
    128       }, "c 0, Reset");
     127      }, "0, Reset");
    129128      AddTransition(StateTermEnd, StateExprEnd, () => {
    130129        codeGenerator.Emit1(OpCodes.Add);
    131         codeGenerator.Emit1(OpCodes.Mul);
    132         codeGenerator.Emit1(OpCodes.LoadParamN);
    133         codeGenerator.Emit1(OpCodes.Add);
    134130        codeGenerator.Emit1(OpCodes.Exit);
    135       }, "+*c+ exit");
     131      }, "+ exit");
    136132      if (allowMultipleTerms)
    137133        AddTransition(StateTermEnd, StateTermStart, () => {
     
    353349    private readonly int[] followStatesBuf = new int[1000];
    354350    public void FollowStates(int state, out int[] buf, out int nElements) {
    355       // for loop instead of where iterator
    356351      var fs = followStates[state];
    357352      int j = 0;
  • branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/ConstraintHandler.cs

    r14185 r15403  
    2929  // This class restricts the set of allowed transitions of the automaton to prevent exploration of duplicate expressions.
    3030  // It would be possible to implement this class in such a way that the search never visits a duplicate expression. However,
    31   // it seems very intricate to detect this robustly and in all cases while generating an expression because
    32   // some for of lookahead is necessary.
    33   // Instead the constraint handler only catches the obvious duplicates directly, but does not guarantee that the search always produces a valid expression.
     31  // it seems very intricate to detect this robustly while generating an expression because
     32  // some form of lookahead is necessary.
     33  // Instead, the constraint handler only catches the obvious duplicates directly, but does not guarantee that the search always produces a valid expression.
    3434  // The ratio of the number of unsuccessful searches, that need backtracking should be tracked in the MCTS alg (MctsSymbolicRegressionStatic)
    3535
  • branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/ExpressionEvaluator.cs

    r14185 r15403  
    161161                var minFx = fxc.Min() - consts[curParamIdx]; // stack[topOfStack] is f(x) + c
    162162
    163                 var delta = 1.0 - minFx - consts[curParamIdx];
    164                 // adjust c so that minFx + c = 1 ... log(minFx + c) = 0
     163                // adjust c so that minFx + c = e and therefore log(minFx + c) = log(e) = 1
     164                // this initialization works in combination with the gradient check (instead of initializing such that log(minFx + c) = 0
     165                var delta = Math.E - minFx - consts[curParamIdx];
    165166                consts[curParamIdx] += delta;
    166167
  • branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/MctsSymbolicRegressionAlgorithm.cs

    r15360 r15403  
    2222using System;
    2323using System.Linq;
    24 using System.Runtime.CompilerServices;
    2524using System.Threading;
    2625using HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression.Policies;
     
    3635
    3736namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
    38   [Item("MCTS Symbolic Regression", "Monte carlo tree search for symbolic regression. Useful mainly as a base learner in gradient boosting.")]
     37  // TODO: support pause (persisting/cloning the state)
     38  [Item("MCTS Symbolic Regression", "Monte carlo tree search for symbolic regression.")]
    3939  [StorableClass]
    4040  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 250)]
     
    5353    private const string CreateSolutionParameterName = "CreateSolution";
    5454    private const string PunishmentFactorParameterName = "PunishmentFactor";
    55 
    56     private const string VariableProductFactorName = "product(xi)";
    57     private const string ExpFactorName = "exp(c * product(xi))";
    58     private const string LogFactorName = "log(c + sum(c*product(xi))";
    59     private const string InvFactorName = "1 / (1 + sum(c*product(xi))";
    60     private const string FactorSumsName = "sum of multiple terms";
     55    private const string CollectParetoOptimalSolutionsParameterName = "CollectParetoOptimalSolutions";
     56    private const string LambdaParameterName = "Lambda";
     57
     58    private const string VariableProductFactorName = "x * y * ...";
     59    private const string ExpFactorName = "exp(c * x * y ...)";
     60    private const string LogFactorName = "log(c + c1 x + c2 x + ...)";
     61    private const string InvFactorName = "1 / (1 + c1 x + c2 x + ...)";
     62    private const string FactorSumsName = "t1(x) + t2(x) + ... ";
    6163    #endregion
    6264
     
    9496    public IFixedValueParameter<BoolValue> CreateSolutionParameter {
    9597      get { return (IFixedValueParameter<BoolValue>)Parameters[CreateSolutionParameterName]; }
     98    }
     99    public IFixedValueParameter<BoolValue> CollectParetoOptimalSolutionsParameter {
     100      get { return (IFixedValueParameter<BoolValue>)Parameters[CollectParetoOptimalSolutionsParameterName]; }
     101    }
     102    public IFixedValueParameter<DoubleValue> LambdaParameter {
     103      get { return (IFixedValueParameter<DoubleValue>)Parameters[LambdaParameterName]; }
    96104    }
    97105    #endregion
     
    136144      get { return CreateSolutionParameter.Value.Value; }
    137145      set { CreateSolutionParameter.Value.Value = value; }
     146    }
     147    public bool CollectParetoOptimalSolutions {
     148      get { return CollectParetoOptimalSolutionsParameter.Value.Value; }
     149      set { CollectParetoOptimalSolutionsParameter.Value.Value = value; }
     150    }
     151    public double Lambda {
     152      get { return LambdaParameter.Value.Value; }
     153      set { LambdaParameter.Value.Value = value; }
    138154    }
    139155    #endregion
     
    177193      Parameters.Add(new FixedValueParameter<IntValue>(ConstantOptimizationIterationsParameterName,
    178194        "Number of iterations for constant optimization. A small number of iterations should be sufficient for most models. " +
    179         "Set to 0 to disable constants optimization.", new IntValue(10)));
     195        "Set to 0 to let the algorithm stop automatically when it converges. Set to -1 to disable constants optimization.", new IntValue(10)));
    180196      Parameters.Add(new FixedValueParameter<BoolValue>(ScaleVariablesParameterName,
    181         "Set to true to scale all input variables to the range [0..1]", new BoolValue(false)));
     197        "Set to true to all input variables to the range [0..1]", new BoolValue(true)));
    182198      Parameters[ScaleVariablesParameterName].Hidden = true;
    183199      Parameters.Add(new FixedValueParameter<DoubleValue>(PunishmentFactorParameterName, "Estimations of models can be bounded. The estimation limits are calculated in the following way (lb = mean(y) - punishmentFactor*range(y), ub = mean(y) + punishmentFactor*range(y))", new DoubleValue(10)));
     
    187203      Parameters[UpdateIntervalParameterName].Hidden = true;
    188204      Parameters.Add(new FixedValueParameter<BoolValue>(CreateSolutionParameterName,
    189         "Flag that indicates if a solution should be produced at the end of the run", new BoolValue(true)));
     205        "Optionally produce a solution at the end of the run", new BoolValue(true)));
    190206      Parameters[CreateSolutionParameterName].Hidden = true;
     207
     208      Parameters.Add(new FixedValueParameter<BoolValue>(CollectParetoOptimalSolutionsParameterName,
     209        "Optionally collect a set of Pareto-optimal solutions minimizing error and complexity.", new BoolValue(false)));
     210      Parameters[CollectParetoOptimalSolutionsParameterName].Hidden = true;
     211
     212      Parameters.Add(new FixedValueParameter<DoubleValue>(LambdaParameterName,
     213        "Lambda is the factor for the regularization term in the objective function (Obj = (y - f(x,p))² + lambda * |p|²)", new DoubleValue(0.0)));
    191214    }
    192215
     
    195218    }
    196219
     220    // TODO: support pause and restart
    197221    protected override void Run(CancellationToken cancellationToken) {
    198222      // Set up the algorithm
    199223      if (SetSeedRandomly) Seed = new System.Random().Next();
     224      var collectPareto = CollectParetoOptimalSolutions;
    200225
    201226      // Set up the results display
     
    229254      var gradEvals = new IntValue();
    230255      Results.Add(new Result("Gradient evaluations", gradEvals));
    231       var paretoBestModelsResult = new Result("ParetoBestModels", typeof(ItemList<ISymbolicRegressionSolution>));
    232       Results.Add(paretoBestModelsResult);
    233 
     256
     257      Result paretoBestModelsResult = new Result("ParetoBestModels", typeof(ItemList<ISymbolicRegressionSolution>));
     258      if (collectPareto) {
     259        Results.Add(paretoBestModelsResult);
     260      }
    234261
    235262      // same as in SymbolicRegressionSingleObjectiveProblem
     
    246273      var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
    247274      if (!AllowedFactors.CheckedItems.Any()) throw new ArgumentException("At least on type of factor must be allowed");
    248       var state = MctsSymbolicRegressionStatic.CreateState(problemData, (uint)Seed, MaxVariableReferences, ScaleVariables, ConstantOptimizationIterations,
    249         Policy,
     275      var state = MctsSymbolicRegressionStatic.CreateState(problemData, (uint)Seed, MaxVariableReferences, ScaleVariables,
     276        ConstantOptimizationIterations, Lambda,
     277        Policy, collectPareto,
    250278        lowerLimit, upperLimit,
    251279        allowProdOfVars: AllowedFactors.CheckedItems.Any(s => s.Value.Value == VariableProductFactorName),
     
    261289      double curBestQ = 0.0;
    262290      int n = 0;
     291
     292      // cancelled before we acutally started
     293      cancellationToken.ThrowIfCancellationRequested();
     294
    263295      // Loop until iteration limit reached or canceled.
    264       for (int i = 0; i < Iterations && !state.Done; i++) {
    265         cancellationToken.ThrowIfCancellationRequested();
    266 
     296      for (int i = 0; i < Iterations && !state.Done && !cancellationToken.IsCancellationRequested; i++) {
    267297        var q = MctsSymbolicRegressionStatic.MakeStep(state);
    268298        sumQ += q; // sum of qs in the last updateinterval iterations
     
    286316          totalRollouts.Value = state.TotalRollouts;
    287317
    288           paretoBestModelsResult.Value = new ItemList<ISymbolicRegressionSolution>(state.ParetoBestModels);
     318          if (collectPareto) {
     319            paretoBestModelsResult.Value = new ItemList<ISymbolicRegressionSolution>(state.ParetoBestModels);
     320          }
    289321
    290322          table.Rows["Best quality"].Values.Add(bestQuality.Value);
     
    296328      }
    297329
    298       // final results
     330      // final results (assumes that at least one iteration was calculated)
    299331      if (n > 0) {
    300332        if (bestQ > bestQuality.Value) {
  • branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/MctsSymbolicRegressionStatic.cs

    r15360 r15403  
    3535namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
    3636  public static class MctsSymbolicRegressionStatic {
    37     // TODO: SGD with adagrad instead of lbfgs?
    38     // TODO: check Taylor expansion capabilities (ln(x), sqrt(x), exp(x)) in combination with GBT
    39     // TODO: optimize for 3 targets concurrently (y, 1/y, exp(y), and log(y))? Would simplify the number of possible expressions again
     37    // OBJECTIVES:
     38    // 1) solve toy problems without numeric constants (to show that structure search is effective / efficient)
     39    //    - e.g. Keijzer, Nguyen ... where no numeric constants are involved
     40    //    - assumptions:
     41    //      - we don't know the necessary operations or functions -> all available functions could be necessary
     42    //      - but we do not need to tune numeric constants -> no scaling of input variables x!
     43    // 2) Solve toy problems with numeric constants to make the algorithm invariant concerning variable scale.
     44    //    This is important for real world applications.
     45    //    - e.g. Korns or Vladislavleva problems where numeric constants are involved
     46    //    - assumptions:
     47    //      - any numeric constant is possible (a-priori we might assume that small abs. constants are more likely)
     48    //      - standardization of variables is possible (or might be necessary) as we adjust numeric parameters of the expression anyway
     49    //      - to simplify the problem we can restrict the set of functions e.g. we assume which functions are necessary for the problem instance
     50    //        -> several steps: (a) polyinomials, (b) rational polynomials, (c) exponential or logarithmic functions, rational functions with exponential and logarithmic parts
     51    // 3) efficiency and effectiveness for real-world problems
     52    //    - e.g. Tower problem
     53    //    - (1) and (2) combined, structure search must be effective in combination with numeric optimization of constants
     54    //   
     55
     56    // TODO NEXT: check if transformation of y is correct and works
     57    // TODO: transform target y to zero-mean and remove linear scaling parameters
     58    // TODO: include offset for variables as parameter
     59    // TODO: why does LM optimization converge so slowly with exp(x), log(x), and 1/x allowed?
     60    // TODO: support e(-x) and possibly (1/-x)
     61    // TODO: is it OK to initialize all constants to 1?
    4062    #region static API
    4163
     
    7698      private readonly double[] scalingFactor;
    7799      private readonly double[] scalingOffset;
     100      private readonly double yStdDev; // for scaling parameters (e.g. stopping condition for LM)
    78101      private readonly int constOptIterations;
     102      private readonly double lambda; // weight of penalty term for regularization
    79103      private readonly double lowerEstimationLimit, upperEstimationLimit;
     104      private readonly bool collectParetoOptimalModels;
    80105      private readonly List<ISymbolicRegressionSolution> paretoBestModels = new List<ISymbolicRegressionSolution>();
    81106      private readonly List<double[]> paretoFront = new List<double[]>(); // matching the models
     
    99124      private readonly double[][] gradBuf;
    100125
    101       public State(IRegressionProblemData problemData, uint randSeed, int maxVariables, bool scaleVariables, int constOptIterations,
     126      public State(IRegressionProblemData problemData, uint randSeed, int maxVariables, bool scaleVariables,
     127        int constOptIterations, double lambda,
    102128        IPolicy treePolicy = null,
     129        bool collectParetoOptimalModels = false,
    103130        double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue,
    104131        bool allowProdOfVars = true,
     
    108135        bool allowMultipleTerms = false) {
    109136
     137        if (lambda < 0) throw new ArgumentException("Lambda must be larger or equal zero", "lambda");
     138
    110139        this.problemData = problemData;
    111140        this.constOptIterations = constOptIterations;
     141        this.lambda = lambda;
    112142        this.evalFun = this.Eval;
    113143        this.lowerEstimationLimit = lowerEstimationLimit;
    114144        this.upperEstimationLimit = upperEstimationLimit;
     145        this.collectParetoOptimalModels = collectParetoOptimalModels;
    115146
    116147        random = new MersenneTwister(randSeed);
     
    128159        this.x = x;
    129160        this.y = y;
     161        this.yStdDev = HeuristicLab.Common.EnumerableStatisticExtensions.StandardDeviation(y);
    130162        this.testX = testX;
    131163        this.testY = testY;
     
    181213          var t = new SymbolicExpressionTree(treeGen.Exec(bestCode, bestConsts, bestNParams, scalingFactor, scalingOffset));
    182214          var model = new SymbolicRegressionModel(problemData.TargetVariable, t, interpreter, lowerEstimationLimit, upperEstimationLimit);
    183 
    184           // model has already been scaled linearly in Eval
     215          model.Scale(problemData); // apply linear scaling
    185216          return model;
    186217        }
     
    212243          Array.Copy(optConsts, bestConsts, bestNParams);
    213244        }
    214         // multi-objective best
    215         var complexity = // SymbolicDataAnalysisModelComplexityCalculator.CalculateComplexity() TODO
    216           Array.FindIndex(code, (opc)=>opc==(byte)OpCodes.Exit);
    217         UpdateParetoFront(q, complexity, code, optConsts, nParams, scalingFactor, scalingOffset);
    218 
     245        if (collectParetoOptimalModels) {
     246          // multi-objective best
     247          var complexity = // SymbolicDataAnalysisModelComplexityCalculator.CalculateComplexity() TODO: implement Kommenda's tree complexity directly in the evaluator
     248            Array.FindIndex(code, (opc) => opc == (byte)OpCodes.Exit);  // use length of expression as surrogate for complexity
     249          UpdateParetoFront(q, complexity, code, optConsts, nParams, scalingFactor, scalingOffset);
     250        }
    219251        return q;
    220252      }
     253
     254      private void Eval(byte[] code, int nParams, out double rsq, out double[] optConsts) {
     255        // we make a first pass to determine a valid starting configuration for all constants
     256        // constant c in log(c + f(x)) is adjusted to guarantee that x is positive (see expression evaluator)
     257        // scale and offset are set to optimal starting configuration
     258        // assumes scale is the first param and offset is the last param
     259
     260        // reset constants
     261        Array.Copy(ones, constsBuf, nParams);
     262        evaluator.Exec(code, x, constsBuf, predBuf, adjustOffsetForLogAndExp: true);
     263        funcEvaluations++;
     264
     265        if (nParams == 0 || constOptIterations < 0) {
     266          // if we don't need to optimize parameters then we are done
     267          // changing scale and offset does not influence r²
     268          rsq = RSq(y, predBuf);
     269          optConsts = constsBuf;
     270        } else {
     271          // optimize constants using the starting point calculated above
     272          OptimizeConstsLm(code, constsBuf, nParams, 0.0, nIters: constOptIterations);
     273
     274          evaluator.Exec(code, x, constsBuf, predBuf);
     275          funcEvaluations++;
     276
     277          rsq = RSq(y, predBuf);
     278          optConsts = constsBuf;
     279        }
     280      }
     281
     282
     283
     284      #region helpers
     285      private static double RSq(IEnumerable<double> x, IEnumerable<double> y) {
     286        OnlineCalculatorError error;
     287        double r = OnlinePearsonsRCalculator.Calculate(x, y, out error);
     288        return error == OnlineCalculatorError.None ? r * r : 0.0;
     289      }
     290
     291
     292      private void OptimizeConstsLm(byte[] code, double[] consts, int nParams, double epsF = 0.0, int nIters = 100) {
     293        double[] optConsts = new double[nParams]; // allocate a smaller buffer for constants opt (TODO perf?)
     294        Array.Copy(consts, optConsts, nParams);
     295
     296        // direct usage of LM is recommended in alglib manual for better performance than the lsfit interface (which uses lm internally).
     297        alglib.minlmstate state;
     298        alglib.minlmreport rep = null;
     299        alglib.minlmcreatevj(y.Length + 1, optConsts, out state); // +1 for penalty term
     300        // Using the change of the gradient as stopping criterion is recommended in alglib manual.
     301        // However, the most recent version of alglib (as of Oct 2017) only supports epsX as stopping criterion
     302        alglib.minlmsetcond(state, epsg: 1E-6 * yStdDev, epsf: epsF, epsx: 0.0, maxits: nIters);
     303        // alglib.minlmsetgradientcheck(state, 1E-5);
     304        alglib.minlmoptimize(state, Func, FuncAndJacobian, null, code);
     305        alglib.minlmresults(state, out optConsts, out rep);
     306        funcEvaluations += rep.nfunc;
     307        gradEvaluations += rep.njac * nParams;
     308
     309        if (rep.terminationtype < 0) throw new ArgumentException("lm failed: termination type = " + rep.terminationtype);
     310
     311        // only use optimized constants if successful
     312        if (rep.terminationtype >= 0) {
     313          Array.Copy(optConsts, consts, optConsts.Length);
     314        }
     315      }
     316
     317      private void Func(double[] arg, double[] fi, object obj) {
     318        var code = (byte[])obj;
     319        int n = predBuf.Length;
     320        evaluator.Exec(code, x, arg, predBuf); // gradients are nParams x vLen
     321        for (int r = 0; r < n; r++) {
     322          var res = predBuf[r] - y[r];
     323          fi[r] = res;
     324        }
     325
     326        var penaltyIdx = fi.Length - 1;
     327        fi[penaltyIdx] = 0.0;
     328        // calc length of parameter vector for regularization
     329        var aa = 0.0;
     330        for (int i = 0; i < arg.Length; i++) {
     331          aa += arg[i] * arg[i];
     332        }
     333        if (lambda > 0 && aa > 0) {
     334          // scale lambda using stdDev(y) to make the parameter independent of the scale of y
     335          // scale lambda using n to make parameter independent of the number of training points
     336          // take the root because LM squares the result
     337          fi[penaltyIdx] = Math.Sqrt(n * lambda / yStdDev * aa);
     338        }
     339      }
     340
     341      private void FuncAndJacobian(double[] arg, double[] fi, double[,] jac, object obj) {
     342        int n = predBuf.Length;
     343        int nParams = arg.Length;
     344        var code = (byte[])obj;
     345        evaluator.ExecGradient(code, x, arg, predBuf, gradBuf); // gradients are nParams x vLen
     346        for (int r = 0; r < n; r++) {
     347          var res = predBuf[r] - y[r];
     348          fi[r] = res;
     349
     350          for (int k = 0; k < nParams; k++) {
     351            jac[r, k] = gradBuf[k][r];
     352          }
     353        }
     354        // calc length of parameter vector for regularization
     355        double aa = 0.0;
     356        for (int i = 0; i < arg.Length; i++) {
     357          aa += arg[i] * arg[i];
     358        }
     359
     360        var penaltyIdx = fi.Length - 1;
     361        if (lambda > 0 && aa > 0) {
     362          fi[penaltyIdx] = 0.0;
     363          // scale lambda using stdDev(y) to make the parameter independent of the scale of y
     364          // scale lambda using n to make parameter independent of the number of training points
     365          // take the root because alglib LM squares the result
     366          fi[penaltyIdx] = Math.Sqrt(n * lambda / yStdDev * aa);
     367
     368          for (int i = 0; i < arg.Length; i++) {
     369            jac[penaltyIdx, i] = 0.5 / fi[penaltyIdx] * 2 * n * lambda / yStdDev * arg[i];
     370          }
     371        } else {
     372          fi[penaltyIdx] = 0.0;
     373          for (int i = 0; i < arg.Length; i++) {
     374            jac[penaltyIdx, i] = 0.0;
     375          }
     376        }
     377      }
     378
    221379
    222380      private void UpdateParetoFront(double q, int complexity, byte[] code, double[] param, int nParam,
     
    233391          }
    234392        }
    235         if(isNonDominated) {
     393        if (isNonDominated) {
    236394          paretoFront.Add(cur);
    237395
     
    242400          var t = new SymbolicExpressionTree(treeGen.Exec(code, param, nParam, scalingFactor, scalingOffset));
    243401          var model = new SymbolicRegressionModel(problemData.TargetVariable, t, interpreter, lowerEstimationLimit, upperEstimationLimit);
     402          model.Scale(problemData); // apply linear scaling
    244403
    245404          var sol = model.CreateRegressionSolution(this.problemData);
    246405          sol.Name = string.Format("{0:N5} {1}", q, complexity);
    247          
     406
    248407          paretoBestModels.Add(sol);
    249408        }
    250         for(int i=paretoFront.Count-2;i>=0;i--) {
     409        for (int i = paretoFront.Count - 2; i >= 0; i--) {
    251410          var @ref = paretoFront[i];
    252411          var domRes = DominationCalculator<int>.Dominates(cur, @ref, max, true);
    253           if(domRes == DominationResult.Dominates) {
     412          if (domRes == DominationResult.Dominates) {
    254413            paretoFront.RemoveAt(i);
    255414            paretoBestModels.RemoveAt(i);
     
    257416        }
    258417      }
    259 
    260       private void Eval(byte[] code, int nParams, out double rsq, out double[] optConsts) {
    261         // we make a first pass to determine a valid starting configuration for all constants
    262         // constant c in log(c + f(x)) is adjusted to guarantee that x is positive (see expression evaluator)
    263         // scale and offset are set to optimal starting configuration
    264         // assumes scale is the first param and offset is the last param
    265         double alpha;
    266         double beta;
    267 
    268         // reset constants
    269         Array.Copy(ones, constsBuf, nParams);
    270         evaluator.Exec(code, x, constsBuf, predBuf, adjustOffsetForLogAndExp: true);
    271         funcEvaluations++;
    272 
    273         // calc opt scaling (alpha*f(x) + beta)
    274         OnlineCalculatorError error;
    275         OnlineLinearScalingParameterCalculator.Calculate(predBuf, y, out alpha, out beta, out error);
    276         if (error == OnlineCalculatorError.None) {
    277           constsBuf[0] *= beta;
    278           constsBuf[nParams - 1] = constsBuf[nParams - 1] * beta + alpha;
    279         }
    280         if (nParams <= 2 || constOptIterations <= 0) {
    281           // if we don't need to optimize parameters then we are done
    282           // changing scale and offset does not influence r²
    283           rsq = RSq(y, predBuf);
    284           optConsts = constsBuf;
    285         } else {
    286           // optimize constants using the starting point calculated above
    287           OptimizeConstsLm(code, constsBuf, nParams, 0.0, nIters: constOptIterations);
    288 
    289           evaluator.Exec(code, x, constsBuf, predBuf);
    290           funcEvaluations++;
    291 
    292           rsq = RSq(y, predBuf);
    293           optConsts = constsBuf;
    294         }
    295       }
    296 
    297 
    298 
    299       #region helpers
    300       private static double RSq(IEnumerable<double> x, IEnumerable<double> y) {
    301         OnlineCalculatorError error;
    302         double r = OnlinePearsonsRCalculator.Calculate(x, y, out error);
    303         return error == OnlineCalculatorError.None ? r * r : 0.0;
    304       }
    305 
    306 
    307       private void OptimizeConstsLm(byte[] code, double[] consts, int nParams, double epsF = 0.0, int nIters = 100) {
    308         double[] optConsts = new double[nParams]; // allocate a smaller buffer for constants opt (TODO perf?)
    309         Array.Copy(consts, optConsts, nParams);
    310 
    311         alglib.minlmstate state;
    312         alglib.minlmreport rep = null;
    313         alglib.minlmcreatevj(y.Length, optConsts, out state);
    314         alglib.minlmsetcond(state, 0.0, epsF, 0.0, nIters);
    315         //alglib.minlmsetgradientcheck(state, 0.000001);
    316         alglib.minlmoptimize(state, Func, FuncAndJacobian, null, code);
    317         alglib.minlmresults(state, out optConsts, out rep);
    318         funcEvaluations += rep.nfunc;
    319         gradEvaluations += rep.njac * nParams;
    320 
    321         if (rep.terminationtype < 0) throw new ArgumentException("lm failed: termination type = " + rep.terminationtype);
    322 
    323         // only use optimized constants if successful
    324         if (rep.terminationtype >= 0) {
    325           Array.Copy(optConsts, consts, optConsts.Length);
    326         }
    327       }
    328 
    329       private void Func(double[] arg, double[] fi, object obj) {
    330         var code = (byte[])obj;
    331         evaluator.Exec(code, x, arg, predBuf); // gradients are nParams x vLen
    332         for (int r = 0; r < predBuf.Length; r++) {
    333           var res = predBuf[r] - y[r];
    334           fi[r] = res;
    335         }
    336       }
    337       private void FuncAndJacobian(double[] arg, double[] fi, double[,] jac, object obj) {
    338         int nParams = arg.Length;
    339         var code = (byte[])obj;
    340         evaluator.ExecGradient(code, x, arg, predBuf, gradBuf); // gradients are nParams x vLen
    341         for (int r = 0; r < predBuf.Length; r++) {
    342           var res = predBuf[r] - y[r];
    343           fi[r] = res;
    344 
    345           for (int k = 0; k < nParams; k++) {
    346             jac[r, k] = gradBuf[k][r];
    347           }
    348         }
    349       }
    350418      #endregion
    351419    }
    352420
     421
     422    /// <summary>
     423    /// Static method to initialize a state for the algorithm
     424    /// </summary>
     425    /// <param name="problemData">The problem data</param>
     426    /// <param name="randSeed">Random seed.</param>
     427    /// <param name="maxVariables">Maximum number of variable references that are allowed in the expression.</param>
     428    /// <param name="scaleVariables">Optionally scale input variables to the interval [0..1] (recommended)</param>
     429    /// <param name="constOptIterations">Maximum number of iterations for constants optimization (Levenberg-Marquardt)</param>
     430    /// <param name="lambda">Penalty factor for regularization (0..inf.), small penalty disabled regularization.</param>
     431    /// <param name="policy">Tree search policy (random, ucb, eps-greedy, ...)</param>
     432    /// <param name="collectParameterOptimalModels">Optionally collect all Pareto-optimal solutions having minimal length and error.</param>
     433    /// <param name="lowerEstimationLimit">Optionally limit the result of the expression to this lower value.</param>
     434    /// <param name="upperEstimationLimit">Optionally limit the result of the expression to this upper value.</param>
     435    /// <param name="allowProdOfVars">Allow products of expressions.</param>
     436    /// <param name="allowExp">Allow expressions with exponentials.</param>
     437    /// <param name="allowLog">Allow expressions with logarithms</param>
     438    /// <param name="allowInv">Allow expressions with 1/x</param>
     439    /// <param name="allowMultipleTerms">Allow expressions which are sums of multiple terms.</param>
     440    /// <returns></returns>
     441
    353442    public static IState CreateState(IRegressionProblemData problemData, uint randSeed, int maxVariables = 3,
    354       bool scaleVariables = true, int constOptIterations = 0,
     443      bool scaleVariables = true, int constOptIterations = -1, double lambda = 0.0,
    355444      IPolicy policy = null,
     445      bool collectParameterOptimalModels = false,
    356446      double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue,
    357447      bool allowProdOfVars = true,
     
    361451      bool allowMultipleTerms = false
    362452      ) {
    363       return new State(problemData, randSeed, maxVariables, scaleVariables, constOptIterations,
    364         policy,
     453      return new State(problemData, randSeed, maxVariables, scaleVariables, constOptIterations, lambda,
     454        policy, collectParameterOptimalModels,
    365455        lowerEstimationLimit, upperEstimationLimit,
    366456        allowProdOfVars, allowExp, allowLog, allowInv, allowMultipleTerms);
     
    481571      var i = 0;
    482572      if (scaleVariables) {
    483         scalingFactor = new double[xs.Length];
    484         scalingOffset = new double[xs.Length];
     573        scalingFactor = new double[xs.Length + 1];
     574        scalingOffset = new double[xs.Length + 1];
    485575      } else {
    486576        scalingFactor = null;
     
    502592      }
    503593
     594      if (scaleVariables) {
     595        // transform target variable to zero-mean
     596        scalingFactor[i] = 1.0;
     597        scalingOffset[i] = -problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows).Average();
     598      }
     599
    504600      GenerateData(problemData, rows, scalingFactor, scalingOffset, out xs, out y);
    505601    }
     
    518614      }
    519615
    520       y = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows).ToArray();
     616      {
     617        var sf = scalingFactor == null ? 1.0 : scalingFactor[i];
     618        var offset = scalingFactor == null ? 0.0 : scalingOffset[i];
     619        y = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows).Select(yi => yi * sf + offset).ToArray();
     620      }
    521621    }
    522622  }
  • branches/MCTS-SymbReg-2796/Solution.sln

    r15359 r15403  
    55MinimumVisualStudioVersion = 10.0.40219.1
    66Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeuristicLab.Algorithms.DataAnalysis.MCTSSymbReg", "HeuristicLab.Algorithms.DataAnalysis\3.4\HeuristicLab.Algorithms.DataAnalysis.MCTSSymbReg.csproj", "{D97F91B5-B71B-412D-90A5-177EED948A3A}"
     7EndProject
     8Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Test", "Tests\Test.csproj", "{80009A4F-F55F-4E95-AD44-5AB01551B964}"
    79EndProject
    810Global
     
    1618    {D97F91B5-B71B-412D-90A5-177EED948A3A}.Release|Any CPU.ActiveCfg = Release|Any CPU
    1719    {D97F91B5-B71B-412D-90A5-177EED948A3A}.Release|Any CPU.Build.0 = Release|Any CPU
     20    {80009A4F-F55F-4E95-AD44-5AB01551B964}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
     21    {80009A4F-F55F-4E95-AD44-5AB01551B964}.Debug|Any CPU.Build.0 = Debug|Any CPU
     22    {80009A4F-F55F-4E95-AD44-5AB01551B964}.Release|Any CPU.ActiveCfg = Release|Any CPU
     23    {80009A4F-F55F-4E95-AD44-5AB01551B964}.Release|Any CPU.Build.0 = Release|Any CPU
    1824  EndGlobalSection
    1925  GlobalSection(SolutionProperties) = preSolution
  • branches/MCTS-SymbReg-2796/Tests/HeuristicLab.Algorithms.DataAnalysis-3.4/MctsSymbolicRegressionTest.cs

    r15057 r15403  
    11using System;
    2 using System.Diagnostics.Contracts;
    32using System.Linq;
    43using System.Threading;
     
    87using HeuristicLab.Optimization;
    98using HeuristicLab.Problems.DataAnalysis;
    10 using HeuristicLab.Tests;
     9using HeuristicLab.Problems.Instances.DataAnalysis;
    1110using Microsoft.VisualStudio.TestTools.UnitTesting;
    1211
     
    2120    public void MctsSymbRegNumberOfSolutionsOneVariable() {
    2221      // this problem has only one variable
    23       var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
     22      var provider = new NguyenInstanceProvider();
    2423      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F1 ")));
    2524      {
     
    276275
    277276
    278     #region Nguyen
     277    #region test structure search (no constants)
     278    [TestMethod]
     279    [TestCategory("Algorithms.DataAnalysis")]
     280    [TestProperty("Time", "short")]
     281    public void MctsSymbReg_NoConstants_Nguyen1() {
     282      // x³ + x² + x
     283      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
     284      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F1 ")));
     285      TestMctsWithoutConstants(regProblem, allowExp: false, allowLog: false, allowInv: false);
     286    }
     287    [TestMethod]
     288    [TestCategory("Algorithms.DataAnalysis")]
     289    [TestProperty("Time", "short")]
     290    public void MctsSymbReg_NoConstants_Nguyen2() {
     291      // x^4 + x³ + x² + x
     292      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
     293      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F2 ")));
     294      TestMctsWithoutConstants(regProblem, allowExp: false, allowLog: false, allowInv: false);
     295    }
     296    [TestMethod]
     297    [TestCategory("Algorithms.DataAnalysis")]
     298    [TestProperty("Time", "short")]
     299    public void MctsSymbReg_NoConstants_Nguyen3() {
     300      // x^5 + x^4 + x³ + x² + x
     301      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
     302      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F3 ")));
     303      TestMctsWithoutConstants(regProblem, allowExp: false, allowLog: false, allowInv: false);
     304    }
     305    [TestMethod]
     306    [TestCategory("Algorithms.DataAnalysis")]
     307    [TestProperty("Time", "short")]
     308    public void MctsSymbReg_NoConstants_Nguyen4() {
     309      // x^6 + x^5 + x^4 + x³ + x² + x
     310      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
     311      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F4 ")));
     312      TestMctsWithoutConstants(regProblem, allowExp: false, allowLog: false, allowInv: false);
     313    }
     314
     315    [TestMethod]
     316    [TestCategory("Algorithms.DataAnalysis")]
     317    [TestProperty("Time", "short")]
     318    public void MctsSymbReg_NoConstants_Poly10() {
     319      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234);
     320      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
     321      TestMctsWithoutConstants(regProblem, allowExp: false, allowLog: false, allowInv: false);
     322    }
     323    #endregion
     324
     325    #region restricted structure but including numeric constants
     326
     327    [TestMethod]
     328    [TestCategory("Algorithms.DataAnalysis")]
     329    [TestProperty("Time", "short")]
     330    public void MctsSymbRegKeijzer7() {
     331      // ln(x)
     332      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
     333      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 7 f(")));
     334      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
     335      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
     336      TestMcts(regProblem, allowExp: false, allowLog: true, allowInv: false);
     337    }
     338
     339    /*
    279340    // [TestMethod]
    280341    [TestCategory("Algorithms.DataAnalysis")]
    281342    [TestProperty("Time", "short")]
    282     public void MctsSymbRegBenchmarkNguyen1() {
    283       var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
    284       var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F1 ")));
    285       TestMcts(regProblem);
    286     }
    287     // [TestMethod]
    288     [TestCategory("Algorithms.DataAnalysis")]
    289     [TestProperty("Time", "short")]
    290     public void MctsSymbRegBenchmarkNguyen2() {
    291       var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
    292       var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F2 ")));
    293       TestMcts(regProblem);
    294     }
    295     // [TestMethod]
    296     [TestCategory("Algorithms.DataAnalysis")]
    297     [TestProperty("Time", "short")]
    298     public void MctsSymbRegBenchmarkNguyen3() {
    299       var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
    300       var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F3 ")));
    301       TestMcts(regProblem, successThreshold: 0.99);
    302     }
    303     // [TestMethod]
    304     [TestCategory("Algorithms.DataAnalysis")]
    305     [TestProperty("Time", "short")]
    306     public void MctsSymbRegBenchmarkNguyen4() {
    307       var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
    308       var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F4 ")));
    309       TestMcts(regProblem);
    310     }
    311     // [TestMethod]
    312     [TestCategory("Algorithms.DataAnalysis")]
    313     [TestProperty("Time", "short")]
    314343    public void MctsSymbRegBenchmarkNguyen5() {
     344      // sin(x²)cos(x) - 1
    315345      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
    316346      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F5 ")));
     
    321351    [TestProperty("Time", "short")]
    322352    public void MctsSymbRegBenchmarkNguyen6() {
     353      // sin(x) + sin(x + x²)
    323354      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
    324355      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F6 ")));
    325356      TestMcts(regProblem);
    326357    }
     358    */
     359    [TestMethod]
     360    [TestCategory("Algorithms.DataAnalysis")]
     361    [TestProperty("Time", "short")]
     362    public void MctsSymbRegBenchmarkNguyen7() {
     363      //  log(x + 1) + log(x² + 1)
     364      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
     365      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F7 ")));
     366      TestMcts(regProblem, maxVariableReferences:5, allowExp: false, allowLog: true, allowInv: false);
     367    }
     368    [TestMethod]
     369    [TestCategory("Algorithms.DataAnalysis")]
     370    [TestProperty("Time", "short")]
     371    public void MctsSymbRegBenchmarkNguyen8() {
     372      // Sqrt(x)
     373      // = x ^ 0.5
     374      // = exp(0.5 * log(x))
     375      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
     376      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F8 ")));
     377      TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false);
     378    }
     379    /*
    327380    // [TestMethod]
    328381    [TestCategory("Algorithms.DataAnalysis")]
    329382    [TestProperty("Time", "short")]
    330     public void MctsSymbRegBenchmarkNguyen7() {
    331       var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
    332       var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F7 ")));
    333       TestMcts(regProblem);
    334     }
    335     // [TestMethod]
    336     [TestCategory("Algorithms.DataAnalysis")]
    337     [TestProperty("Time", "short")]
    338     public void MctsSymbRegBenchmarkNguyen8() {
    339       var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
    340       var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F8 ")));
    341       TestMcts(regProblem, successThreshold: 0.99);
    342     }
    343     // [TestMethod]
    344     [TestCategory("Algorithms.DataAnalysis")]
    345     [TestProperty("Time", "short")]
    346383    public void MctsSymbRegBenchmarkNguyen9() {
     384      //  sin(x) + sin(y²)
    347385      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
    348386      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F9 ")));
    349       TestMcts(regProblem, iterations: 10000);
     387      TestMcts(regProblem);
    350388    }
    351389    // [TestMethod]
     
    353391    [TestProperty("Time", "short")]
    354392    public void MctsSymbRegBenchmarkNguyen10() {
     393      // 2sin(x)cos(y)
    355394      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
    356395      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F10 ")));
    357396      TestMcts(regProblem);
    358397    }
    359     // [TestMethod]
     398    */
     399    [TestMethod]
    360400    [TestCategory("Algorithms.DataAnalysis")]
    361401    [TestProperty("Time", "short")]
    362402    public void MctsSymbRegBenchmarkNguyen11() {
    363       var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
     403      // x ^ y  , x > 0, y > 0   
     404      // = exp(y * log(x))
     405      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
    364406      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F11 ")));
    365       TestMcts(regProblem, 10000, 0.95); // cannot solve exactly in 10000 iterations
    366     }
    367     // [TestMethod]
     407      TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false);
     408    }
     409    [TestMethod]
    368410    [TestCategory("Algorithms.DataAnalysis")]
    369411    [TestProperty("Time", "short")]
    370412    public void MctsSymbRegBenchmarkNguyen12() {
    371       var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
     413      // x^4 - x³ + y²/2 - y
     414      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
    372415      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F12 ")));
    373       TestMcts(regProblem, iterations: 10000, successThreshold: 0.99, allowLog: false, allowExp: false, allowInv: false);
     416      TestMcts(regProblem, maxVariableReferences: 20, allowExp: false, allowLog: false, allowInv: false);
    374417    }
    375418
     
    377420
    378421    #region keijzer
    379     // [TestMethod]
     422    [TestMethod]
    380423    [TestCategory("Algorithms.DataAnalysis")]
    381424    [TestProperty("Time", "long")]
    382425    public void MctsSymbRegBenchmarkKeijzer5() {
    383       var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider();
     426      // (30 * x * z) / ((x - 10)  * y²)
     427      // = 30 x z / (xy² - y²)
     428      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
    384429      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 5 f(")));
    385430      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
    386431      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
    387       TestMcts(regProblem, iterations: 10000, allowExp: false, allowLog: false, allowSum: false, successThreshold: 0.99);
    388     }
    389 
    390     // [TestMethod]
     432      TestMcts(regProblem, maxVariableReferences: 20, allowExp: false, allowLog: false, allowInv: true);
     433    }
     434
     435    [TestMethod]
    391436    [TestCategory("Algorithms.DataAnalysis")]
    392437    [TestProperty("Time", "short")]
    393438    public void MctsSymbRegBenchmarkKeijzer6() {
    394       // Keijzer 6 f(x) = Sum(1 / i) From 1 to X
    395       var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider();
     439      // Keijzer 6 f(x) = Sum(1 / i) From 1 to X  , x \in [0..120]
     440      // we can only approximate this
     441      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
    396442      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 6 f(")));
    397443      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
    398444      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
    399       TestMcts(regProblem, allowLog: false, allowExp: false, successThreshold: 0.995); // cannot solve
    400     }
    401 
    402     // [TestMethod]
    403     [TestCategory("Algorithms.DataAnalysis")]
    404     [TestProperty("Time", "short")]
    405     public void MctsSymbRegBenchmarkKeijzer7() {
    406       var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider();
    407       var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 7 f(")));
    408       // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
    409       if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
    410       TestMcts(regProblem);
    411     }
    412 
    413     [TestMethod]
    414     // [TestCategory("Algorithms.DataAnalysis")]
     445      TestMcts(regProblem, maxVariableReferences: 20, allowExp: false, allowLog: false, allowInv: true);
     446    }
     447
     448
     449    [TestMethod]
     450    [TestCategory("Algorithms.DataAnalysis")]
    415451    [TestProperty("Time", "short")]
    416452    public void MctsSymbRegBenchmarkKeijzer8() {
    417       var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider();
     453      // sqrt(x)
     454      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
    418455      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 8 f(")));
    419456      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
    420457      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
    421       TestMcts(regProblem);
    422     }
    423 
    424     [TestMethod]
    425     // [TestCategory("Algorithms.DataAnalysis")]
     458      TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false);
     459    }
     460
     461    [TestMethod]
     462    [TestCategory("Algorithms.DataAnalysis")]
    426463    [TestProperty("Time", "short")]
    427464    public void MctsSymbRegBenchmarkKeijzer9() {
    428       var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider();
     465      // arcsinh(x)  i.e. ln(x + sqrt(x² + 1))
     466      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
    429467      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 9 f(")));
    430468      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
    431469      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
    432       TestMcts(regProblem);
    433     }
    434 
    435     /*
    436      * cannot solve this yet x^y
    437     [TestMethod]
    438     [TestCategory("Algorithms.DataAnalysis")]
    439     [TestProperty("Time", "short")]
    440     public void MctsSymbRegBenchmarkKeijzer10() {
    441       var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider();
    442       var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 10 f(")));
    443       // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
    444       if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
    445       TestMcts(regProblem, iterations: 10000, successThreshold: 0.99);
    446     }
    447      */
    448 
    449     /* cannot solve this yet
    450      * xy + sin( (x-1) (y-1) )
     470      TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false);
     471    }
     472
     473    /*
    451474    [TestMethod]
    452475    [TestCategory("Algorithms.DataAnalysis")]
    453476    [TestProperty("Time", "short")]
    454477    public void MctsSymbRegBenchmarkKeijzer11() {
     478      // xy + sin( (x-1) (y-1) )
    455479      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider();
    456480      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 11 f(")));
     
    460484    }
    461485     */
    462     // [TestMethod]
     486    [TestMethod]
    463487    [TestCategory("Algorithms.DataAnalysis")]
    464488    [TestProperty("Time", "short")]
    465489    public void MctsSymbRegBenchmarkKeijzer12() {
    466       // sames a Nguyen 12
    467       var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider();
     490      // x^4 - x³ + y² / 2 - y,  same as Nguyen 12             
     491      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
    468492      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 12 f(")));
    469493      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
    470494      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
    471       TestMcts(regProblem, iterations: 10000, allowLog: false, allowExp: false, allowInv: false, successThreshold: 0.99); // cannot solve exactly in 10000 iterations
    472     }
    473     // [TestMethod]
     495      TestMcts(regProblem, maxVariableReferences: 15, allowExp: false, allowLog: false, allowInv: false);
     496    }
     497    [TestMethod]
    474498    [TestCategory("Algorithms.DataAnalysis")]
    475499    [TestProperty("Time", "short")]
    476500    public void MctsSymbRegBenchmarkKeijzer14() {
    477       var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider();
     501      // 8 / (2 + x² + y²)
     502      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
    478503      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 14 f(")));
    479504      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
    480505      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
    481       TestMcts(regProblem);
    482     }
    483     // [TestMethod]
     506      TestMcts(regProblem, maxVariableReferences: 10, allowExp: false, allowLog: false, allowInv: true);
     507    }
     508    [TestMethod]
    484509    [TestCategory("Algorithms.DataAnalysis")]
    485510    [TestProperty("Time", "short")]
    486511    public void MctsSymbRegBenchmarkKeijzer15() {
    487       var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider();
     512      // x³ / 5 + y³ / 2 - y - x
     513      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
    488514      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 15 f(")));
    489515      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
    490516      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
    491       TestMcts(regProblem, iterations: 10000, allowLog: false, allowExp: false, allowInv: false);
     517      TestMcts(regProblem, maxVariableReferences: 10, allowExp: false, allowLog: false, allowInv: false);
    492518    }
    493519    #endregion
    494520
    495     private void TestMcts(IRegressionProblemData problemData, int iterations = 2000, double successThreshold = 0.999,
     521    private void TestMcts(IRegressionProblemData problemData,
     522      int iterations = 20000,
     523      double successThreshold = 0.99999,
     524      int maxVariableReferences = 5,
    496525      bool allowExp = true,
    497526      bool allowLog = true,
     
    505534      mctsSymbReg.Problem = regProblem;
    506535      mctsSymbReg.Iterations = iterations;
    507       mctsSymbReg.MaxVariableReferences = 10;
    508       var ucbPolicy = new Ucb();
    509       ucbPolicy.C = 2;
    510       mctsSymbReg.Policy = ucbPolicy;
     536      mctsSymbReg.MaxVariableReferences = maxVariableReferences;
     537
    511538      mctsSymbReg.SetSeedRandomly = false;
    512539      mctsSymbReg.Seed = 1234;
     
    514541      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("log")), allowLog);
    515542      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("1 /")), allowInv);
    516       mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("sum of multiple")), allowSum);
     543      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("t1(x) + t2(x) + ... ")), allowSum);
     544
     545      mctsSymbReg.ScaleVariables = true;
     546      mctsSymbReg.ConstantOptimizationIterations = 0;
     547
    517548      #endregion
    518549      RunAlgorithm(mctsSymbReg);
    519550
    520551      Console.WriteLine(mctsSymbReg.ExecutionTime);
    521       // R² >= 0.999
    522       // using arequal to get output of the actual value
     552      var eps = 1.0 - successThreshold;
     553      Assert.AreEqual(1.0, ((DoubleValue)mctsSymbReg.Results["Best solution quality (train)"].Value).Value, eps);
     554      Assert.AreEqual(1.0, ((DoubleValue)mctsSymbReg.Results["Best solution quality (test)"].Value).Value, eps);
     555    }
     556
     557
     558    private void TestMctsWithoutConstants(IRegressionProblemData problemData, int iterations = 200000, double successThreshold = 0.99999,
     559      bool allowExp = true,
     560      bool allowLog = true,
     561      bool allowInv = true,
     562      bool allowSum = true
     563      ) {
     564      var mctsSymbReg = new MctsSymbolicRegressionAlgorithm();
     565      var regProblem = new RegressionProblem();
     566      regProblem.ProblemDataParameter.Value = problemData;
     567      #region Algorithm Configuration
     568      mctsSymbReg.Problem = regProblem;
     569      mctsSymbReg.Iterations = iterations;
     570      mctsSymbReg.MaxVariableReferences = 25;
     571      mctsSymbReg.SetSeedRandomly = false;
     572      mctsSymbReg.Seed = 1234;
     573      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("exp")), allowExp);
     574      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("log")), allowLog);
     575      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("1 /")), allowInv);
     576      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("t1(x) + t2(x) + ... ")), allowSum);
     577
     578      // no constants
     579      mctsSymbReg.ScaleVariables = false;
     580      mctsSymbReg.ConstantOptimizationIterations = -1;
     581
     582      #endregion
     583      RunAlgorithm(mctsSymbReg);
     584
     585      Console.WriteLine(mctsSymbReg.ExecutionTime);
    523586      var eps = 1.0 - successThreshold;
    524587      Assert.AreEqual(1.0, ((DoubleValue)mctsSymbReg.Results["Best solution quality (train)"].Value).Value, eps);
     
    546609      ucbPolicy.C = 1000; // essentially breadth first search
    547610      mctsSymbReg.Policy = ucbPolicy;
    548       mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.StartsWith("prod")), allowProd);
    549       mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("exp")), allowExp);
    550       mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("log")), allowLog);
    551       mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("1 /")), allowInv);
    552       mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("sum of multiple")), allowSum);
     611      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.StartsWith("x * y * ...")), allowProd);
     612      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("exp(c * x * y ...)")), allowExp);
     613      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("log(c + c1 x + c2 x + ...)")), allowLog);
     614      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("1 / (1 + c1 x + c2 x + ...)")), allowInv);
     615      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("t1(x) + t2(x) + ... ")), allowSum);
    553616      #endregion
    554617      RunAlgorithm(mctsSymbReg);
Note: See TracChangeset for help on using the changeset viewer.