Changeset 15403 for branches/MCTS-SymbReg-2796
- Timestamp:
- 10/04/17 22:00:52 (7 years ago)
- Location:
- branches/MCTS-SymbReg-2796
- Files:
-
- 3 added
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/MCTS-SymbReg-2796
-
Property
svn:ignore
set to
TestResults
-
Property
svn:ignore
set to
-
branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/Automaton.cs
r14185 r15403 96 96 } 97 97 98 // reverse notation ops99 // Expr -> c 0 Term { '+' Term } '+' '*' c'+' 'exit'98 // reverse notation 99 // Expr -> 0 Term { '+' Term } '+' 'exit' 100 100 // Term -> c Fact { '*' Fact } '*' 101 101 // Fact -> VarFact | ExpFact | LogFact | InvFact … … 120 120 actionStrings = new List<string>[nStates, nStates]; 121 121 122 // Expr -> c 0 Term { '+' Term } '+' '*' c'+' 'exit'122 // Expr -> 0 Term { '+' Term } '+' 'exit' 123 123 AddTransition(StateExpr, StateTermStart, () => { 124 124 codeGenerator.Reset(); 125 codeGenerator.Emit1(OpCodes.LoadParamN);126 125 codeGenerator.Emit1(OpCodes.LoadConst0); 127 126 constraintHandler.Reset(); 128 }, " c0, Reset");127 }, "0, Reset"); 129 128 AddTransition(StateTermEnd, StateExprEnd, () => { 130 129 codeGenerator.Emit1(OpCodes.Add); 131 codeGenerator.Emit1(OpCodes.Mul);132 codeGenerator.Emit1(OpCodes.LoadParamN);133 codeGenerator.Emit1(OpCodes.Add);134 130 codeGenerator.Emit1(OpCodes.Exit); 135 }, "+ *c+exit");131 }, "+ exit"); 136 132 if (allowMultipleTerms) 137 133 AddTransition(StateTermEnd, StateTermStart, () => { … … 353 349 private readonly int[] followStatesBuf = new int[1000]; 354 350 public void FollowStates(int state, out int[] buf, out int nElements) { 355 // for loop instead of where iterator356 351 var fs = followStates[state]; 357 352 int j = 0; -
branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/ConstraintHandler.cs
r14185 r15403 29 29 // This class restricts the set of allowed transitions of the automaton to prevent exploration of duplicate expressions. 30 30 // It would be possible to implement this class in such a way that the search never visits a duplicate expression. However, 31 // it seems very intricate to detect this robustly and in all caseswhile generating an expression because32 // some for of lookahead is necessary.33 // Instead the constraint handler only catches the obvious duplicates directly, but does not guarantee that the search always produces a valid expression.31 // it seems very intricate to detect this robustly while generating an expression because 32 // some form of lookahead is necessary. 33 // Instead, the constraint handler only catches the obvious duplicates directly, but does not guarantee that the search always produces a valid expression. 34 34 // The ratio of the number of unsuccessful searches, that need backtracking should be tracked in the MCTS alg (MctsSymbolicRegressionStatic) 35 35 -
branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/ExpressionEvaluator.cs
r14185 r15403 161 161 var minFx = fxc.Min() - consts[curParamIdx]; // stack[topOfStack] is f(x) + c 162 162 163 var delta = 1.0 - minFx - consts[curParamIdx]; 164 // adjust c so that minFx + c = 1 ... log(minFx + c) = 0 163 // adjust c so that minFx + c = e and therefore log(minFx + c) = log(e) = 1 164 // this initialization works in combination with the gradient check (instead of initializing such that log(minFx + c) = 0 165 var delta = Math.E - minFx - consts[curParamIdx]; 165 166 consts[curParamIdx] += delta; 166 167 -
branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/MctsSymbolicRegressionAlgorithm.cs
r15360 r15403 22 22 using System; 23 23 using System.Linq; 24 using System.Runtime.CompilerServices;25 24 using System.Threading; 26 25 using HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression.Policies; … … 36 35 37 36 namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression { 38 [Item("MCTS Symbolic Regression", "Monte carlo tree search for symbolic regression. Useful mainly as a base learner in gradient boosting.")] 37 // TODO: support pause (persisting/cloning the state) 38 [Item("MCTS Symbolic Regression", "Monte carlo tree search for symbolic regression.")] 39 39 [StorableClass] 40 40 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 250)] … … 53 53 private const string CreateSolutionParameterName = "CreateSolution"; 54 54 private const string PunishmentFactorParameterName = "PunishmentFactor"; 55 56 private const string VariableProductFactorName = "product(xi)"; 57 private const string ExpFactorName = "exp(c * product(xi))"; 58 private const string LogFactorName = "log(c + sum(c*product(xi))"; 59 private const string InvFactorName = "1 / (1 + sum(c*product(xi))"; 60 private const string FactorSumsName = "sum of multiple terms"; 55 private const string CollectParetoOptimalSolutionsParameterName = "CollectParetoOptimalSolutions"; 56 private const string LambdaParameterName = "Lambda"; 57 58 private const string VariableProductFactorName = "x * y * ..."; 59 private const string ExpFactorName = "exp(c * x * y ...)"; 60 private const string LogFactorName = "log(c + c1 x + c2 x + ...)"; 61 private const string InvFactorName = "1 / (1 + c1 x + c2 x + ...)"; 62 private const string FactorSumsName = "t1(x) + t2(x) + ... "; 61 63 #endregion 62 64 … … 94 96 public IFixedValueParameter<BoolValue> CreateSolutionParameter { 95 97 get { return (IFixedValueParameter<BoolValue>)Parameters[CreateSolutionParameterName]; } 98 } 99 public IFixedValueParameter<BoolValue> CollectParetoOptimalSolutionsParameter { 100 get { return (IFixedValueParameter<BoolValue>)Parameters[CollectParetoOptimalSolutionsParameterName]; } 101 } 102 public IFixedValueParameter<DoubleValue> LambdaParameter { 103 get { return (IFixedValueParameter<DoubleValue>)Parameters[LambdaParameterName]; } 96 104 } 97 105 #endregion … … 136 144 get { return CreateSolutionParameter.Value.Value; } 137 145 set { CreateSolutionParameter.Value.Value = value; } 146 } 147 public bool CollectParetoOptimalSolutions { 148 get { return CollectParetoOptimalSolutionsParameter.Value.Value; } 149 set { CollectParetoOptimalSolutionsParameter.Value.Value = value; } 150 } 151 public double Lambda { 152 get { return LambdaParameter.Value.Value; } 153 set { LambdaParameter.Value.Value = value; } 138 154 } 139 155 #endregion … … 177 193 Parameters.Add(new FixedValueParameter<IntValue>(ConstantOptimizationIterationsParameterName, 178 194 "Number of iterations for constant optimization. A small number of iterations should be sufficient for most models. " + 179 "Set to 0 to disable constants optimization.", new IntValue(10)));195 "Set to 0 to let the algorithm stop automatically when it converges. Set to -1 to disable constants optimization.", new IntValue(10))); 180 196 Parameters.Add(new FixedValueParameter<BoolValue>(ScaleVariablesParameterName, 181 "Set to true to scale all input variables to the range [0..1]", new BoolValue(false)));197 "Set to true to all input variables to the range [0..1]", new BoolValue(true))); 182 198 Parameters[ScaleVariablesParameterName].Hidden = true; 183 199 Parameters.Add(new FixedValueParameter<DoubleValue>(PunishmentFactorParameterName, "Estimations of models can be bounded. The estimation limits are calculated in the following way (lb = mean(y) - punishmentFactor*range(y), ub = mean(y) + punishmentFactor*range(y))", new DoubleValue(10))); … … 187 203 Parameters[UpdateIntervalParameterName].Hidden = true; 188 204 Parameters.Add(new FixedValueParameter<BoolValue>(CreateSolutionParameterName, 189 " Flag that indicates if a solution should be producedat the end of the run", new BoolValue(true)));205 "Optionally produce a solution at the end of the run", new BoolValue(true))); 190 206 Parameters[CreateSolutionParameterName].Hidden = true; 207 208 Parameters.Add(new FixedValueParameter<BoolValue>(CollectParetoOptimalSolutionsParameterName, 209 "Optionally collect a set of Pareto-optimal solutions minimizing error and complexity.", new BoolValue(false))); 210 Parameters[CollectParetoOptimalSolutionsParameterName].Hidden = true; 211 212 Parameters.Add(new FixedValueParameter<DoubleValue>(LambdaParameterName, 213 "Lambda is the factor for the regularization term in the objective function (Obj = (y - f(x,p))² + lambda * |p|²)", new DoubleValue(0.0))); 191 214 } 192 215 … … 195 218 } 196 219 220 // TODO: support pause and restart 197 221 protected override void Run(CancellationToken cancellationToken) { 198 222 // Set up the algorithm 199 223 if (SetSeedRandomly) Seed = new System.Random().Next(); 224 var collectPareto = CollectParetoOptimalSolutions; 200 225 201 226 // Set up the results display … … 229 254 var gradEvals = new IntValue(); 230 255 Results.Add(new Result("Gradient evaluations", gradEvals)); 231 var paretoBestModelsResult = new Result("ParetoBestModels", typeof(ItemList<ISymbolicRegressionSolution>)); 232 Results.Add(paretoBestModelsResult); 233 256 257 Result paretoBestModelsResult = new Result("ParetoBestModels", typeof(ItemList<ISymbolicRegressionSolution>)); 258 if (collectPareto) { 259 Results.Add(paretoBestModelsResult); 260 } 234 261 235 262 // same as in SymbolicRegressionSingleObjectiveProblem … … 246 273 var problemData = (IRegressionProblemData)Problem.ProblemData.Clone(); 247 274 if (!AllowedFactors.CheckedItems.Any()) throw new ArgumentException("At least on type of factor must be allowed"); 248 var state = MctsSymbolicRegressionStatic.CreateState(problemData, (uint)Seed, MaxVariableReferences, ScaleVariables, ConstantOptimizationIterations, 249 Policy, 275 var state = MctsSymbolicRegressionStatic.CreateState(problemData, (uint)Seed, MaxVariableReferences, ScaleVariables, 276 ConstantOptimizationIterations, Lambda, 277 Policy, collectPareto, 250 278 lowerLimit, upperLimit, 251 279 allowProdOfVars: AllowedFactors.CheckedItems.Any(s => s.Value.Value == VariableProductFactorName), … … 261 289 double curBestQ = 0.0; 262 290 int n = 0; 291 292 // cancelled before we acutally started 293 cancellationToken.ThrowIfCancellationRequested(); 294 263 295 // Loop until iteration limit reached or canceled. 264 for (int i = 0; i < Iterations && !state.Done; i++) { 265 cancellationToken.ThrowIfCancellationRequested(); 266 296 for (int i = 0; i < Iterations && !state.Done && !cancellationToken.IsCancellationRequested; i++) { 267 297 var q = MctsSymbolicRegressionStatic.MakeStep(state); 268 298 sumQ += q; // sum of qs in the last updateinterval iterations … … 286 316 totalRollouts.Value = state.TotalRollouts; 287 317 288 paretoBestModelsResult.Value = new ItemList<ISymbolicRegressionSolution>(state.ParetoBestModels); 318 if (collectPareto) { 319 paretoBestModelsResult.Value = new ItemList<ISymbolicRegressionSolution>(state.ParetoBestModels); 320 } 289 321 290 322 table.Rows["Best quality"].Values.Add(bestQuality.Value); … … 296 328 } 297 329 298 // final results 330 // final results (assumes that at least one iteration was calculated) 299 331 if (n > 0) { 300 332 if (bestQ > bestQuality.Value) { -
branches/MCTS-SymbReg-2796/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/MctsSymbolicRegressionStatic.cs
r15360 r15403 35 35 namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression { 36 36 public static class MctsSymbolicRegressionStatic { 37 // TODO: SGD with adagrad instead of lbfgs? 38 // TODO: check Taylor expansion capabilities (ln(x), sqrt(x), exp(x)) in combination with GBT 39 // TODO: optimize for 3 targets concurrently (y, 1/y, exp(y), and log(y))? Would simplify the number of possible expressions again 37 // OBJECTIVES: 38 // 1) solve toy problems without numeric constants (to show that structure search is effective / efficient) 39 // - e.g. Keijzer, Nguyen ... where no numeric constants are involved 40 // - assumptions: 41 // - we don't know the necessary operations or functions -> all available functions could be necessary 42 // - but we do not need to tune numeric constants -> no scaling of input variables x! 43 // 2) Solve toy problems with numeric constants to make the algorithm invariant concerning variable scale. 44 // This is important for real world applications. 45 // - e.g. Korns or Vladislavleva problems where numeric constants are involved 46 // - assumptions: 47 // - any numeric constant is possible (a-priori we might assume that small abs. constants are more likely) 48 // - standardization of variables is possible (or might be necessary) as we adjust numeric parameters of the expression anyway 49 // - to simplify the problem we can restrict the set of functions e.g. we assume which functions are necessary for the problem instance 50 // -> several steps: (a) polyinomials, (b) rational polynomials, (c) exponential or logarithmic functions, rational functions with exponential and logarithmic parts 51 // 3) efficiency and effectiveness for real-world problems 52 // - e.g. Tower problem 53 // - (1) and (2) combined, structure search must be effective in combination with numeric optimization of constants 54 // 55 56 // TODO NEXT: check if transformation of y is correct and works 57 // TODO: transform target y to zero-mean and remove linear scaling parameters 58 // TODO: include offset for variables as parameter 59 // TODO: why does LM optimization converge so slowly with exp(x), log(x), and 1/x allowed? 60 // TODO: support e(-x) and possibly (1/-x) 61 // TODO: is it OK to initialize all constants to 1? 40 62 #region static API 41 63 … … 76 98 private readonly double[] scalingFactor; 77 99 private readonly double[] scalingOffset; 100 private readonly double yStdDev; // for scaling parameters (e.g. stopping condition for LM) 78 101 private readonly int constOptIterations; 102 private readonly double lambda; // weight of penalty term for regularization 79 103 private readonly double lowerEstimationLimit, upperEstimationLimit; 104 private readonly bool collectParetoOptimalModels; 80 105 private readonly List<ISymbolicRegressionSolution> paretoBestModels = new List<ISymbolicRegressionSolution>(); 81 106 private readonly List<double[]> paretoFront = new List<double[]>(); // matching the models … … 99 124 private readonly double[][] gradBuf; 100 125 101 public State(IRegressionProblemData problemData, uint randSeed, int maxVariables, bool scaleVariables, int constOptIterations, 126 public State(IRegressionProblemData problemData, uint randSeed, int maxVariables, bool scaleVariables, 127 int constOptIterations, double lambda, 102 128 IPolicy treePolicy = null, 129 bool collectParetoOptimalModels = false, 103 130 double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue, 104 131 bool allowProdOfVars = true, … … 108 135 bool allowMultipleTerms = false) { 109 136 137 if (lambda < 0) throw new ArgumentException("Lambda must be larger or equal zero", "lambda"); 138 110 139 this.problemData = problemData; 111 140 this.constOptIterations = constOptIterations; 141 this.lambda = lambda; 112 142 this.evalFun = this.Eval; 113 143 this.lowerEstimationLimit = lowerEstimationLimit; 114 144 this.upperEstimationLimit = upperEstimationLimit; 145 this.collectParetoOptimalModels = collectParetoOptimalModels; 115 146 116 147 random = new MersenneTwister(randSeed); … … 128 159 this.x = x; 129 160 this.y = y; 161 this.yStdDev = HeuristicLab.Common.EnumerableStatisticExtensions.StandardDeviation(y); 130 162 this.testX = testX; 131 163 this.testY = testY; … … 181 213 var t = new SymbolicExpressionTree(treeGen.Exec(bestCode, bestConsts, bestNParams, scalingFactor, scalingOffset)); 182 214 var model = new SymbolicRegressionModel(problemData.TargetVariable, t, interpreter, lowerEstimationLimit, upperEstimationLimit); 183 184 // model has already been scaled linearly in Eval 215 model.Scale(problemData); // apply linear scaling 185 216 return model; 186 217 } … … 212 243 Array.Copy(optConsts, bestConsts, bestNParams); 213 244 } 214 // multi-objective best 215 var complexity = // SymbolicDataAnalysisModelComplexityCalculator.CalculateComplexity() TODO 216 Array.FindIndex(code, (opc)=>opc==(byte)OpCodes.Exit); 217 UpdateParetoFront(q, complexity, code, optConsts, nParams, scalingFactor, scalingOffset); 218 245 if (collectParetoOptimalModels) { 246 // multi-objective best 247 var complexity = // SymbolicDataAnalysisModelComplexityCalculator.CalculateComplexity() TODO: implement Kommenda's tree complexity directly in the evaluator 248 Array.FindIndex(code, (opc) => opc == (byte)OpCodes.Exit); // use length of expression as surrogate for complexity 249 UpdateParetoFront(q, complexity, code, optConsts, nParams, scalingFactor, scalingOffset); 250 } 219 251 return q; 220 252 } 253 254 private void Eval(byte[] code, int nParams, out double rsq, out double[] optConsts) { 255 // we make a first pass to determine a valid starting configuration for all constants 256 // constant c in log(c + f(x)) is adjusted to guarantee that x is positive (see expression evaluator) 257 // scale and offset are set to optimal starting configuration 258 // assumes scale is the first param and offset is the last param 259 260 // reset constants 261 Array.Copy(ones, constsBuf, nParams); 262 evaluator.Exec(code, x, constsBuf, predBuf, adjustOffsetForLogAndExp: true); 263 funcEvaluations++; 264 265 if (nParams == 0 || constOptIterations < 0) { 266 // if we don't need to optimize parameters then we are done 267 // changing scale and offset does not influence r² 268 rsq = RSq(y, predBuf); 269 optConsts = constsBuf; 270 } else { 271 // optimize constants using the starting point calculated above 272 OptimizeConstsLm(code, constsBuf, nParams, 0.0, nIters: constOptIterations); 273 274 evaluator.Exec(code, x, constsBuf, predBuf); 275 funcEvaluations++; 276 277 rsq = RSq(y, predBuf); 278 optConsts = constsBuf; 279 } 280 } 281 282 283 284 #region helpers 285 private static double RSq(IEnumerable<double> x, IEnumerable<double> y) { 286 OnlineCalculatorError error; 287 double r = OnlinePearsonsRCalculator.Calculate(x, y, out error); 288 return error == OnlineCalculatorError.None ? r * r : 0.0; 289 } 290 291 292 private void OptimizeConstsLm(byte[] code, double[] consts, int nParams, double epsF = 0.0, int nIters = 100) { 293 double[] optConsts = new double[nParams]; // allocate a smaller buffer for constants opt (TODO perf?) 294 Array.Copy(consts, optConsts, nParams); 295 296 // direct usage of LM is recommended in alglib manual for better performance than the lsfit interface (which uses lm internally). 297 alglib.minlmstate state; 298 alglib.minlmreport rep = null; 299 alglib.minlmcreatevj(y.Length + 1, optConsts, out state); // +1 for penalty term 300 // Using the change of the gradient as stopping criterion is recommended in alglib manual. 301 // However, the most recent version of alglib (as of Oct 2017) only supports epsX as stopping criterion 302 alglib.minlmsetcond(state, epsg: 1E-6 * yStdDev, epsf: epsF, epsx: 0.0, maxits: nIters); 303 // alglib.minlmsetgradientcheck(state, 1E-5); 304 alglib.minlmoptimize(state, Func, FuncAndJacobian, null, code); 305 alglib.minlmresults(state, out optConsts, out rep); 306 funcEvaluations += rep.nfunc; 307 gradEvaluations += rep.njac * nParams; 308 309 if (rep.terminationtype < 0) throw new ArgumentException("lm failed: termination type = " + rep.terminationtype); 310 311 // only use optimized constants if successful 312 if (rep.terminationtype >= 0) { 313 Array.Copy(optConsts, consts, optConsts.Length); 314 } 315 } 316 317 private void Func(double[] arg, double[] fi, object obj) { 318 var code = (byte[])obj; 319 int n = predBuf.Length; 320 evaluator.Exec(code, x, arg, predBuf); // gradients are nParams x vLen 321 for (int r = 0; r < n; r++) { 322 var res = predBuf[r] - y[r]; 323 fi[r] = res; 324 } 325 326 var penaltyIdx = fi.Length - 1; 327 fi[penaltyIdx] = 0.0; 328 // calc length of parameter vector for regularization 329 var aa = 0.0; 330 for (int i = 0; i < arg.Length; i++) { 331 aa += arg[i] * arg[i]; 332 } 333 if (lambda > 0 && aa > 0) { 334 // scale lambda using stdDev(y) to make the parameter independent of the scale of y 335 // scale lambda using n to make parameter independent of the number of training points 336 // take the root because LM squares the result 337 fi[penaltyIdx] = Math.Sqrt(n * lambda / yStdDev * aa); 338 } 339 } 340 341 private void FuncAndJacobian(double[] arg, double[] fi, double[,] jac, object obj) { 342 int n = predBuf.Length; 343 int nParams = arg.Length; 344 var code = (byte[])obj; 345 evaluator.ExecGradient(code, x, arg, predBuf, gradBuf); // gradients are nParams x vLen 346 for (int r = 0; r < n; r++) { 347 var res = predBuf[r] - y[r]; 348 fi[r] = res; 349 350 for (int k = 0; k < nParams; k++) { 351 jac[r, k] = gradBuf[k][r]; 352 } 353 } 354 // calc length of parameter vector for regularization 355 double aa = 0.0; 356 for (int i = 0; i < arg.Length; i++) { 357 aa += arg[i] * arg[i]; 358 } 359 360 var penaltyIdx = fi.Length - 1; 361 if (lambda > 0 && aa > 0) { 362 fi[penaltyIdx] = 0.0; 363 // scale lambda using stdDev(y) to make the parameter independent of the scale of y 364 // scale lambda using n to make parameter independent of the number of training points 365 // take the root because alglib LM squares the result 366 fi[penaltyIdx] = Math.Sqrt(n * lambda / yStdDev * aa); 367 368 for (int i = 0; i < arg.Length; i++) { 369 jac[penaltyIdx, i] = 0.5 / fi[penaltyIdx] * 2 * n * lambda / yStdDev * arg[i]; 370 } 371 } else { 372 fi[penaltyIdx] = 0.0; 373 for (int i = 0; i < arg.Length; i++) { 374 jac[penaltyIdx, i] = 0.0; 375 } 376 } 377 } 378 221 379 222 380 private void UpdateParetoFront(double q, int complexity, byte[] code, double[] param, int nParam, … … 233 391 } 234 392 } 235 if (isNonDominated) {393 if (isNonDominated) { 236 394 paretoFront.Add(cur); 237 395 … … 242 400 var t = new SymbolicExpressionTree(treeGen.Exec(code, param, nParam, scalingFactor, scalingOffset)); 243 401 var model = new SymbolicRegressionModel(problemData.TargetVariable, t, interpreter, lowerEstimationLimit, upperEstimationLimit); 402 model.Scale(problemData); // apply linear scaling 244 403 245 404 var sol = model.CreateRegressionSolution(this.problemData); 246 405 sol.Name = string.Format("{0:N5} {1}", q, complexity); 247 406 248 407 paretoBestModels.Add(sol); 249 408 } 250 for (int i=paretoFront.Count-2;i>=0;i--) {409 for (int i = paretoFront.Count - 2; i >= 0; i--) { 251 410 var @ref = paretoFront[i]; 252 411 var domRes = DominationCalculator<int>.Dominates(cur, @ref, max, true); 253 if (domRes == DominationResult.Dominates) {412 if (domRes == DominationResult.Dominates) { 254 413 paretoFront.RemoveAt(i); 255 414 paretoBestModels.RemoveAt(i); … … 257 416 } 258 417 } 259 260 private void Eval(byte[] code, int nParams, out double rsq, out double[] optConsts) {261 // we make a first pass to determine a valid starting configuration for all constants262 // constant c in log(c + f(x)) is adjusted to guarantee that x is positive (see expression evaluator)263 // scale and offset are set to optimal starting configuration264 // assumes scale is the first param and offset is the last param265 double alpha;266 double beta;267 268 // reset constants269 Array.Copy(ones, constsBuf, nParams);270 evaluator.Exec(code, x, constsBuf, predBuf, adjustOffsetForLogAndExp: true);271 funcEvaluations++;272 273 // calc opt scaling (alpha*f(x) + beta)274 OnlineCalculatorError error;275 OnlineLinearScalingParameterCalculator.Calculate(predBuf, y, out alpha, out beta, out error);276 if (error == OnlineCalculatorError.None) {277 constsBuf[0] *= beta;278 constsBuf[nParams - 1] = constsBuf[nParams - 1] * beta + alpha;279 }280 if (nParams <= 2 || constOptIterations <= 0) {281 // if we don't need to optimize parameters then we are done282 // changing scale and offset does not influence r²283 rsq = RSq(y, predBuf);284 optConsts = constsBuf;285 } else {286 // optimize constants using the starting point calculated above287 OptimizeConstsLm(code, constsBuf, nParams, 0.0, nIters: constOptIterations);288 289 evaluator.Exec(code, x, constsBuf, predBuf);290 funcEvaluations++;291 292 rsq = RSq(y, predBuf);293 optConsts = constsBuf;294 }295 }296 297 298 299 #region helpers300 private static double RSq(IEnumerable<double> x, IEnumerable<double> y) {301 OnlineCalculatorError error;302 double r = OnlinePearsonsRCalculator.Calculate(x, y, out error);303 return error == OnlineCalculatorError.None ? r * r : 0.0;304 }305 306 307 private void OptimizeConstsLm(byte[] code, double[] consts, int nParams, double epsF = 0.0, int nIters = 100) {308 double[] optConsts = new double[nParams]; // allocate a smaller buffer for constants opt (TODO perf?)309 Array.Copy(consts, optConsts, nParams);310 311 alglib.minlmstate state;312 alglib.minlmreport rep = null;313 alglib.minlmcreatevj(y.Length, optConsts, out state);314 alglib.minlmsetcond(state, 0.0, epsF, 0.0, nIters);315 //alglib.minlmsetgradientcheck(state, 0.000001);316 alglib.minlmoptimize(state, Func, FuncAndJacobian, null, code);317 alglib.minlmresults(state, out optConsts, out rep);318 funcEvaluations += rep.nfunc;319 gradEvaluations += rep.njac * nParams;320 321 if (rep.terminationtype < 0) throw new ArgumentException("lm failed: termination type = " + rep.terminationtype);322 323 // only use optimized constants if successful324 if (rep.terminationtype >= 0) {325 Array.Copy(optConsts, consts, optConsts.Length);326 }327 }328 329 private void Func(double[] arg, double[] fi, object obj) {330 var code = (byte[])obj;331 evaluator.Exec(code, x, arg, predBuf); // gradients are nParams x vLen332 for (int r = 0; r < predBuf.Length; r++) {333 var res = predBuf[r] - y[r];334 fi[r] = res;335 }336 }337 private void FuncAndJacobian(double[] arg, double[] fi, double[,] jac, object obj) {338 int nParams = arg.Length;339 var code = (byte[])obj;340 evaluator.ExecGradient(code, x, arg, predBuf, gradBuf); // gradients are nParams x vLen341 for (int r = 0; r < predBuf.Length; r++) {342 var res = predBuf[r] - y[r];343 fi[r] = res;344 345 for (int k = 0; k < nParams; k++) {346 jac[r, k] = gradBuf[k][r];347 }348 }349 }350 418 #endregion 351 419 } 352 420 421 422 /// <summary> 423 /// Static method to initialize a state for the algorithm 424 /// </summary> 425 /// <param name="problemData">The problem data</param> 426 /// <param name="randSeed">Random seed.</param> 427 /// <param name="maxVariables">Maximum number of variable references that are allowed in the expression.</param> 428 /// <param name="scaleVariables">Optionally scale input variables to the interval [0..1] (recommended)</param> 429 /// <param name="constOptIterations">Maximum number of iterations for constants optimization (Levenberg-Marquardt)</param> 430 /// <param name="lambda">Penalty factor for regularization (0..inf.), small penalty disabled regularization.</param> 431 /// <param name="policy">Tree search policy (random, ucb, eps-greedy, ...)</param> 432 /// <param name="collectParameterOptimalModels">Optionally collect all Pareto-optimal solutions having minimal length and error.</param> 433 /// <param name="lowerEstimationLimit">Optionally limit the result of the expression to this lower value.</param> 434 /// <param name="upperEstimationLimit">Optionally limit the result of the expression to this upper value.</param> 435 /// <param name="allowProdOfVars">Allow products of expressions.</param> 436 /// <param name="allowExp">Allow expressions with exponentials.</param> 437 /// <param name="allowLog">Allow expressions with logarithms</param> 438 /// <param name="allowInv">Allow expressions with 1/x</param> 439 /// <param name="allowMultipleTerms">Allow expressions which are sums of multiple terms.</param> 440 /// <returns></returns> 441 353 442 public static IState CreateState(IRegressionProblemData problemData, uint randSeed, int maxVariables = 3, 354 bool scaleVariables = true, int constOptIterations = 0,443 bool scaleVariables = true, int constOptIterations = -1, double lambda = 0.0, 355 444 IPolicy policy = null, 445 bool collectParameterOptimalModels = false, 356 446 double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue, 357 447 bool allowProdOfVars = true, … … 361 451 bool allowMultipleTerms = false 362 452 ) { 363 return new State(problemData, randSeed, maxVariables, scaleVariables, constOptIterations, 364 policy, 453 return new State(problemData, randSeed, maxVariables, scaleVariables, constOptIterations, lambda, 454 policy, collectParameterOptimalModels, 365 455 lowerEstimationLimit, upperEstimationLimit, 366 456 allowProdOfVars, allowExp, allowLog, allowInv, allowMultipleTerms); … … 481 571 var i = 0; 482 572 if (scaleVariables) { 483 scalingFactor = new double[xs.Length ];484 scalingOffset = new double[xs.Length ];573 scalingFactor = new double[xs.Length + 1]; 574 scalingOffset = new double[xs.Length + 1]; 485 575 } else { 486 576 scalingFactor = null; … … 502 592 } 503 593 594 if (scaleVariables) { 595 // transform target variable to zero-mean 596 scalingFactor[i] = 1.0; 597 scalingOffset[i] = -problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows).Average(); 598 } 599 504 600 GenerateData(problemData, rows, scalingFactor, scalingOffset, out xs, out y); 505 601 } … … 518 614 } 519 615 520 y = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows).ToArray(); 616 { 617 var sf = scalingFactor == null ? 1.0 : scalingFactor[i]; 618 var offset = scalingFactor == null ? 0.0 : scalingOffset[i]; 619 y = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows).Select(yi => yi * sf + offset).ToArray(); 620 } 521 621 } 522 622 } -
branches/MCTS-SymbReg-2796/Solution.sln
r15359 r15403 5 5 MinimumVisualStudioVersion = 10.0.40219.1 6 6 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeuristicLab.Algorithms.DataAnalysis.MCTSSymbReg", "HeuristicLab.Algorithms.DataAnalysis\3.4\HeuristicLab.Algorithms.DataAnalysis.MCTSSymbReg.csproj", "{D97F91B5-B71B-412D-90A5-177EED948A3A}" 7 EndProject 8 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Test", "Tests\Test.csproj", "{80009A4F-F55F-4E95-AD44-5AB01551B964}" 7 9 EndProject 8 10 Global … … 16 18 {D97F91B5-B71B-412D-90A5-177EED948A3A}.Release|Any CPU.ActiveCfg = Release|Any CPU 17 19 {D97F91B5-B71B-412D-90A5-177EED948A3A}.Release|Any CPU.Build.0 = Release|Any CPU 20 {80009A4F-F55F-4E95-AD44-5AB01551B964}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 21 {80009A4F-F55F-4E95-AD44-5AB01551B964}.Debug|Any CPU.Build.0 = Debug|Any CPU 22 {80009A4F-F55F-4E95-AD44-5AB01551B964}.Release|Any CPU.ActiveCfg = Release|Any CPU 23 {80009A4F-F55F-4E95-AD44-5AB01551B964}.Release|Any CPU.Build.0 = Release|Any CPU 18 24 EndGlobalSection 19 25 GlobalSection(SolutionProperties) = preSolution -
branches/MCTS-SymbReg-2796/Tests/HeuristicLab.Algorithms.DataAnalysis-3.4/MctsSymbolicRegressionTest.cs
r15057 r15403 1 1 using System; 2 using System.Diagnostics.Contracts;3 2 using System.Linq; 4 3 using System.Threading; … … 8 7 using HeuristicLab.Optimization; 9 8 using HeuristicLab.Problems.DataAnalysis; 10 using HeuristicLab. Tests;9 using HeuristicLab.Problems.Instances.DataAnalysis; 11 10 using Microsoft.VisualStudio.TestTools.UnitTesting; 12 11 … … 21 20 public void MctsSymbRegNumberOfSolutionsOneVariable() { 22 21 // this problem has only one variable 23 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();22 var provider = new NguyenInstanceProvider(); 24 23 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F1 "))); 25 24 { … … 276 275 277 276 278 #region Nguyen 277 #region test structure search (no constants) 278 [TestMethod] 279 [TestCategory("Algorithms.DataAnalysis")] 280 [TestProperty("Time", "short")] 281 public void MctsSymbReg_NoConstants_Nguyen1() { 282 // x³ + x² + x 283 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234); 284 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F1 "))); 285 TestMctsWithoutConstants(regProblem, allowExp: false, allowLog: false, allowInv: false); 286 } 287 [TestMethod] 288 [TestCategory("Algorithms.DataAnalysis")] 289 [TestProperty("Time", "short")] 290 public void MctsSymbReg_NoConstants_Nguyen2() { 291 // x^4 + x³ + x² + x 292 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234); 293 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F2 "))); 294 TestMctsWithoutConstants(regProblem, allowExp: false, allowLog: false, allowInv: false); 295 } 296 [TestMethod] 297 [TestCategory("Algorithms.DataAnalysis")] 298 [TestProperty("Time", "short")] 299 public void MctsSymbReg_NoConstants_Nguyen3() { 300 // x^5 + x^4 + x³ + x² + x 301 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234); 302 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F3 "))); 303 TestMctsWithoutConstants(regProblem, allowExp: false, allowLog: false, allowInv: false); 304 } 305 [TestMethod] 306 [TestCategory("Algorithms.DataAnalysis")] 307 [TestProperty("Time", "short")] 308 public void MctsSymbReg_NoConstants_Nguyen4() { 309 // x^6 + x^5 + x^4 + x³ + x² + x 310 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234); 311 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F4 "))); 312 TestMctsWithoutConstants(regProblem, allowExp: false, allowLog: false, allowInv: false); 313 } 314 315 [TestMethod] 316 [TestCategory("Algorithms.DataAnalysis")] 317 [TestProperty("Time", "short")] 318 public void MctsSymbReg_NoConstants_Poly10() { 319 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234); 320 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10"))); 321 TestMctsWithoutConstants(regProblem, allowExp: false, allowLog: false, allowInv: false); 322 } 323 #endregion 324 325 #region restricted structure but including numeric constants 326 327 [TestMethod] 328 [TestCategory("Algorithms.DataAnalysis")] 329 [TestProperty("Time", "short")] 330 public void MctsSymbRegKeijzer7() { 331 // ln(x) 332 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234); 333 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 7 f("))); 334 // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) 335 if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; 336 TestMcts(regProblem, allowExp: false, allowLog: true, allowInv: false); 337 } 338 339 /* 279 340 // [TestMethod] 280 341 [TestCategory("Algorithms.DataAnalysis")] 281 342 [TestProperty("Time", "short")] 282 public void MctsSymbRegBenchmarkNguyen1() {283 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();284 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F1 ")));285 TestMcts(regProblem);286 }287 // [TestMethod]288 [TestCategory("Algorithms.DataAnalysis")]289 [TestProperty("Time", "short")]290 public void MctsSymbRegBenchmarkNguyen2() {291 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();292 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F2 ")));293 TestMcts(regProblem);294 }295 // [TestMethod]296 [TestCategory("Algorithms.DataAnalysis")]297 [TestProperty("Time", "short")]298 public void MctsSymbRegBenchmarkNguyen3() {299 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();300 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F3 ")));301 TestMcts(regProblem, successThreshold: 0.99);302 }303 // [TestMethod]304 [TestCategory("Algorithms.DataAnalysis")]305 [TestProperty("Time", "short")]306 public void MctsSymbRegBenchmarkNguyen4() {307 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();308 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F4 ")));309 TestMcts(regProblem);310 }311 // [TestMethod]312 [TestCategory("Algorithms.DataAnalysis")]313 [TestProperty("Time", "short")]314 343 public void MctsSymbRegBenchmarkNguyen5() { 344 // sin(x²)cos(x) - 1 315 345 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(); 316 346 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F5 "))); … … 321 351 [TestProperty("Time", "short")] 322 352 public void MctsSymbRegBenchmarkNguyen6() { 353 // sin(x) + sin(x + x²) 323 354 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(); 324 355 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F6 "))); 325 356 TestMcts(regProblem); 326 357 } 358 */ 359 [TestMethod] 360 [TestCategory("Algorithms.DataAnalysis")] 361 [TestProperty("Time", "short")] 362 public void MctsSymbRegBenchmarkNguyen7() { 363 // log(x + 1) + log(x² + 1) 364 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234); 365 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F7 "))); 366 TestMcts(regProblem, maxVariableReferences:5, allowExp: false, allowLog: true, allowInv: false); 367 } 368 [TestMethod] 369 [TestCategory("Algorithms.DataAnalysis")] 370 [TestProperty("Time", "short")] 371 public void MctsSymbRegBenchmarkNguyen8() { 372 // Sqrt(x) 373 // = x ^ 0.5 374 // = exp(0.5 * log(x)) 375 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234); 376 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F8 "))); 377 TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false); 378 } 379 /* 327 380 // [TestMethod] 328 381 [TestCategory("Algorithms.DataAnalysis")] 329 382 [TestProperty("Time", "short")] 330 public void MctsSymbRegBenchmarkNguyen7() {331 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();332 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F7 ")));333 TestMcts(regProblem);334 }335 // [TestMethod]336 [TestCategory("Algorithms.DataAnalysis")]337 [TestProperty("Time", "short")]338 public void MctsSymbRegBenchmarkNguyen8() {339 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();340 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F8 ")));341 TestMcts(regProblem, successThreshold: 0.99);342 }343 // [TestMethod]344 [TestCategory("Algorithms.DataAnalysis")]345 [TestProperty("Time", "short")]346 383 public void MctsSymbRegBenchmarkNguyen9() { 384 // sin(x) + sin(y²) 347 385 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(); 348 386 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F9 "))); 349 TestMcts(regProblem , iterations: 10000);387 TestMcts(regProblem); 350 388 } 351 389 // [TestMethod] … … 353 391 [TestProperty("Time", "short")] 354 392 public void MctsSymbRegBenchmarkNguyen10() { 393 // 2sin(x)cos(y) 355 394 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(); 356 395 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F10 "))); 357 396 TestMcts(regProblem); 358 397 } 359 // [TestMethod] 398 */ 399 [TestMethod] 360 400 [TestCategory("Algorithms.DataAnalysis")] 361 401 [TestProperty("Time", "short")] 362 402 public void MctsSymbRegBenchmarkNguyen11() { 363 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(); 403 // x ^ y , x > 0, y > 0 404 // = exp(y * log(x)) 405 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234); 364 406 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F11 "))); 365 TestMcts(regProblem, 10000, 0.95); // cannot solve exactly in 10000 iterations366 } 367 //[TestMethod]407 TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false); 408 } 409 [TestMethod] 368 410 [TestCategory("Algorithms.DataAnalysis")] 369 411 [TestProperty("Time", "short")] 370 412 public void MctsSymbRegBenchmarkNguyen12() { 371 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(); 413 // x^4 - x³ + y²/2 - y 414 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234); 372 415 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F12 "))); 373 TestMcts(regProblem, iterations: 10000, successThreshold: 0.99, allowLog: false, allowExp: false, allowInv: false);416 TestMcts(regProblem, maxVariableReferences: 20, allowExp: false, allowLog: false, allowInv: false); 374 417 } 375 418 … … 377 420 378 421 #region keijzer 379 //[TestMethod]422 [TestMethod] 380 423 [TestCategory("Algorithms.DataAnalysis")] 381 424 [TestProperty("Time", "long")] 382 425 public void MctsSymbRegBenchmarkKeijzer5() { 383 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(); 426 // (30 * x * z) / ((x - 10) * y²) 427 // = 30 x z / (xy² - y²) 428 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234); 384 429 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 5 f("))); 385 430 // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) 386 431 if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; 387 TestMcts(regProblem, iterations: 10000, allowExp: false, allowLog: false, allowSum: false, successThreshold: 0.99);388 } 389 390 //[TestMethod]432 TestMcts(regProblem, maxVariableReferences: 20, allowExp: false, allowLog: false, allowInv: true); 433 } 434 435 [TestMethod] 391 436 [TestCategory("Algorithms.DataAnalysis")] 392 437 [TestProperty("Time", "short")] 393 438 public void MctsSymbRegBenchmarkKeijzer6() { 394 // Keijzer 6 f(x) = Sum(1 / i) From 1 to X 395 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(); 439 // Keijzer 6 f(x) = Sum(1 / i) From 1 to X , x \in [0..120] 440 // we can only approximate this 441 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234); 396 442 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 6 f("))); 397 443 // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) 398 444 if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; 399 TestMcts(regProblem, allowLog: false, allowExp: false, successThreshold: 0.995); // cannot solve 400 } 401 402 // [TestMethod] 403 [TestCategory("Algorithms.DataAnalysis")] 404 [TestProperty("Time", "short")] 405 public void MctsSymbRegBenchmarkKeijzer7() { 406 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(); 407 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 7 f("))); 408 // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) 409 if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; 410 TestMcts(regProblem); 411 } 412 413 [TestMethod] 414 // [TestCategory("Algorithms.DataAnalysis")] 445 TestMcts(regProblem, maxVariableReferences: 20, allowExp: false, allowLog: false, allowInv: true); 446 } 447 448 449 [TestMethod] 450 [TestCategory("Algorithms.DataAnalysis")] 415 451 [TestProperty("Time", "short")] 416 452 public void MctsSymbRegBenchmarkKeijzer8() { 417 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(); 453 // sqrt(x) 454 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234); 418 455 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 8 f("))); 419 456 // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) 420 457 if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; 421 TestMcts(regProblem );422 } 423 424 [TestMethod] 425 //[TestCategory("Algorithms.DataAnalysis")]458 TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false); 459 } 460 461 [TestMethod] 462 [TestCategory("Algorithms.DataAnalysis")] 426 463 [TestProperty("Time", "short")] 427 464 public void MctsSymbRegBenchmarkKeijzer9() { 428 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(); 465 // arcsinh(x) i.e. ln(x + sqrt(x² + 1)) 466 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234); 429 467 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 9 f("))); 430 468 // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) 431 469 if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; 432 TestMcts(regProblem); 433 } 434 435 /* 436 * cannot solve this yet x^y 437 [TestMethod] 438 [TestCategory("Algorithms.DataAnalysis")] 439 [TestProperty("Time", "short")] 440 public void MctsSymbRegBenchmarkKeijzer10() { 441 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(); 442 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 10 f("))); 443 // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) 444 if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; 445 TestMcts(regProblem, iterations: 10000, successThreshold: 0.99); 446 } 447 */ 448 449 /* cannot solve this yet 450 * xy + sin( (x-1) (y-1) ) 470 TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false); 471 } 472 473 /* 451 474 [TestMethod] 452 475 [TestCategory("Algorithms.DataAnalysis")] 453 476 [TestProperty("Time", "short")] 454 477 public void MctsSymbRegBenchmarkKeijzer11() { 478 // xy + sin( (x-1) (y-1) ) 455 479 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(); 456 480 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 11 f("))); … … 460 484 } 461 485 */ 462 //[TestMethod]486 [TestMethod] 463 487 [TestCategory("Algorithms.DataAnalysis")] 464 488 [TestProperty("Time", "short")] 465 489 public void MctsSymbRegBenchmarkKeijzer12() { 466 // sames a Nguyen 12467 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider( );490 // x^4 - x³ + y² / 2 - y, same as Nguyen 12 491 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234); 468 492 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 12 f("))); 469 493 // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) 470 494 if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; 471 TestMcts(regProblem, iterations: 10000, allowLog: false, allowExp: false, allowInv: false, successThreshold: 0.99); // cannot solve exactly in 10000 iterations472 } 473 //[TestMethod]495 TestMcts(regProblem, maxVariableReferences: 15, allowExp: false, allowLog: false, allowInv: false); 496 } 497 [TestMethod] 474 498 [TestCategory("Algorithms.DataAnalysis")] 475 499 [TestProperty("Time", "short")] 476 500 public void MctsSymbRegBenchmarkKeijzer14() { 477 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(); 501 // 8 / (2 + x² + y²) 502 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234); 478 503 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 14 f("))); 479 504 // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) 480 505 if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; 481 TestMcts(regProblem );482 } 483 //[TestMethod]506 TestMcts(regProblem, maxVariableReferences: 10, allowExp: false, allowLog: false, allowInv: true); 507 } 508 [TestMethod] 484 509 [TestCategory("Algorithms.DataAnalysis")] 485 510 [TestProperty("Time", "short")] 486 511 public void MctsSymbRegBenchmarkKeijzer15() { 487 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(); 512 // x³ / 5 + y³ / 2 - y - x 513 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234); 488 514 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 15 f("))); 489 515 // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) 490 516 if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; 491 TestMcts(regProblem, iterations: 10000, allowLog: false, allowExp: false, allowInv: false);517 TestMcts(regProblem, maxVariableReferences: 10, allowExp: false, allowLog: false, allowInv: false); 492 518 } 493 519 #endregion 494 520 495 private void TestMcts(IRegressionProblemData problemData, int iterations = 2000, double successThreshold = 0.999, 521 private void TestMcts(IRegressionProblemData problemData, 522 int iterations = 20000, 523 double successThreshold = 0.99999, 524 int maxVariableReferences = 5, 496 525 bool allowExp = true, 497 526 bool allowLog = true, … … 505 534 mctsSymbReg.Problem = regProblem; 506 535 mctsSymbReg.Iterations = iterations; 507 mctsSymbReg.MaxVariableReferences = 10; 508 var ucbPolicy = new Ucb(); 509 ucbPolicy.C = 2; 510 mctsSymbReg.Policy = ucbPolicy; 536 mctsSymbReg.MaxVariableReferences = maxVariableReferences; 537 511 538 mctsSymbReg.SetSeedRandomly = false; 512 539 mctsSymbReg.Seed = 1234; … … 514 541 mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("log")), allowLog); 515 542 mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("1 /")), allowInv); 516 mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("sum of multiple")), allowSum); 543 mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("t1(x) + t2(x) + ... ")), allowSum); 544 545 mctsSymbReg.ScaleVariables = true; 546 mctsSymbReg.ConstantOptimizationIterations = 0; 547 517 548 #endregion 518 549 RunAlgorithm(mctsSymbReg); 519 550 520 551 Console.WriteLine(mctsSymbReg.ExecutionTime); 521 // R² >= 0.999 522 // using arequal to get output of the actual value 552 var eps = 1.0 - successThreshold; 553 Assert.AreEqual(1.0, ((DoubleValue)mctsSymbReg.Results["Best solution quality (train)"].Value).Value, eps); 554 Assert.AreEqual(1.0, ((DoubleValue)mctsSymbReg.Results["Best solution quality (test)"].Value).Value, eps); 555 } 556 557 558 private void TestMctsWithoutConstants(IRegressionProblemData problemData, int iterations = 200000, double successThreshold = 0.99999, 559 bool allowExp = true, 560 bool allowLog = true, 561 bool allowInv = true, 562 bool allowSum = true 563 ) { 564 var mctsSymbReg = new MctsSymbolicRegressionAlgorithm(); 565 var regProblem = new RegressionProblem(); 566 regProblem.ProblemDataParameter.Value = problemData; 567 #region Algorithm Configuration 568 mctsSymbReg.Problem = regProblem; 569 mctsSymbReg.Iterations = iterations; 570 mctsSymbReg.MaxVariableReferences = 25; 571 mctsSymbReg.SetSeedRandomly = false; 572 mctsSymbReg.Seed = 1234; 573 mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("exp")), allowExp); 574 mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("log")), allowLog); 575 mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("1 /")), allowInv); 576 mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("t1(x) + t2(x) + ... ")), allowSum); 577 578 // no constants 579 mctsSymbReg.ScaleVariables = false; 580 mctsSymbReg.ConstantOptimizationIterations = -1; 581 582 #endregion 583 RunAlgorithm(mctsSymbReg); 584 585 Console.WriteLine(mctsSymbReg.ExecutionTime); 523 586 var eps = 1.0 - successThreshold; 524 587 Assert.AreEqual(1.0, ((DoubleValue)mctsSymbReg.Results["Best solution quality (train)"].Value).Value, eps); … … 546 609 ucbPolicy.C = 1000; // essentially breadth first search 547 610 mctsSymbReg.Policy = ucbPolicy; 548 mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.StartsWith(" prod")), allowProd);549 mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("exp ")), allowExp);550 mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("log ")), allowLog);551 mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("1 / ")), allowInv);552 mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains(" sum of multiple")), allowSum);611 mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.StartsWith("x * y * ...")), allowProd); 612 mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("exp(c * x * y ...)")), allowExp); 613 mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("log(c + c1 x + c2 x + ...)")), allowLog); 614 mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("1 / (1 + c1 x + c2 x + ...)")), allowInv); 615 mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("t1(x) + t2(x) + ... ")), allowSum); 553 616 #endregion 554 617 RunAlgorithm(mctsSymbReg);
Note: See TracChangeset
for help on using the changeset viewer.