| 25 | == Motivating Example: Symbolic Regression == |
| 26 | This is a fully self-contained specification of a symbolic regression problem to solve the Poly-10 benchmark problem. This file can be compiler using our reference GPDL compiler for HeuristicLab to create a solver for this problem. |
| 27 | {{{ |
| 28 | #!csharp |
| 29 | PROBLEM SymbRegKoza |
| 30 | CODE << |
| 31 | double[,] x; |
| 32 | double[] y; |
| 33 | string[] variableNames; |
| 34 | Dictionary<string,int> nameToCol; |
| 35 | |
| 36 | double GetValue(double[,] data, string varName, int row) { |
| 37 | if(nameToCol == null) { |
| 38 | /* init mapping */ |
| 39 | nameToCol = new Dictionary<string, int>(); |
| 40 | for(int i=0; i<variableNames.Length; i++) { |
| 41 | nameToCol[variableNames[i]] = i; |
| 42 | } |
| 43 | } |
| 44 | return x[row, nameToCol[varName]]; |
| 45 | } |
| 46 | |
| 47 | double RSquared(IEnumerable<double> xs, IEnumerable<double> ys) { |
| 48 | HeuristicLab.Problems.DataAnalysis.OnlineCalculatorError error; |
| 49 | var r2 = HeuristicLab.Problems.DataAnalysis.OnlinePearsonsRSquaredCalculator.Calculate(xs, ys, out error); |
| 50 | if(error == HeuristicLab.Problems.DataAnalysis.OnlineCalculatorError.None) return r2; |
| 51 | else return 0.0; |
| 52 | } |
| 53 | >> |
| 54 | |
| 55 | INIT << |
| 56 | // generate 500 case of poly-10 benchmark function |
| 57 | int n = 500; |
| 58 | variableNames = new string[] {"x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10" }; |
| 59 | var rand = new System.Random(); |
| 60 | x = new double[n, 10]; |
| 61 | y = new double[n]; |
| 62 | for(int row = 0; row < 500; row++) { |
| 63 | for(int col = 0; col < 10; col++) { |
| 64 | x[row, col] = rand.NextDouble() * 2.0 - 1.0; |
| 65 | } |
| 66 | y[row] = x[row, 0] * x[row, 1] + |
| 67 | x[row, 2] * x[row, 3] + |
| 68 | x[row, 4] * x[row, 5] + |
| 69 | x[row, 0] * x[row, 6] + x[row, 8] + |
| 70 | x[row, 2] * x[row, 5] + x[row, 9]; |
| 71 | } |
| 72 | >> |
| 73 | |
| 74 | /* non-terminals of the problem */ |
| 75 | NONTERMINALS |
| 76 | Model<<int row, out double val>>. |
| 77 | RPB<<int row, out double val>>. |
| 78 | Addition<<int row, out double val>>. |
| 79 | Subtraction<<int row, out double val>>. |
| 80 | Multiplication<<int row, out double val>>. |
| 81 | Division<<int row, out double val>>. |
| 82 | |
| 83 | /* terminals of the problem: random constants (ERC) and variables */ |
| 84 | TERMINALS |
| 85 | ERC<<out double val>> |
| 86 | CONSTRAINTS |
| 87 | val IN RANGE <<-100>> .. <<100>> |
| 88 | . |
| 89 | |
| 90 | Var<<out string varName>> |
| 91 | CONSTRAINTS |
| 92 | varName IN SET <<variableNames>> |
| 93 | . |
| 94 | |
| 95 | /* grammar rules for the problem with interleaved semantic actions */ |
| 96 | RULES |
| 97 | Model<<int row, out double val>> = |
| 98 | RPB<<row, out val>> . |
| 99 | |
| 100 | RPB<<int row, out double val>> = LOCAL << string varName; >> |
| 101 | Addition<<row, out val>> |
| 102 | | Subtraction<<row, out val>> |
| 103 | | Division<<row, out val>> |
| 104 | | Multiplication<<row, out val>> |
| 105 | | Var<<out varName>> SEM << val = GetValue(x, varName, row); >> |
| 106 | | ERC<<out val>> |
| 107 | . |
| 108 | |
| 109 | Addition<<int row, out double val>> = LOCAL << double x1, x2; >> |
| 110 | RPB<<row, out x1>> RPB<<row, out x2>> SEM<< val = x1 + x2; >> |
| 111 | . |
| 112 | Subtraction<<int row, out double val>> = LOCAL << double x1, x2; >> |
| 113 | RPB<<row, out x1>> RPB<<row, out x2>> SEM<< val = x1 - x2; >> |
| 114 | . |
| 115 | Division<<int row, out double val>> = LOCAL << double x1, x2; >> |
| 116 | RPB<<row, out x1>> RPB<<row, out x2>> SEM<< val = x1 / x2; >> |
| 117 | . |
| 118 | Multiplication<<int row, out double val>> = LOCAL << double x1, x2; >> |
| 119 | RPB<<row, out x1>> RPB<<row, out x2>> SEM<< val = x1 * x2; >> |
| 120 | . |
| 121 | |
| 122 | /* objective function */ |
| 123 | MAXIMIZE /* could also use the keyword MINIMIZE here */ |
| 124 | << |
| 125 | var rows = System.Linq.Enumerable.Range(0, x.GetLength(0)); |
| 126 | var predicted = rows.Select(r => { |
| 127 | double result; |
| 128 | Model(r, out result); /* we can call the root symbol directly */ |
| 129 | return result; |
| 130 | }); |
| 131 | return RSquared(predicted, y); |
| 132 | >> |
| 133 | END SymbRegKoza. |
| 134 | }}} |
| 135 | |