Changeset 11832 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.SymbReg/SymbolicRegressionProblem.cs
- Timestamp:
- 01/27/15 16:34:34 (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.SymbReg/SymbolicRegressionProblem.cs
r11806 r11832 5 5 using System.Security.AccessControl; 6 6 using System.Text; 7 using AutoDiff; 7 8 using HeuristicLab.Common; 8 9 using HeuristicLab.Problems.DataAnalysis; … … 13 14 // provides bridge to HL regression problem instances 14 15 public class SymbolicRegressionProblem : IProblem { 16 15 17 private const string grammarString = @" 16 18 G(E): 17 E -> V | V+E | V-E | V*E | V/E | (E) 19 E -> V | V+E | V-E | V*E | V/E | (E) | C | C+E | C-E | C*E | C/E 20 C -> 0..9 18 21 V -> <variables> 19 22 "; 20 23 // C represents Koza-style ERC (the symbol is the index of the ERC), the values are initialized below 21 24 22 25 private readonly IGrammar grammar; 23 private readonly ExpressionInterpreter interpreter;24 26 25 27 private readonly int N; … … 27 29 private readonly double[] y; 28 30 private readonly int d; 29 30 31 public SymbolicRegressionProblem(string partOfName) { 31 private readonly double[] erc; 32 33 34 public SymbolicRegressionProblem(Random random, string partOfName) { 32 35 var instanceProvider = new RegressionRealWorldInstanceProvider(); 33 36 var dds = instanceProvider.GetDataDescriptors().OfType<RegressionDataDescriptor>(); … … 50 53 i++; 51 54 } 55 // initialize ERC values 56 erc = Enumerable.Range(0, 10).Select(_ => Rand.RandNormal(random) * 10).ToArray(); 52 57 53 58 char firstVar = 'a'; 54 59 char lastVar = Convert.ToChar(Convert.ToByte('a') + d - 1); 55 60 this.grammar = new Grammar(grammarString.Replace("<variables>", firstVar + " .. " + lastVar)); 56 this.interpreter = new ExpressionInterpreter();57 58 61 } 59 62 … … 69 72 70 73 public double Evaluate(string sentence) { 71 return HeuristicLab.Common.Extensions.RSq(y, Enumerable.Range(0, N).Select(i => interpreter.Interpret(sentence, x[i]))); 72 } 73 74 75 // right now only + and * is supported 76 public string CanonicalRepresentation(string terminalPhrase) { 77 //return terminalPhrase; 78 var terms = terminalPhrase.Split('+'); 79 return string.Join("+", terms.Select(term => string.Join("", term.Replace("*", "").OrderBy(ch => ch))) 80 .OrderBy(term => term)); 81 } 74 var interpreter = new ExpressionInterpreter(); 75 return HeuristicLab.Common.Extensions.RSq(y, Enumerable.Range(0, N).Select(i => interpreter.Interpret(sentence, x[i], erc))); 76 } 77 78 79 public string CanonicalRepresentation(string phrase) { 80 return phrase; 81 //var terms = phrase.Split('+'); 82 //return string.Join("+", terms.Select(term => string.Join("", term.Replace("*", "").OrderBy(ch => ch))) 83 // .OrderBy(term => term)); 84 } 85 86 public IEnumerable<Feature> GetFeatures(string phrase) 87 { 88 throw new NotImplementedException(); 89 } 90 91 92 /* 93 public static double OptimizeConstants(string sentence) { 94 95 List<AutoDiff.Variable> variables = new List<AutoDiff.Variable>(); 96 List<AutoDiff.Variable> parameters = new List<AutoDiff.Variable>(); 97 List<string> variableNames = new List<string>(); 98 99 AutoDiff.Term func; 100 if (!TryTransformToAutoDiff(sentence, 0, variables, parameters, out func)) 101 throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree."); 102 if (variableNames.Count == 0) return 0.0; 103 104 AutoDiff.IParametricCompiledTerm compiledFunc = AutoDiff.TermUtils.Compile(func, variables.ToArray(), parameters.ToArray()); 105 106 List<SymbolicExpressionTreeTerminalNode> terminalNodes = tree.Root.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>().ToList(); 107 double[] c = new double[variables.Count]; 108 109 { 110 c[0] = 0.0; 111 c[1] = 1.0; 112 //extract inital constants 113 int i = 2; 114 foreach (var node in terminalNodes) { 115 ConstantTreeNode constantTreeNode = node as ConstantTreeNode; 116 VariableTreeNode variableTreeNode = node as VariableTreeNode; 117 if (constantTreeNode != null) 118 c[i++] = constantTreeNode.Value; 119 else if (variableTreeNode != null) 120 c[i++] = variableTreeNode.Weight; 121 } 122 } 123 double[] originalConstants = (double[])c.Clone(); 124 double originalQuality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling); 125 126 alglib.lsfitstate state; 127 alglib.lsfitreport rep; 128 int info; 129 130 Dataset ds = problemData.Dataset; 131 double[,] x = new double[rows.Count(), variableNames.Count]; 132 int row = 0; 133 foreach (var r in rows) { 134 for (int col = 0; col < variableNames.Count; col++) { 135 x[row, col] = ds.GetDoubleValue(variableNames[col], r); 136 } 137 row++; 138 } 139 double[] y = ds.GetDoubleValues(problemData.TargetVariable, rows).ToArray(); 140 int n = x.GetLength(0); 141 int m = x.GetLength(1); 142 int k = c.Length; 143 144 alglib.ndimensional_pfunc function_cx_1_func = CreatePFunc(compiledFunc); 145 alglib.ndimensional_pgrad function_cx_1_grad = CreatePGrad(compiledFunc); 146 147 try { 148 alglib.lsfitcreatefg(x, y, c, n, m, k, false, out state); 149 alglib.lsfitsetcond(state, 0.0, 0.0, maxIterations); 150 //alglib.lsfitsetgradientcheck(state, 0.001); 151 alglib.lsfitfit(state, function_cx_1_func, function_cx_1_grad, null, null); 152 alglib.lsfitresults(state, out info, out c, out rep); 153 } catch (ArithmeticException) { 154 return originalQuality; 155 } catch (alglib.alglibexception) { 156 return originalQuality; 157 } 158 159 //info == -7 => constant optimization failed due to wrong gradient 160 if (info != -7) throw new ArgumentException(); 161 } 162 163 164 private static alglib.ndimensional_pfunc CreatePFunc(AutoDiff.IParametricCompiledTerm compiledFunc) { 165 return (double[] c, double[] x, ref double func, object o) => { 166 func = compiledFunc.Evaluate(c, x); 167 }; 168 } 169 170 private static alglib.ndimensional_pgrad CreatePGrad(AutoDiff.IParametricCompiledTerm compiledFunc) { 171 return (double[] c, double[] x, ref double func, double[] grad, object o) => { 172 var tupel = compiledFunc.Differentiate(c, x); 173 func = tupel.Item2; 174 Array.Copy(tupel.Item1, grad, grad.Length); 175 }; 176 } 177 178 private static bool TryTransformToAutoDiff(string phrase, int symbolPos, List<AutoDiff.Variable> variables, List<AutoDiff.Variable> parameters, out AutoDiff.Term term) 179 { 180 var curSy = phrase[0]; 181 if () { 182 var var = new AutoDiff.Variable(); 183 variables.Add(var); 184 term = var; 185 return true; 186 } 187 if (node.Symbol is Variable) { 188 var varNode = node as VariableTreeNode; 189 var par = new AutoDiff.Variable(); 190 parameters.Add(par); 191 variableNames.Add(varNode.VariableName); 192 var w = new AutoDiff.Variable(); 193 variables.Add(w); 194 term = AutoDiff.TermBuilder.Product(w, par); 195 return true; 196 } 197 if (node.Symbol is Addition) { 198 List<AutoDiff.Term> terms = new List<Term>(); 199 foreach (var subTree in node.Subtrees) { 200 AutoDiff.Term t; 201 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, out t)) { 202 term = null; 203 return false; 204 } 205 terms.Add(t); 206 } 207 term = AutoDiff.TermBuilder.Sum(terms); 208 return true; 209 } 210 if (node.Symbol is Subtraction) { 211 List<AutoDiff.Term> terms = new List<Term>(); 212 for (int i = 0; i < node.SubtreeCount; i++) { 213 AutoDiff.Term t; 214 if (!TryTransformToAutoDiff(node.GetSubtree(i), variables, parameters, variableNames, out t)) { 215 term = null; 216 return false; 217 } 218 if (i > 0) t = -t; 219 terms.Add(t); 220 } 221 term = AutoDiff.TermBuilder.Sum(terms); 222 return true; 223 } 224 if (node.Symbol is Multiplication) { 225 AutoDiff.Term a, b; 226 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, out a) || 227 !TryTransformToAutoDiff(node.GetSubtree(1), variables, parameters, variableNames, out b)) { 228 term = null; 229 return false; 230 } else { 231 List<AutoDiff.Term> factors = new List<Term>(); 232 foreach (var subTree in node.Subtrees.Skip(2)) { 233 AutoDiff.Term f; 234 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, out f)) { 235 term = null; 236 return false; 237 } 238 factors.Add(f); 239 } 240 term = AutoDiff.TermBuilder.Product(a, b, factors.ToArray()); 241 return true; 242 } 243 } 244 if (node.Symbol is Division) { 245 // only works for at least two subtrees 246 AutoDiff.Term a, b; 247 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, out a) || 248 !TryTransformToAutoDiff(node.GetSubtree(1), variables, parameters, variableNames, out b)) { 249 term = null; 250 return false; 251 } else { 252 List<AutoDiff.Term> factors = new List<Term>(); 253 foreach (var subTree in node.Subtrees.Skip(2)) { 254 AutoDiff.Term f; 255 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, out f)) { 256 term = null; 257 return false; 258 } 259 factors.Add(1.0 / f); 260 } 261 term = AutoDiff.TermBuilder.Product(a, 1.0 / b, factors.ToArray()); 262 return true; 263 } 264 } 265 266 if (node.Symbol is StartSymbol) { 267 var alpha = new AutoDiff.Variable(); 268 var beta = new AutoDiff.Variable(); 269 variables.Add(beta); 270 variables.Add(alpha); 271 AutoDiff.Term branchTerm; 272 if (TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, out branchTerm)) { 273 term = branchTerm * alpha + beta; 274 return true; 275 } else { 276 term = null; 277 return false; 278 } 279 } 280 term = null; 281 return false; 282 } 283 */ 82 284 } 83 285 }
Note: See TracChangeset
for help on using the changeset viewer.