Free cookie consent management tool by TermsFeed Policy Generator

source: branches/ConstantOptimization/SymbolicRegressionConstantOptimizationEvaluator.cs @ 8532

Last change on this file since 8532 was 7293, checked in by gkronber, 13 years ago

#1746: added binary of AutoDiff library for symbolic differentiation and an improved version of the constant optimizer from the trunk.

File size: 16.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Parameters;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31
32namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
33  [Item("Constant Optimization Evaluator", "Calculates Pearson R² of a symbolic regression solution and optimizes the constant used.")]
34  [StorableClass]
35  public class SymbolicRegressionConstantOptimizationEvaluator : SymbolicRegressionSingleObjectiveEvaluator {
36    private const string ConstantOptimizationIterationsParameterName = "ConstantOptimizationIterations";
37    private const string ConstantOptimizationImprovementParameterName = "ConstantOptimizationImprovement";
38    private const string ConstantOptimizationProbabilityParameterName = "ConstantOptimizationProbability";
39    private const string ConstantOptimizationRowsPercentageParameterName = "ConstantOptimizationRowsPercentage";
40
41    private const string EvaluatedTreesResultName = "EvaluatedTrees";
42    private const string EvaluatedTreeNodesResultName = "EvaluatedTreeNodes";
43
44    public ILookupParameter<IntValue> EvaluatedTreesParameter {
45      get { return (ILookupParameter<IntValue>)Parameters[EvaluatedTreesResultName]; }
46    }
47    public ILookupParameter<IntValue> EvaluatedTreeNodesParameter {
48      get { return (ILookupParameter<IntValue>)Parameters[EvaluatedTreeNodesResultName]; }
49    }
50
51    public IFixedValueParameter<IntValue> ConstantOptimizationIterationsParameter {
52      get { return (IFixedValueParameter<IntValue>)Parameters[ConstantOptimizationIterationsParameterName]; }
53    }
54    public IFixedValueParameter<DoubleValue> ConstantOptimizationImprovementParameter {
55      get { return (IFixedValueParameter<DoubleValue>)Parameters[ConstantOptimizationImprovementParameterName]; }
56    }
57    public IFixedValueParameter<PercentValue> ConstantOptimizationProbabilityParameter {
58      get { return (IFixedValueParameter<PercentValue>)Parameters[ConstantOptimizationProbabilityParameterName]; }
59    }
60    public IFixedValueParameter<PercentValue> ConstantOptimizationRowsPercentageParameter {
61      get { return (IFixedValueParameter<PercentValue>)Parameters[ConstantOptimizationRowsPercentageParameterName]; }
62    }
63
64    public IntValue ConstantOptimizationIterations {
65      get { return ConstantOptimizationIterationsParameter.Value; }
66    }
67    public DoubleValue ConstantOptimizationImprovement {
68      get { return ConstantOptimizationImprovementParameter.Value; }
69    }
70    public PercentValue ConstantOptimizationProbability {
71      get { return ConstantOptimizationProbabilityParameter.Value; }
72    }
73    public PercentValue ConstantOptimizationRowsPercentage {
74      get { return ConstantOptimizationRowsPercentageParameter.Value; }
75    }
76
77    public override bool Maximization {
78      get { return true; }
79    }
80
81    [StorableConstructor]
82    protected SymbolicRegressionConstantOptimizationEvaluator(bool deserializing) : base(deserializing) { }
83    protected SymbolicRegressionConstantOptimizationEvaluator(SymbolicRegressionConstantOptimizationEvaluator original, Cloner cloner)
84      : base(original, cloner) {
85    }
86    public SymbolicRegressionConstantOptimizationEvaluator()
87      : base() {
88      Parameters.Add(new FixedValueParameter<IntValue>(ConstantOptimizationIterationsParameterName, "Determines how many iterations should be calculated while optimizing the constant of a symbolic expression tree (0 indicates other or default stopping criterion).", new IntValue(3), true));
89      Parameters.Add(new FixedValueParameter<DoubleValue>(ConstantOptimizationImprovementParameterName, "Determines the relative improvement which must be achieved in the constant optimization to continue with it (0 indicates other or default stopping criterion).", new DoubleValue(0), true));
90      Parameters.Add(new FixedValueParameter<PercentValue>(ConstantOptimizationProbabilityParameterName, "Determines the probability that the constants are optimized", new PercentValue(1), true));
91      Parameters.Add(new FixedValueParameter<PercentValue>(ConstantOptimizationRowsPercentageParameterName, "Determines the percentage of the rows which should be used for constant optimization", new PercentValue(1), true));
92
93      Parameters.Add(new LookupParameter<IntValue>(EvaluatedTreesResultName));
94      Parameters.Add(new LookupParameter<IntValue>(EvaluatedTreeNodesResultName));
95    }
96
97    public override IDeepCloneable Clone(Cloner cloner) {
98      return new SymbolicRegressionConstantOptimizationEvaluator(this, cloner);
99    }
100
101    public override IOperation Apply() {
102      AddResults();
103      int seed = RandomParameter.ActualValue.Next();
104      var solution = SymbolicExpressionTreeParameter.ActualValue;
105      double quality;
106      if (RandomParameter.ActualValue.NextDouble() < ConstantOptimizationProbability.Value) {
107        IEnumerable<int> constantOptimizationRows = GenerateRowsToEvaluate(ConstantOptimizationRowsPercentage.Value);
108        quality = OptimizeConstants(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, ProblemDataParameter.ActualValue,
109           constantOptimizationRows, ConstantOptimizationImprovement.Value, ConstantOptimizationIterations.Value, 0.001,
110           EstimationLimitsParameter.ActualValue.Upper, EstimationLimitsParameter.ActualValue.Lower,
111          EvaluatedTreesParameter.ActualValue, EvaluatedTreeNodesParameter.ActualValue);
112        if (ConstantOptimizationRowsPercentage.Value != RelativeNumberOfEvaluatedSamplesParameter.ActualValue.Value) {
113          var evaluationRows = GenerateRowsToEvaluate();
114          quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows);
115        }
116      } else {
117        var evaluationRows = GenerateRowsToEvaluate();
118        quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows);
119      }
120      QualityParameter.ActualValue = new DoubleValue(quality);
121      EvaluatedTreesParameter.ActualValue.Value += 1;
122      EvaluatedTreeNodesParameter.ActualValue.Value += solution.Length;
123
124      if (Successor != null)
125        return ExecutionContext.CreateOperation(Successor);
126      else
127        return null;
128    }
129
130    private void AddResults() {
131      if (EvaluatedTreesParameter.ActualValue == null) {
132        var scope = ExecutionContext.Scope;
133        while (scope.Parent != null)
134          scope = scope.Parent;
135        scope.Variables.Add(new Core.Variable(EvaluatedTreesResultName, new IntValue()));
136      }
137      if (EvaluatedTreeNodesParameter.ActualValue == null) {
138        var scope = ExecutionContext.Scope;
139        while (scope.Parent != null)
140          scope = scope.Parent;
141        scope.Variables.Add(new Core.Variable(EvaluatedTreeNodesResultName, new IntValue()));
142      }
143    }
144
145    public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows) {
146      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;
147      EstimationLimitsParameter.ExecutionContext = context;
148
149      double r2 = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, tree, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, problemData, rows);
150
151      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;
152      EstimationLimitsParameter.ExecutionContext = null;
153
154      return r2;
155    }
156
157    public static double OptimizeConstants(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree tree, IRegressionProblemData problemData,
158      IEnumerable<int> rows, double improvement, int iterations, double differentialStep, double upperEstimationLimit = double.MaxValue, double lowerEstimationLimit = double.MinValue, IntValue evaluatedTrees = null, IntValue evaluatedTreeNodes = null) {
159
160      List<AutoDiff.Variable> variables = new List<AutoDiff.Variable>();
161      List<AutoDiff.Variable> parameters = new List<AutoDiff.Variable>();
162      List<string> variableNames = new List<string>();
163
164      var func = TransformToAutoDiff(tree.Root.GetSubtree(0), variables, parameters, variableNames);
165      if (variableNames.Count == 0) return 0.0;
166
167
168      AutoDiff.IParametricCompiledTerm compiledFunc = AutoDiff.TermUtils.Compile(func, variables.ToArray(), parameters.ToArray());
169
170      List<SymbolicExpressionTreeTerminalNode> terminalNodes = tree.Root.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>().ToList();
171      double[] c = new double[variables.Count];
172
173      c[0] = 0.0;
174      c[1] = 1.0;
175      //extract inital constants
176      for (int i = 0; i < terminalNodes.Count; i++) {
177        ConstantTreeNode constantTreeNode = terminalNodes[i] as ConstantTreeNode;
178        if (constantTreeNode != null) c[i + 2] = constantTreeNode.Value;
179        VariableTreeNode variableTreeNode = terminalNodes[i] as VariableTreeNode;
180        if (variableTreeNode != null) c[i + 2] = variableTreeNode.Weight;
181      }
182
183      double epsg = 0;
184      double epsf = improvement;
185      double epsx = 0;
186      int maxits = iterations;
187      double diffstep = differentialStep;
188
189      alglib.lsfitstate state;
190      alglib.lsfitreport rep;
191      int info;
192
193      Dataset ds = problemData.Dataset;
194      double[,] x = new double[rows.Count(), variableNames.Count];
195      int row = 0;
196      foreach (var r in rows) {
197        for (int col = 0; col < variableNames.Count; col++) {
198          x[row, col] = ds.GetDoubleValue(variableNames[col], r);
199        }
200        row++;
201      }
202      double[] y = ds.GetDoubleValues(problemData.TargetVariable, rows).ToArray();
203      int n = x.GetLength(0);
204      int m = x.GetLength(1);
205      int k = c.Length;
206
207      alglib.ndimensional_pfunc function_cx_1_func = CreatePFunc(compiledFunc, variables, parameters);
208      alglib.ndimensional_pgrad function_cx_1_grad = CreatePGrad(compiledFunc, variables, parameters);
209
210      alglib.lsfitcreatefg(x, y, c, n, m, k, false, out state);
211      alglib.lsfitsetcond(state, epsf, epsx, maxits);
212      alglib.lsfitfit(state, function_cx_1_func, function_cx_1_grad, null, null);
213      alglib.lsfitresults(state, out info, out c, out rep);
214      //alglib.minlmstate state;
215      //alglib.minlmreport report;
216
217      //alglib.minlmcreatev(1, c, diffstep, out state);
218      //alglib.minlmsetcond(state, epsg, epsf, epsx, maxits);
219      //alglib.minlmoptimize(state, CreateCallBack(interpreter, tree, problemData, rows, upperEstimationLimit, lowerEstimationLimit, treeLength, evaluatedTrees, evaluatedTreeNodes), null, terminalNodes);
220      //alglib.minlmresults(state, out c, out report);
221
222      for (int i = 2; i < c.Length; i++) {
223        ConstantTreeNode constantTreeNode = terminalNodes[i - 2] as ConstantTreeNode;
224        if (constantTreeNode != null) constantTreeNode.Value = c[i];
225        VariableTreeNode variableTreeNode = terminalNodes[i - 2] as VariableTreeNode;
226        if (variableTreeNode != null) variableTreeNode.Weight = c[i];
227      }
228
229      return SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows);
230
231    }
232
233    private static alglib.ndimensional_pfunc CreatePFunc(AutoDiff.IParametricCompiledTerm compiledFunc, List<AutoDiff.Variable> variables, List<AutoDiff.Variable> parameters) {
234      return (double[] c, double[] x, ref double func, object o) => {
235        func = compiledFunc.Evaluate(c, x);
236      };
237    }
238
239    private static alglib.ndimensional_pgrad CreatePGrad(AutoDiff.IParametricCompiledTerm compiledFunc, List<AutoDiff.Variable> variables, List<AutoDiff.Variable> parameters) {
240      return (double[] c, double[] x, ref double func, double[] grad, object o) => {
241        var tupel = compiledFunc.Differentiate(c, x);
242        func = tupel.Item2;
243        Array.Copy(tupel.Item1, grad, grad.Length);
244      };
245    }
246
247    private static AutoDiff.Term TransformToAutoDiff(ISymbolicExpressionTreeNode node, List<AutoDiff.Variable> variables, List<AutoDiff.Variable> parameters, List<string> variableNames) {
248      if (node.Symbol is Constant) {
249        var var = new AutoDiff.Variable();
250        variables.Add(var);
251        return var;
252      }
253      if (node.Symbol is Variable) {
254        var w = new AutoDiff.Variable();
255        variables.Add(w);
256        var par = new AutoDiff.Variable();
257        parameters.Add(par);
258        variableNames.Add((node as VariableTreeNode).VariableName);
259        return AutoDiff.TermBuilder.Product(w, par);
260      }
261      if (node.Symbol is Addition)
262        return AutoDiff.TermBuilder.Sum(
263          from subTree in node.Subtrees
264          select TransformToAutoDiff(subTree, variables, parameters, variableNames)
265          );
266      if (node.Symbol is Multiplication)
267        return AutoDiff.TermBuilder.Product(
268          TransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames),
269          TransformToAutoDiff(node.GetSubtree(1), variables, parameters, variableNames),
270          (from subTree in node.Subtrees.Skip(2)
271           select TransformToAutoDiff(subTree, variables, parameters, variableNames)).ToArray()
272          );
273      if (node.Symbol is Logarithm)
274        return AutoDiff.TermBuilder.Log(
275          TransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames));
276      if (node.Symbol is Exponential)
277        return AutoDiff.TermBuilder.Exp(
278          TransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames));
279      if (node.Symbol is StartSymbol) {
280        var alpha = new AutoDiff.Variable();
281        var beta = new AutoDiff.Variable();
282        variables.Add(beta);
283        variables.Add(alpha);
284        return TransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames) * alpha + beta;
285      }
286      throw new NotImplementedException();
287    }
288
289    private static alglib.ndimensional_fvec CreateCallBack(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows, double upperEstimationLimit, double lowerEstimationLimit, int treeLength, IntValue evaluatedTrees = null, IntValue evaluatedTreeNodes = null) {
290      return (double[] arg, double[] fi, object obj) => {
291        // update constants of tree
292        List<SymbolicExpressionTreeTerminalNode> terminalNodes = (List<SymbolicExpressionTreeTerminalNode>)obj;
293        for (int i = 0; i < terminalNodes.Count; i++) {
294          ConstantTreeNode constantTreeNode = terminalNodes[i] as ConstantTreeNode;
295          if (constantTreeNode != null) constantTreeNode.Value = arg[i];
296          VariableTreeNode variableTreeNode = terminalNodes[i] as VariableTreeNode;
297          if (variableTreeNode != null) variableTreeNode.Weight = arg[i];
298        }
299
300        double quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows);
301
302        fi[0] = 1 - quality;
303        if (evaluatedTrees != null) evaluatedTrees.Value++;
304        if (evaluatedTreeNodes != null) evaluatedTreeNodes.Value += treeLength;
305      };
306    }
307
308  }
309}
Note: See TracBrowser for help on using the repository browser.