Free cookie consent management tool by TermsFeed Policy Generator

source: branches/ConstantOptimization/HeuristicLab.Problems.DataAnylsis.Symbolic.Regression/3.4/SingleObjective/SymbolicRegressionConstantOptimizationEvaluatorAutoDiff.cs @ 7302

Last change on this file since 7302 was 7302, checked in by mkommend, 12 years ago

#1746: Added solution for constant optimization.

File size: 18.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Parameters;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31
32namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
33  [Item("Constant Optimization Evaluator AutoDiff", "Calculates Pearson R² of a symbolic regression solution and optimizes the constant used.")]
34  [StorableClass]
35  public class SymbolicRegressionConstantOptimizationEvaluatorAutoDiff : SymbolicRegressionSingleObjectiveEvaluator {
36    private const string ConstantOptimizationIterationsParameterName = "ConstantOptimizationIterations";
37    private const string ConstantOptimizationImprovementParameterName = "ConstantOptimizationImprovement";
38    private const string ConstantOptimizationProbabilityParameterName = "ConstantOptimizationProbability";
39    private const string ConstantOptimizationRowsPercentageParameterName = "ConstantOptimizationRowsPercentage";
40
41    private const string EvaluatedTreesResultName = "EvaluatedTrees";
42    private const string EvaluatedTreeNodesResultName = "EvaluatedTreeNodes";
43
44    public ILookupParameter<IntValue> EvaluatedTreesParameter {
45      get { return (ILookupParameter<IntValue>)Parameters[EvaluatedTreesResultName]; }
46    }
47    public ILookupParameter<IntValue> EvaluatedTreeNodesParameter {
48      get { return (ILookupParameter<IntValue>)Parameters[EvaluatedTreeNodesResultName]; }
49    }
50
51    public IFixedValueParameter<IntValue> ConstantOptimizationIterationsParameter {
52      get { return (IFixedValueParameter<IntValue>)Parameters[ConstantOptimizationIterationsParameterName]; }
53    }
54    public IFixedValueParameter<DoubleValue> ConstantOptimizationImprovementParameter {
55      get { return (IFixedValueParameter<DoubleValue>)Parameters[ConstantOptimizationImprovementParameterName]; }
56    }
57    public IFixedValueParameter<PercentValue> ConstantOptimizationProbabilityParameter {
58      get { return (IFixedValueParameter<PercentValue>)Parameters[ConstantOptimizationProbabilityParameterName]; }
59    }
60    public IFixedValueParameter<PercentValue> ConstantOptimizationRowsPercentageParameter {
61      get { return (IFixedValueParameter<PercentValue>)Parameters[ConstantOptimizationRowsPercentageParameterName]; }
62    }
63
64    public IntValue ConstantOptimizationIterations {
65      get { return ConstantOptimizationIterationsParameter.Value; }
66    }
67    public DoubleValue ConstantOptimizationImprovement {
68      get { return ConstantOptimizationImprovementParameter.Value; }
69    }
70    public PercentValue ConstantOptimizationProbability {
71      get { return ConstantOptimizationProbabilityParameter.Value; }
72    }
73    public PercentValue ConstantOptimizationRowsPercentage {
74      get { return ConstantOptimizationRowsPercentageParameter.Value; }
75    }
76
77    public override bool Maximization {
78      get { return true; }
79    }
80
81    [StorableConstructor]
82    protected SymbolicRegressionConstantOptimizationEvaluatorAutoDiff(bool deserializing) : base(deserializing) { }
83    protected SymbolicRegressionConstantOptimizationEvaluatorAutoDiff(SymbolicRegressionConstantOptimizationEvaluatorAutoDiff original, Cloner cloner)
84      : base(original, cloner) {
85    }
86    public SymbolicRegressionConstantOptimizationEvaluatorAutoDiff()
87      : base() {
88      Parameters.Add(new FixedValueParameter<IntValue>(ConstantOptimizationIterationsParameterName, "Determines how many iterations should be calculated while optimizing the constant of a symbolic expression tree (0 indicates other or default stopping criterion).", new IntValue(3), true));
89      Parameters.Add(new FixedValueParameter<DoubleValue>(ConstantOptimizationImprovementParameterName, "Determines the relative improvement which must be achieved in the constant optimization to continue with it (0 indicates other or default stopping criterion).", new DoubleValue(0), true));
90      Parameters.Add(new FixedValueParameter<PercentValue>(ConstantOptimizationProbabilityParameterName, "Determines the probability that the constants are optimized", new PercentValue(1), true));
91      Parameters.Add(new FixedValueParameter<PercentValue>(ConstantOptimizationRowsPercentageParameterName, "Determines the percentage of the rows which should be used for constant optimization", new PercentValue(1), true));
92
93      Parameters.Add(new LookupParameter<IntValue>(EvaluatedTreesResultName));
94      Parameters.Add(new LookupParameter<IntValue>(EvaluatedTreeNodesResultName));
95    }
96
97    public override IDeepCloneable Clone(Cloner cloner) {
98      return new SymbolicRegressionConstantOptimizationEvaluatorAutoDiff(this, cloner);
99    }
100
101    public override IOperation Apply() {
102      AddResults();
103      int seed = RandomParameter.ActualValue.Next();
104      var solution = SymbolicExpressionTreeParameter.ActualValue;
105      double quality;
106      if (RandomParameter.ActualValue.NextDouble() < ConstantOptimizationProbability.Value) {
107        IEnumerable<int> constantOptimizationRows = GenerateRowsToEvaluate(ConstantOptimizationRowsPercentage.Value);
108        quality = OptimizeConstants(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, ProblemDataParameter.ActualValue,
109           constantOptimizationRows, ConstantOptimizationImprovement.Value, ConstantOptimizationIterations.Value, 0.001,
110           EstimationLimitsParameter.ActualValue.Upper, EstimationLimitsParameter.ActualValue.Lower,
111          EvaluatedTreesParameter.ActualValue, EvaluatedTreeNodesParameter.ActualValue);
112        if (ConstantOptimizationRowsPercentage.Value != RelativeNumberOfEvaluatedSamplesParameter.ActualValue.Value) {
113          var evaluationRows = GenerateRowsToEvaluate();
114          quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows);
115        }
116      } else {
117        var evaluationRows = GenerateRowsToEvaluate();
118        quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows);
119      }
120      QualityParameter.ActualValue = new DoubleValue(quality);
121      EvaluatedTreesParameter.ActualValue.Value += 1;
122      EvaluatedTreeNodesParameter.ActualValue.Value += solution.Length;
123
124      if (Successor != null)
125        return ExecutionContext.CreateOperation(Successor);
126      else
127        return null;
128    }
129
130    private void AddResults() {
131      if (EvaluatedTreesParameter.ActualValue == null) {
132        var scope = ExecutionContext.Scope;
133        while (scope.Parent != null)
134          scope = scope.Parent;
135        scope.Variables.Add(new Core.Variable(EvaluatedTreesResultName, new IntValue()));
136      }
137      if (EvaluatedTreeNodesParameter.ActualValue == null) {
138        var scope = ExecutionContext.Scope;
139        while (scope.Parent != null)
140          scope = scope.Parent;
141        scope.Variables.Add(new Core.Variable(EvaluatedTreeNodesResultName, new IntValue()));
142      }
143    }
144
145    public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows) {
146      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;
147      EstimationLimitsParameter.ExecutionContext = context;
148
149      double r2 = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, tree, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, problemData, rows);
150
151      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;
152      EstimationLimitsParameter.ExecutionContext = null;
153
154      return r2;
155    }
156
157    private static double[,] x;
158    public override void InitializeState() {
159      base.InitializeState();
160      x = null;
161    }
162    public override void ClearState() {
163      base.ClearState();
164      x = null;
165    }
166
167    public static double OptimizeConstants(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree tree, IRegressionProblemData problemData,
168      IEnumerable<int> rows, double improvement, int iterations, double differentialStep, double upperEstimationLimit = double.MaxValue, double lowerEstimationLimit = double.MinValue, IntValue evaluatedTrees = null, IntValue evaluatedTreeNodes = null) {
169
170      List<AutoDiff.Variable> variables = new List<AutoDiff.Variable>();
171      List<AutoDiff.Variable> parameters = new List<AutoDiff.Variable>();
172      List<string> variableNames = new List<string>();
173
174      var func = TransformToAutoDiff(tree.Root.GetSubtree(0), variables, parameters, variableNames);
175      if (variableNames.Count == 0) return 0.0;
176
177
178      AutoDiff.IParametricCompiledTerm compiledFunc = AutoDiff.TermUtils.Compile(func, variables.ToArray(), parameters.ToArray());
179
180      List<SymbolicExpressionTreeTerminalNode> terminalNodes = tree.Root.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>().ToList();
181      //List<SymbolicExpressionTreeTerminalNode> terminalNodes = tree.Root.IterateNodesPrefix().OfType<ConstantTreeNode>().Cast<SymbolicExpressionTreeTerminalNode>().ToList();
182      double[] c = new double[variables.Count];
183
184      c[0] = 0.0;
185      c[1] = 1.0;
186      //extract inital constants
187      for (int i = 0; i < terminalNodes.Count; i++) {
188        ConstantTreeNode constantTreeNode = terminalNodes[i] as ConstantTreeNode;
189        if (constantTreeNode != null) c[i + 2] = constantTreeNode.Value;
190        VariableTreeNode variableTreeNode = terminalNodes[i] as VariableTreeNode;
191        if (variableTreeNode != null) c[i + 2] = variableTreeNode.Weight;
192      }
193
194      double epsg = 0;
195      double epsf = improvement;
196      double epsx = 0;
197      int maxits = iterations;
198      double diffstep = differentialStep;
199
200      alglib.lsfitstate state;
201      alglib.lsfitreport rep;
202      int info;
203
204      //Dataset ds = problemData.Dataset;
205      //List<string> datasetVariables = ds.DoubleVariables.ToList();
206      //if (x == null) {
207      //  x = new double[rows.Count(), ds.DoubleVariables.Count()];
208      //  int row = 0;
209      //  foreach (var r in rows) {
210      //    for (int col = 0; col < datasetVariables.Count; col++) {
211      //      x[row, col] = ds.GetDoubleValue(datasetVariables[col], row);
212      //    }
213      //    row++;
214      //  }
215      //}
216
217      Dataset ds = problemData.Dataset;
218      double[,] x = new double[rows.Count(), variableNames.Count];
219      int row = 0;
220      foreach (var r in rows) {
221        for (int col = 0; col < variableNames.Count; col++) {
222          x[row, col] = ds.GetDoubleValue(variableNames[col], r);
223        }
224        row++;
225      }
226
227      //List<int> variableIndexes = variableNames.Select(v => datasetVariables.IndexOf(v)).ToList();
228      double[] y = ds.GetDoubleValues(problemData.TargetVariable, rows).ToArray();
229      int n = x.GetLength(0);
230      int m = x.GetLength(1);
231      int k = c.Length;
232
233      alglib.ndimensional_pfunc function_cx_1_func = CreatePFunc(compiledFunc, variables, parameters);
234      alglib.ndimensional_pgrad function_cx_1_grad = CreatePGrad(compiledFunc, variables, parameters);
235
236      alglib.lsfitcreatefg(x, y, c, n, m, k, false, out state);
237      alglib.lsfitsetcond(state, epsf, epsx, maxits);
238      alglib.lsfitfit(state, function_cx_1_func, function_cx_1_grad, null, null);
239      alglib.lsfitresults(state, out info, out c, out rep);
240      //alglib.minlmstate state;
241      //alglib.minlmreport report;
242
243      //alglib.minlmcreatev(1, c, diffstep, out state);
244      //alglib.minlmsetcond(state, epsg, epsf, epsx, maxits);
245      //alglib.minlmoptimize(state, CreateCallBack(interpreter, tree, problemData, rows, upperEstimationLimit, lowerEstimationLimit, treeLength, evaluatedTrees, evaluatedTreeNodes), null, terminalNodes);
246      //alglib.minlmresults(state, out c, out report);
247
248      for (int i = 2; i < c.Length; i++) {
249        ConstantTreeNode constantTreeNode = terminalNodes[i - 2] as ConstantTreeNode;
250        if (constantTreeNode != null) constantTreeNode.Value = c[i];
251        VariableTreeNode variableTreeNode = terminalNodes[i - 2] as VariableTreeNode;
252        if (variableTreeNode != null) variableTreeNode.Weight = c[i];
253      }
254
255      return SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows);
256
257    }
258
259    private static alglib.ndimensional_pfunc CreatePFunc(AutoDiff.IParametricCompiledTerm compiledFunc, List<AutoDiff.Variable> variables, List<AutoDiff.Variable> parameters) {
260      return (double[] c, double[] x, ref double func, object o) => {
261        //List<int> variableIndexes = (List<int>)o;
262        //var x_new = variableIndexes.Select(i => x[i]).ToArray();
263        func = compiledFunc.Evaluate(c, x);
264      };
265    }
266
267    private static alglib.ndimensional_pgrad CreatePGrad(AutoDiff.IParametricCompiledTerm compiledFunc, List<AutoDiff.Variable> variables, List<AutoDiff.Variable> parameters) {
268      return (double[] c, double[] x, ref double func, double[] grad, object o) => {
269        //List<int> variableIndexes = (List<int>)o;
270        //var x_new = variableIndexes.Select(i => x[i]).ToArray();
271        var tupel = compiledFunc.Differentiate(c, x);
272        func = tupel.Item2;
273        Array.Copy(tupel.Item1, grad, grad.Length);
274      };
275    }
276
277    private static AutoDiff.Term TransformToAutoDiff(ISymbolicExpressionTreeNode node, List<AutoDiff.Variable> variables, List<AutoDiff.Variable> parameters, List<string> variableNames) {
278      if (node.Symbol is Constant) {
279        var var = new AutoDiff.Variable();
280        variables.Add(var);
281        return var;
282      }
283      if (node.Symbol is Variable) {
284        var w = new AutoDiff.Variable();
285        variables.Add(w);
286        var par = new AutoDiff.Variable();
287        parameters.Add(par);
288        variableNames.Add((node as VariableTreeNode).VariableName);
289        return AutoDiff.TermBuilder.Product(w, par);
290      }
291      if (node.Symbol is Addition)
292        return AutoDiff.TermBuilder.Sum(
293          from subTree in node.Subtrees
294          select TransformToAutoDiff(subTree, variables, parameters, variableNames)
295          );
296      if (node.Symbol is Subtraction) {
297        return AutoDiff.TermBuilder.Sum(
298          TransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames),
299          -1.0 * TransformToAutoDiff(node.GetSubtree(1), variables, parameters, variableNames),
300          (from subTree in node.Subtrees.Skip(2)
301           select -1.0 * TransformToAutoDiff(subTree, variables, parameters, variableNames)
302          ).ToArray());
303      }
304      if (node.Symbol is Multiplication)
305        return AutoDiff.TermBuilder.Product(
306          TransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames),
307          TransformToAutoDiff(node.GetSubtree(1), variables, parameters, variableNames),
308          (from subTree in node.Subtrees.Skip(2)
309           select TransformToAutoDiff(subTree, variables, parameters, variableNames)).ToArray()
310          );
311      if (node.Symbol is Division) {
312        return AutoDiff.TermBuilder.Product(
313          TransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames),
314          1 / TransformToAutoDiff(node.GetSubtree(1), variables, parameters, variableNames),
315          (from subTree in node.Subtrees.Skip(2)
316           select 1 / TransformToAutoDiff(subTree, variables, parameters, variableNames)).ToArray()
317          );
318      }
319      if (node.Symbol is Logarithm)
320        return AutoDiff.TermBuilder.Log(
321          TransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames));
322      if (node.Symbol is Exponential)
323        return AutoDiff.TermBuilder.Exp(
324          TransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames));
325      if (node.Symbol is StartSymbol) {
326        var alpha = new AutoDiff.Variable();
327        var beta = new AutoDiff.Variable();
328        variables.Add(beta);
329        variables.Add(alpha);
330        return TransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames) * alpha + beta;
331      }
332      throw new NotImplementedException();
333    }
334
335    private static alglib.ndimensional_fvec CreateCallBack(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows, double upperEstimationLimit, double lowerEstimationLimit, int treeLength, IntValue evaluatedTrees = null, IntValue evaluatedTreeNodes = null) {
336      return (double[] arg, double[] fi, object obj) => {
337        // update constants of tree
338        List<SymbolicExpressionTreeTerminalNode> terminalNodes = (List<SymbolicExpressionTreeTerminalNode>)obj;
339        for (int i = 0; i < terminalNodes.Count; i++) {
340          ConstantTreeNode constantTreeNode = terminalNodes[i] as ConstantTreeNode;
341          if (constantTreeNode != null) constantTreeNode.Value = arg[i];
342          VariableTreeNode variableTreeNode = terminalNodes[i] as VariableTreeNode;
343          if (variableTreeNode != null) variableTreeNode.Weight = arg[i];
344        }
345
346        double quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows);
347
348        fi[0] = 1 - quality;
349        if (evaluatedTrees != null) evaluatedTrees.Value++;
350        if (evaluatedTreeNodes != null) evaluatedTreeNodes.Value += treeLength;
351      };
352    }
353
354  }
355}
Note: See TracBrowser for help on using the repository browser.