Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2994-AutoDiffForIntervals/HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Extensions/SymbolicRegressionNewConstantOptimizationEvaluator.cs @ 17270

Last change on this file since 17270 was 16912, checked in by gkronber, 6 years ago

#2994: added a new plugin for extension classes based on new functionality in this branch and added an evaluator that uses new AutoDiff

File size: 19.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HEAL.Attic;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
34  [Item("Constant Optimization Evaluator (new)", "Calculates Pearson R² of a symbolic regression solution and optimizes the constant used.")]
35  [StorableType("1D5361E9-EF73-47D2-9211-FDD39BBC1018")]
36  public class SymbolicRegressionNewConstantOptimizationEvaluator : SymbolicRegressionSingleObjectiveEvaluator {
37    private const string ConstantOptimizationIterationsParameterName = "ConstantOptimizationIterations";
38    private const string ConstantOptimizationImprovementParameterName = "ConstantOptimizationImprovement";
39    private const string ConstantOptimizationProbabilityParameterName = "ConstantOptimizationProbability";
40    private const string ConstantOptimizationRowsPercentageParameterName = "ConstantOptimizationRowsPercentage";
41    private const string UpdateConstantsInTreeParameterName = "UpdateConstantsInSymbolicExpressionTree";
42    private const string UpdateVariableWeightsParameterName = "Update Variable Weights";
43
44    private const string FunctionEvaluationsResultParameterName = "Constants Optimization Function Evaluations";
45    private const string GradientEvaluationsResultParameterName = "Constants Optimization Gradient Evaluations";
46    private const string CountEvaluationsParameterName = "Count Function and Gradient Evaluations";
47
48    public IFixedValueParameter<IntValue> ConstantOptimizationIterationsParameter {
49      get { return (IFixedValueParameter<IntValue>)Parameters[ConstantOptimizationIterationsParameterName]; }
50    }
51    public IFixedValueParameter<DoubleValue> ConstantOptimizationImprovementParameter {
52      get { return (IFixedValueParameter<DoubleValue>)Parameters[ConstantOptimizationImprovementParameterName]; }
53    }
54    public IFixedValueParameter<PercentValue> ConstantOptimizationProbabilityParameter {
55      get { return (IFixedValueParameter<PercentValue>)Parameters[ConstantOptimizationProbabilityParameterName]; }
56    }
57    public IFixedValueParameter<PercentValue> ConstantOptimizationRowsPercentageParameter {
58      get { return (IFixedValueParameter<PercentValue>)Parameters[ConstantOptimizationRowsPercentageParameterName]; }
59    }
60    public IFixedValueParameter<BoolValue> UpdateConstantsInTreeParameter {
61      get { return (IFixedValueParameter<BoolValue>)Parameters[UpdateConstantsInTreeParameterName]; }
62    }
63    public IFixedValueParameter<BoolValue> UpdateVariableWeightsParameter {
64      get { return (IFixedValueParameter<BoolValue>)Parameters[UpdateVariableWeightsParameterName]; }
65    }
66
67    public IResultParameter<IntValue> FunctionEvaluationsResultParameter {
68      get { return (IResultParameter<IntValue>)Parameters[FunctionEvaluationsResultParameterName]; }
69    }
70    public IResultParameter<IntValue> GradientEvaluationsResultParameter {
71      get { return (IResultParameter<IntValue>)Parameters[GradientEvaluationsResultParameterName]; }
72    }
73    public IFixedValueParameter<BoolValue> CountEvaluationsParameter {
74      get { return (IFixedValueParameter<BoolValue>)Parameters[CountEvaluationsParameterName]; }
75    }
76
77
78    public IntValue ConstantOptimizationIterations {
79      get { return ConstantOptimizationIterationsParameter.Value; }
80    }
81    public DoubleValue ConstantOptimizationImprovement {
82      get { return ConstantOptimizationImprovementParameter.Value; }
83    }
84    public PercentValue ConstantOptimizationProbability {
85      get { return ConstantOptimizationProbabilityParameter.Value; }
86    }
87    public PercentValue ConstantOptimizationRowsPercentage {
88      get { return ConstantOptimizationRowsPercentageParameter.Value; }
89    }
90    public bool UpdateConstantsInTree {
91      get { return UpdateConstantsInTreeParameter.Value.Value; }
92      set { UpdateConstantsInTreeParameter.Value.Value = value; }
93    }
94
95    public bool UpdateVariableWeights {
96      get { return UpdateVariableWeightsParameter.Value.Value; }
97      set { UpdateVariableWeightsParameter.Value.Value = value; }
98    }
99
100    public bool CountEvaluations {
101      get { return CountEvaluationsParameter.Value.Value; }
102      set { CountEvaluationsParameter.Value.Value = value; }
103    }
104
105    public override bool Maximization {
106      get { return true; }
107    }
108
109    [StorableConstructor]
110    protected SymbolicRegressionNewConstantOptimizationEvaluator(StorableConstructorFlag _) : base(_) { }
111    protected SymbolicRegressionNewConstantOptimizationEvaluator(SymbolicRegressionNewConstantOptimizationEvaluator original, Cloner cloner)
112      : base(original, cloner) {
113    }
114    public SymbolicRegressionNewConstantOptimizationEvaluator()
115      : base() {
116      Parameters.Add(new FixedValueParameter<IntValue>(ConstantOptimizationIterationsParameterName, "Determines how many iterations should be calculated while optimizing the constant of a symbolic expression tree (0 indicates other or default stopping criterion).", new IntValue(10)));
117      Parameters.Add(new FixedValueParameter<DoubleValue>(ConstantOptimizationImprovementParameterName, "Determines the relative improvement which must be achieved in the constant optimization to continue with it (0 indicates other or default stopping criterion).", new DoubleValue(0)) { Hidden = true });
118      Parameters.Add(new FixedValueParameter<PercentValue>(ConstantOptimizationProbabilityParameterName, "Determines the probability that the constants are optimized", new PercentValue(1)));
119      Parameters.Add(new FixedValueParameter<PercentValue>(ConstantOptimizationRowsPercentageParameterName, "Determines the percentage of the rows which should be used for constant optimization", new PercentValue(1)));
120      Parameters.Add(new FixedValueParameter<BoolValue>(UpdateConstantsInTreeParameterName, "Determines if the constants in the tree should be overwritten by the optimized constants.", new BoolValue(true)) { Hidden = true });
121      Parameters.Add(new FixedValueParameter<BoolValue>(UpdateVariableWeightsParameterName, "Determines if the variable weights in the tree should be  optimized.", new BoolValue(true)) { Hidden = true });
122
123      Parameters.Add(new FixedValueParameter<BoolValue>(CountEvaluationsParameterName, "Determines if function and gradient evaluation should be counted.", new BoolValue(false)));
124      Parameters.Add(new ResultParameter<IntValue>(FunctionEvaluationsResultParameterName, "The number of function evaluations performed by the constants optimization evaluator", "Results", new IntValue()));
125      Parameters.Add(new ResultParameter<IntValue>(GradientEvaluationsResultParameterName, "The number of gradient evaluations performed by the constants optimization evaluator", "Results", new IntValue()));
126    }
127
128    public override IDeepCloneable Clone(Cloner cloner) {
129      return new SymbolicRegressionNewConstantOptimizationEvaluator(this, cloner);
130    }
131
132    [StorableHook(HookType.AfterDeserialization)]
133    private void AfterDeserialization() { }
134
135    private static readonly object locker = new object();
136
137    public override IOperation InstrumentedApply() {
138      var solution = SymbolicExpressionTreeParameter.ActualValue;
139      double quality;
140      if (RandomParameter.ActualValue.NextDouble() < ConstantOptimizationProbability.Value) {
141        IEnumerable<int> constantOptimizationRows = GenerateRowsToEvaluate(ConstantOptimizationRowsPercentage.Value);
142        var counter = new EvaluationsCounter();
143        quality = OptimizeConstants(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, ProblemDataParameter.ActualValue,
144           constantOptimizationRows, ApplyLinearScalingParameter.ActualValue.Value, ConstantOptimizationIterations.Value, updateVariableWeights: UpdateVariableWeights, lowerEstimationLimit: EstimationLimitsParameter.ActualValue.Lower, upperEstimationLimit: EstimationLimitsParameter.ActualValue.Upper, updateConstantsInTree: UpdateConstantsInTree, counter: counter);
145
146        if (ConstantOptimizationRowsPercentage.Value != RelativeNumberOfEvaluatedSamplesParameter.ActualValue.Value) {
147          var evaluationRows = GenerateRowsToEvaluate();
148          quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value);
149        }
150
151        if (CountEvaluations) {
152          lock (locker) {
153            FunctionEvaluationsResultParameter.ActualValue.Value += counter.FunctionEvaluations;
154            GradientEvaluationsResultParameter.ActualValue.Value += counter.GradientEvaluations;
155          }
156        }
157
158      } else {
159        var evaluationRows = GenerateRowsToEvaluate();
160        quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value);
161      }
162      QualityParameter.ActualValue = new DoubleValue(quality);
163
164      return base.InstrumentedApply();
165    }
166
167    public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows) {
168      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;
169      EstimationLimitsParameter.ExecutionContext = context;
170      ApplyLinearScalingParameter.ExecutionContext = context;
171      FunctionEvaluationsResultParameter.ExecutionContext = context;
172      GradientEvaluationsResultParameter.ExecutionContext = context;
173
174      // Pearson R² evaluator is used on purpose instead of the const-opt evaluator,
175      // because Evaluate() is used to get the quality of evolved models on
176      // different partitions of the dataset (e.g., best validation model)
177      double r2 = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, tree, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, problemData, rows, ApplyLinearScalingParameter.ActualValue.Value);
178
179      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;
180      EstimationLimitsParameter.ExecutionContext = null;
181      ApplyLinearScalingParameter.ExecutionContext = null;
182      FunctionEvaluationsResultParameter.ExecutionContext = null;
183      GradientEvaluationsResultParameter.ExecutionContext = null;
184
185      return r2;
186    }
187
188    public class EvaluationsCounter {
189      public int FunctionEvaluations = 0;
190      public int GradientEvaluations = 0;
191    }
192
193    private static void GetParameterNodes(ISymbolicExpressionTree tree, out List<ISymbolicExpressionTreeNode> thetaNodes, out List<double> thetaValues) {
194      thetaNodes = new List<ISymbolicExpressionTreeNode>();
195      thetaValues = new List<double>();
196
197      var nodes = tree.IterateNodesPrefix().ToArray();
198      for (int i = 0; i < nodes.Length; ++i) {
199        var node = nodes[i];
200        if (node is VariableTreeNode variableTreeNode) {
201          thetaValues.Add(variableTreeNode.Weight);
202          thetaNodes.Add(node);
203        } else if (node is ConstantTreeNode constantTreeNode) {
204          thetaNodes.Add(node);
205          thetaValues.Add(constantTreeNode.Value);
206        }
207      }
208    }
209
210    public static double OptimizeConstants(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
211      ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling,
212      int maxIterations, bool updateVariableWeights = true,
213      double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue,
214      bool updateConstantsInTree = true, Action<double[], double, object> iterationCallback = null, EvaluationsCounter counter = null) {
215
216      if (!updateVariableWeights) throw new NotSupportedException();
217
218      // // numeric constants in the tree become variables for constant opt
219      // // variables in the tree become parameters (fixed values) for constant opt
220      // // for each parameter (variable in the original tree) we store the
221      // // variable name, variable value (for factor vars) and lag as a DataForVariable object.
222      // // A dictionary is used to find parameters
223      // double[] initialConstants;
224      // var parameters = new List<TreeToAutoDiffTermConverter.DataForVariable>();
225      //
226      // TreeToAutoDiffTermConverter.ParametricFunction func;
227      // TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad;
228      // if (!TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, updateVariableWeights, applyLinearScaling, out parameters, out initialConstants, out func, out func_grad))
229      //   throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree.");
230      // if (parameters.Count == 0) return 0.0; // gkronber: constant expressions always have a R² of 0.0
231      // var parameterEntries = parameters.ToArray(); // order of entries must be the same for x
232
233
234      GetParameterNodes(tree, out List<ISymbolicExpressionTreeNode> thetaNodes, out List<double> thetaValues);
235      var initialConstants = thetaValues.ToArray();
236
237      //extract inital constants
238      double[] c;
239      if (applyLinearScaling) {
240        c = new double[initialConstants.Length + 2];
241        c[0] = 0.0;
242        c[1] = 1.0;
243        Array.Copy(initialConstants, 0, c, 2, initialConstants.Length);
244      } else {
245        c = (double[])initialConstants.Clone();
246      }
247
248      double originalQuality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);
249
250      if (counter == null) counter = new EvaluationsCounter();
251      var rowEvaluationsCounter = new EvaluationsCounter();
252
253      alglib.minlmstate state;
254      alglib.minlmreport rep;
255
256      IDataset ds = problemData.Dataset;
257      double[] y = ds.GetDoubleValues(problemData.TargetVariable, rows).ToArray();
258      int n = y.Length;
259      int k = c.Length;
260
261      var trainRows = problemData.TrainingIndices.ToArray();
262      var parameterNodes = thetaNodes.ToArray();
263      alglib.ndimensional_fvec function_cx_1_func = CreateFunc(tree, new VectorEvaluator(), parameterNodes, ds, problemData.TargetVariable, trainRows);
264      alglib.ndimensional_jac function_cx_1_jac = CreateJac(tree, new VectorAutoDiffEvaluator(), parameterNodes, ds, problemData.TargetVariable, trainRows);
265      alglib.ndimensional_rep xrep = (p, f, obj) => iterationCallback(p, f, obj);
266
267      try {
268        alglib.minlmcreatevj(n, c, out state);
269        alglib.minlmsetcond(state, 0.0, maxIterations);
270        alglib.minlmsetxrep(state, iterationCallback != null);
271        // alglib.minlmsetgradientcheck(state, 0.001);
272        alglib.minlmoptimize(state, function_cx_1_func, function_cx_1_jac, xrep, rowEvaluationsCounter);
273        alglib.minlmresults(state, out c, out rep);
274      } catch (ArithmeticException) {
275        return originalQuality;
276      } catch (alglib.alglibexception) {
277        return originalQuality;
278      }
279
280      counter.FunctionEvaluations += rowEvaluationsCounter.FunctionEvaluations / n;
281      counter.GradientEvaluations += rowEvaluationsCounter.GradientEvaluations / n;
282
283      //retVal == -7  => constant optimization failed due to wrong gradient
284      if (rep.terminationtype != -7) {
285        if (applyLinearScaling) {
286          var tmp = new double[c.Length - 2];
287          Array.Copy(c, 2, tmp, 0, tmp.Length);
288          UpdateConstants(parameterNodes, tmp);
289        } else UpdateConstants(parameterNodes, c);
290      }
291      var quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);
292
293      if (!updateConstantsInTree) UpdateConstants(parameterNodes, initialConstants);
294
295      if (originalQuality - quality > 0.001 || double.IsNaN(quality)) {
296        UpdateConstants(parameterNodes, initialConstants);
297        return originalQuality;
298      }
299      return quality;
300    }
301
302    private static void UpdateConstants(ISymbolicExpressionTreeNode[] nodes, double[] constants) {
303      if (nodes.Length != constants.Length) throw new InvalidOperationException();
304      for(int i = 0;i<nodes.Length;i++) {
305        if (nodes[i] is VariableTreeNode varNode) varNode.Weight = constants[i];
306        else if (nodes[i] is ConstantTreeNode constNode) constNode.Value = constants[i];
307      }
308    }
309
310    private static alglib.ndimensional_fvec CreateFunc(ISymbolicExpressionTree tree, VectorEvaluator eval, ISymbolicExpressionTreeNode[] parameterNodes, IDataset ds, string targetVar, int[] rows) {
311      var y = ds.GetDoubleValues(targetVar, rows).ToArray();
312      return (double[] c, double[] fi, object o) => {
313        UpdateConstants(parameterNodes, c);
314        var pred = eval.Evaluate(tree, ds, rows);
315        for (int i = 0; i < fi.Length; i++)
316          fi[i] = pred[i] - y[i];
317
318        var counter = (EvaluationsCounter)o;
319        counter.FunctionEvaluations++;
320      };
321    }
322
323    private static alglib.ndimensional_jac CreateJac(ISymbolicExpressionTree tree, VectorAutoDiffEvaluator eval, ISymbolicExpressionTreeNode[] parameterNodes, IDataset ds, string targetVar, int[] rows) {
324      var y = ds.GetDoubleValues(targetVar, rows).ToArray();
325      return (double[] c, double[] fi, double[,] jac, object o) => {
326        UpdateConstants(parameterNodes, c);
327        eval.Evaluate(tree, ds, rows, parameterNodes, fi, jac);
328
329        for (int i = 0; i < fi.Length; i++)
330          fi[i] -= y[i];
331
332        var counter = (EvaluationsCounter)o;
333        counter.GradientEvaluations++;
334      };
335    }
336    public static bool CanOptimizeConstants(ISymbolicExpressionTree tree) {
337      return TreeToAutoDiffTermConverter.IsCompatible(tree);
338    }
339  }
340}
Note: See TracBrowser for help on using the repository browser.