source: branches/3087_Ceres_Integration/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/Evaluators/SymbolicRegressionConstantOptimizationEvaluator.cs @ 18009

Last change on this file since 18009 was 18009, checked in by bburlacu, 5 months ago

#3087: refactor ConstantOptimizationEvaluator to use the native ParameterOptimizer

File size: 16.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25
26using HEAL.Attic;
27
28using HeuristicLab.Common;
29using HeuristicLab.Core;
30using HeuristicLab.Data;
31using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
32using HeuristicLab.NativeInterpreter;
33using HeuristicLab.Optimization;
34using HeuristicLab.Parameters;
35
36namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
37  [Item("Constant Optimization Evaluator", "Calculates Pearson R² of a symbolic regression solution and optimizes the constant used.")]
38  [StorableType("24B68851-036D-4446-BD6F-3823E9028FF4")]
39  public class SymbolicRegressionConstantOptimizationEvaluator : SymbolicRegressionSingleObjectiveEvaluator {
40    private const string ConstantOptimizationIterationsParameterName = "ConstantOptimizationIterations";
41    private const string ConstantOptimizationImprovementParameterName = "ConstantOptimizationImprovement";
42    private const string ConstantOptimizationProbabilityParameterName = "ConstantOptimizationProbability";
43    private const string ConstantOptimizationRowsPercentageParameterName = "ConstantOptimizationRowsPercentage";
44    private const string UpdateConstantsInTreeParameterName = "UpdateConstantsInSymbolicExpressionTree";
45    private const string UpdateVariableWeightsParameterName = "Update Variable Weights";
46
47    private const string FunctionEvaluationsResultParameterName = "Constants Optimization Function Evaluations";
48    private const string GradientEvaluationsResultParameterName = "Constants Optimization Gradient Evaluations";
49    private const string CountEvaluationsParameterName = "Count Function and Gradient Evaluations";
50
51    public IFixedValueParameter<IntValue> ConstantOptimizationIterationsParameter {
52      get { return (IFixedValueParameter<IntValue>)Parameters[ConstantOptimizationIterationsParameterName]; }
53    }
54    public IFixedValueParameter<DoubleValue> ConstantOptimizationImprovementParameter {
55      get { return (IFixedValueParameter<DoubleValue>)Parameters[ConstantOptimizationImprovementParameterName]; }
56    }
57    public IFixedValueParameter<PercentValue> ConstantOptimizationProbabilityParameter {
58      get { return (IFixedValueParameter<PercentValue>)Parameters[ConstantOptimizationProbabilityParameterName]; }
59    }
60    public IFixedValueParameter<PercentValue> ConstantOptimizationRowsPercentageParameter {
61      get { return (IFixedValueParameter<PercentValue>)Parameters[ConstantOptimizationRowsPercentageParameterName]; }
62    }
63    public IFixedValueParameter<BoolValue> UpdateConstantsInTreeParameter {
64      get { return (IFixedValueParameter<BoolValue>)Parameters[UpdateConstantsInTreeParameterName]; }
65    }
66    public IFixedValueParameter<BoolValue> UpdateVariableWeightsParameter {
67      get { return (IFixedValueParameter<BoolValue>)Parameters[UpdateVariableWeightsParameterName]; }
68    }
69
70    public IResultParameter<IntValue> FunctionEvaluationsResultParameter {
71      get { return (IResultParameter<IntValue>)Parameters[FunctionEvaluationsResultParameterName]; }
72    }
73    public IResultParameter<IntValue> GradientEvaluationsResultParameter {
74      get { return (IResultParameter<IntValue>)Parameters[GradientEvaluationsResultParameterName]; }
75    }
76    public IFixedValueParameter<BoolValue> CountEvaluationsParameter {
77      get { return (IFixedValueParameter<BoolValue>)Parameters[CountEvaluationsParameterName]; }
78    }
79
80
81    public IntValue ConstantOptimizationIterations {
82      get { return ConstantOptimizationIterationsParameter.Value; }
83    }
84    public DoubleValue ConstantOptimizationImprovement {
85      get { return ConstantOptimizationImprovementParameter.Value; }
86    }
87    public PercentValue ConstantOptimizationProbability {
88      get { return ConstantOptimizationProbabilityParameter.Value; }
89    }
90    public PercentValue ConstantOptimizationRowsPercentage {
91      get { return ConstantOptimizationRowsPercentageParameter.Value; }
92    }
93    public bool UpdateConstantsInTree {
94      get { return UpdateConstantsInTreeParameter.Value.Value; }
95      set { UpdateConstantsInTreeParameter.Value.Value = value; }
96    }
97
98    public bool UpdateVariableWeights {
99      get { return UpdateVariableWeightsParameter.Value.Value; }
100      set { UpdateVariableWeightsParameter.Value.Value = value; }
101    }
102
103    public bool CountEvaluations {
104      get { return CountEvaluationsParameter.Value.Value; }
105      set { CountEvaluationsParameter.Value.Value = value; }
106    }
107
108    public override bool Maximization {
109      get { return true; }
110    }
111
112    [StorableConstructor]
113    protected SymbolicRegressionConstantOptimizationEvaluator(StorableConstructorFlag _) : base(_) { }
114    protected SymbolicRegressionConstantOptimizationEvaluator(SymbolicRegressionConstantOptimizationEvaluator original, Cloner cloner)
115      : base(original, cloner) {
116    }
117    public SymbolicRegressionConstantOptimizationEvaluator()
118      : base() {
119      Parameters.Add(new FixedValueParameter<IntValue>(ConstantOptimizationIterationsParameterName, "Determines how many iterations should be calculated while optimizing the constant of a symbolic expression tree (0 indicates other or default stopping criterion).", new IntValue(10)));
120      Parameters.Add(new FixedValueParameter<DoubleValue>(ConstantOptimizationImprovementParameterName, "Determines the relative improvement which must be achieved in the constant optimization to continue with it (0 indicates other or default stopping criterion).", new DoubleValue(0)) { Hidden = true });
121      Parameters.Add(new FixedValueParameter<PercentValue>(ConstantOptimizationProbabilityParameterName, "Determines the probability that the constants are optimized", new PercentValue(1)));
122      Parameters.Add(new FixedValueParameter<PercentValue>(ConstantOptimizationRowsPercentageParameterName, "Determines the percentage of the rows which should be used for constant optimization", new PercentValue(1)));
123      Parameters.Add(new FixedValueParameter<BoolValue>(UpdateConstantsInTreeParameterName, "Determines if the constants in the tree should be overwritten by the optimized constants.", new BoolValue(true)) { Hidden = true });
124      Parameters.Add(new FixedValueParameter<BoolValue>(UpdateVariableWeightsParameterName, "Determines if the variable weights in the tree should be  optimized.", new BoolValue(true)) { Hidden = true });
125
126      Parameters.Add(new FixedValueParameter<BoolValue>(CountEvaluationsParameterName, "Determines if function and gradient evaluation should be counted.", new BoolValue(false)));
127      Parameters.Add(new ResultParameter<IntValue>(FunctionEvaluationsResultParameterName, "The number of function evaluations performed by the constants optimization evaluator", "Results", new IntValue()));
128      Parameters.Add(new ResultParameter<IntValue>(GradientEvaluationsResultParameterName, "The number of gradient evaluations performed by the constants optimization evaluator", "Results", new IntValue()));
129    }
130
131    public override IDeepCloneable Clone(Cloner cloner) {
132      return new SymbolicRegressionConstantOptimizationEvaluator(this, cloner);
133    }
134
135    [StorableHook(HookType.AfterDeserialization)]
136    private void AfterDeserialization() {
137      if (!Parameters.ContainsKey(UpdateConstantsInTreeParameterName))
138        Parameters.Add(new FixedValueParameter<BoolValue>(UpdateConstantsInTreeParameterName, "Determines if the constants in the tree should be overwritten by the optimized constants.", new BoolValue(true)));
139      if (!Parameters.ContainsKey(UpdateVariableWeightsParameterName))
140        Parameters.Add(new FixedValueParameter<BoolValue>(UpdateVariableWeightsParameterName, "Determines if the variable weights in the tree should be  optimized.", new BoolValue(true)));
141
142      if (!Parameters.ContainsKey(CountEvaluationsParameterName))
143        Parameters.Add(new FixedValueParameter<BoolValue>(CountEvaluationsParameterName, "Determines if function and gradient evaluation should be counted.", new BoolValue(false)));
144
145      if (!Parameters.ContainsKey(FunctionEvaluationsResultParameterName))
146        Parameters.Add(new ResultParameter<IntValue>(FunctionEvaluationsResultParameterName, "The number of function evaluations performed by the constants optimization evaluator", "Results", new IntValue()));
147      if (!Parameters.ContainsKey(GradientEvaluationsResultParameterName))
148        Parameters.Add(new ResultParameter<IntValue>(GradientEvaluationsResultParameterName, "The number of gradient evaluations performed by the constants optimization evaluator", "Results", new IntValue()));
149    }
150
151    private static readonly object locker = new object();
152    public override IOperation InstrumentedApply() {
153      var solution = SymbolicExpressionTreeParameter.ActualValue;
154      double quality;
155      if (RandomParameter.ActualValue.NextDouble() < ConstantOptimizationProbability.Value) {
156        IEnumerable<int> constantOptimizationRows = GenerateRowsToEvaluate(ConstantOptimizationRowsPercentage.Value);
157        var counter = new EvaluationsCounter();
158        quality = OptimizeConstants(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, ProblemDataParameter.ActualValue,
159           constantOptimizationRows, ApplyLinearScalingParameter.ActualValue.Value, ConstantOptimizationIterations.Value, updateVariableWeights: UpdateVariableWeights, lowerEstimationLimit: EstimationLimitsParameter.ActualValue.Lower, upperEstimationLimit: EstimationLimitsParameter.ActualValue.Upper, updateConstantsInTree: UpdateConstantsInTree, counter: counter);
160
161        if (ConstantOptimizationRowsPercentage.Value != RelativeNumberOfEvaluatedSamplesParameter.ActualValue.Value) {
162          var evaluationRows = GenerateRowsToEvaluate();
163          quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value);
164        }
165
166        if (CountEvaluations) {
167          lock (locker) {
168            FunctionEvaluationsResultParameter.ActualValue.Value += counter.FunctionEvaluations;
169            GradientEvaluationsResultParameter.ActualValue.Value += counter.GradientEvaluations;
170          }
171        }
172
173      } else {
174        var evaluationRows = GenerateRowsToEvaluate();
175        quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value);
176      }
177      QualityParameter.ActualValue = new DoubleValue(quality);
178
179      return base.InstrumentedApply();
180    }
181
182    public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows) {
183      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;
184      EstimationLimitsParameter.ExecutionContext = context;
185      ApplyLinearScalingParameter.ExecutionContext = context;
186      FunctionEvaluationsResultParameter.ExecutionContext = context;
187      GradientEvaluationsResultParameter.ExecutionContext = context;
188
189      // Pearson R² evaluator is used on purpose instead of the const-opt evaluator,
190      // because Evaluate() is used to get the quality of evolved models on
191      // different partitions of the dataset (e.g., best validation model)
192      double r2 = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, tree, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, problemData, rows, ApplyLinearScalingParameter.ActualValue.Value);
193
194      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;
195      EstimationLimitsParameter.ExecutionContext = null;
196      ApplyLinearScalingParameter.ExecutionContext = null;
197      FunctionEvaluationsResultParameter.ExecutionContext = null;
198      GradientEvaluationsResultParameter.ExecutionContext = null;
199
200      return r2;
201    }
202
203    public class EvaluationsCounter {
204      public int FunctionEvaluations = 0;
205      public int GradientEvaluations = 0;
206    }
207
208    public static double OptimizeConstants(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
209      ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling,
210      int maxIterations, bool updateVariableWeights = true,
211      double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue,
212      bool updateConstantsInTree = true, Action<double[], double, object> iterationCallback = null, EvaluationsCounter counter = null) {
213
214      double originalQuality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);
215
216      var nodesToOptimize = new HashSet<ISymbolicExpressionTreeNode>();
217      var originalNodeValues = new Dictionary<ISymbolicExpressionTreeNode, double>();
218
219      foreach (var node in tree.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>()) {
220        if (node is VariableTreeNode && !updateVariableWeights) {
221          continue;
222        }
223        if (node is ConstantTreeNode && node.Parent.Symbol is Power && node.Parent.GetSubtree(0) == node) {
224          // do not optimize exponents
225          continue;
226        }
227        nodesToOptimize.Add(node);
228        if (node is ConstantTreeNode constant) {
229          originalNodeValues[node] = constant.Value;
230        } else if (node is VariableTreeNode variable) {
231          originalNodeValues[node] = variable.Weight;
232        }
233      }
234
235      var options = new SolverOptions {
236        Iterations = maxIterations
237      };
238      var summary = new OptimizationSummary();
239      var optimizedNodeValues = ParameterOptimizer.OptimizeTree(tree, problemData.Dataset, problemData.TrainingIndices, problemData.TargetVariable, nodesToOptimize, options, ref summary);
240
241      counter.FunctionEvaluations += summary.ResidualEvaluations;
242      counter.GradientEvaluations += summary.JacobianEvaluations;
243
244      // check if the fitting of the parameters was successful
245      UpdateNodeValues(optimizedNodeValues);
246     
247      var quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);
248      if (quality < originalQuality || !updateConstantsInTree) {
249        UpdateNodeValues(originalNodeValues);
250      }
251      return Math.Max(quality, originalQuality);
252    }
253
254    private static void UpdateNodeValues(IDictionary<ISymbolicExpressionTreeNode, double> values) {
255      foreach (var item in values) {
256        var node = item.Key;
257        if (node is ConstantTreeNode constant) {
258          constant.Value = item.Value;
259        } else if (node is VariableTreeNode variable) {
260          variable.Weight = item.Value;
261        }
262      }
263    }
264
265    public static bool CanOptimizeConstants(ISymbolicExpressionTree tree) {
266      return TreeToAutoDiffTermConverter.IsCompatible(tree);
267    }
268  }
269}
Note: See TracBrowser for help on using the repository browser.