Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/Evaluators/NMSESingleObjectiveConstraintsEvaluator.cs @ 18240

Last change on this file since 18240 was 18220, checked in by gkronber, 2 years ago

#3136: reintegrated structure-template GP branch into trunk

File size: 19.8 KB
RevLine 
[17958]1#region License Information
2/* HeuristicLab
3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HEAL.Attic;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
30using HeuristicLab.Parameters;
31
32namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
33  [Item("NMSE Evaluator with shape-constraints (single-objective)", "Calculates NMSE of a symbolic regression solution and checks constraints. The fitness is a combination of NMSE and constraint violations.")]
34  [StorableType("27473973-DD8D-4375-997D-942E2280AE8E")]
35  public class NMSESingleObjectiveConstraintsEvaluator : SymbolicRegressionSingleObjectiveEvaluator {
36    #region Parameter/Properties
37
38    private const string OptimizeParametersParameterName = "OptimizeParameters";
39    private const string ParameterOptimizationIterationsParameterName = "ParameterOptimizationIterations";
40    private const string UseSoftConstraintsParameterName = "UseSoftConstraintsEvaluation";
41    private const string BoundsEstimatorParameterName = "BoundsEstimator";
42    private const string PenaltyFactorParameterName = "PenaltyFactor";
43
44
45    public IFixedValueParameter<BoolValue> OptimizerParametersParameter =>
46      (IFixedValueParameter<BoolValue>)Parameters[OptimizeParametersParameterName];
47
[18132]48    public IFixedValueParameter<IntValue> ParameterOptimizationIterationsParameter =>
[17958]49      (IFixedValueParameter<IntValue>)Parameters[ParameterOptimizationIterationsParameterName];
50
51    public IFixedValueParameter<BoolValue> UseSoftConstraintsParameter =>
52      (IFixedValueParameter<BoolValue>)Parameters[UseSoftConstraintsParameterName];
53
54    public IValueParameter<IBoundsEstimator> BoundsEstimatorParameter =>
55      (IValueParameter<IBoundsEstimator>)Parameters[BoundsEstimatorParameterName];
56    public IFixedValueParameter<DoubleValue> PenaltyFactorParameter =>
57      (IFixedValueParameter<DoubleValue>)Parameters[PenaltyFactorParameterName];
58
59    public bool OptimizeParameters {
60      get => OptimizerParametersParameter.Value.Value;
61      set => OptimizerParametersParameter.Value.Value = value;
62    }
63
[18132]64    public int ParameterOptimizationIterations {
65      get => ParameterOptimizationIterationsParameter.Value.Value;
66      set => ParameterOptimizationIterationsParameter.Value.Value = value;
[17958]67    }
68
69    public bool UseSoftConstraints {
70      get => UseSoftConstraintsParameter.Value.Value;
71      set => UseSoftConstraintsParameter.Value.Value = value;
72    }
73
74    public IBoundsEstimator BoundsEstimator {
75      get => BoundsEstimatorParameter.Value;
76      set => BoundsEstimatorParameter.Value = value;
77    }
78
79    public double PenalityFactor {
80      get => PenaltyFactorParameter.Value.Value;
81      set => PenaltyFactorParameter.Value.Value = value;
82    }
83
84
85    public override bool Maximization => false; // NMSE is minimized
86
87    #endregion
88
89    #region Constructors/Cloning
90
91    [StorableConstructor]
92    protected NMSESingleObjectiveConstraintsEvaluator(StorableConstructorFlag _) : base(_) { }
93
94    protected NMSESingleObjectiveConstraintsEvaluator(
95      NMSESingleObjectiveConstraintsEvaluator original, Cloner cloner) : base(original, cloner) { }
96
97    public NMSESingleObjectiveConstraintsEvaluator() {
98      Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersParameterName,
[18132]99        "Define whether optimization of parameters is active or not (default: false).", new BoolValue(false)));
[17958]100      Parameters.Add(new FixedValueParameter<IntValue>(ParameterOptimizationIterationsParameterName,
101        "Define how many parameter optimization steps should be performed (default: 10).", new IntValue(10)));
102      Parameters.Add(new FixedValueParameter<BoolValue>(UseSoftConstraintsParameterName,
103        "Define whether the constraints are penalized by soft or hard constraints (default: false).", new BoolValue(false)));
104      Parameters.Add(new ValueParameter<IBoundsEstimator>(BoundsEstimatorParameterName,
105        "The estimator which is used to estimate output ranges of models (default: interval arithmetic).", new IntervalArithBoundsEstimator()));
106      Parameters.Add(new FixedValueParameter<DoubleValue>(PenaltyFactorParameterName,
107        "Punishment factor for constraint violations for soft constraint handling (fitness = NMSE + penaltyFactor * avg(violations)) (default: 1.0)", new DoubleValue(1.0)));
108    }
109
110    [StorableHook(HookType.AfterDeserialization)]
111    private void AfterDeserialization() { }
112
113    public override IDeepCloneable Clone(Cloner cloner) {
114      return new NMSESingleObjectiveConstraintsEvaluator(this, cloner);
115    }
116
117    #endregion
118
119    public override IOperation InstrumentedApply() {
120      var rows = GenerateRowsToEvaluate();
121      var tree = SymbolicExpressionTreeParameter.ActualValue;
122      var problemData = ProblemDataParameter.ActualValue;
123      var interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
124      var estimationLimits = EstimationLimitsParameter.ActualValue;
125      var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
126
[18220]127      var quality = Evaluate(tree, problemData, rows, interpreter, applyLinearScaling, estimationLimits.Lower, estimationLimits.Upper);
128      QualityParameter.ActualValue = new DoubleValue(quality);
[17958]129
[18220]130      return base.InstrumentedApply();
131    }
[17958]132
[18220]133    private static void CalcLinearScalingTerms(
134      ISymbolicExpressionTree tree,
135      IRegressionProblemData problemData,
136      IEnumerable<int> rows,
137      ISymbolicDataAnalysisExpressionTreeInterpreter interpreter) {
138      var rootNode = new ProgramRootSymbol().CreateTreeNode();
139      var startNode = new StartSymbol().CreateTreeNode();
140      var offset = tree.Root.GetSubtree(0) //Start
141                            .GetSubtree(0); //Offset
142      var scaling = offset.GetSubtree(0);
[17958]143
[18220]144      //Check if tree contains offset and scaling nodes
145      if (!(offset.Symbol is Addition) || !(scaling.Symbol is Multiplication))
146        throw new ArgumentException($"Scaling can only be used with LinearScalingGrammar.");
[17958]147
[18220]148      var t = (ISymbolicExpressionTreeNode)scaling.GetSubtree(0).Clone();
149      rootNode.AddSubtree(startNode);
150      startNode.AddSubtree(t);
151      var newTree = new SymbolicExpressionTree(rootNode);
[17958]152
[18220]153      //calculate alpha and beta for scaling
154      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(newTree, problemData.Dataset, rows);
[17958]155
[18220]156      var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
157      OnlineLinearScalingParameterCalculator.Calculate(estimatedValues, targetValues, out var alpha, out var beta,
158        out var errorState);
[17958]159
[18220]160      if (errorState == OnlineCalculatorError.None) {
161        //Set alpha and beta to the scaling nodes from grammar
162        var offsetParameter = offset.GetSubtree(1) as NumberTreeNode;
163        offsetParameter.Value = alpha;
164        var scalingParameter = scaling.GetSubtree(1) as NumberTreeNode;
165        scalingParameter.Value = beta;
166      }
[17958]167    }
168
169    public static double Calculate(
[18220]170      ISymbolicExpressionTree tree,
171      IRegressionProblemData problemData, IEnumerable<int> rows,
[17958]172      ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
173      double lowerEstimationLimit, double upperEstimationLimit,
174      IBoundsEstimator estimator,
175      bool useSoftConstraints = false, double penaltyFactor = 1.0) {
176
177      var constraints = Enumerable.Empty<ShapeConstraint>();
[18220]178      if (problemData is ShapeConstrainedRegressionProblemData scProbData)
[17958]179        constraints = scProbData.ShapeConstraints.EnabledConstraints;
180
[18220]181      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, rows);
[17958]182      var boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
[18220]183      var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
184      var nmse = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(targetValues, boundedEstimatedValues, out var errorState);
185      if (errorState != OnlineCalculatorError.None)
[17958]186        return 1.0;
187
[18220]188      if (!constraints.Any())
189        return nmse;
190
191      var intervalCollection = problemData.VariableRanges;
[17958]192      var constraintViolations = IntervalUtil.GetConstraintViolations(constraints, estimator, intervalCollection, tree);
193
[18220]194      // infinite/NaN constraints
195      if (constraintViolations.Any(x => double.IsNaN(x) || double.IsInfinity(x)))
[17958]196        return 1.0;
[18220]197
198      // hard constraints
199      if (!useSoftConstraints) {
200        if (constraintViolations.Any(x => x > 0.0))
201          return 1.0;
202        return nmse;
[17958]203      }
204
[18220]205      // soft constraints
206      if (penaltyFactor < 0.0)
207        throw new ArgumentException("The parameter has to be >= 0.0.", nameof(penaltyFactor));
[17958]208
[18220]209      var weightedViolationsAvg = constraints
210        .Zip(constraintViolations, (c, v) => c.Weight * v)
211        .Average();
[17958]212
[18220]213      return Math.Min(nmse, 1.0) + penaltyFactor * weightedViolationsAvg;
[17958]214    }
215
216    public override double Evaluate(
217      IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData,
218      IEnumerable<int> rows) {
219      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;
220      EstimationLimitsParameter.ExecutionContext = context;
221      ApplyLinearScalingParameter.ExecutionContext = context;
222
[18220]223      var nmse = Calculate(
224        tree, problemData, rows,
225        SymbolicDataAnalysisTreeInterpreterParameter.ActualValue,
226        EstimationLimitsParameter.ActualValue.Lower,
227        EstimationLimitsParameter.ActualValue.Upper,
228        BoundsEstimator,
229        UseSoftConstraints,
230        PenalityFactor);
[17958]231
232      SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;
233      EstimationLimitsParameter.ExecutionContext = null;
234      ApplyLinearScalingParameter.ExecutionContext = null;
235
236      return nmse;
237    }
[18220]238
239    public override double Evaluate(
240      ISymbolicExpressionTree tree,
241      IRegressionProblemData problemData,
242      IEnumerable<int> rows,
243      ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
244      bool applyLinearScaling = true,
245      double lowerEstimationLimit = double.MinValue,
246      double upperEstimationLimit = double.MaxValue) {
247
248      if (OptimizeParameters)
249        Optimize(
250          interpreter, tree,
251          problemData, rows,
252          ParameterOptimizationIterations,
253          updateVariableWeights: true,
254          lowerEstimationLimit,
255          upperEstimationLimit);
256      else if (applyLinearScaling) // extra scaling terms, which are included in tree
257        CalcLinearScalingTerms(tree, problemData, rows, interpreter);
258
259      return Calculate(
260        tree, problemData,
261        rows, interpreter,
262        lowerEstimationLimit,
263        upperEstimationLimit,
264        BoundsEstimator,
265        UseSoftConstraints,
266        PenalityFactor);
267    }
268
269    public static double Optimize(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
270          ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows,
271          int maxIterations, bool updateVariableWeights = true,
272          double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue,
273          bool updateParametersInTree = true, Action<double[], double, object> iterationCallback = null, EvaluationsCounter counter = null) {
274
275      // Numeric parameters in the tree become variables for parameter optimization.
276      // Variables in the tree become parameters (fixed values) for parameter optimization.
277      // For each parameter (variable in the original tree) we store the
278      // variable name, variable value (for factor vars) and lag as a DataForVariable object.
279      // A dictionary is used to find parameters
280      double[] initialParameters;
281      var parameters = new List<TreeToAutoDiffTermConverter.DataForVariable>();
282
283      TreeToAutoDiffTermConverter.ParametricFunction func;
284      TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad;
285      if (!TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, updateVariableWeights, addLinearScalingTerms: false, out parameters, out initialParameters, out func, out func_grad))
286        throw new NotSupportedException("Could not optimize parameters of symbolic expression tree due to not supported symbols used in the tree.");
287      var parameterEntries = parameters.ToArray(); // order of entries must be the same for x
288
289      // extract initial parameters
290      double[] c = (double[])initialParameters.Clone();
291
292      double originalQuality = SymbolicRegressionSingleObjectiveMeanSquaredErrorEvaluator.Calculate(
293        tree, problemData, rows,
294        interpreter, applyLinearScaling: false,
295        lowerEstimationLimit,
296        upperEstimationLimit);
297
298      if (counter == null) counter = new EvaluationsCounter();
299      var rowEvaluationsCounter = new EvaluationsCounter();
300
301      alglib.minlmreport rep;
302
303      IDataset ds = problemData.Dataset;
304      int n = rows.Count();
305      int k = parameters.Count;
306
307      double[,] x = new double[n, k];
308      int row = 0;
309      foreach (var r in rows) {
310        int col = 0;
311        foreach (var info in parameterEntries) {
312          if (ds.VariableHasType<double>(info.variableName)) {
313            x[row, col] = ds.GetDoubleValue(info.variableName, r + info.lag);
314          } else if (ds.VariableHasType<string>(info.variableName)) {
315            x[row, col] = ds.GetStringValue(info.variableName, r) == info.variableValue ? 1 : 0;
316          } else throw new InvalidProgramException("found a variable of unknown type");
317          col++;
318        }
319        row++;
320      }
321      double[] y = ds.GetDoubleValues(problemData.TargetVariable, rows).ToArray();
322
323
324      alglib.ndimensional_rep xrep = (p, f, obj) => iterationCallback(p, f, obj);
325
326      try {
327        alglib.minlmcreatevj(y.Length, c, out var lmstate);
328        alglib.minlmsetcond(lmstate, 0.0, maxIterations);
329        alglib.minlmsetxrep(lmstate, iterationCallback != null);
330        // alglib.minlmoptguardgradient(lmstate, 1e-5); // for debugging gradient calculation
331        alglib.minlmoptimize(lmstate, CreateFunc(func, x, y), CreateJac(func_grad, x, y), xrep, rowEvaluationsCounter);
332        alglib.minlmresults(lmstate, out c, out rep);
333        // alglib.minlmoptguardresults(lmstate, out var optGuardReport);
334      } catch (ArithmeticException) {
335        return originalQuality;
336      } catch (alglib.alglibexception) {
337        return originalQuality;
338      }
339
340      counter.FunctionEvaluations += rowEvaluationsCounter.FunctionEvaluations / n;
341      counter.GradientEvaluations += rowEvaluationsCounter.GradientEvaluations / n;
342
343      // * TerminationType, completion code:
344      //     * -8    optimizer detected NAN/INF values either in the function itself,
345      //             or in its Jacobian
346      //     * -5    inappropriate solver was used:
347      //             * solver created with minlmcreatefgh() used  on  problem  with
348      //               general linear constraints (set with minlmsetlc() call).
349      //     * -3    constraints are inconsistent
350      //     *  2    relative step is no more than EpsX.
351      //     *  5    MaxIts steps was taken
352      //     *  7    stopping conditions are too stringent,
353      //             further improvement is impossible
354      //     *  8    terminated   by  user  who  called  MinLMRequestTermination().
355      //             X contains point which was "current accepted" when termination
356      //             request was submitted.
357      if (rep.terminationtype > 0) {
358        UpdateParameters(tree, c, updateVariableWeights);
359      }
360      var quality = SymbolicRegressionSingleObjectiveMeanSquaredErrorEvaluator.Calculate(
361        tree, problemData, rows,
362        interpreter, applyLinearScaling: false,
363        lowerEstimationLimit, upperEstimationLimit);
364
365      if (!updateParametersInTree) UpdateParameters(tree, initialParameters, updateVariableWeights);
366
367      if (originalQuality < quality || double.IsNaN(quality)) {
368        UpdateParameters(tree, initialParameters, updateVariableWeights);
369        return originalQuality;
370      }
371      return quality;
372    }
373
374    private static void UpdateParameters(ISymbolicExpressionTree tree, double[] parameters, bool updateVariableWeights) {
375      int i = 0;
376      foreach (var node in tree.Root.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>()) {
377        NumberTreeNode numberTreeNode = node as NumberTreeNode;
378        VariableTreeNodeBase variableTreeNodeBase = node as VariableTreeNodeBase;
379        FactorVariableTreeNode factorVarTreeNode = node as FactorVariableTreeNode;
380        if (numberTreeNode != null) {
381          if (numberTreeNode.Parent.Symbol is Power
382              && numberTreeNode.Parent.GetSubtree(1) == numberTreeNode) continue; // exponents in powers are not optimized (see TreeToAutoDiffTermConverter)
383          numberTreeNode.Value = parameters[i++];
384        } else if (updateVariableWeights && variableTreeNodeBase != null)
385          variableTreeNodeBase.Weight = parameters[i++];
386        else if (factorVarTreeNode != null) {
387          for (int j = 0; j < factorVarTreeNode.Weights.Length; j++)
388            factorVarTreeNode.Weights[j] = parameters[i++];
389        }
390      }
391    }
392
393    private static alglib.ndimensional_fvec CreateFunc(TreeToAutoDiffTermConverter.ParametricFunction func, double[,] x, double[] y) {
394      int d = x.GetLength(1);
395      // row buffer
396      var xi = new double[d];
397      // function must return residuals, alglib optimizes resid²
398      return (double[] c, double[] resid, object o) => {
399        for (int i = 0; i < y.Length; i++) {
400          Buffer.BlockCopy(x, i * d * sizeof(double), xi, 0, d * sizeof(double)); // copy row. We are using BlockCopy instead of Array.Copy because x has rank 2
401          resid[i] = func(c, xi) - y[i];
402        }
403        var counter = (EvaluationsCounter)o;
404        counter.FunctionEvaluations += y.Length;
405      };
406    }
407
408    private static alglib.ndimensional_jac CreateJac(TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad, double[,] x, double[] y) {
409      int numParams = x.GetLength(1);
410      // row buffer
411      var xi = new double[numParams];
412      return (double[] c, double[] resid, double[,] jac, object o) => {
413        int numVars = c.Length;
414        for (int i = 0; i < y.Length; i++) {
415          Buffer.BlockCopy(x, i * numParams * sizeof(double), xi, 0, numParams * sizeof(double)); // copy row
416          var tuple = func_grad(c, xi);
417          resid[i] = tuple.Item2 - y[i];
418          Buffer.BlockCopy(tuple.Item1, 0, jac, i * numVars * sizeof(double), numVars * sizeof(double)); // copy the gradient to jac. BlockCopy because jac has rank 2.
419        }
420        var counter = (EvaluationsCounter)o;
421        counter.FunctionEvaluations += y.Length;
422        counter.GradientEvaluations += y.Length;
423      };
424    }
425
426    public class EvaluationsCounter {
427      public int FunctionEvaluations = 0;
428      public int GradientEvaluations = 0;
429    }
[17958]430  }
431}
Note: See TracBrowser for help on using the repository browser.