source: branches/3136_Structural_GP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/StructuredSymbolicRegressionSingleObjectiveProblem.cs @ 18199

Last change on this file since 18199 was 18199, checked in by mkommend, 7 months ago

#3136: Fixed parsing of variables in subfunctions.

File size: 18.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HEAL.Attic;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Encodings.RealVectorEncoding;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.Optimization;
32using HeuristicLab.Parameters;
33using HeuristicLab.Problems.Instances;
34using HeuristicLab.Problems.Instances.DataAnalysis;
35
36namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
37  [StorableType("7464E84B-65CC-440A-91F0-9FA920D730F9")]
38  [Item(Name = "Structured Symbolic Regression Single Objective Problem (single-objective)", Description = "A problem with a structural definition and unfixed subfunctions.")]
39  [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 150)]
40  public class StructuredSymbolicRegressionSingleObjectiveProblem : SingleObjectiveBasicProblem<MultiEncoding>, IRegressionProblem, IProblemInstanceConsumer<IRegressionProblemData> {
41
42    #region Constants
43    private const string ProblemDataParameterName = "ProblemData";
44    private const string StructureTemplateParameterName = "Structure Template";
45    private const string InterpreterParameterName = "Interpreter";
46    private const string EstimationLimitsParameterName = "EstimationLimits";
47    private const string BestTrainingSolutionParameterName = "Best Training Solution";
48    private const string ApplyLinearScalingParameterName = "Apply Linear Scaling";
49    private const string OptimizeParametersParameterName = "Optimize Parameters";
50
51    private const string SymbolicExpressionTreeName = "SymbolicExpressionTree";
52    private const string NumericParametersEncoding = "Numeric Parameters";
53
54    private const string StructureTemplateDescriptionText =
55      "Enter your expression as string in infix format into the empty input field.\n" +
56      "By checking the \"Apply Linear Scaling\" checkbox you can add the relevant scaling terms to your expression.\n" +
57      "After entering the expression click parse to build the tree.\n" +
58      "To edit the defined sub-functions, click on the corresponding-colored node in the tree view.\n" +
59      "Check the info box besides the input field for more information.";
60    #endregion
61
62    #region Parameters
63    public IValueParameter<IRegressionProblemData> ProblemDataParameter => (IValueParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName];
64    public IFixedValueParameter<StructureTemplate> StructureTemplateParameter => (IFixedValueParameter<StructureTemplate>)Parameters[StructureTemplateParameterName];
65    public IValueParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> InterpreterParameter => (IValueParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[InterpreterParameterName];
66    public IFixedValueParameter<DoubleLimit> EstimationLimitsParameter => (IFixedValueParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName];
67    public IResultParameter<ISymbolicRegressionSolution> BestTrainingSolutionParameter => (IResultParameter<ISymbolicRegressionSolution>)Parameters[BestTrainingSolutionParameterName];
68
69    public IFixedValueParameter<BoolValue> ApplyLinearScalingParameter => (IFixedValueParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName];
70    public IFixedValueParameter<BoolValue> OptimizeParametersParameter => (IFixedValueParameter<BoolValue>)Parameters[OptimizeParametersParameterName];
71    #endregion
72
73    #region Properties
74
75    public IRegressionProblemData ProblemData {
76      get => ProblemDataParameter.Value;
77      set {
78        ProblemDataParameter.Value = value;
79        ProblemDataChanged?.Invoke(this, EventArgs.Empty);
80      }
81    }
82
83    public StructureTemplate StructureTemplate => StructureTemplateParameter.Value;
84
85    public ISymbolicDataAnalysisExpressionTreeInterpreter Interpreter => InterpreterParameter.Value;
86
87    IParameter IDataAnalysisProblem.ProblemDataParameter => ProblemDataParameter;
88    IDataAnalysisProblemData IDataAnalysisProblem.ProblemData => ProblemData;
89
90    public DoubleLimit EstimationLimits => EstimationLimitsParameter.Value;
91
92    public bool ApplyLinearScaling {
93      get => ApplyLinearScalingParameter.Value.Value;
94      set => ApplyLinearScalingParameter.Value.Value = value;
95    }
96
97    public bool OptimizeParameters {
98      get => OptimizeParametersParameter.Value.Value;
99      set => OptimizeParametersParameter.Value.Value = value;
100    }
101
102    public override bool Maximization => false;
103    #endregion
104
105    #region EventHandlers
106    public event EventHandler ProblemDataChanged;
107    #endregion
108
109    #region Constructors & Cloning
110    public StructuredSymbolicRegressionSingleObjectiveProblem() {
111      var provider = new PhysicsInstanceProvider();
112      var descriptor = new SheetBendingProcess();
113      var problemData = provider.LoadData(descriptor);
114      var shapeConstraintProblemData = new ShapeConstrainedRegressionProblemData(problemData);
115
116      var structureTemplate = new StructureTemplate();
117
118      Parameters.Add(new ValueParameter<IRegressionProblemData>(
119        ProblemDataParameterName,
120        shapeConstraintProblemData));
121
122      Parameters.Add(new FixedValueParameter<StructureTemplate>(
123        StructureTemplateParameterName,
124        StructureTemplateDescriptionText,
125        structureTemplate));
126
127      Parameters.Add(new FixedValueParameter<BoolValue>(
128        ApplyLinearScalingParameterName, new BoolValue(true)
129        ));
130
131      Parameters.Add(new FixedValueParameter<BoolValue>(
132        OptimizeParametersParameterName, new BoolValue(true)
133        ));
134
135      Parameters.Add(new ValueParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(
136        InterpreterParameterName,
137        new SymbolicDataAnalysisExpressionTreeBatchInterpreter()) { Hidden = true });
138      Parameters.Add(new FixedValueParameter<DoubleLimit>(
139        EstimationLimitsParameterName,
140        new DoubleLimit(double.NegativeInfinity, double.PositiveInfinity)) { Hidden = true });
141      Parameters.Add(new ResultParameter<ISymbolicRegressionSolution>(BestTrainingSolutionParameterName, "") { Hidden = true });
142
143      this.EvaluatorParameter.Hidden = true;
144
145      Operators.Add(new SymbolicDataAnalysisVariableFrequencyAnalyzer());
146      Operators.Add(new MinAverageMaxSymbolicExpressionTreeLengthAnalyzer());
147      Operators.Add(new SymbolicExpressionSymbolFrequencyAnalyzer());
148
149      RegisterEventHandlers();
150
151      StructureTemplate.ApplyLinearScaling = ApplyLinearScaling;
152      StructureTemplate.Template =
153        "(" +
154          "(210000 / (210000 + h)) * ((sigma_y * t * t) / (wR * Rt * t)) + " +
155          "PlasticHardening(_) - Elasticity(_)" +
156        ")" +
157        " * C(_)";
158    }
159
160    public StructuredSymbolicRegressionSingleObjectiveProblem(StructuredSymbolicRegressionSingleObjectiveProblem original, Cloner cloner) : base(original, cloner) {
161      RegisterEventHandlers();
162    }
163
164    public override IDeepCloneable Clone(Cloner cloner) =>
165      new StructuredSymbolicRegressionSingleObjectiveProblem(this, cloner);
166
167    [StorableConstructor]
168    protected StructuredSymbolicRegressionSingleObjectiveProblem(StorableConstructorFlag _) : base(_) { }
169
170
171    [StorableHook(HookType.AfterDeserialization)]
172    private void AfterDeserialization() {
173      if (!Parameters.ContainsKey(ApplyLinearScalingParameterName)) {
174        Parameters.Add(new FixedValueParameter<BoolValue>(ApplyLinearScalingParameterName, new BoolValue(StructureTemplate.ApplyLinearScaling)));
175      }
176
177      if (!Parameters.ContainsKey(OptimizeParametersParameterName)) {
178        Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersParameterName, new BoolValue(false)));
179      }
180
181      RegisterEventHandlers();
182    }
183
184    #endregion
185
186    private void RegisterEventHandlers() {
187      if (StructureTemplate != null) {
188        StructureTemplate.Changed += OnTemplateChanged;
189      }
190
191      ProblemDataParameter.ValueChanged += ProblemDataParameterValueChanged;
192      ApplyLinearScalingParameter.Value.ValueChanged += (o, e) => StructureTemplate.ApplyLinearScaling = ApplyLinearScaling;
193    }
194
195    private void ProblemDataParameterValueChanged(object sender, EventArgs e) {
196      StructureTemplate.Reset();
197      // InfoBox for Reset?
198    }
199
200    private void OnTemplateChanged(object sender, EventArgs args) {
201      ApplyLinearScaling = StructureTemplate.ApplyLinearScaling;
202      SetupEncoding();
203    }
204
205    private void SetupEncoding() {
206      foreach (var e in Encoding.Encodings.ToArray())
207        Encoding.Remove(e);
208
209
210      var templateNumberTreeNodes = StructureTemplate.Tree.IterateNodesPrefix().OfType<NumberTreeNode>();
211      if (templateNumberTreeNodes.Any()) {
212        var templateParameterValues = templateNumberTreeNodes.Select(n => n.Value).ToArray();
213        var encoding = new RealVectorEncoding(NumericParametersEncoding, templateParameterValues.Length);
214
215        var creator = encoding.Operators.OfType<NormalDistributedRealVectorCreator>().First();
216        creator.MeanParameter.Value = new RealVector(templateParameterValues);
217        creator.SigmaParameter.Value = new DoubleArray(templateParameterValues.Length);
218        encoding.SolutionCreator = creator;
219
220
221        Encoding.Add(encoding);
222      }
223
224      foreach (var subFunction in StructureTemplate.SubFunctions) {
225        subFunction.SetupVariables(ProblemData.AllowedInputVariables);
226        // prevent the same encoding twice
227        if (Encoding.Encodings.Any(x => x.Name == subFunction.Name)) continue;
228
229        var encoding = new SymbolicExpressionTreeEncoding(
230          subFunction.Name,
231          subFunction.Grammar,
232          subFunction.MaximumSymbolicExpressionTreeLength,
233          subFunction.MaximumSymbolicExpressionTreeDepth);
234        Encoding.Add(encoding);
235      }
236
237      //set single point || copy crossover for numeric parameters
238      var multiCrossover = (IParameterizedItem)Encoding.Operators.OfType<MultiEncodingCrossover>().First();
239      foreach (var param in multiCrossover.Parameters.OfType<ConstrainedValueParameter<ICrossover>>()) {
240        if (!param.Name.Contains(NumericParametersEncoding)) continue;
241
242        var singlePointCrossover = param.ValidValues.OfType<SinglePointCrossover>().First();
243        var copyCrossover = param.ValidValues.OfType<CopyCrossover>().First();
244
245        var realvectorEncoding = (RealVectorEncoding)Encoding.Encodings.Where(e => e.Name == NumericParametersEncoding).First();
246        if (realvectorEncoding.Length == 1) { //single-point crossover throws if encoding length == 1
247          param.Value = copyCrossover;
248        } else
249          param.Value = singlePointCrossover;
250      }
251
252      //adapt crossover probability for subtree crossover
253      foreach (var param in multiCrossover.Parameters.OfType<ConstrainedValueParameter<ICrossover>>()) {
254        var subtreeCrossover = param.ValidValues.OfType<SubtreeCrossover>().FirstOrDefault();
255        if (subtreeCrossover != null) {
256          subtreeCrossover.CrossoverProbability = 1.0 / Encoding.Encodings.OfType<SymbolicExpressionTreeEncoding>().Count();
257          param.Value = subtreeCrossover;
258        }
259      }
260
261      //set multi manipulator as default manipulator for all symbolic expression tree encoding parts
262      var manipulator = (IParameterizedItem)Encoding.Operators.OfType<MultiEncodingManipulator>().First();
263      foreach (var param in manipulator.Parameters.OfType<ConstrainedValueParameter<IManipulator>>()) {
264        var m = param.ValidValues.OfType<MultiSymbolicExpressionTreeManipulator>().FirstOrDefault();
265        param.Value = m ?? param.ValidValues.First();
266      }
267    }
268
269    public override void Analyze(Individual[] individuals, double[] qualities, ResultCollection results, IRandom random) {
270      base.Analyze(individuals, qualities, results, random);
271
272      var best = GetBestIndividual(individuals, qualities).Item1;
273
274      if (!results.ContainsKey(BestTrainingSolutionParameter.ActualName)) {
275        results.Add(new Result(BestTrainingSolutionParameter.ActualName, typeof(SymbolicRegressionSolution)));
276      }
277
278      var tree = (ISymbolicExpressionTree)best[SymbolicExpressionTreeName];
279      var model = new SymbolicRegressionModel(ProblemData.TargetVariable, tree, Interpreter);
280      var solution = model.CreateRegressionSolution(ProblemData);
281
282      results[BestTrainingSolutionParameter.ActualName].Value = solution;
283    }
284
285
286    public override double Evaluate(Individual individual, IRandom random) {
287      var templateTree = StructureTemplate.Tree;
288      if (templateTree == null)
289        throw new ArgumentException("No structure template defined!");
290
291      var tree = BuildTreeFromIndividual(templateTree, individual, containsNumericParameters: StructureTemplate.ContainsNumericParameters);
292      individual[SymbolicExpressionTreeName] = tree;
293
294      if (OptimizeParameters) {
295        var excludeNodes = GetTemplateTreeNodes(tree.Root).OfType<IVariableTreeNode>();
296        ParameterOptimization.OptimizeTreeParameters(ProblemData, tree, interpreter: Interpreter, excludeNodes: excludeNodes);
297      } else if (ApplyLinearScaling) {
298        LinearScaling.AdjustLinearScalingParams(ProblemData, tree, Interpreter);
299      }
300
301      UpdateIndividualFromTree(tree, individual, containsNumericParameters: StructureTemplate.ContainsNumericParameters);
302
303      //calculate NMSE
304      var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(tree, ProblemData.Dataset, ProblemData.TrainingIndices);
305      var boundedEstimatedValues = estimatedValues.LimitToRange(EstimationLimits.Lower, EstimationLimits.Upper);
306      var targetValues = ProblemData.TargetVariableTrainingValues;
307      var nmse = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(targetValues, boundedEstimatedValues, out var errorState);
308      if (errorState != OnlineCalculatorError.None)
309        nmse = 1.0;
310
311      //evaluate constraints
312      var constraints = Enumerable.Empty<ShapeConstraint>();
313      if (ProblemData is ShapeConstrainedRegressionProblemData scProbData)
314        constraints = scProbData.ShapeConstraints.EnabledConstraints;
315      if (constraints.Any()) {
316        var boundsEstimator = new IntervalArithBoundsEstimator();
317        var constraintViolations = IntervalUtil.GetConstraintViolations(constraints, boundsEstimator, ProblemData.VariableRanges, tree);
318
319        // infinite/NaN constraints
320        if (constraintViolations.Any(x => double.IsNaN(x) || double.IsInfinity(x)))
321          nmse = 1.0;
322
323        if (constraintViolations.Any(x => x > 0.0))
324          nmse = 1.0;
325      }
326
327      return nmse;
328    }
329
330    private static IEnumerable<ISymbolicExpressionTreeNode> GetTemplateTreeNodes(ISymbolicExpressionTreeNode rootNode) {
331      yield return rootNode;
332      foreach (var node in rootNode.Subtrees) {
333        if (node is SubFunctionTreeNode) {
334          yield return node;
335          continue;
336        }
337
338        foreach (var subNode in GetTemplateTreeNodes(node))
339          yield return subNode;
340      }
341    }
342
343    private static ISymbolicExpressionTree BuildTreeFromIndividual(ISymbolicExpressionTree template, Individual individual, bool containsNumericParameters) {
344      var resolvedTree = (ISymbolicExpressionTree)template.Clone();
345
346      //set numeric parameter values
347      if (containsNumericParameters) {
348        var realVector = individual.RealVector(NumericParametersEncoding);
349        var numberTreeNodes = resolvedTree.IterateNodesPrefix().OfType<NumberTreeNode>().ToArray();
350
351        if (realVector.Length != numberTreeNodes.Length)
352          throw new InvalidOperationException("The number of numeric parameters in the tree does not match the provided numerical values.");
353
354        for (int i = 0; i < numberTreeNodes.Length; i++)
355          numberTreeNodes[i].Value = realVector[i];
356      }
357
358      // build main tree
359      foreach (var subFunctionTreeNode in resolvedTree.IterateNodesPrefix().OfType<SubFunctionTreeNode>()) {
360        var subFunctionTree = individual.SymbolicExpressionTree(subFunctionTreeNode.Name);
361
362        // extract function tree
363        var subTree = subFunctionTree.Root.GetSubtree(0)  // StartSymbol
364                                          .GetSubtree(0); // First Symbol
365        subTree = (ISymbolicExpressionTreeNode)subTree.Clone();
366        subFunctionTreeNode.AddSubtree(subTree);
367      }
368      return resolvedTree;
369    }
370
371    private static void UpdateIndividualFromTree(ISymbolicExpressionTree tree, Individual individual, bool containsNumericParameters) {
372      var clonedTree = (ISymbolicExpressionTree)tree.Clone();
373
374      foreach (var subFunctionTreeNode in clonedTree.IterateNodesPrefix().OfType<SubFunctionTreeNode>()) {
375        var grammar = ((ISymbolicExpressionTree)individual[subFunctionTreeNode.Name]).Root.Grammar;
376        var functionTreeNode = subFunctionTreeNode.GetSubtree(0);
377        //remove function code to make numeric parameters extraction easier
378        subFunctionTreeNode.RemoveSubtree(0);
379
380
381        var rootNode = (SymbolicExpressionTreeTopLevelNode)new ProgramRootSymbol().CreateTreeNode();
382        rootNode.SetGrammar(grammar);
383        var startNode = (SymbolicExpressionTreeTopLevelNode)new StartSymbol().CreateTreeNode();
384        startNode.SetGrammar(grammar);
385
386        rootNode.AddSubtree(startNode);
387        startNode.AddSubtree(functionTreeNode);
388        var functionTree = new SymbolicExpressionTree(rootNode);
389        individual[subFunctionTreeNode.Name] = functionTree;
390      }
391
392      //set numeric parameter values
393      if (containsNumericParameters) {
394        var realVector = individual.RealVector(NumericParametersEncoding);
395        var numberTreeNodes = clonedTree.IterateNodesPrefix().OfType<NumberTreeNode>().ToArray();
396
397        if (realVector.Length != numberTreeNodes.Length)
398          throw new InvalidOperationException("The number of numeric parameters in the tree does not match the provided numerical values.");
399
400        for (int i = 0; i < numberTreeNodes.Length; i++)
401          realVector[i] = numberTreeNodes[i].Value;
402      }
403    }
404
405    public void Load(IRegressionProblemData data) {
406      ProblemData = data;
407    }
408  }
409}
Note: See TracBrowser for help on using the repository browser.