Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/StructureTemplateSymbolicRegressionProblem.cs

Last change on this file was 18220, checked in by gkronber, 2 years ago

#3136: reintegrated structure-template GP branch into trunk

File size: 18.8 KB
RevLine 
[18190]1#region License Information
2/* HeuristicLab
3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
[18197]23using System.Collections.Generic;
[18061]24using System.Linq;
25using HEAL.Attic;
26using HeuristicLab.Common;
[18146]27using HeuristicLab.Core;
[18190]28using HeuristicLab.Data;
[18194]29using HeuristicLab.Encodings.RealVectorEncoding;
[18062]30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
[18146]31using HeuristicLab.Optimization;
32using HeuristicLab.Parameters;
33using HeuristicLab.Problems.Instances;
[18084]34using HeuristicLab.Problems.Instances.DataAnalysis;
[18061]35
[18063]36namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
[18061]37  [StorableType("7464E84B-65CC-440A-91F0-9FA920D730F9")]
[18200]38  [Item(Name = "Structured Symbolic Regression Problem (single-objective)", Description = "A problem with a structural definition and variable subfunctions.")]
[18063]39  [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 150)]
[18206]40  public class StructureTemplateSymbolicRegressionProblem : SingleObjectiveBasicProblem<MultiEncoding>, IRegressionProblem, IProblemInstanceConsumer<IRegressionProblemData> {
[18061]41
42    #region Constants
43    private const string ProblemDataParameterName = "ProblemData";
[18063]44    private const string StructureTemplateParameterName = "Structure Template";
[18075]45    private const string InterpreterParameterName = "Interpreter";
[18076]46    private const string EstimationLimitsParameterName = "EstimationLimits";
47    private const string BestTrainingSolutionParameterName = "Best Training Solution";
[18190]48    private const string ApplyLinearScalingParameterName = "Apply Linear Scaling";
49    private const string OptimizeParametersParameterName = "Optimize Parameters";
[18072]50
[18076]51    private const string SymbolicExpressionTreeName = "SymbolicExpressionTree";
[18194]52    private const string NumericParametersEncoding = "Numeric Parameters";
[18076]53
54    private const string StructureTemplateDescriptionText =
[18072]55      "Enter your expression as string in infix format into the empty input field.\n" +
56      "By checking the \"Apply Linear Scaling\" checkbox you can add the relevant scaling terms to your expression.\n" +
57      "After entering the expression click parse to build the tree.\n" +
[18134]58      "To edit the defined sub-functions, click on the corresponding-colored node in the tree view.\n" +
59      "Check the info box besides the input field for more information.";
[18061]60    #endregion
61
[18072]62    #region Parameters
[18061]63    public IValueParameter<IRegressionProblemData> ProblemDataParameter => (IValueParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName];
[18063]64    public IFixedValueParameter<StructureTemplate> StructureTemplateParameter => (IFixedValueParameter<StructureTemplate>)Parameters[StructureTemplateParameterName];
[18075]65    public IValueParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> InterpreterParameter => (IValueParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[InterpreterParameterName];
[18076]66    public IFixedValueParameter<DoubleLimit> EstimationLimitsParameter => (IFixedValueParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName];
67    public IResultParameter<ISymbolicRegressionSolution> BestTrainingSolutionParameter => (IResultParameter<ISymbolicRegressionSolution>)Parameters[BestTrainingSolutionParameterName];
[18190]68
69    public IFixedValueParameter<BoolValue> ApplyLinearScalingParameter => (IFixedValueParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName];
70    public IFixedValueParameter<BoolValue> OptimizeParametersParameter => (IFixedValueParameter<BoolValue>)Parameters[OptimizeParametersParameterName];
[18061]71    #endregion
72
73    #region Properties
[18081]74
[18076]75    public IRegressionProblemData ProblemData {
76      get => ProblemDataParameter.Value;
[18061]77      set {
78        ProblemDataParameter.Value = value;
79        ProblemDataChanged?.Invoke(this, EventArgs.Empty);
80      }
81    }
82
[18075]83    public StructureTemplate StructureTemplate => StructureTemplateParameter.Value;
[18061]84
[18075]85    public ISymbolicDataAnalysisExpressionTreeInterpreter Interpreter => InterpreterParameter.Value;
[18063]86
[18061]87    IParameter IDataAnalysisProblem.ProblemDataParameter => ProblemDataParameter;
88    IDataAnalysisProblemData IDataAnalysisProblem.ProblemData => ProblemData;
89
[18076]90    public DoubleLimit EstimationLimits => EstimationLimitsParameter.Value;
91
[18190]92    public bool ApplyLinearScaling {
93      get => ApplyLinearScalingParameter.Value.Value;
94      set => ApplyLinearScalingParameter.Value.Value = value;
95    }
96
97    public bool OptimizeParameters {
98      get => OptimizeParametersParameter.Value.Value;
99      set => OptimizeParametersParameter.Value.Value = value;
100    }
101
[18081]102    public override bool Maximization => false;
[18061]103    #endregion
104
105    #region EventHandlers
106    public event EventHandler ProblemDataChanged;
107    #endregion
108
109    #region Constructors & Cloning
[18206]110    public StructureTemplateSymbolicRegressionProblem() {
[18101]111      var provider = new PhysicsInstanceProvider();
112      var descriptor = new SheetBendingProcess();
[18084]113      var problemData = provider.LoadData(descriptor);
114      var shapeConstraintProblemData = new ShapeConstrainedRegressionProblemData(problemData);
115
[18065]116      var structureTemplate = new StructureTemplate();
117
[18075]118      Parameters.Add(new ValueParameter<IRegressionProblemData>(
[18076]119        ProblemDataParameterName,
[18084]120        shapeConstraintProblemData));
[18081]121
[18075]122      Parameters.Add(new FixedValueParameter<StructureTemplate>(
[18076]123        StructureTemplateParameterName,
124        StructureTemplateDescriptionText,
[18075]125        structureTemplate));
[18099]126
[18190]127      Parameters.Add(new FixedValueParameter<BoolValue>(
128        ApplyLinearScalingParameterName, new BoolValue(true)
129        ));
130
131      Parameters.Add(new FixedValueParameter<BoolValue>(
132        OptimizeParametersParameterName, new BoolValue(true)
133        ));
134
[18075]135      Parameters.Add(new ValueParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(
[18076]136        InterpreterParameterName,
[18162]137        new SymbolicDataAnalysisExpressionTreeBatchInterpreter()) { Hidden = true });
[18076]138      Parameters.Add(new FixedValueParameter<DoubleLimit>(
139        EstimationLimitsParameterName,
[18152]140        new DoubleLimit(double.NegativeInfinity, double.PositiveInfinity)) { Hidden = true });
[18095]141      Parameters.Add(new ResultParameter<ISymbolicRegressionSolution>(BestTrainingSolutionParameterName, "") { Hidden = true });
[18075]142
[18081]143      this.EvaluatorParameter.Hidden = true;
[18099]144
[18076]145      Operators.Add(new SymbolicDataAnalysisVariableFrequencyAnalyzer());
146      Operators.Add(new MinAverageMaxSymbolicExpressionTreeLengthAnalyzer());
147      Operators.Add(new SymbolicExpressionSymbolFrequencyAnalyzer());
148
[18151]149      RegisterEventHandlers();
[18190]150
151      StructureTemplate.ApplyLinearScaling = ApplyLinearScaling;
[18099]152      StructureTemplate.Template =
[18084]153        "(" +
154          "(210000 / (210000 + h)) * ((sigma_y * t * t) / (wR * Rt * t)) + " +
155          "PlasticHardening(_) - Elasticity(_)" +
156        ")" +
157        " * C(_)";
[18061]158    }
159
[18206]160    public StructureTemplateSymbolicRegressionProblem(StructureTemplateSymbolicRegressionProblem original, Cloner cloner) : base(original, cloner) {
[18151]161      RegisterEventHandlers();
162    }
[18061]163
[18184]164    public override IDeepCloneable Clone(Cloner cloner) =>
[18206]165      new StructureTemplateSymbolicRegressionProblem(this, cloner);
[18184]166
[18061]167    [StorableConstructor]
[18206]168    protected StructureTemplateSymbolicRegressionProblem(StorableConstructorFlag _) : base(_) { }
[18151]169
170
171    [StorableHook(HookType.AfterDeserialization)]
172    private void AfterDeserialization() {
[18190]173      if (!Parameters.ContainsKey(ApplyLinearScalingParameterName)) {
174        Parameters.Add(new FixedValueParameter<BoolValue>(ApplyLinearScalingParameterName, new BoolValue(StructureTemplate.ApplyLinearScaling)));
175      }
176
177      if (!Parameters.ContainsKey(OptimizeParametersParameterName)) {
178        Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersParameterName, new BoolValue(false)));
179      }
180
[18151]181      RegisterEventHandlers();
182    }
183
[18065]184    #endregion
[18061]185
[18151]186    private void RegisterEventHandlers() {
187      if (StructureTemplate != null) {
188        StructureTemplate.Changed += OnTemplateChanged;
189      }
190
191      ProblemDataParameter.ValueChanged += ProblemDataParameterValueChanged;
[18190]192      ApplyLinearScalingParameter.Value.ValueChanged += (o, e) => StructureTemplate.ApplyLinearScaling = ApplyLinearScaling;
[18151]193    }
194
[18184]195    private void ProblemDataParameterValueChanged(object sender, EventArgs e) {
196      StructureTemplate.Reset();
197      // InfoBox for Reset?
198    }
199
[18066]200    private void OnTemplateChanged(object sender, EventArgs args) {
[18190]201      ApplyLinearScaling = StructureTemplate.ApplyLinearScaling;
[18184]202      SetupEncoding();
[18068]203    }
204
[18184]205    private void SetupEncoding() {
[18066]206      foreach (var e in Encoding.Encodings.ToArray())
207        Encoding.Remove(e);
208
[18194]209
210      var templateNumberTreeNodes = StructureTemplate.Tree.IterateNodesPrefix().OfType<NumberTreeNode>();
211      if (templateNumberTreeNodes.Any()) {
212        var templateParameterValues = templateNumberTreeNodes.Select(n => n.Value).ToArray();
213        var encoding = new RealVectorEncoding(NumericParametersEncoding, templateParameterValues.Length);
214
215        var creator = encoding.Operators.OfType<NormalDistributedRealVectorCreator>().First();
216        creator.MeanParameter.Value = new RealVector(templateParameterValues);
217        creator.SigmaParameter.Value = new DoubleArray(templateParameterValues.Length);
218        encoding.SolutionCreator = creator;
219
[18198]220
[18194]221        Encoding.Add(encoding);
222      }
223
[18184]224      foreach (var subFunction in StructureTemplate.SubFunctions) {
225        subFunction.SetupVariables(ProblemData.AllowedInputVariables);
[18190]226        // prevent the same encoding twice
227        if (Encoding.Encodings.Any(x => x.Name == subFunction.Name)) continue;
[18184]228
229        var encoding = new SymbolicExpressionTreeEncoding(
230          subFunction.Name,
231          subFunction.Grammar,
232          subFunction.MaximumSymbolicExpressionTreeLength,
233          subFunction.MaximumSymbolicExpressionTreeDepth);
234        Encoding.Add(encoding);
[18066]235      }
[18152]236
[18198]237      //set single point || copy crossover for numeric parameters
[18194]238      var multiCrossover = (IParameterizedItem)Encoding.Operators.OfType<MultiEncodingCrossover>().First();
239      foreach (var param in multiCrossover.Parameters.OfType<ConstrainedValueParameter<ICrossover>>()) {
[18198]240        if (!param.Name.Contains(NumericParametersEncoding)) continue;
241
242        var singlePointCrossover = param.ValidValues.OfType<SinglePointCrossover>().First();
243        var copyCrossover = param.ValidValues.OfType<CopyCrossover>().First();
244
245        var realvectorEncoding = (RealVectorEncoding)Encoding.Encodings.Where(e => e.Name == NumericParametersEncoding).First();
246        if (realvectorEncoding.Length == 1) { //single-point crossover throws if encoding length == 1
247          param.Value = copyCrossover;
248        } else
249          param.Value = singlePointCrossover;
[18194]250      }
251
252      //adapt crossover probability for subtree crossover
253      foreach (var param in multiCrossover.Parameters.OfType<ConstrainedValueParameter<ICrossover>>()) {
254        var subtreeCrossover = param.ValidValues.OfType<SubtreeCrossover>().FirstOrDefault();
255        if (subtreeCrossover != null) {
256          subtreeCrossover.CrossoverProbability = 1.0 / Encoding.Encodings.OfType<SymbolicExpressionTreeEncoding>().Count();
257          param.Value = subtreeCrossover;
[18184]258        }
259      }
[18194]260
261      //set multi manipulator as default manipulator for all symbolic expression tree encoding parts
262      var manipulator = (IParameterizedItem)Encoding.Operators.OfType<MultiEncodingManipulator>().First();
263      foreach (var param in manipulator.Parameters.OfType<ConstrainedValueParameter<IManipulator>>()) {
264        var m = param.ValidValues.OfType<MultiSymbolicExpressionTreeManipulator>().FirstOrDefault();
265        param.Value = m ?? param.ValidValues.First();
266      }
[18066]267    }
268
269    public override void Analyze(Individual[] individuals, double[] qualities, ResultCollection results, IRandom random) {
270      base.Analyze(individuals, qualities, results, random);
271
[18095]272      var best = GetBestIndividual(individuals, qualities).Item1;
[18076]273
274      if (!results.ContainsKey(BestTrainingSolutionParameter.ActualName)) {
275        results.Add(new Result(BestTrainingSolutionParameter.ActualName, typeof(SymbolicRegressionSolution)));
[18066]276      }
277
[18076]278      var tree = (ISymbolicExpressionTree)best[SymbolicExpressionTreeName];
279      var model = new SymbolicRegressionModel(ProblemData.TargetVariable, tree, Interpreter);
280      var solution = model.CreateRegressionSolution(ProblemData);
281
282      results[BestTrainingSolutionParameter.ActualName].Value = solution;
[18066]283    }
284
[18076]285
[18065]286    public override double Evaluate(Individual individual, IRandom random) {
[18184]287      var templateTree = StructureTemplate.Tree;
288      if (templateTree == null)
289        throw new ArgumentException("No structure template defined!");
[18071]290
[18197]291      var tree = BuildTreeFromIndividual(templateTree, individual, containsNumericParameters: StructureTemplate.ContainsNumericParameters);
[18192]292      individual[SymbolicExpressionTreeName] = tree;
[18184]293
[18192]294      if (OptimizeParameters) {
[18197]295        var excludeNodes = GetTemplateTreeNodes(tree.Root).OfType<IVariableTreeNode>();
296        ParameterOptimization.OptimizeTreeParameters(ProblemData, tree, interpreter: Interpreter, excludeNodes: excludeNodes);
[18192]297      } else if (ApplyLinearScaling) {
[18191]298        LinearScaling.AdjustLinearScalingParams(ProblemData, tree, Interpreter);
[18177]299      }
[18076]300
[18197]301      UpdateIndividualFromTree(tree, individual, containsNumericParameters: StructureTemplate.ContainsNumericParameters);
[18194]302
[18192]303      //calculate NMSE
304      var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(tree, ProblemData.Dataset, ProblemData.TrainingIndices);
305      var boundedEstimatedValues = estimatedValues.LimitToRange(EstimationLimits.Lower, EstimationLimits.Upper);
306      var targetValues = ProblemData.TargetVariableTrainingValues;
307      var nmse = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(targetValues, boundedEstimatedValues, out var errorState);
308      if (errorState != OnlineCalculatorError.None)
309        nmse = 1.0;
[18076]310
[18192]311      //evaluate constraints
312      var constraints = Enumerable.Empty<ShapeConstraint>();
313      if (ProblemData is ShapeConstrainedRegressionProblemData scProbData)
314        constraints = scProbData.ShapeConstraints.EnabledConstraints;
315      if (constraints.Any()) {
316        var boundsEstimator = new IntervalArithBoundsEstimator();
317        var constraintViolations = IntervalUtil.GetConstraintViolations(constraints, boundsEstimator, ProblemData.VariableRanges, tree);
318
319        // infinite/NaN constraints
320        if (constraintViolations.Any(x => double.IsNaN(x) || double.IsInfinity(x)))
321          nmse = 1.0;
322
323        if (constraintViolations.Any(x => x > 0.0))
324          nmse = 1.0;
325      }
326
327      return nmse;
[18066]328    }
329
[18197]330    private static IEnumerable<ISymbolicExpressionTreeNode> GetTemplateTreeNodes(ISymbolicExpressionTreeNode rootNode) {
331      yield return rootNode;
332      foreach (var node in rootNode.Subtrees) {
333        if (node is SubFunctionTreeNode) {
334          yield return node;
335          continue;
336        }
337
338        foreach (var subNode in GetTemplateTreeNodes(node))
339          yield return subNode;
340      }
341    }
342
343    private static ISymbolicExpressionTree BuildTreeFromIndividual(ISymbolicExpressionTree template, Individual individual, bool containsNumericParameters) {
[18190]344      var resolvedTree = (ISymbolicExpressionTree)template.Clone();
[18194]345
346      //set numeric parameter values
[18197]347      if (containsNumericParameters) {
[18194]348        var realVector = individual.RealVector(NumericParametersEncoding);
349        var numberTreeNodes = resolvedTree.IterateNodesPrefix().OfType<NumberTreeNode>().ToArray();
350
351        if (realVector.Length != numberTreeNodes.Length)
352          throw new InvalidOperationException("The number of numeric parameters in the tree does not match the provided numerical values.");
353
354        for (int i = 0; i < numberTreeNodes.Length; i++)
355          numberTreeNodes[i].Value = realVector[i];
356      }
357
[18190]358      // build main tree
359      foreach (var subFunctionTreeNode in resolvedTree.IterateNodesPrefix().OfType<SubFunctionTreeNode>()) {
360        var subFunctionTree = individual.SymbolicExpressionTree(subFunctionTreeNode.Name);
361
362        // extract function tree
363        var subTree = subFunctionTree.Root.GetSubtree(0)  // StartSymbol
364                                          .GetSubtree(0); // First Symbol
365        subTree = (ISymbolicExpressionTreeNode)subTree.Clone();
366        subFunctionTreeNode.AddSubtree(subTree);
367      }
368      return resolvedTree;
369    }
370
[18197]371    private static void UpdateIndividualFromTree(ISymbolicExpressionTree tree, Individual individual, bool containsNumericParameters) {
[18194]372      var clonedTree = (ISymbolicExpressionTree)tree.Clone();
373
374      foreach (var subFunctionTreeNode in clonedTree.IterateNodesPrefix().OfType<SubFunctionTreeNode>()) {
375        var grammar = ((ISymbolicExpressionTree)individual[subFunctionTreeNode.Name]).Root.Grammar;
376        var functionTreeNode = subFunctionTreeNode.GetSubtree(0);
377        //remove function code to make numeric parameters extraction easier
378        subFunctionTreeNode.RemoveSubtree(0);
379
380
381        var rootNode = (SymbolicExpressionTreeTopLevelNode)new ProgramRootSymbol().CreateTreeNode();
382        rootNode.SetGrammar(grammar);
383        var startNode = (SymbolicExpressionTreeTopLevelNode)new StartSymbol().CreateTreeNode();
384        startNode.SetGrammar(grammar);
385
386        rootNode.AddSubtree(startNode);
387        startNode.AddSubtree(functionTreeNode);
388        var functionTree = new SymbolicExpressionTree(rootNode);
389        individual[subFunctionTreeNode.Name] = functionTree;
390      }
391
392      //set numeric parameter values
[18197]393      if (containsNumericParameters) {
[18194]394        var realVector = individual.RealVector(NumericParametersEncoding);
395        var numberTreeNodes = clonedTree.IterateNodesPrefix().OfType<NumberTreeNode>().ToArray();
396
397        if (realVector.Length != numberTreeNodes.Length)
398          throw new InvalidOperationException("The number of numeric parameters in the tree does not match the provided numerical values.");
399
400        for (int i = 0; i < numberTreeNodes.Length; i++)
401          realVector[i] = numberTreeNodes[i].Value;
402      }
403    }
404
[18099]405    public void Load(IRegressionProblemData data) {
406      ProblemData = data;
407    }
[18061]408  }
409}
Note: See TracBrowser for help on using the repository browser.