Free cookie consent management tool by TermsFeed Policy Generator

source: branches/3136_Structural_GP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/StructuredSymbolicRegressionSingleObjectiveProblem.cs @ 18198

Last change on this file since 18198 was 18198, checked in by mkommend, 2 years ago

#3136: Fixed error in structur GP if only one numeric parameter is present in the template by providing a new copy crossover for real vectors.

File size: 18.9 KB
RevLine 
[18190]1#region License Information
2/* HeuristicLab
3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
[18197]23using System.Collections.Generic;
[18061]24using System.Linq;
25using HEAL.Attic;
26using HeuristicLab.Common;
[18146]27using HeuristicLab.Core;
[18190]28using HeuristicLab.Data;
[18194]29using HeuristicLab.Encodings.RealVectorEncoding;
[18062]30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
[18146]31using HeuristicLab.Optimization;
[18198]32using HeuristicLab.Optimization.Operators;
[18146]33using HeuristicLab.Parameters;
34using HeuristicLab.Problems.Instances;
[18084]35using HeuristicLab.Problems.Instances.DataAnalysis;
[18061]36
[18063]37namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
[18061]38  [StorableType("7464E84B-65CC-440A-91F0-9FA920D730F9")]
[18063]39  [Item(Name = "Structured Symbolic Regression Single Objective Problem (single-objective)", Description = "A problem with a structural definition and unfixed subfunctions.")]
40  [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 150)]
[18075]41  public class StructuredSymbolicRegressionSingleObjectiveProblem : SingleObjectiveBasicProblem<MultiEncoding>, IRegressionProblem, IProblemInstanceConsumer<IRegressionProblemData> {
[18061]42
43    #region Constants
44    private const string ProblemDataParameterName = "ProblemData";
[18063]45    private const string StructureTemplateParameterName = "Structure Template";
[18075]46    private const string InterpreterParameterName = "Interpreter";
[18076]47    private const string EstimationLimitsParameterName = "EstimationLimits";
48    private const string BestTrainingSolutionParameterName = "Best Training Solution";
[18190]49    private const string ApplyLinearScalingParameterName = "Apply Linear Scaling";
50    private const string OptimizeParametersParameterName = "Optimize Parameters";
[18072]51
[18076]52    private const string SymbolicExpressionTreeName = "SymbolicExpressionTree";
[18194]53    private const string NumericParametersEncoding = "Numeric Parameters";
[18076]54
55    private const string StructureTemplateDescriptionText =
[18072]56      "Enter your expression as string in infix format into the empty input field.\n" +
57      "By checking the \"Apply Linear Scaling\" checkbox you can add the relevant scaling terms to your expression.\n" +
58      "After entering the expression click parse to build the tree.\n" +
[18134]59      "To edit the defined sub-functions, click on the corresponding-colored node in the tree view.\n" +
60      "Check the info box besides the input field for more information.";
[18061]61    #endregion
62
[18072]63    #region Parameters
[18061]64    public IValueParameter<IRegressionProblemData> ProblemDataParameter => (IValueParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName];
[18063]65    public IFixedValueParameter<StructureTemplate> StructureTemplateParameter => (IFixedValueParameter<StructureTemplate>)Parameters[StructureTemplateParameterName];
[18075]66    public IValueParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> InterpreterParameter => (IValueParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[InterpreterParameterName];
[18076]67    public IFixedValueParameter<DoubleLimit> EstimationLimitsParameter => (IFixedValueParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName];
68    public IResultParameter<ISymbolicRegressionSolution> BestTrainingSolutionParameter => (IResultParameter<ISymbolicRegressionSolution>)Parameters[BestTrainingSolutionParameterName];
[18190]69
70    public IFixedValueParameter<BoolValue> ApplyLinearScalingParameter => (IFixedValueParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName];
71    public IFixedValueParameter<BoolValue> OptimizeParametersParameter => (IFixedValueParameter<BoolValue>)Parameters[OptimizeParametersParameterName];
[18061]72    #endregion
73
74    #region Properties
[18081]75
[18076]76    public IRegressionProblemData ProblemData {
77      get => ProblemDataParameter.Value;
[18061]78      set {
79        ProblemDataParameter.Value = value;
80        ProblemDataChanged?.Invoke(this, EventArgs.Empty);
81      }
82    }
83
[18075]84    public StructureTemplate StructureTemplate => StructureTemplateParameter.Value;
[18061]85
[18075]86    public ISymbolicDataAnalysisExpressionTreeInterpreter Interpreter => InterpreterParameter.Value;
[18063]87
[18061]88    IParameter IDataAnalysisProblem.ProblemDataParameter => ProblemDataParameter;
89    IDataAnalysisProblemData IDataAnalysisProblem.ProblemData => ProblemData;
90
[18076]91    public DoubleLimit EstimationLimits => EstimationLimitsParameter.Value;
92
[18190]93    public bool ApplyLinearScaling {
94      get => ApplyLinearScalingParameter.Value.Value;
95      set => ApplyLinearScalingParameter.Value.Value = value;
96    }
97
98    public bool OptimizeParameters {
99      get => OptimizeParametersParameter.Value.Value;
100      set => OptimizeParametersParameter.Value.Value = value;
101    }
102
[18081]103    public override bool Maximization => false;
[18061]104    #endregion
105
106    #region EventHandlers
107    public event EventHandler ProblemDataChanged;
108    #endregion
109
110    #region Constructors & Cloning
111    public StructuredSymbolicRegressionSingleObjectiveProblem() {
[18101]112      var provider = new PhysicsInstanceProvider();
113      var descriptor = new SheetBendingProcess();
[18084]114      var problemData = provider.LoadData(descriptor);
115      var shapeConstraintProblemData = new ShapeConstrainedRegressionProblemData(problemData);
116
[18065]117      var structureTemplate = new StructureTemplate();
118
[18075]119      Parameters.Add(new ValueParameter<IRegressionProblemData>(
[18076]120        ProblemDataParameterName,
[18084]121        shapeConstraintProblemData));
[18081]122
[18075]123      Parameters.Add(new FixedValueParameter<StructureTemplate>(
[18076]124        StructureTemplateParameterName,
125        StructureTemplateDescriptionText,
[18075]126        structureTemplate));
[18099]127
[18190]128      Parameters.Add(new FixedValueParameter<BoolValue>(
129        ApplyLinearScalingParameterName, new BoolValue(true)
130        ));
131
132      Parameters.Add(new FixedValueParameter<BoolValue>(
133        OptimizeParametersParameterName, new BoolValue(true)
134        ));
135
[18075]136      Parameters.Add(new ValueParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(
[18076]137        InterpreterParameterName,
[18162]138        new SymbolicDataAnalysisExpressionTreeBatchInterpreter()) { Hidden = true });
[18076]139      Parameters.Add(new FixedValueParameter<DoubleLimit>(
140        EstimationLimitsParameterName,
[18152]141        new DoubleLimit(double.NegativeInfinity, double.PositiveInfinity)) { Hidden = true });
[18095]142      Parameters.Add(new ResultParameter<ISymbolicRegressionSolution>(BestTrainingSolutionParameterName, "") { Hidden = true });
[18075]143
[18081]144      this.EvaluatorParameter.Hidden = true;
[18099]145
[18076]146      Operators.Add(new SymbolicDataAnalysisVariableFrequencyAnalyzer());
147      Operators.Add(new MinAverageMaxSymbolicExpressionTreeLengthAnalyzer());
148      Operators.Add(new SymbolicExpressionSymbolFrequencyAnalyzer());
149
[18151]150      RegisterEventHandlers();
[18190]151
152      StructureTemplate.ApplyLinearScaling = ApplyLinearScaling;
[18099]153      StructureTemplate.Template =
[18084]154        "(" +
155          "(210000 / (210000 + h)) * ((sigma_y * t * t) / (wR * Rt * t)) + " +
156          "PlasticHardening(_) - Elasticity(_)" +
157        ")" +
158        " * C(_)";
[18061]159    }
160
[18184]161    public StructuredSymbolicRegressionSingleObjectiveProblem(StructuredSymbolicRegressionSingleObjectiveProblem original, Cloner cloner) : base(original, cloner) {
[18151]162      RegisterEventHandlers();
163    }
[18061]164
[18184]165    public override IDeepCloneable Clone(Cloner cloner) =>
166      new StructuredSymbolicRegressionSingleObjectiveProblem(this, cloner);
167
[18061]168    [StorableConstructor]
[18063]169    protected StructuredSymbolicRegressionSingleObjectiveProblem(StorableConstructorFlag _) : base(_) { }
[18151]170
171
172    [StorableHook(HookType.AfterDeserialization)]
173    private void AfterDeserialization() {
[18190]174      if (!Parameters.ContainsKey(ApplyLinearScalingParameterName)) {
175        Parameters.Add(new FixedValueParameter<BoolValue>(ApplyLinearScalingParameterName, new BoolValue(StructureTemplate.ApplyLinearScaling)));
176      }
177
178      if (!Parameters.ContainsKey(OptimizeParametersParameterName)) {
179        Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeParametersParameterName, new BoolValue(false)));
180      }
181
[18151]182      RegisterEventHandlers();
183    }
184
[18065]185    #endregion
[18061]186
[18151]187    private void RegisterEventHandlers() {
188      if (StructureTemplate != null) {
189        StructureTemplate.Changed += OnTemplateChanged;
190      }
191
192      ProblemDataParameter.ValueChanged += ProblemDataParameterValueChanged;
[18190]193      ApplyLinearScalingParameter.Value.ValueChanged += (o, e) => StructureTemplate.ApplyLinearScaling = ApplyLinearScaling;
[18151]194    }
195
[18184]196    private void ProblemDataParameterValueChanged(object sender, EventArgs e) {
197      StructureTemplate.Reset();
198      // InfoBox for Reset?
199    }
200
[18066]201    private void OnTemplateChanged(object sender, EventArgs args) {
[18190]202      ApplyLinearScaling = StructureTemplate.ApplyLinearScaling;
[18184]203      SetupEncoding();
[18068]204    }
205
[18184]206    private void SetupEncoding() {
[18066]207      foreach (var e in Encoding.Encodings.ToArray())
208        Encoding.Remove(e);
209
[18194]210
211      var templateNumberTreeNodes = StructureTemplate.Tree.IterateNodesPrefix().OfType<NumberTreeNode>();
212      if (templateNumberTreeNodes.Any()) {
213        var templateParameterValues = templateNumberTreeNodes.Select(n => n.Value).ToArray();
214        var encoding = new RealVectorEncoding(NumericParametersEncoding, templateParameterValues.Length);
215
216        var creator = encoding.Operators.OfType<NormalDistributedRealVectorCreator>().First();
217        creator.MeanParameter.Value = new RealVector(templateParameterValues);
218        creator.SigmaParameter.Value = new DoubleArray(templateParameterValues.Length);
219        encoding.SolutionCreator = creator;
220
[18198]221
[18194]222        Encoding.Add(encoding);
223      }
224
[18184]225      foreach (var subFunction in StructureTemplate.SubFunctions) {
226        subFunction.SetupVariables(ProblemData.AllowedInputVariables);
[18190]227        // prevent the same encoding twice
228        if (Encoding.Encodings.Any(x => x.Name == subFunction.Name)) continue;
[18184]229
230        var encoding = new SymbolicExpressionTreeEncoding(
231          subFunction.Name,
232          subFunction.Grammar,
233          subFunction.MaximumSymbolicExpressionTreeLength,
234          subFunction.MaximumSymbolicExpressionTreeDepth);
235        Encoding.Add(encoding);
[18066]236      }
[18152]237
[18198]238      //set single point || copy crossover for numeric parameters
[18194]239      var multiCrossover = (IParameterizedItem)Encoding.Operators.OfType<MultiEncodingCrossover>().First();
240      foreach (var param in multiCrossover.Parameters.OfType<ConstrainedValueParameter<ICrossover>>()) {
[18198]241        if (!param.Name.Contains(NumericParametersEncoding)) continue;
242
243        var singlePointCrossover = param.ValidValues.OfType<SinglePointCrossover>().First();
244        var copyCrossover = param.ValidValues.OfType<CopyCrossover>().First();
245
246        var realvectorEncoding = (RealVectorEncoding)Encoding.Encodings.Where(e => e.Name == NumericParametersEncoding).First();
247        if (realvectorEncoding.Length == 1) { //single-point crossover throws if encoding length == 1
248          param.Value = copyCrossover;
249        } else
250          param.Value = singlePointCrossover;
[18194]251      }
252
253      //adapt crossover probability for subtree crossover
254      foreach (var param in multiCrossover.Parameters.OfType<ConstrainedValueParameter<ICrossover>>()) {
255        var subtreeCrossover = param.ValidValues.OfType<SubtreeCrossover>().FirstOrDefault();
256        if (subtreeCrossover != null) {
257          subtreeCrossover.CrossoverProbability = 1.0 / Encoding.Encodings.OfType<SymbolicExpressionTreeEncoding>().Count();
258          param.Value = subtreeCrossover;
[18184]259        }
260      }
[18194]261
262      //set multi manipulator as default manipulator for all symbolic expression tree encoding parts
263      var manipulator = (IParameterizedItem)Encoding.Operators.OfType<MultiEncodingManipulator>().First();
264      foreach (var param in manipulator.Parameters.OfType<ConstrainedValueParameter<IManipulator>>()) {
265        var m = param.ValidValues.OfType<MultiSymbolicExpressionTreeManipulator>().FirstOrDefault();
266        param.Value = m ?? param.ValidValues.First();
267      }
[18066]268    }
269
270    public override void Analyze(Individual[] individuals, double[] qualities, ResultCollection results, IRandom random) {
271      base.Analyze(individuals, qualities, results, random);
272
[18095]273      var best = GetBestIndividual(individuals, qualities).Item1;
[18076]274
275      if (!results.ContainsKey(BestTrainingSolutionParameter.ActualName)) {
276        results.Add(new Result(BestTrainingSolutionParameter.ActualName, typeof(SymbolicRegressionSolution)));
[18066]277      }
278
[18076]279      var tree = (ISymbolicExpressionTree)best[SymbolicExpressionTreeName];
280      var model = new SymbolicRegressionModel(ProblemData.TargetVariable, tree, Interpreter);
281      var solution = model.CreateRegressionSolution(ProblemData);
282
283      results[BestTrainingSolutionParameter.ActualName].Value = solution;
[18066]284    }
285
[18076]286
[18065]287    public override double Evaluate(Individual individual, IRandom random) {
[18184]288      var templateTree = StructureTemplate.Tree;
289      if (templateTree == null)
290        throw new ArgumentException("No structure template defined!");
[18071]291
[18197]292      var tree = BuildTreeFromIndividual(templateTree, individual, containsNumericParameters: StructureTemplate.ContainsNumericParameters);
[18192]293      individual[SymbolicExpressionTreeName] = tree;
[18184]294
[18192]295      if (OptimizeParameters) {
[18197]296        var excludeNodes = GetTemplateTreeNodes(tree.Root).OfType<IVariableTreeNode>();
297        ParameterOptimization.OptimizeTreeParameters(ProblemData, tree, interpreter: Interpreter, excludeNodes: excludeNodes);
[18192]298      } else if (ApplyLinearScaling) {
[18191]299        LinearScaling.AdjustLinearScalingParams(ProblemData, tree, Interpreter);
[18177]300      }
[18076]301
[18197]302      UpdateIndividualFromTree(tree, individual, containsNumericParameters: StructureTemplate.ContainsNumericParameters);
[18194]303
[18192]304      //calculate NMSE
305      var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(tree, ProblemData.Dataset, ProblemData.TrainingIndices);
306      var boundedEstimatedValues = estimatedValues.LimitToRange(EstimationLimits.Lower, EstimationLimits.Upper);
307      var targetValues = ProblemData.TargetVariableTrainingValues;
308      var nmse = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(targetValues, boundedEstimatedValues, out var errorState);
309      if (errorState != OnlineCalculatorError.None)
310        nmse = 1.0;
[18076]311
[18192]312      //evaluate constraints
313      var constraints = Enumerable.Empty<ShapeConstraint>();
314      if (ProblemData is ShapeConstrainedRegressionProblemData scProbData)
315        constraints = scProbData.ShapeConstraints.EnabledConstraints;
316      if (constraints.Any()) {
317        var boundsEstimator = new IntervalArithBoundsEstimator();
318        var constraintViolations = IntervalUtil.GetConstraintViolations(constraints, boundsEstimator, ProblemData.VariableRanges, tree);
319
320        // infinite/NaN constraints
321        if (constraintViolations.Any(x => double.IsNaN(x) || double.IsInfinity(x)))
322          nmse = 1.0;
323
324        if (constraintViolations.Any(x => x > 0.0))
325          nmse = 1.0;
326      }
327
328      return nmse;
[18066]329    }
330
[18197]331    private static IEnumerable<ISymbolicExpressionTreeNode> GetTemplateTreeNodes(ISymbolicExpressionTreeNode rootNode) {
332      yield return rootNode;
333      foreach (var node in rootNode.Subtrees) {
334        if (node is SubFunctionTreeNode) {
335          yield return node;
336          continue;
337        }
338
339        foreach (var subNode in GetTemplateTreeNodes(node))
340          yield return subNode;
341      }
342    }
343
344    private static ISymbolicExpressionTree BuildTreeFromIndividual(ISymbolicExpressionTree template, Individual individual, bool containsNumericParameters) {
[18190]345      var resolvedTree = (ISymbolicExpressionTree)template.Clone();
[18194]346
347      //set numeric parameter values
[18197]348      if (containsNumericParameters) {
[18194]349        var realVector = individual.RealVector(NumericParametersEncoding);
350        var numberTreeNodes = resolvedTree.IterateNodesPrefix().OfType<NumberTreeNode>().ToArray();
351
352        if (realVector.Length != numberTreeNodes.Length)
353          throw new InvalidOperationException("The number of numeric parameters in the tree does not match the provided numerical values.");
354
355        for (int i = 0; i < numberTreeNodes.Length; i++)
356          numberTreeNodes[i].Value = realVector[i];
357      }
358
[18190]359      // build main tree
360      foreach (var subFunctionTreeNode in resolvedTree.IterateNodesPrefix().OfType<SubFunctionTreeNode>()) {
361        var subFunctionTree = individual.SymbolicExpressionTree(subFunctionTreeNode.Name);
362
363        // extract function tree
364        var subTree = subFunctionTree.Root.GetSubtree(0)  // StartSymbol
365                                          .GetSubtree(0); // First Symbol
366        subTree = (ISymbolicExpressionTreeNode)subTree.Clone();
367        subFunctionTreeNode.AddSubtree(subTree);
368      }
369      return resolvedTree;
370    }
371
[18197]372    private static void UpdateIndividualFromTree(ISymbolicExpressionTree tree, Individual individual, bool containsNumericParameters) {
[18194]373      var clonedTree = (ISymbolicExpressionTree)tree.Clone();
374
375      foreach (var subFunctionTreeNode in clonedTree.IterateNodesPrefix().OfType<SubFunctionTreeNode>()) {
376        var grammar = ((ISymbolicExpressionTree)individual[subFunctionTreeNode.Name]).Root.Grammar;
377        var functionTreeNode = subFunctionTreeNode.GetSubtree(0);
378        //remove function code to make numeric parameters extraction easier
379        subFunctionTreeNode.RemoveSubtree(0);
380
381
382        var rootNode = (SymbolicExpressionTreeTopLevelNode)new ProgramRootSymbol().CreateTreeNode();
383        rootNode.SetGrammar(grammar);
384        var startNode = (SymbolicExpressionTreeTopLevelNode)new StartSymbol().CreateTreeNode();
385        startNode.SetGrammar(grammar);
386
387        rootNode.AddSubtree(startNode);
388        startNode.AddSubtree(functionTreeNode);
389        var functionTree = new SymbolicExpressionTree(rootNode);
390        individual[subFunctionTreeNode.Name] = functionTree;
391      }
392
393      //set numeric parameter values
[18197]394      if (containsNumericParameters) {
[18194]395        var realVector = individual.RealVector(NumericParametersEncoding);
396        var numberTreeNodes = clonedTree.IterateNodesPrefix().OfType<NumberTreeNode>().ToArray();
397
398        if (realVector.Length != numberTreeNodes.Length)
399          throw new InvalidOperationException("The number of numeric parameters in the tree does not match the provided numerical values.");
400
401        for (int i = 0; i < numberTreeNodes.Length; i++)
402          realVector[i] = numberTreeNodes[i].Value;
403      }
404    }
405
[18099]406    public void Load(IRegressionProblemData data) {
407      ProblemData = data;
408    }
[18061]409  }
410}
Note: See TracBrowser for help on using the repository browser.