source: branches/3136_Structural_GP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/StructuredSymbolicRegressionSingleObjectiveProblem.cs @ 18084

Last change on this file since 18084 was 18084, checked in by dpiringe, 2 months ago

#3136

  • added a new problem data provider AsadzadehProvider and the correspondig instance Asadzadeh1
    • implements the test setup of paper Symbolic regression based hybrid semiparametric modelling of processes: An example case of a bending process
  • used the Asadzadeh1 instance in StructuredSymbolicRegressionSingleObjectiveProblem for default setup
  • added the SubFunctionSymbol in DerivativeCalculator and IntervalArithBoundsEstimator
File size: 14.8 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Threading.Tasks;
6using HeuristicLab.Core;
7using HeuristicLab.Optimization;
8using HEAL.Attic;
9using HeuristicLab.Common;
10using HeuristicLab.Problems.Instances;
11using HeuristicLab.Parameters;
12using HeuristicLab.Data;
13using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
14using HeuristicLab.PluginInfrastructure;
15using HeuristicLab.Problems.Instances.DataAnalysis.Regression.Asadzadeh;
16using HeuristicLab.Problems.Instances.DataAnalysis;
17
18namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
19  [StorableType("7464E84B-65CC-440A-91F0-9FA920D730F9")]
20  [Item(Name = "Structured Symbolic Regression Single Objective Problem (single-objective)", Description = "A problem with a structural definition and unfixed subfunctions.")]
21  [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 150)]
22  public class StructuredSymbolicRegressionSingleObjectiveProblem : SingleObjectiveBasicProblem<MultiEncoding>, IRegressionProblem, IProblemInstanceConsumer<IRegressionProblemData> {
23
24    #region Constants
25    private const string TreeEvaluatorParameterName = "TreeEvaluator";
26    private const string ProblemDataParameterName = "ProblemData";
27    private const string StructureTemplateParameterName = "Structure Template";
28    private const string InterpreterParameterName = "Interpreter";
29    private const string EstimationLimitsParameterName = "EstimationLimits";
30    private const string BestTrainingSolutionParameterName = "Best Training Solution";
31
32    private const string SymbolicExpressionTreeName = "SymbolicExpressionTree";
33
34    private const string StructureTemplateDescriptionText =
35      "Enter your expression as string in infix format into the empty input field.\n" +
36      "By checking the \"Apply Linear Scaling\" checkbox you can add the relevant scaling terms to your expression.\n" +
37      "After entering the expression click parse to build the tree.\n" +
38      "To edit the defined sub-functions, click on the coressponding colored node in the tree view.";
39    #endregion
40
41    #region Parameters
42    public IConstrainedValueParameter<SymbolicRegressionSingleObjectiveEvaluator> TreeEvaluatorParameter => (IConstrainedValueParameter<SymbolicRegressionSingleObjectiveEvaluator>)Parameters[TreeEvaluatorParameterName];
43    public IValueParameter<IRegressionProblemData> ProblemDataParameter => (IValueParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName];
44    public IFixedValueParameter<StructureTemplate> StructureTemplateParameter => (IFixedValueParameter<StructureTemplate>)Parameters[StructureTemplateParameterName];
45    public IValueParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> InterpreterParameter => (IValueParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[InterpreterParameterName];
46    public IFixedValueParameter<DoubleLimit> EstimationLimitsParameter => (IFixedValueParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName];
47    public IResultParameter<ISymbolicRegressionSolution> BestTrainingSolutionParameter => (IResultParameter<ISymbolicRegressionSolution>)Parameters[BestTrainingSolutionParameterName];
48    #endregion
49
50    #region Properties
51
52    public IRegressionProblemData ProblemData {
53      get => ProblemDataParameter.Value;
54      set {
55        ProblemDataParameter.Value = value;
56        ProblemDataChanged?.Invoke(this, EventArgs.Empty);
57      }
58    }
59
60    public StructureTemplate StructureTemplate => StructureTemplateParameter.Value;
61
62    public ISymbolicDataAnalysisExpressionTreeInterpreter Interpreter => InterpreterParameter.Value;
63
64    IParameter IDataAnalysisProblem.ProblemDataParameter => ProblemDataParameter;
65    IDataAnalysisProblemData IDataAnalysisProblem.ProblemData => ProblemData;
66
67    public DoubleLimit EstimationLimits => EstimationLimitsParameter.Value;
68
69    public override bool Maximization => false;
70    #endregion
71
72    #region EventHandlers
73    public event EventHandler ProblemDataChanged;
74    #endregion
75
76    #region Constructors & Cloning
77    public StructuredSymbolicRegressionSingleObjectiveProblem() {
78      var provider = new AsadzadehProvider();
79      var descriptor = new Asadzadeh1();
80      var problemData = provider.LoadData(descriptor);
81      var shapeConstraintProblemData = new ShapeConstrainedRegressionProblemData(problemData);
82
83
84      var targetInterval = shapeConstraintProblemData.VariableRanges.GetInterval(shapeConstraintProblemData.TargetVariable);
85      var estimationWidth = targetInterval.Width * 10;
86
87
88      var structureTemplate = new StructureTemplate();
89      structureTemplate.Changed += OnTemplateChanged;
90
91      var evaluators = new ItemSet<SymbolicRegressionSingleObjectiveEvaluator>(
92        ApplicationManager.Manager.GetInstances<SymbolicRegressionSingleObjectiveEvaluator>()
93        .Where(x => x.Maximization == Maximization));
94
95      Parameters.Add(new ConstrainedValueParameter<SymbolicRegressionSingleObjectiveEvaluator>(
96        TreeEvaluatorParameterName,
97        evaluators,
98        evaluators.First()));
99
100      Parameters.Add(new ValueParameter<IRegressionProblemData>(
101        ProblemDataParameterName,
102        shapeConstraintProblemData));
103      ProblemDataParameter.ValueChanged += ProblemDataParameterValueChanged;
104
105      Parameters.Add(new FixedValueParameter<StructureTemplate>(
106        StructureTemplateParameterName,
107        StructureTemplateDescriptionText,
108        structureTemplate));
109     
110      Parameters.Add(new ValueParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(
111        InterpreterParameterName,
112        new SymbolicDataAnalysisExpressionTreeInterpreter()) { Hidden = true });
113     
114      Parameters.Add(new FixedValueParameter<DoubleLimit>(
115        EstimationLimitsParameterName,
116        new DoubleLimit(targetInterval.LowerBound - estimationWidth, targetInterval.UpperBound + estimationWidth)));
117      EstimationLimitsParameter.Hidden = true;
118
119      Parameters.Add(new ResultParameter<ISymbolicRegressionSolution>(BestTrainingSolutionParameterName, ""));
120      this.BestTrainingSolutionParameter.Hidden = true;
121
122      this.EvaluatorParameter.Hidden = true;
123
124     
125
126      Operators.Add(new SymbolicDataAnalysisVariableFrequencyAnalyzer());
127      Operators.Add(new MinAverageMaxSymbolicExpressionTreeLengthAnalyzer());
128      Operators.Add(new SymbolicExpressionSymbolFrequencyAnalyzer());
129
130      StructureTemplate.Template =
131        "(" +
132          "(210000 / (210000 + h)) * ((sigma_y * t * t) / (wR * Rt * t)) + " +
133          "PlasticHardening(_) - Elasticity(_)" +
134        ")" +
135        " * C(_)";
136    }
137
138    public StructuredSymbolicRegressionSingleObjectiveProblem(StructuredSymbolicRegressionSingleObjectiveProblem original,
139      Cloner cloner) : base(original, cloner) { }
140
141    [StorableConstructor]
142    protected StructuredSymbolicRegressionSingleObjectiveProblem(StorableConstructorFlag _) : base(_) { }
143    #endregion
144
145    #region Cloning
146    public override IDeepCloneable Clone(Cloner cloner) =>
147      new StructuredSymbolicRegressionSingleObjectiveProblem(this, cloner);
148    #endregion
149
150    private void ProblemDataParameterValueChanged(object sender, EventArgs e) {
151      StructureTemplate.Reset();
152      // InfoBox for Reset?
153    }
154
155    private void OnTemplateChanged(object sender, EventArgs args) {
156      SetupStructureTemplate();
157    }
158
159    private void SetupStructureTemplate() {
160      foreach (var e in Encoding.Encodings.ToArray())
161        Encoding.Remove(e);
162
163      foreach (var f in StructureTemplate.SubFunctions.Values) {
164        SetupVariables(f);
165        if (!Encoding.Encodings.Any(x => x.Name == f.Name)) // to prevent the same encoding twice
166          Encoding.Add(new SymbolicExpressionTreeEncoding(
167            f.Name,
168            f.Grammar,
169            f.MaximumSymbolicExpressionTreeLength,
170            f.MaximumSymbolicExpressionTreeDepth));
171      }
172    }
173
174    public override void Analyze(Individual[] individuals, double[] qualities, ResultCollection results, IRandom random) {
175      base.Analyze(individuals, qualities, results, random);
176
177      var orderedIndividuals = individuals.Zip(qualities, (i, q) => new { Individual = i, Quality = q }).OrderBy(z => z.Quality);
178      var best = Maximization ? orderedIndividuals.Last().Individual : orderedIndividuals.First().Individual;
179
180      if (!results.ContainsKey(BestTrainingSolutionParameter.ActualName)) {
181        results.Add(new Result(BestTrainingSolutionParameter.ActualName, typeof(SymbolicRegressionSolution)));
182      }
183
184      var tree = (ISymbolicExpressionTree)best[SymbolicExpressionTreeName];
185
186      var model = new SymbolicRegressionModel(ProblemData.TargetVariable, tree, Interpreter);
187      var solution = model.CreateRegressionSolution(ProblemData);
188
189      results[BestTrainingSolutionParameter.ActualName].Value = solution;
190    }
191
192
193    public override double Evaluate(Individual individual, IRandom random) {
194      var tree = BuildTree(individual);
195
196      if (StructureTemplate.ApplyLinearScaling)
197        AdjustLinearScalingParams(ProblemData, tree, Interpreter);
198
199      individual[SymbolicExpressionTreeName] = tree;
200
201      //TreeEvaluatorParameter.Value.EstimationLimitsParameter.ActualValue = EstimationLimits;
202      //TreeEvaluatorParameter.Value.EstimationLimitsParameter.Value = EstimationLimits;
203      //var quality = TreeEvaluatorParameter.Value.Evaluate(new ExecutionContext(null, this, new Scope("Test")), tree, ProblemData, ProblemData.TrainingIndices);
204
205      var quality = double.MaxValue;
206      var evaluatorGUID = TreeEvaluatorParameter.Value.GetType().GUID;
207
208      // TODO: use Evaluate method instead of static Calculate -> a fake ExecutionContext is needed
209      if (evaluatorGUID == typeof(NMSESingleObjectiveConstraintsEvaluator).GUID) {
210        quality = NMSESingleObjectiveConstraintsEvaluator.Calculate(
211        Interpreter, tree,
212        EstimationLimits.Lower, EstimationLimits.Upper,
213        ProblemData, ProblemData.TrainingIndices, new IntervalArithBoundsEstimator());
214      } else if (evaluatorGUID == typeof(SymbolicRegressionLogResidualEvaluator).GUID) {
215        quality = SymbolicRegressionLogResidualEvaluator.Calculate(
216        Interpreter, tree,
217        EstimationLimits.Lower, EstimationLimits.Upper,
218        ProblemData, ProblemData.TrainingIndices);
219      } else if (evaluatorGUID == typeof(SymbolicRegressionMeanRelativeErrorEvaluator).GUID) {
220        quality = SymbolicRegressionMeanRelativeErrorEvaluator.Calculate(
221        Interpreter, tree,
222        EstimationLimits.Lower, EstimationLimits.Upper,
223        ProblemData, ProblemData.TrainingIndices);
224      } else if (evaluatorGUID == typeof(SymbolicRegressionSingleObjectiveMaxAbsoluteErrorEvaluator).GUID) {
225        quality = SymbolicRegressionSingleObjectiveMaxAbsoluteErrorEvaluator.Calculate(
226        Interpreter, tree,
227        EstimationLimits.Lower, EstimationLimits.Upper,
228        ProblemData, ProblemData.TrainingIndices, false);
229      } else if (evaluatorGUID == typeof(SymbolicRegressionSingleObjectiveMeanAbsoluteErrorEvaluator).GUID) {
230        quality = SymbolicRegressionSingleObjectiveMeanAbsoluteErrorEvaluator.Calculate(
231        Interpreter, tree,
232        EstimationLimits.Lower, EstimationLimits.Upper,
233        ProblemData, ProblemData.TrainingIndices, false);
234      } else { // SymbolicRegressionSingleObjectiveMeanSquaredErrorEvaluator
235        quality = SymbolicRegressionSingleObjectiveMeanSquaredErrorEvaluator.Calculate(
236        Interpreter, tree,
237        EstimationLimits.Lower, EstimationLimits.Upper,
238        ProblemData, ProblemData.TrainingIndices, false);
239      }
240   
241      return quality;
242    }
243
244    private static void AdjustLinearScalingParams(IRegressionProblemData problemData, ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter) {
245      var offsetNode = tree.Root.GetSubtree(0).GetSubtree(0);
246      var scalingNode = offsetNode.Subtrees.Where(x => !(x is ConstantTreeNode)).First();
247
248      var offsetConstantNode = (ConstantTreeNode)offsetNode.Subtrees.Where(x => x is ConstantTreeNode).First();
249      var scalingConstantNode = (ConstantTreeNode)scalingNode.Subtrees.Where(x => x is ConstantTreeNode).First();
250
251      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndices);
252      var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices);
253
254      OnlineLinearScalingParameterCalculator.Calculate(estimatedValues, targetValues, out double a, out double b, out OnlineCalculatorError error);
255      if (error == OnlineCalculatorError.None) {
256        offsetConstantNode.Value = a;
257        scalingConstantNode.Value = b;
258      }
259    }
260
261    private ISymbolicExpressionTree BuildTree(Individual individual) {
262      if (StructureTemplate.Tree == null)
263        throw new ArgumentException("No structure template defined!");
264
265      var templateTree = (ISymbolicExpressionTree)StructureTemplate.Tree.Clone();
266
267      // build main tree
268      foreach (var subFunctionTreeNode in templateTree.IterateNodesPrefix().OfType<SubFunctionTreeNode>()) {
269        var subFunctionTree = individual.SymbolicExpressionTree(subFunctionTreeNode.Name);
270
271        // add new tree
272        var subTree = subFunctionTree.Root.GetSubtree(0)  // Start
273                                          .GetSubtree(0); // Offset
274        subTree = (ISymbolicExpressionTreeNode)subTree.Clone();
275        subFunctionTreeNode.AddSubtree(subTree);
276
277      }
278      return templateTree;
279    }
280
281    private void SetupVariables(SubFunction subFunction) {
282      var varSym = (Variable)subFunction.Grammar.GetSymbol("Variable");
283      if (varSym == null) {
284        varSym = new Variable();
285        subFunction.Grammar.AddSymbol(varSym);
286      }
287
288      var allVariables = ProblemData.InputVariables.Select(x => x.Value);
289      var allInputs = allVariables.Where(x => x != ProblemData.TargetVariable);
290
291      // set all variables
292      varSym.AllVariableNames = allVariables;
293
294      // set all allowed variables
295      if (subFunction.Arguments.Contains("_")) {
296        varSym.VariableNames = allInputs;
297      } else {
298        var vars = new List<string>();
299        var exceptions = new List<Exception>();
300        foreach (var arg in subFunction.Arguments) {
301          if (allInputs.Contains(arg))
302            vars.Add(arg);
303          else
304            exceptions.Add(new ArgumentException($"The argument '{arg}' for sub-function '{subFunction.Name}' is not a valid variable."));
305        }
306        if (exceptions.Any())
307          throw new AggregateException(exceptions);
308        varSym.VariableNames = vars;
309      }
310
311      varSym.Enabled = true;
312    }
313
314    public void Load(IRegressionProblemData data) => ProblemData = data;
315  }
316}
Note: See TracBrowser for help on using the repository browser.