source: trunk/sources/HeuristicLab.Problems.GeneticProgramming/3.3/BasicSymbolicRegression/Problem.cs @ 12937

Last change on this file since 12937 was 12937, checked in by gkronber, 4 years ago

#2472: added an implementation of Koza-style symbolic regression as BasicProblem to Problems.GeneticProgramming

File size: 8.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Problems.DataAnalysis;
31using HeuristicLab.Problems.Instances;
32
33
34namespace HeuristicLab.Problems.GeneticProgramming.BasicSymbolicRegression {
35  [Item("Koza-style Symbolic Regression", "An implementation of symbolic regression without bells-and-whistles. Use \"Symbolic Regression Problem (single-objective)\" if you want to use all features.")]
36  [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 900)]
37  [StorableClass]
38  public sealed class Problem : SymbolicExpressionTreeProblem, IRegressionProblem, IProblemInstanceConsumer<IRegressionProblemData>, IProblemInstanceExporter<IRegressionProblemData> {
39
40    #region parameter names
41
42    private const string ProblemDataParameterName = "ProblemData";
43
44    #endregion
45
46    public event EventHandler ProblemDataChanged;
47
48    #region Parameter Properties
49    IParameter IDataAnalysisProblem.ProblemDataParameter { get { return ProblemDataParameter; } }
50
51    public IValueParameter<IRegressionProblemData> ProblemDataParameter {
52      get { return (IValueParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
53    }
54
55    #endregion
56
57    #region Properties
58
59    public IRegressionProblemData ProblemData {
60      get { return ProblemDataParameter.Value; }
61      set { ProblemDataParameter.Value = value; }
62    }
63    IDataAnalysisProblemData IDataAnalysisProblem.ProblemData { get { return ProblemData; } }
64
65
66    #endregion
67
68    public override bool Maximization {
69      get { return true; }
70    }
71
72    public Problem()
73      : base() {
74      Parameters.Add(new ValueParameter<IRegressionProblemData>(ProblemDataParameterName, "The data for the regression problem", new RegressionProblemData()));
75
76      var g = new SimpleSymbolicExpressionGrammar(); // empty grammar is replaced in UpdateGrammar()
77      base.Encoding = new SymbolicExpressionTreeEncoding(g, 100, 17);
78
79      UpdateGrammar();
80      RegisterEventHandlers();
81    }
82
83
84    public override double Evaluate(ISymbolicExpressionTree tree, IRandom random) {
85      // Doesn't use classes from HeuristicLab.Problems.DataAnalysis.Symbolic to make sure that the implementation can be fully understood easily.
86      // HeuristicLab.Problems.DataAnalysis.Symbolic would already provide all the necessary functionality (esp. interpreter) but at a much higher complexity.
87      // Another argument is that we don't need a reference to HeuristicLab.Problems.DataAnalysis.Symbolic
88
89      var problemData = ProblemData;
90      var rows = ProblemData.TrainingIndices.ToArray();
91      var target = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
92      var predicted = Interpret(tree, problemData.Dataset, rows);
93
94      OnlineCalculatorError errorState;
95      var r = OnlinePearsonsRCalculator.Calculate(target, predicted, out errorState);
96      if (errorState != OnlineCalculatorError.None) r = 0;
97      return r * r;
98    }
99
100    private IEnumerable<double> Interpret(ISymbolicExpressionTree tree, IDataset dataset, IEnumerable<int> rows) {
101      // skip programRoot and startSymbol
102      return InterpretRec(tree.Root.GetSubtree(0).GetSubtree(0), dataset, rows);
103    }
104
105
106    private IEnumerable<double> InterpretRec(ISymbolicExpressionTreeNode node, IDataset dataset, IEnumerable<int> rows) {
107      var eval = CreateEvalClosure(dataset, rows);
108
109      switch (node.Symbol.Name) {
110        case "+": return eval(node.GetSubtree(0), node.GetSubtree(1), (x, y) => x + y);
111        case "*": return eval(node.GetSubtree(0), node.GetSubtree(1), (x, y) => x * y);
112        case "-": return eval(node.GetSubtree(0), node.GetSubtree(1), (x, y) => x - y);
113        case "%": return eval(node.GetSubtree(0), node.GetSubtree(1), (x, y) => y.IsAlmost(0.0) ? 0.0 : x / y); // protected division
114        default: {
115            double erc;
116            if (double.TryParse(node.Symbol.Name, out erc)) {
117              return rows.Select(_ => erc);
118            } else {
119              // assume that this is a variable name
120              return dataset.GetDoubleValues(node.Symbol.Name, rows);
121            }
122          }
123      }
124    }
125
126    private Func<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode, Func<double, double, double>, IEnumerable<double>> CreateEvalClosure(IDataset dataset, IEnumerable<int> rows) {
127      // capture dataset and rows in scope
128      return (a, b, f) => {
129        var leftResult = InterpretRec(a, dataset, rows);
130        var rightResult = InterpretRec(b, dataset, rows);
131        return leftResult.Zip(rightResult, f);
132      };
133    }
134
135    #region item cloning and persistence
136    // persistence
137    [StorableConstructor]
138    private Problem(bool deserializing) : base(deserializing) { }
139
140    [StorableHook(HookType.AfterDeserialization)]
141    private void AfterDeserialization() {
142      RegisterEventHandlers();
143    }
144
145    // cloning
146    private Problem(Problem original, Cloner cloner)
147      : base(original, cloner) {
148      RegisterEventHandlers();
149    }
150    public override IDeepCloneable Clone(Cloner cloner) {
151      return new Problem(this, cloner);
152    }
153    #endregion
154
155    #region events
156
157    private void RegisterEventHandlers() {
158      ProblemDataParameter.ValueChanged += new EventHandler(ProblemDataParameter_ValueChanged);
159      if (ProblemDataParameter.Value != null) ProblemDataParameter.Value.Changed += new EventHandler(ProblemData_Changed);
160    }
161
162    private void ProblemDataParameter_ValueChanged(object sender, EventArgs e) {
163      ProblemDataParameter.Value.Changed += new EventHandler(ProblemData_Changed);
164      OnProblemDataChanged();
165      OnReset();
166    }
167
168    private void ProblemData_Changed(object sender, EventArgs e) {
169      OnReset();
170    }
171
172    private void OnProblemDataChanged() {
173      UpdateGrammar();
174
175      var handler = ProblemDataChanged;
176      if (handler != null) handler(this, EventArgs.Empty);
177    }
178
179    private void UpdateGrammar() {
180      // whenever ProblemData is changed we create a new grammar with the necessary symbols
181      var g = new SimpleSymbolicExpressionGrammar();
182      g.AddSymbols(new[] { "+", "*", "%", "-" }, 2, 2); // % is protected division 1/0 := 0
183
184      foreach (var variableName in ProblemData.AllowedInputVariables)
185        g.AddTerminalSymbol(variableName);
186
187      // generate ephemeral random consts in the range [-10..+10[ (2*number of variables)
188      var rand = new System.Random();
189      for (int i = 0; i < ProblemData.AllowedInputVariables.Count() * 2; i++) {
190        string newErcSy;
191        do {
192          newErcSy = string.Format("{0:F2}", rand.NextDouble() * 20 - 10);
193        } while (g.Symbols.Any(sy => sy.Name == newErcSy)); // it might happen that we generate the same constant twice
194        g.AddTerminalSymbol(newErcSy);
195      }
196
197      Encoding.Grammar = g;
198    }
199
200    #endregion
201
202    #region Import & Export
203    public void Load(IRegressionProblemData data) {
204      Name = data.Name;
205      Description = data.Description;
206      ProblemData = data;
207    }
208
209    public IRegressionProblemData Export() {
210      return ProblemData;
211    }
212    #endregion
213  }
214}
Note: See TracBrowser for help on using the repository browser.