Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionProblem.cs @ 3651

Last change on this file since 3651 was 3651, checked in by gkronber, 14 years ago

Implemented analyzers for symbolic expression tree encoding, artificial ant problem and symbolic regression problem. #999 (Refactor algorithm analysis and tracing)

File size: 21.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Drawing;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.PluginInfrastructure;
33using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
34using HeuristicLab.Problems.DataAnalysis.Regression;
35using HeuristicLab.Problems.DataAnalysis.Symbolic;
36using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.ArchitectureManipulators;
37using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Manipulators;
38using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Crossovers;
39using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Creators;
40using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Interfaces;
41using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers;
42using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Analyzers;
43
44namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
45  [Item("Symbolic Regression Problem", "Represents a symbolic regression problem.")]
46  [Creatable("Problems")]
47  [StorableClass]
48  public class SymbolicRegressionProblem : DataAnalysisProblem, ISingleObjectiveProblem {
49
50    #region Parameter Properties
51    public ValueParameter<BoolValue> MaximizationParameter {
52      get { return (ValueParameter<BoolValue>)Parameters["Maximization"]; }
53    }
54    IParameter ISingleObjectiveProblem.MaximizationParameter {
55      get { return MaximizationParameter; }
56    }
57    public ValueParameter<SymbolicExpressionTreeCreator> SolutionCreatorParameter {
58      get { return (ValueParameter<SymbolicExpressionTreeCreator>)Parameters["SolutionCreator"]; }
59    }
60    IParameter IProblem.SolutionCreatorParameter {
61      get { return SolutionCreatorParameter; }
62    }
63    public ValueParameter<DoubleValue> LowerEstimationLimitParameter {
64      get { return (ValueParameter<DoubleValue>)Parameters["LowerEstimationLimit"]; }
65    }
66    public ValueParameter<DoubleValue> UpperEstimationLimitParameter {
67      get { return (ValueParameter<DoubleValue>)Parameters["UpperEstimationLimit"]; }
68    }
69    public ValueParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
70      get { return (ValueParameter<ISymbolicExpressionTreeInterpreter>)Parameters["SymbolicExpressionTreeInterpreter"]; }
71    }
72    public ValueParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
73      get { return (ValueParameter<ISymbolicRegressionEvaluator>)Parameters["Evaluator"]; }
74    }
75    IParameter IProblem.EvaluatorParameter {
76      get { return EvaluatorParameter; }
77    }
78    public ValueParameter<ISymbolicExpressionGrammar> FunctionTreeGrammarParameter {
79      get { return (ValueParameter<ISymbolicExpressionGrammar>)Parameters["FunctionTreeGrammar"]; }
80    }
81    public ValueParameter<IntValue> MaxExpressionLengthParameter {
82      get { return (ValueParameter<IntValue>)Parameters["MaxExpressionLength"]; }
83    }
84    public ValueParameter<IntValue> MaxExpressionDepthParameter {
85      get { return (ValueParameter<IntValue>)Parameters["MaxExpressionDepth"]; }
86    }
87    public ValueParameter<IntValue> MaxFunctionDefiningBranchesParameter {
88      get { return (ValueParameter<IntValue>)Parameters["MaxFunctionDefiningBranches"]; }
89    }
90    public ValueParameter<IntValue> MaxFunctionArgumentsParameter {
91      get { return (ValueParameter<IntValue>)Parameters["MaxFunctionArguments"]; }
92    }
93    public OptionalValueParameter<DoubleValue> BestKnownQualityParameter {
94      get { return (OptionalValueParameter<DoubleValue>)Parameters["BestKnownQuality"]; }
95    }
96    IParameter ISingleObjectiveProblem.BestKnownQualityParameter {
97      get { return BestKnownQualityParameter; }
98    }
99    #endregion
100
101    #region Properties
102    public IntValue MaxExpressionLength {
103      get { return MaxExpressionLengthParameter.Value; }
104      set { MaxExpressionLengthParameter.Value = value; }
105    }
106    public IntValue MaxExpressionDepth {
107      get { return MaxExpressionDepthParameter.Value; }
108      set { MaxExpressionDepthParameter.Value = value; }
109    }
110    public IntValue MaxFunctionDefiningBranches {
111      get { return MaxFunctionDefiningBranchesParameter.Value; }
112      set { MaxFunctionDefiningBranchesParameter.Value = value; }
113    }
114    public IntValue MaxFunctionArguments {
115      get { return MaxFunctionArgumentsParameter.Value; }
116      set { MaxFunctionArgumentsParameter.Value = value; }
117    }
118    public SymbolicExpressionTreeCreator SolutionCreator {
119      get { return SolutionCreatorParameter.Value; }
120      set { SolutionCreatorParameter.Value = value; }
121    }
122    ISolutionCreator IProblem.SolutionCreator {
123      get { return SolutionCreatorParameter.Value; }
124    }
125    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
126      get { return SymbolicExpressionTreeInterpreterParameter.Value; }
127      set { SymbolicExpressionTreeInterpreterParameter.Value = value; }
128    }
129    public DoubleValue LowerEstimationLimit {
130      get { return LowerEstimationLimitParameter.Value; }
131      set { LowerEstimationLimitParameter.Value = value; }
132    }
133    public DoubleValue UpperEstimationLimit {
134      get { return UpperEstimationLimitParameter.Value; }
135      set { UpperEstimationLimitParameter.Value = value; }
136    }
137
138    public ISymbolicRegressionEvaluator Evaluator {
139      get { return EvaluatorParameter.Value; }
140      set { EvaluatorParameter.Value = value; }
141    }
142    ISingleObjectiveEvaluator ISingleObjectiveProblem.Evaluator {
143      get { return EvaluatorParameter.Value; }
144    }
145    IEvaluator IProblem.Evaluator {
146      get { return EvaluatorParameter.Value; }
147    }
148    public ISymbolicExpressionGrammar FunctionTreeGrammar {
149      get { return (ISymbolicExpressionGrammar)FunctionTreeGrammarParameter.Value; }
150    }
151    public DoubleValue BestKnownQuality {
152      get { return BestKnownQualityParameter.Value; }
153    }
154    private List<IOperator> operators;
155    public IEnumerable<IOperator> Operators {
156      get { return operators; }
157    }
158    public IEnumerable<ISymbolicRegressionSolutionPopulationAnalyzer> Analyzers {
159      get { return operators.OfType<ISymbolicRegressionSolutionPopulationAnalyzer>(); }
160    }
161    public DoubleValue PunishmentFactor {
162      get { return new DoubleValue(10.0); }
163    }
164    public IntValue TrainingSamplesStart {
165      get { return new IntValue(DataAnalysisProblemData.TrainingSamplesStart.Value); }
166    }
167    public IntValue TrainingSamplesEnd {
168      get {
169        return new IntValue((DataAnalysisProblemData.TrainingSamplesStart.Value +
170          DataAnalysisProblemData.TrainingSamplesEnd.Value) / 2);
171      }
172    }
173    public IntValue ValidationSamplesStart {
174      get { return TrainingSamplesEnd; }
175    }
176    public IntValue ValidationSamplesEnd {
177      get { return new IntValue(DataAnalysisProblemData.TrainingSamplesEnd.Value); }
178    }
179    #endregion
180
181    public SymbolicRegressionProblem()
182      : base() {
183      SymbolicExpressionTreeCreator creator = new ProbabilisticTreeCreator();
184      var evaluator = new SymbolicRegressionScaledMeanSquaredErrorEvaluator();
185      var grammar = new ArithmeticExpressionGrammar();
186      var globalGrammar = new GlobalSymbolicExpressionGrammar(grammar);
187      var interpreter = new SimpleArithmeticExpressionInterpreter();
188      Parameters.Add(new ValueParameter<BoolValue>("Maximization", "Set to false as the error of the regression model should be minimized.", (BoolValue)new BoolValue(false).AsReadOnly()));
189      Parameters.Add(new ValueParameter<SymbolicExpressionTreeCreator>("SolutionCreator", "The operator which should be used to create new symbolic regression solutions.", creator));
190      Parameters.Add(new ValueParameter<ISymbolicExpressionTreeInterpreter>("SymbolicExpressionTreeInterpreter", "The interpreter that should be used to evaluate the symbolic expression tree.", interpreter));
191      Parameters.Add(new ValueParameter<ISymbolicRegressionEvaluator>("Evaluator", "The operator which should be used to evaluate symbolic regression solutions.", evaluator));
192      Parameters.Add(new ValueParameter<DoubleValue>("LowerEstimationLimit", "The lower limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.NegativeInfinity)));
193      Parameters.Add(new ValueParameter<DoubleValue>("UpperEstimationLimit", "The upper limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.PositiveInfinity)));
194      Parameters.Add(new OptionalValueParameter<DoubleValue>("BestKnownQuality", "The minimal error value that reached by symbolic regression solutions for the problem."));
195      Parameters.Add(new ValueParameter<ISymbolicExpressionGrammar>("FunctionTreeGrammar", "The grammar that should be used for symbolic regression models.", globalGrammar));
196      Parameters.Add(new ValueParameter<IntValue>("MaxExpressionLength", "Maximal length of the symbolic expression.", new IntValue(100)));
197      Parameters.Add(new ValueParameter<IntValue>("MaxExpressionDepth", "Maximal depth of the symbolic expression.", new IntValue(10)));
198      Parameters.Add(new ValueParameter<IntValue>("MaxFunctionDefiningBranches", "Maximal number of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly()));
199      Parameters.Add(new ValueParameter<IntValue>("MaxFunctionArguments", "Maximal number of arguments of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly()));
200
201      creator.SymbolicExpressionTreeParameter.ActualName = "SymbolicRegressionModel";
202      evaluator.QualityParameter.ActualName = "TrainingMeanSquaredError";
203
204      ParameterizeSolutionCreator();
205      ParameterizeEvaluator();
206
207      UpdateGrammar();
208      UpdateEstimationLimits();
209      Initialize();
210    }
211
212    [StorableConstructor]
213    private SymbolicRegressionProblem(bool deserializing) : base() { }
214
215    [StorableHook(HookType.AfterDeserialization)]
216    private void AfterDeserializationHook() {
217      Initialize();
218    }
219
220    public override IDeepCloneable Clone(Cloner cloner) {
221      SymbolicRegressionProblem clone = (SymbolicRegressionProblem)base.Clone(cloner);
222      clone.Initialize();
223      return clone;
224    }
225
226    private void RegisterParameterValueEvents() {
227      MaxFunctionArgumentsParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
228      MaxFunctionDefiningBranchesParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
229      SolutionCreatorParameter.ValueChanged += new EventHandler(SolutionCreatorParameter_ValueChanged);
230      EvaluatorParameter.ValueChanged += new EventHandler(EvaluatorParameter_ValueChanged);
231    }
232
233    private void RegisterParameterEvents() {
234      MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
235      MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
236      SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
237      Evaluator.QualityParameter.ActualNameChanged += new EventHandler(Evaluator_QualityParameter_ActualNameChanged);
238    }
239
240    #region event handling
241    protected override void OnDataAnalysisProblemChanged(EventArgs e) {
242      base.OnDataAnalysisProblemChanged(e);
243      // paritions could be changed
244      ParameterizeEvaluator();
245      ParameterizeAnalyzers();
246      // input variables could have been changed
247      UpdateGrammar();
248      // estimation limits have to be recalculated
249      UpdateEstimationLimits();
250    }
251    protected virtual void OnArchitectureParameterChanged(EventArgs e) {
252      UpdateGrammar();
253    }
254    protected virtual void OnGrammarChanged(EventArgs e) { }
255    protected virtual void OnOperatorsChanged(EventArgs e) { RaiseOperatorsChanged(e); }
256    protected virtual void OnSolutionCreatorChanged(EventArgs e) {
257      SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
258      ParameterizeSolutionCreator();
259      OnSolutionParameterNameChanged(e);
260      RaiseSolutionCreatorChanged(e);
261    }
262
263    protected virtual void OnSolutionParameterNameChanged(EventArgs e) {
264      ParameterizeEvaluator();
265      ParameterizeAnalyzers();
266      ParameterizeOperators();
267    }
268
269    protected virtual void OnEvaluatorChanged(EventArgs e) {
270      Evaluator.QualityParameter.ActualNameChanged += new EventHandler(Evaluator_QualityParameter_ActualNameChanged);
271      ParameterizeEvaluator();
272      ParameterizeAnalyzers();
273      RaiseEvaluatorChanged(e);
274    }
275    protected virtual void OnQualityParameterNameChanged(EventArgs e) {
276      ParameterizeAnalyzers();
277    }
278    #endregion
279
280    #region event handlers
281    private void SolutionCreatorParameter_ValueChanged(object sender, EventArgs e) {
282      OnSolutionCreatorChanged(e);
283    }
284    private void SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged(object sender, EventArgs e) {
285      OnSolutionParameterNameChanged(e);
286    }
287    private void EvaluatorParameter_ValueChanged(object sender, EventArgs e) {
288      OnEvaluatorChanged(e);
289    }
290    private void ArchitectureParameter_ValueChanged(object sender, EventArgs e) {
291      OnArchitectureParameterChanged(e);
292    }
293    private void Evaluator_QualityParameter_ActualNameChanged(object sender, EventArgs e) {
294      OnQualityParameterNameChanged(e);
295    }
296    #endregion
297
298    #region events
299    public event EventHandler SolutionCreatorChanged;
300    private void RaiseSolutionCreatorChanged(EventArgs e) {
301      var changed = SolutionCreatorChanged;
302      if (changed != null)
303        changed(this, e);
304    }
305    public event EventHandler EvaluatorChanged;
306    private void RaiseEvaluatorChanged(EventArgs e) {
307      var changed = EvaluatorChanged;
308      if (changed != null)
309        changed(this, e);
310    }
311
312    public event EventHandler OperatorsChanged;
313    private void RaiseOperatorsChanged(EventArgs e) {
314      var changed = OperatorsChanged;
315      if (changed != null)
316        changed(this, e);
317    }
318    #endregion
319
320    #region Helpers
321    private void Initialize() {
322      InitializeOperators();
323      RegisterParameterEvents();
324      RegisterParameterValueEvents();
325    }
326
327    private void UpdateGrammar() {
328      foreach (var varSymbol in FunctionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols.Variable>()) {
329        varSymbol.VariableNames = DataAnalysisProblemData.InputVariables.CheckedItems.Select(x => x.Value.Value);
330      }
331      var globalGrammar = FunctionTreeGrammar as GlobalSymbolicExpressionGrammar;
332      if (globalGrammar != null) {
333        globalGrammar.MaxFunctionArguments = MaxFunctionArguments.Value;
334        globalGrammar.MaxFunctionDefinitions = MaxFunctionDefiningBranches.Value;
335      }
336    }
337
338    private void UpdateEstimationLimits() {
339      if (TrainingSamplesStart.Value < TrainingSamplesEnd.Value &&
340        DataAnalysisProblemData.Dataset.VariableNames.Contains(DataAnalysisProblemData.TargetVariable.Value)) {
341        var targetValues = DataAnalysisProblemData.Dataset.GetVariableValues(DataAnalysisProblemData.TargetVariable.Value, TrainingSamplesStart.Value, TrainingSamplesEnd.Value);
342        var mean = targetValues.Average();
343        var range = targetValues.Max() - targetValues.Min();
344        UpperEstimationLimit = new DoubleValue(mean + PunishmentFactor.Value * range);
345        LowerEstimationLimit = new DoubleValue(mean - PunishmentFactor.Value * range);
346      }
347    }
348
349    private void InitializeOperators() {
350      operators = new List<IOperator>();
351      operators.AddRange(ApplicationManager.Manager.GetInstances<ISymbolicExpressionTreeOperator>().OfType<IOperator>());
352      operators.Add(new PopulationValidationBestScaledSymbolicRegressionSolutionAnalyzer());
353      operators.Add(new PopulationMinAvgMaxTreeSizeAnalyzer());
354      operators.Add(new PopulationSymbolicRegressionVariableFrequencyAnalyzer());
355      ParameterizeOperators();
356      ParameterizeAnalyzers();
357    }
358
359    private void ParameterizeSolutionCreator() {
360      SolutionCreator.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
361      SolutionCreator.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
362      SolutionCreator.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
363      SolutionCreator.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
364      SolutionCreator.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
365    }
366
367    private void ParameterizeEvaluator() {
368      Evaluator.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
369      Evaluator.RegressionProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
370      Evaluator.SamplesStartParameter.Value = TrainingSamplesStart;
371      Evaluator.SamplesEndParameter.Value = TrainingSamplesEnd;
372    }
373
374    private void ParameterizeAnalyzers() {
375      foreach (var analyzer in Analyzers) {
376        analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
377        var bestValidationSolutionAnalyzer = analyzer as PopulationValidationBestScaledSymbolicRegressionSolutionAnalyzer;
378        if (bestValidationSolutionAnalyzer != null) {
379          bestValidationSolutionAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
380          bestValidationSolutionAnalyzer.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name;
381          bestValidationSolutionAnalyzer.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name;
382          bestValidationSolutionAnalyzer.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
383          bestValidationSolutionAnalyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
384          bestValidationSolutionAnalyzer.SamplesStartParameter.Value = ValidationSamplesStart;
385          bestValidationSolutionAnalyzer.SamplesEndParameter.Value = ValidationSamplesEnd;
386        }
387        var varFreqAnalyzer = analyzer as PopulationSymbolicRegressionVariableFrequencyAnalyzer;
388        if (varFreqAnalyzer != null) {
389          varFreqAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
390        }
391      }
392      foreach (ISymbolicExpressionTreePopulationAnalyzer analyzer in Operators.OfType<ISymbolicExpressionTreePopulationAnalyzer>()) {
393        analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
394      }
395    }
396
397    private void ParameterizeOperators() {
398      foreach (ISymbolicExpressionTreeOperator op in Operators.OfType<ISymbolicExpressionTreeOperator>()) {
399        op.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
400        op.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
401        op.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
402      }
403      foreach (ISymbolicExpressionTreeCrossover op in Operators.OfType<ISymbolicExpressionTreeCrossover>()) {
404        op.ParentsParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
405        op.ChildParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
406      }
407      foreach (ISymbolicExpressionTreeManipulator op in Operators.OfType<ISymbolicExpressionTreeManipulator>()) {
408        op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
409      }
410      foreach (ISymbolicExpressionTreeArchitectureManipulator op in Operators.OfType<ISymbolicExpressionTreeArchitectureManipulator>()) {
411        op.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
412        op.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
413      }
414    }
415    #endregion
416  }
417}
Note: See TracBrowser for help on using the repository browser.