Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionProblem.cs @ 3599

Last change on this file since 3599 was 3599, checked in by gkronber, 14 years ago

Changed DataAnalysisProblemData to use a CheckedItemList for the input variables instead of a CheckedItemCollection to preserve the ordering of input variables. #938 (Data types and operators for regression problems)

File size: 21.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Drawing;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.PluginInfrastructure;
33using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
34using HeuristicLab.Problems.DataAnalysis.Regression;
35using HeuristicLab.Problems.DataAnalysis.Symbolic;
36using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.ArchitectureManipulators;
37using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Manipulators;
38using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Crossovers;
39using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Creators;
40using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Interfaces;
41
42namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
43  [Item("Symbolic Regression Problem", "Represents a symbolic regression problem.")]
44  [Creatable("Problems")]
45  [StorableClass]
46  public class SymbolicRegressionProblem : DataAnalysisProblem, ISingleObjectiveProblem {
47
48    #region Parameter Properties
49    public ValueParameter<BoolValue> MaximizationParameter {
50      get { return (ValueParameter<BoolValue>)Parameters["Maximization"]; }
51    }
52    IParameter ISingleObjectiveProblem.MaximizationParameter {
53      get { return MaximizationParameter; }
54    }
55    public ValueParameter<SymbolicExpressionTreeCreator> SolutionCreatorParameter {
56      get { return (ValueParameter<SymbolicExpressionTreeCreator>)Parameters["SolutionCreator"]; }
57    }
58    IParameter IProblem.SolutionCreatorParameter {
59      get { return SolutionCreatorParameter; }
60    }
61    public ValueParameter<DoubleValue> LowerEstimationLimitParameter {
62      get { return (ValueParameter<DoubleValue>)Parameters["LowerEstimationLimit"]; }
63    }
64    public ValueParameter<DoubleValue> UpperEstimationLimitParameter {
65      get { return (ValueParameter<DoubleValue>)Parameters["UpperEstimationLimit"]; }
66    }
67    public ValueParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
68      get { return (ValueParameter<ISymbolicExpressionTreeInterpreter>)Parameters["SymbolicExpressionTreeInterpreter"]; }
69    }
70    public ValueParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
71      get { return (ValueParameter<ISymbolicRegressionEvaluator>)Parameters["Evaluator"]; }
72    }
73    IParameter IProblem.EvaluatorParameter {
74      get { return EvaluatorParameter; }
75    }
76    public ValueParameter<ISymbolicExpressionGrammar> FunctionTreeGrammarParameter {
77      get { return (ValueParameter<ISymbolicExpressionGrammar>)Parameters["FunctionTreeGrammar"]; }
78    }
79    public ValueParameter<IntValue> MaxExpressionLengthParameter {
80      get { return (ValueParameter<IntValue>)Parameters["MaxExpressionLength"]; }
81    }
82    public ValueParameter<IntValue> MaxExpressionDepthParameter {
83      get { return (ValueParameter<IntValue>)Parameters["MaxExpressionDepth"]; }
84    }
85    public ValueParameter<IntValue> MaxFunctionDefiningBranchesParameter {
86      get { return (ValueParameter<IntValue>)Parameters["MaxFunctionDefiningBranches"]; }
87    }
88    public ValueParameter<IntValue> MaxFunctionArgumentsParameter {
89      get { return (ValueParameter<IntValue>)Parameters["MaxFunctionArguments"]; }
90    }
91    public OptionalValueParameter<ISingleObjectiveSolutionsVisualizer> VisualizerParameter {
92      get { return (OptionalValueParameter<ISingleObjectiveSolutionsVisualizer>)Parameters["Visualizer"]; }
93    }
94    IParameter IProblem.VisualizerParameter {
95      get { return VisualizerParameter; }
96    }
97    public OptionalValueParameter<DoubleValue> BestKnownQualityParameter {
98      get { return (OptionalValueParameter<DoubleValue>)Parameters["BestKnownQuality"]; }
99    }
100    IParameter ISingleObjectiveProblem.BestKnownQualityParameter {
101      get { return BestKnownQualityParameter; }
102    }
103    #endregion
104
105    #region Properties
106    public IntValue MaxExpressionLength {
107      get { return MaxExpressionLengthParameter.Value; }
108      set { MaxExpressionLengthParameter.Value = value; }
109    }
110    public IntValue MaxExpressionDepth {
111      get { return MaxExpressionDepthParameter.Value; }
112      set { MaxExpressionDepthParameter.Value = value; }
113    }
114    public IntValue MaxFunctionDefiningBranches {
115      get { return MaxFunctionDefiningBranchesParameter.Value; }
116      set { MaxFunctionDefiningBranchesParameter.Value = value; }
117    }
118    public IntValue MaxFunctionArguments {
119      get { return MaxFunctionArgumentsParameter.Value; }
120      set { MaxFunctionArgumentsParameter.Value = value; }
121    }
122    public SymbolicExpressionTreeCreator SolutionCreator {
123      get { return SolutionCreatorParameter.Value; }
124      set { SolutionCreatorParameter.Value = value; }
125    }
126    ISolutionCreator IProblem.SolutionCreator {
127      get { return SolutionCreatorParameter.Value; }
128    }
129    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
130      get { return SymbolicExpressionTreeInterpreterParameter.Value; }
131      set { SymbolicExpressionTreeInterpreterParameter.Value = value; }
132    }
133    public DoubleValue LowerEstimationLimit {
134      get { return LowerEstimationLimitParameter.Value; }
135      set { LowerEstimationLimitParameter.Value = value; }
136    }
137    public DoubleValue UpperEstimationLimit {
138      get { return UpperEstimationLimitParameter.Value; }
139      set { UpperEstimationLimitParameter.Value = value; }
140    }
141
142    public ISymbolicRegressionEvaluator Evaluator {
143      get { return EvaluatorParameter.Value; }
144      set { EvaluatorParameter.Value = value; }
145    }
146    ISingleObjectiveEvaluator ISingleObjectiveProblem.Evaluator {
147      get { return EvaluatorParameter.Value; }
148    }
149    IEvaluator IProblem.Evaluator {
150      get { return EvaluatorParameter.Value; }
151    }
152    public ISymbolicExpressionGrammar FunctionTreeGrammar {
153      get { return (ISymbolicExpressionGrammar)FunctionTreeGrammarParameter.Value; }
154    }
155    public ISingleObjectiveSolutionsVisualizer Visualizer {
156      get { return VisualizerParameter.Value; }
157      set { VisualizerParameter.Value = value; }
158    }
159    ISolutionsVisualizer IProblem.Visualizer {
160      get { return VisualizerParameter.Value; }
161    }
162    public DoubleValue BestKnownQuality {
163      get { return BestKnownQualityParameter.Value; }
164    }
165    private List<ISymbolicExpressionTreeOperator> operators;
166    public IEnumerable<IOperator> Operators {
167      get { return operators.Cast<IOperator>(); }
168    }
169    public DoubleValue PunishmentFactor {
170      get { return new DoubleValue(10.0); }
171    }
172    public IntValue TrainingSamplesStart {
173      get { return new IntValue(DataAnalysisProblemData.TrainingSamplesStart.Value); }
174    }
175    public IntValue TrainingSamplesEnd {
176      get {
177        return new IntValue((DataAnalysisProblemData.TrainingSamplesStart.Value +
178          DataAnalysisProblemData.TrainingSamplesEnd.Value) / 2);
179      }
180    }
181    public IntValue ValidationSamplesStart {
182      get { return TrainingSamplesEnd; }
183    }
184    public IntValue ValidationSamplesEnd {
185      get { return new IntValue(DataAnalysisProblemData.TrainingSamplesEnd.Value); }
186    }
187    #endregion
188
189    public SymbolicRegressionProblem()
190      : base() {
191      SymbolicExpressionTreeCreator creator = new ProbabilisticTreeCreator();
192      var evaluator = new SymbolicRegressionScaledMeanSquaredErrorEvaluator();
193      var grammar = new ArithmeticExpressionGrammar();
194      var globalGrammar = new GlobalSymbolicExpressionGrammar(grammar);
195      var visualizer = new BestValidationSymbolicRegressionSolutionVisualizer();
196      var interpreter = new SimpleArithmeticExpressionInterpreter();
197      Parameters.Add(new ValueParameter<BoolValue>("Maximization", "Set to false as the error of the regression model should be minimized.", (BoolValue)new BoolValue(false).AsReadOnly()));
198      Parameters.Add(new ValueParameter<SymbolicExpressionTreeCreator>("SolutionCreator", "The operator which should be used to create new symbolic regression solutions.", creator));
199      Parameters.Add(new ValueParameter<ISymbolicExpressionTreeInterpreter>("SymbolicExpressionTreeInterpreter", "The interpreter that should be used to evaluate the symbolic expression tree.", interpreter));
200      Parameters.Add(new ValueParameter<ISymbolicRegressionEvaluator>("Evaluator", "The operator which should be used to evaluate symbolic regression solutions.", evaluator));
201      Parameters.Add(new ValueParameter<DoubleValue>("LowerEstimationLimit", "The lower limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.NegativeInfinity)));
202      Parameters.Add(new ValueParameter<DoubleValue>("UpperEstimationLimit", "The upper limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.PositiveInfinity)));
203      Parameters.Add(new OptionalValueParameter<DoubleValue>("BestKnownQuality", "The minimal error value that reached by symbolic regression solutions for the problem."));
204      Parameters.Add(new ValueParameter<ISymbolicExpressionGrammar>("FunctionTreeGrammar", "The grammar that should be used for symbolic regression models.", globalGrammar));
205      Parameters.Add(new ValueParameter<IntValue>("MaxExpressionLength", "Maximal length of the symbolic expression.", new IntValue(100)));
206      Parameters.Add(new ValueParameter<IntValue>("MaxExpressionDepth", "Maximal depth of the symbolic expression.", new IntValue(10)));
207      Parameters.Add(new ValueParameter<IntValue>("MaxFunctionDefiningBranches", "Maximal number of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly()));
208      Parameters.Add(new ValueParameter<IntValue>("MaxFunctionArguments", "Maximal number of arguments of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly()));
209      Parameters.Add(new ValueParameter<ISingleObjectiveSolutionsVisualizer>("Visualizer", "The operator which should be used to visualize symbolic regression solutions.", visualizer));
210
211      creator.SymbolicExpressionTreeParameter.ActualName = "SymbolicRegressionModel";
212      evaluator.QualityParameter.ActualName = "TrainingMeanSquaredError";
213
214      ParameterizeSolutionCreator();
215      ParameterizeEvaluator();
216      ParameterizeVisualizer();
217
218      UpdateGrammar();
219      UpdateEstimationLimits();
220      Initialize();
221    }
222
223    [StorableConstructor]
224    private SymbolicRegressionProblem(bool deserializing) : base() { }
225
226    [StorableHook(HookType.AfterDeserialization)]
227    private void AfterDeserializationHook() {
228      Initialize();
229    }
230
231    public override IDeepCloneable Clone(Cloner cloner) {
232      SymbolicRegressionProblem clone = (SymbolicRegressionProblem)base.Clone(cloner);
233      clone.Initialize();
234      return clone;
235    }
236
237    private void RegisterParameterValueEvents() {
238      MaxFunctionArgumentsParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
239      MaxFunctionDefiningBranchesParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
240      SolutionCreatorParameter.ValueChanged += new EventHandler(SolutionCreatorParameter_ValueChanged);
241      EvaluatorParameter.ValueChanged += new EventHandler(EvaluatorParameter_ValueChanged);
242      VisualizerParameter.ValueChanged += new EventHandler(VisualizerParameter_ValueChanged);
243    }
244
245    private void RegisterParameterEvents() {
246      MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
247      MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
248      SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
249      Evaluator.QualityParameter.ActualNameChanged += new EventHandler(Evaluator_QualityParameter_ActualNameChanged);
250    }
251
252    #region event handling
253    protected override void OnDataAnalysisProblemChanged(EventArgs e) {
254      base.OnDataAnalysisProblemChanged(e);
255      // paritions could be changed
256      ParameterizeEvaluator();
257      ParameterizeVisualizer();
258      // input variables could have been changed
259      UpdateGrammar();
260      // estimation limits have to be recalculated
261      UpdateEstimationLimits();
262    }
263    protected virtual void OnArchitectureParameterChanged(EventArgs e) {
264      UpdateGrammar();
265    }
266    protected virtual void OnGrammarChanged(EventArgs e) { }
267    protected virtual void OnOperatorsChanged(EventArgs e) { RaiseOperatorsChanged(e); }
268    protected virtual void OnSolutionCreatorChanged(EventArgs e) {
269      SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
270      ParameterizeSolutionCreator();
271      OnSolutionParameterNameChanged(e);
272      RaiseSolutionCreatorChanged(e);
273    }
274
275    protected virtual void OnSolutionParameterNameChanged(EventArgs e) {
276      ParameterizeEvaluator();
277      ParameterizeVisualizer();
278      ParameterizeOperators();
279    }
280
281    protected virtual void OnEvaluatorChanged(EventArgs e) {
282      Evaluator.QualityParameter.ActualNameChanged += new EventHandler(Evaluator_QualityParameter_ActualNameChanged);
283      ParameterizeEvaluator();
284      ParameterizeVisualizer();
285      RaiseEvaluatorChanged(e);
286    }
287    protected virtual void OnQualityParameterNameChanged(EventArgs e) {
288      ParameterizeVisualizer();
289    }
290    protected virtual void OnVisualizerChanged(EventArgs e) {
291      ParameterizeVisualizer();
292      RaiseVisualizerChanged(e);
293    }
294    #endregion
295
296    #region event handlers
297    private void SolutionCreatorParameter_ValueChanged(object sender, EventArgs e) {
298      OnSolutionCreatorChanged(e);
299    }
300    private void SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged(object sender, EventArgs e) {
301      OnSolutionParameterNameChanged(e);
302    }
303    private void EvaluatorParameter_ValueChanged(object sender, EventArgs e) {
304      OnEvaluatorChanged(e);
305    }
306    private void VisualizerParameter_ValueChanged(object sender, EventArgs e) {
307      OnVisualizerChanged(e);
308    }
309    private void ArchitectureParameter_ValueChanged(object sender, EventArgs e) {
310      OnArchitectureParameterChanged(e);
311    }
312    private void Evaluator_QualityParameter_ActualNameChanged(object sender, EventArgs e) {
313      OnQualityParameterNameChanged(e);
314    }
315    #endregion
316
317    #region events
318    public event EventHandler SolutionCreatorChanged;
319    private void RaiseSolutionCreatorChanged(EventArgs e) {
320      var changed = SolutionCreatorChanged;
321      if (changed != null)
322        changed(this, e);
323    }
324    public event EventHandler EvaluatorChanged;
325    private void RaiseEvaluatorChanged(EventArgs e) {
326      var changed = EvaluatorChanged;
327      if (changed != null)
328        changed(this, e);
329    }
330    public event EventHandler VisualizerChanged;
331    private void RaiseVisualizerChanged(EventArgs e) {
332      var changed = VisualizerChanged;
333      if (changed != null)
334        changed(this, e);
335    }
336
337    public event EventHandler OperatorsChanged;
338    private void RaiseOperatorsChanged(EventArgs e) {
339      var changed = OperatorsChanged;
340      if (changed != null)
341        changed(this, e);
342    }
343    #endregion
344
345    #region Helpers
346    private void Initialize() {
347      InitializeOperators();
348      RegisterParameterEvents();
349      RegisterParameterValueEvents();
350    }
351
352    private void UpdateGrammar() {
353      foreach (var varSymbol in FunctionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols.Variable>()) {
354        varSymbol.VariableNames = DataAnalysisProblemData.InputVariables.CheckedItems.Select(x => x.Value.Value);
355      }
356      var globalGrammar = FunctionTreeGrammar as GlobalSymbolicExpressionGrammar;
357      if (globalGrammar != null) {
358        globalGrammar.MaxFunctionArguments = MaxFunctionArguments.Value;
359        globalGrammar.MaxFunctionDefinitions = MaxFunctionDefiningBranches.Value;
360      }
361    }
362
363    private void UpdateEstimationLimits() {
364      if (TrainingSamplesStart.Value < TrainingSamplesEnd.Value &&
365        DataAnalysisProblemData.Dataset.VariableNames.Contains(DataAnalysisProblemData.TargetVariable.Value)) {
366        var targetValues = DataAnalysisProblemData.Dataset.GetVariableValues(DataAnalysisProblemData.TargetVariable.Value, TrainingSamplesStart.Value, TrainingSamplesEnd.Value);
367        var mean = targetValues.Average();
368        var range = targetValues.Max() - targetValues.Min();
369        UpperEstimationLimit = new DoubleValue(mean + PunishmentFactor.Value * range);
370        LowerEstimationLimit = new DoubleValue(mean - PunishmentFactor.Value * range);
371      }
372    }
373
374    private void InitializeOperators() {
375      operators = new List<ISymbolicExpressionTreeOperator>();
376      operators.AddRange(ApplicationManager.Manager.GetInstances<ISymbolicExpressionTreeOperator>());
377      ParameterizeOperators();
378    }
379
380    private void ParameterizeSolutionCreator() {
381      SolutionCreator.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
382      SolutionCreator.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
383      SolutionCreator.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
384      SolutionCreator.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
385      SolutionCreator.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
386    }
387
388    private void ParameterizeEvaluator() {
389      Evaluator.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
390      Evaluator.RegressionProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
391      Evaluator.SamplesStartParameter.Value = TrainingSamplesStart;
392      Evaluator.SamplesEndParameter.Value = TrainingSamplesEnd;
393    }
394
395    private void ParameterizeVisualizer() {
396      var solutionVisualizer = Visualizer as BestValidationSymbolicRegressionSolutionVisualizer;
397      if (solutionVisualizer != null) {
398        solutionVisualizer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
399        solutionVisualizer.DataAnalysisProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
400        solutionVisualizer.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name;
401        solutionVisualizer.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name;
402        solutionVisualizer.QualityParameter.ActualName = Evaluator.QualityParameter.Name;
403        solutionVisualizer.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
404        solutionVisualizer.ValidationSamplesStartParameter.Value = ValidationSamplesStart;
405        solutionVisualizer.ValidationSamplesEndParameter.Value = ValidationSamplesEnd;
406      }
407    }
408
409    private void ParameterizeOperators() {
410      foreach (ISymbolicExpressionTreeOperator op in Operators.OfType<ISymbolicExpressionTreeOperator>()) {
411        op.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
412        op.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
413        op.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
414      }
415      foreach (ISymbolicExpressionTreeCrossover op in Operators.OfType<ISymbolicExpressionTreeCrossover>()) {
416        op.ParentsParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
417        op.ChildParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
418      }
419      foreach (ISymbolicExpressionTreeManipulator op in Operators.OfType<ISymbolicExpressionTreeManipulator>()) {
420        op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
421      }
422      foreach (ISymbolicExpressionTreeArchitectureManipulator op in Operators.OfType<ISymbolicExpressionTreeArchitectureManipulator>()) {
423        op.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
424        op.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
425      }
426    }
427    #endregion
428  }
429}
Note: See TracBrowser for help on using the repository browser.