Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionProblemBase.cs @ 4857

Last change on this file since 4857 was 4341, checked in by gkronber, 14 years ago

Merged changesets from revisions r4249, r4250, r4251, r4291, r4295 from trunk into data analysis exploration #1142.

File size: 18.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Analyzers;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Creators;
31using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Interfaces;
32using HeuristicLab.Optimization;
33using HeuristicLab.Parameters;
34using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
35using HeuristicLab.PluginInfrastructure;
36using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers;
37using HeuristicLab.Problems.DataAnalysis.Symbolic;
38
39namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
40  [StorableClass]
41  public abstract class SymbolicRegressionProblemBase : DataAnalysisProblem, IProblem {
42
43    #region Parameter Properties
44    public new ValueParameter<SymbolicExpressionTreeCreator> SolutionCreatorParameter {
45      get { return (ValueParameter<SymbolicExpressionTreeCreator>)Parameters["SolutionCreator"]; }
46    }
47    IParameter IProblem.SolutionCreatorParameter {
48      get { return SolutionCreatorParameter; }
49    }
50    public ValueParameter<DoubleValue> LowerEstimationLimitParameter {
51      get { return (ValueParameter<DoubleValue>)Parameters["LowerEstimationLimit"]; }
52    }
53    public ValueParameter<DoubleValue> UpperEstimationLimitParameter {
54      get { return (ValueParameter<DoubleValue>)Parameters["UpperEstimationLimit"]; }
55    }
56    public ValueParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
57      get { return (ValueParameter<ISymbolicExpressionTreeInterpreter>)Parameters["SymbolicExpressionTreeInterpreter"]; }
58    }
59    public ValueParameter<ISymbolicExpressionGrammar> FunctionTreeGrammarParameter {
60      get { return (ValueParameter<ISymbolicExpressionGrammar>)Parameters["FunctionTreeGrammar"]; }
61    }
62    public ValueParameter<IntValue> MaxExpressionLengthParameter {
63      get { return (ValueParameter<IntValue>)Parameters["MaxExpressionLength"]; }
64    }
65    public ValueParameter<IntValue> MaxExpressionDepthParameter {
66      get { return (ValueParameter<IntValue>)Parameters["MaxExpressionDepth"]; }
67    }
68    public ValueParameter<IntValue> MaxFunctionDefiningBranchesParameter {
69      get { return (ValueParameter<IntValue>)Parameters["MaxFunctionDefiningBranches"]; }
70    }
71    public ValueParameter<IntValue> MaxFunctionArgumentsParameter {
72      get { return (ValueParameter<IntValue>)Parameters["MaxFunctionArguments"]; }
73    }
74    #endregion
75
76    #region Properties
77    public IntValue MaxExpressionLength {
78      get { return MaxExpressionLengthParameter.Value; }
79      set { MaxExpressionLengthParameter.Value = value; }
80    }
81    public IntValue MaxExpressionDepth {
82      get { return MaxExpressionDepthParameter.Value; }
83      set { MaxExpressionDepthParameter.Value = value; }
84    }
85    public IntValue MaxFunctionDefiningBranches {
86      get { return MaxFunctionDefiningBranchesParameter.Value; }
87      set { MaxFunctionDefiningBranchesParameter.Value = value; }
88    }
89    public IntValue MaxFunctionArguments {
90      get { return MaxFunctionArgumentsParameter.Value; }
91      set { MaxFunctionArgumentsParameter.Value = value; }
92    }
93    public new SymbolicExpressionTreeCreator SolutionCreator {
94      get { return SolutionCreatorParameter.Value; }
95      set { SolutionCreatorParameter.Value = value; }
96    }
97    ISolutionCreator IProblem.SolutionCreator {
98      get { return SolutionCreatorParameter.Value; }
99    }
100    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
101      get { return SymbolicExpressionTreeInterpreterParameter.Value; }
102      set { SymbolicExpressionTreeInterpreterParameter.Value = value; }
103    }
104    public DoubleValue LowerEstimationLimit {
105      get { return LowerEstimationLimitParameter.Value; }
106      set { LowerEstimationLimitParameter.Value = value; }
107    }
108    public DoubleValue UpperEstimationLimit {
109      get { return UpperEstimationLimitParameter.Value; }
110      set { UpperEstimationLimitParameter.Value = value; }
111    }
112
113    public ISymbolicExpressionGrammar FunctionTreeGrammar {
114      get { return (ISymbolicExpressionGrammar)FunctionTreeGrammarParameter.Value; }
115      private set { FunctionTreeGrammarParameter.Value = value; }
116    }
117    public override IEnumerable<IOperator> Operators {
118      get { return operators; }
119    }
120    public IEnumerable<ISymbolicRegressionAnalyzer> Analyzers {
121      get { return operators.OfType<ISymbolicRegressionAnalyzer>(); }
122    }
123    public DoubleValue PunishmentFactor {
124      get { return new DoubleValue(10.0); }
125    }
126    public IntValue TrainingSamplesStart {
127      get { return new IntValue(DataAnalysisProblemData.TrainingSamplesStart.Value); }
128    }
129    public IntValue TrainingSamplesEnd {
130      get {
131        return new IntValue((DataAnalysisProblemData.TrainingSamplesStart.Value +
132          DataAnalysisProblemData.TrainingSamplesEnd.Value) / 2);
133      }
134    }
135    public IntValue ValidationSamplesStart {
136      get { return TrainingSamplesEnd; }
137    }
138    public IntValue ValidationSamplesEnd {
139      get { return new IntValue(DataAnalysisProblemData.TrainingSamplesEnd.Value); }
140    }
141    public IntValue TestSamplesStart {
142      get { return DataAnalysisProblemData.TestSamplesStart; }
143    }
144    public IntValue TestSamplesEnd {
145      get { return DataAnalysisProblemData.TestSamplesEnd; }
146    }
147    #endregion
148
149    [Storable]
150    private List<IOperator> operators;
151
152    [StorableConstructor]
153    protected SymbolicRegressionProblemBase(bool deserializing) : base(deserializing) { }
154    public SymbolicRegressionProblemBase()
155      : base() {
156      SymbolicExpressionTreeCreator creator = new ProbabilisticTreeCreator();
157      var grammar = new FullFunctionalExpressionGrammar();
158      var globalGrammar = new GlobalSymbolicExpressionGrammar(grammar);
159      var interpreter = new SimpleArithmeticExpressionInterpreter();
160      Parameters.Add(new ValueParameter<SymbolicExpressionTreeCreator>("SolutionCreator", "The operator which should be used to create new symbolic regression solutions.", creator));
161      Parameters.Add(new ValueParameter<ISymbolicExpressionTreeInterpreter>("SymbolicExpressionTreeInterpreter", "The interpreter that should be used to evaluate the symbolic expression tree.", interpreter));
162      Parameters.Add(new ValueParameter<DoubleValue>("LowerEstimationLimit", "The lower limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.NegativeInfinity)));
163      Parameters.Add(new ValueParameter<DoubleValue>("UpperEstimationLimit", "The upper limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.PositiveInfinity)));
164      Parameters.Add(new ValueParameter<ISymbolicExpressionGrammar>("FunctionTreeGrammar", "The grammar that should be used for symbolic regression models.", globalGrammar));
165      Parameters.Add(new ValueParameter<IntValue>("MaxExpressionLength", "Maximal length of the symbolic expression.", new IntValue(100)));
166      Parameters.Add(new ValueParameter<IntValue>("MaxExpressionDepth", "Maximal depth of the symbolic expression.", new IntValue(10)));
167      Parameters.Add(new ValueParameter<IntValue>("MaxFunctionDefiningBranches", "Maximal number of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly()));
168      Parameters.Add(new ValueParameter<IntValue>("MaxFunctionArguments", "Maximal number of arguments of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly()));
169
170      creator.SymbolicExpressionTreeParameter.ActualName = "SymbolicRegressionModel";
171
172      ParameterizeSolutionCreator();
173
174      UpdateGrammar();
175      UpdateEstimationLimits();
176      InitializeOperators();
177      RegisterParameterEvents();
178      RegisterParameterValueEvents();
179    }
180
181    public override IDeepCloneable Clone(Cloner cloner) {
182      SymbolicRegressionProblemBase clone = (SymbolicRegressionProblemBase)base.Clone(cloner);
183      clone.operators = operators.Select(x => (IOperator)cloner.Clone(x)).ToList();
184      clone.RegisterParameterEvents();
185      clone.RegisterParameterValueEvents();
186      return clone;
187    }
188
189    private void RegisterParameterValueEvents() {
190      MaxFunctionArgumentsParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
191      MaxFunctionDefiningBranchesParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
192      SolutionCreatorParameter.ValueChanged += new EventHandler(SolutionCreatorParameter_ValueChanged);
193      FunctionTreeGrammarParameter.ValueChanged += new EventHandler(FunctionTreeGrammarParameter_ValueChanged);
194    }
195
196    private void RegisterParameterEvents() {
197      MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
198      MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
199      SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
200    }
201
202    #region event handling
203    protected override void OnDataAnalysisProblemChanged(EventArgs e) {
204      base.OnDataAnalysisProblemChanged(e);
205      // paritions could be changed
206      ParameterizeAnalyzers();
207      // input variables could have been changed
208      UpdateGrammar();
209      // estimation limits have to be recalculated
210      UpdateEstimationLimits();
211    }
212    protected virtual void OnArchitectureParameterChanged(EventArgs e) {
213      UpdateGrammar();
214    }
215    protected virtual void OnGrammarChanged() { UpdateGrammar(); }
216    protected virtual void OnOperatorsChanged(EventArgs e) { RaiseOperatorsChanged(e); }
217    protected virtual void OnSolutionCreatorChanged(EventArgs e) {
218      SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
219      ParameterizeSolutionCreator();
220      OnSolutionParameterNameChanged(e);
221      RaiseSolutionCreatorChanged(e);
222    }
223
224    protected virtual void OnSolutionParameterNameChanged(EventArgs e) {
225      ParameterizeAnalyzers();
226      ParameterizeOperators();
227    }
228
229    protected virtual void OnEvaluatorChanged(EventArgs e) {
230      RaiseEvaluatorChanged(e);
231    }
232    #endregion
233
234    #region event handlers
235    private void FunctionTreeGrammarParameter_ValueChanged(object sender, EventArgs e) {
236      if (!(FunctionTreeGrammar is GlobalSymbolicExpressionGrammar))
237        FunctionTreeGrammar = new GlobalSymbolicExpressionGrammar(FunctionTreeGrammar);
238      OnGrammarChanged();
239    }
240
241    private void SolutionCreatorParameter_ValueChanged(object sender, EventArgs e) {
242      OnSolutionCreatorChanged(e);
243    }
244    private void SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged(object sender, EventArgs e) {
245      OnSolutionParameterNameChanged(e);
246    }
247    private void ArchitectureParameter_ValueChanged(object sender, EventArgs e) {
248      MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
249      MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
250      OnArchitectureParameterChanged(e);
251    }
252    private void ArchitectureParameterValue_ValueChanged(object sender, EventArgs e) {
253      OnArchitectureParameterChanged(e);
254    }
255    #endregion
256
257    #region Helpers
258    [StorableHook(HookType.AfterDeserialization)]
259    private void AfterDeserializationHook() {
260      // BackwardsCompatibility3.3
261      #region Backwards compatible code (remove with 3.4)
262      if (operators == null) InitializeOperators();
263      #endregion
264      RegisterParameterEvents();
265      RegisterParameterValueEvents();
266    }
267
268    protected void AddOperator(IOperator op) {
269      operators.Add(op);
270    }
271
272    private void UpdateGrammar() {
273      foreach (var varSymbol in FunctionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols.Variable>()) {
274        varSymbol.VariableNames = DataAnalysisProblemData.InputVariables.CheckedItems.Select(x => x.Value.Value);
275      }
276      var globalGrammar = FunctionTreeGrammar as GlobalSymbolicExpressionGrammar;
277      if (globalGrammar != null) {
278        globalGrammar.MaxFunctionArguments = MaxFunctionArguments.Value;
279        globalGrammar.MaxFunctionDefinitions = MaxFunctionDefiningBranches.Value;
280      }
281    }
282
283    private void UpdateEstimationLimits() {
284      if (TrainingSamplesStart.Value < TrainingSamplesEnd.Value &&
285        DataAnalysisProblemData.Dataset.VariableNames.Contains(DataAnalysisProblemData.TargetVariable.Value)) {
286        var targetValues = DataAnalysisProblemData.Dataset.GetVariableValues(DataAnalysisProblemData.TargetVariable.Value, TrainingSamplesStart.Value, TrainingSamplesEnd.Value);
287        var mean = targetValues.Average();
288        var range = targetValues.Max() - targetValues.Min();
289        UpperEstimationLimit = new DoubleValue(mean + PunishmentFactor.Value * range);
290        LowerEstimationLimit = new DoubleValue(mean - PunishmentFactor.Value * range);
291      }
292    }
293
294    private void InitializeOperators() {
295      operators = new List<IOperator>();
296      operators.AddRange(ApplicationManager.Manager.GetInstances<ISymbolicExpressionTreeOperator>().OfType<IOperator>());
297      operators.Add(new SymbolicRegressionVariableFrequencyAnalyzer());
298      operators.Add(new MinAverageMaxSymbolicExpressionTreeSizeAnalyzer());
299      operators.Add(new SymbolicRegressionModelQualityAnalyzer());
300      ParameterizeOperators();
301      ParameterizeAnalyzers();
302    }
303
304    private void ParameterizeSolutionCreator() {
305      SolutionCreator.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
306      SolutionCreator.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
307      SolutionCreator.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
308      SolutionCreator.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
309      SolutionCreator.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
310    }
311
312    private void ParameterizeAnalyzers() {
313      foreach (var analyzer in Analyzers) {
314        analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
315        var symbolicRegressionModelQualityAnalyzer = analyzer as SymbolicRegressionModelQualityAnalyzer;
316        if (symbolicRegressionModelQualityAnalyzer != null) {
317          symbolicRegressionModelQualityAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
318          symbolicRegressionModelQualityAnalyzer.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name;
319          symbolicRegressionModelQualityAnalyzer.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name;
320          symbolicRegressionModelQualityAnalyzer.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
321          symbolicRegressionModelQualityAnalyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
322        }
323        var varFreqAnalyzer = analyzer as SymbolicRegressionVariableFrequencyAnalyzer;
324        if (varFreqAnalyzer != null) {
325          varFreqAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
326        }
327        var pruningOperator = analyzer as SymbolicRegressionTournamentPruning;
328        if (pruningOperator != null) {
329          pruningOperator.SamplesStartParameter.Value = TrainingSamplesStart;
330          pruningOperator.SamplesEndParameter.Value = TrainingSamplesEnd;
331          pruningOperator.DataAnalysisProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
332          pruningOperator.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
333          pruningOperator.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
334          pruningOperator.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name;
335          pruningOperator.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name;
336        }
337      }
338      foreach (ISymbolicExpressionTreeAnalyzer analyzer in Operators.OfType<ISymbolicExpressionTreeAnalyzer>()) {
339        analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
340      }
341    }
342
343    private void ParameterizeOperators() {
344      foreach (ISymbolicExpressionTreeOperator op in Operators.OfType<ISymbolicExpressionTreeOperator>()) {
345        op.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
346        op.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
347        op.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
348      }
349      foreach (ISymbolicExpressionTreeCrossover op in Operators.OfType<ISymbolicExpressionTreeCrossover>()) {
350        op.ParentsParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
351        op.ChildParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
352      }
353      foreach (ISymbolicExpressionTreeManipulator op in Operators.OfType<ISymbolicExpressionTreeManipulator>()) {
354        op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
355      }
356      foreach (ISymbolicExpressionTreeArchitectureManipulator op in Operators.OfType<ISymbolicExpressionTreeArchitectureManipulator>()) {
357        op.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
358        op.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
359      }
360    }
361    #endregion
362  }
363}
Note: See TracBrowser for help on using the repository browser.