Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionProblemBase.cs @ 12064

Last change on this file since 12064 was 5275, checked in by gkronber, 14 years ago

Merged changes from trunk to data analysis exploration branch and added fractional distance metric evaluator. #1142

File size: 18.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Analyzers;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Creators;
31using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Interfaces;
32using HeuristicLab.Optimization;
33using HeuristicLab.Parameters;
34using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
35using HeuristicLab.PluginInfrastructure;
36using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers;
37using HeuristicLab.Problems.DataAnalysis.Symbolic;
38
39namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
40  [StorableClass]
41  public abstract class SymbolicRegressionProblemBase : DataAnalysisProblem, IProblem {
42
43    #region Parameter Properties
44    public new ValueParameter<SymbolicExpressionTreeCreator> SolutionCreatorParameter {
45      get { return (ValueParameter<SymbolicExpressionTreeCreator>)Parameters["SolutionCreator"]; }
46    }
47    IParameter IProblem.SolutionCreatorParameter {
48      get { return SolutionCreatorParameter; }
49    }
50    public ValueParameter<DoubleValue> LowerEstimationLimitParameter {
51      get { return (ValueParameter<DoubleValue>)Parameters["LowerEstimationLimit"]; }
52    }
53    public ValueParameter<DoubleValue> UpperEstimationLimitParameter {
54      get { return (ValueParameter<DoubleValue>)Parameters["UpperEstimationLimit"]; }
55    }
56    public ValueParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
57      get { return (ValueParameter<ISymbolicExpressionTreeInterpreter>)Parameters["SymbolicExpressionTreeInterpreter"]; }
58    }
59    public ValueParameter<ISymbolicExpressionGrammar> FunctionTreeGrammarParameter {
60      get { return (ValueParameter<ISymbolicExpressionGrammar>)Parameters["FunctionTreeGrammar"]; }
61    }
62    public ValueParameter<IntValue> MaxExpressionLengthParameter {
63      get { return (ValueParameter<IntValue>)Parameters["MaxExpressionLength"]; }
64    }
65    public ValueParameter<IntValue> MaxExpressionDepthParameter {
66      get { return (ValueParameter<IntValue>)Parameters["MaxExpressionDepth"]; }
67    }
68    public ValueParameter<IntValue> MaxFunctionDefiningBranchesParameter {
69      get { return (ValueParameter<IntValue>)Parameters["MaxFunctionDefiningBranches"]; }
70    }
71    public ValueParameter<IntValue> MaxFunctionArgumentsParameter {
72      get { return (ValueParameter<IntValue>)Parameters["MaxFunctionArguments"]; }
73    }
74    #endregion
75
76    #region Properties
77    public IntValue MaxExpressionLength {
78      get { return MaxExpressionLengthParameter.Value; }
79      set { MaxExpressionLengthParameter.Value = value; }
80    }
81    public IntValue MaxExpressionDepth {
82      get { return MaxExpressionDepthParameter.Value; }
83      set { MaxExpressionDepthParameter.Value = value; }
84    }
85    public IntValue MaxFunctionDefiningBranches {
86      get { return MaxFunctionDefiningBranchesParameter.Value; }
87      set { MaxFunctionDefiningBranchesParameter.Value = value; }
88    }
89    public IntValue MaxFunctionArguments {
90      get { return MaxFunctionArgumentsParameter.Value; }
91      set { MaxFunctionArgumentsParameter.Value = value; }
92    }
93    public new SymbolicExpressionTreeCreator SolutionCreator {
94      get { return SolutionCreatorParameter.Value; }
95      set { SolutionCreatorParameter.Value = value; }
96    }
97    ISolutionCreator IProblem.SolutionCreator {
98      get { return SolutionCreatorParameter.Value; }
99    }
100    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
101      get { return SymbolicExpressionTreeInterpreterParameter.Value; }
102      set { SymbolicExpressionTreeInterpreterParameter.Value = value; }
103    }
104    public DoubleValue LowerEstimationLimit {
105      get { return LowerEstimationLimitParameter.Value; }
106      set { LowerEstimationLimitParameter.Value = value; }
107    }
108    public DoubleValue UpperEstimationLimit {
109      get { return UpperEstimationLimitParameter.Value; }
110      set { UpperEstimationLimitParameter.Value = value; }
111    }
112
113    public ISymbolicExpressionGrammar FunctionTreeGrammar {
114      get { return (ISymbolicExpressionGrammar)FunctionTreeGrammarParameter.Value; }
115      private set { FunctionTreeGrammarParameter.Value = value; }
116    }
117    public override IEnumerable<IOperator> Operators {
118      get { return operators; }
119    }
120    public IEnumerable<ISymbolicRegressionAnalyzer> Analyzers {
121      get { return operators.OfType<ISymbolicRegressionAnalyzer>(); }
122    }
123    public DoubleValue PunishmentFactor {
124      get { return new DoubleValue(10.0); }
125    }
126    public IntValue TrainingSamplesStart {
127      get { return new IntValue(DataAnalysisProblemData.TrainingIndizes.First()); }
128    }
129    public IntValue TrainingSamplesEnd {
130      get {
131        int endIndex = (int)(DataAnalysisProblemData.TrainingIndizes.Count() * (1.0 - DataAnalysisProblemData.ValidationPercentage.Value) - 1);
132        if (endIndex < 0) endIndex = 0;
133        return new IntValue(DataAnalysisProblemData.TrainingIndizes.ElementAt(endIndex));
134      }
135    }
136    public IntValue ValidationSamplesStart {
137      get { return TrainingSamplesEnd; }
138    }
139    public IntValue ValidationSamplesEnd {
140      get { return new IntValue(DataAnalysisProblemData.TrainingIndizes.Last() + 1); }
141    }
142    public IntValue TestSamplesStart {
143      get { return DataAnalysisProblemData.TestSamplesStart; }
144    }
145    public IntValue TestSamplesEnd {
146      get { return DataAnalysisProblemData.TestSamplesEnd; }
147    }
148    #endregion
149
150    [Storable]
151    private List<IOperator> operators;
152
153    [StorableConstructor]
154    protected SymbolicRegressionProblemBase(bool deserializing) : base(deserializing) { }
155    protected SymbolicRegressionProblemBase(SymbolicRegressionProblemBase original, Cloner cloner)
156      : base(original, cloner) {
157      operators = original.operators.Select(x => (IOperator)cloner.Clone(x)).ToList();
158      RegisterParameterValueEvents();
159      RegisterParameterEvents();
160    }
161    public SymbolicRegressionProblemBase()
162      : base() {
163      SymbolicExpressionTreeCreator creator = new ProbabilisticTreeCreator();
164      var grammar = new FullFunctionalExpressionGrammar();
165      var globalGrammar = new GlobalSymbolicExpressionGrammar(grammar);
166      var interpreter = new SimpleArithmeticExpressionInterpreter();
167      Parameters.Add(new ValueParameter<SymbolicExpressionTreeCreator>("SolutionCreator", "The operator which should be used to create new symbolic regression solutions.", creator));
168      Parameters.Add(new ValueParameter<ISymbolicExpressionTreeInterpreter>("SymbolicExpressionTreeInterpreter", "The interpreter that should be used to evaluate the symbolic expression tree.", interpreter));
169      Parameters.Add(new ValueParameter<DoubleValue>("LowerEstimationLimit", "The lower limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.NegativeInfinity)));
170      Parameters.Add(new ValueParameter<DoubleValue>("UpperEstimationLimit", "The upper limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.PositiveInfinity)));
171      Parameters.Add(new ValueParameter<ISymbolicExpressionGrammar>("FunctionTreeGrammar", "The grammar that should be used for symbolic regression models.", globalGrammar));
172      Parameters.Add(new ValueParameter<IntValue>("MaxExpressionLength", "Maximal length of the symbolic expression.", new IntValue(100)));
173      Parameters.Add(new ValueParameter<IntValue>("MaxExpressionDepth", "Maximal depth of the symbolic expression. The minimum depth needed for the algorithm is 3 because two levels are reserved for the ProgramRoot and the Start symbol.", new IntValue(10)));
174      Parameters.Add(new ValueParameter<IntValue>("MaxFunctionDefiningBranches", "Maximal number of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly()));
175      Parameters.Add(new ValueParameter<IntValue>("MaxFunctionArguments", "Maximal number of arguments of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly()));
176
177      ParameterizeSolutionCreator();
178
179      UpdateGrammar();
180      UpdateEstimationLimits();
181      InitializeOperators();
182      RegisterParameterValueEvents();
183      RegisterParameterEvents();
184    }
185
186    private void RegisterParameterEvents() {
187      MaxFunctionArgumentsParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
188      MaxFunctionDefiningBranchesParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
189      MaxExpressionDepthParameter.ValueChanged += new EventHandler(MaxExpressionDepthParameter_ValueChanged);
190      SolutionCreatorParameter.ValueChanged += new EventHandler(SolutionCreatorParameter_ValueChanged);
191      FunctionTreeGrammarParameter.ValueChanged += new EventHandler(FunctionTreeGrammarParameter_ValueChanged);
192      SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
193    }
194
195    private void RegisterParameterValueEvents() {
196      MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
197      MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
198      MaxExpressionDepthParameter.Value.ValueChanged += new EventHandler(MaxExpressionDepthParameterValue_ValueChanged);
199    }
200
201    #region event handling
202    protected override void OnDataAnalysisProblemChanged(EventArgs e) {
203      base.OnDataAnalysisProblemChanged(e);
204      // paritions could be changed
205      ParameterizeAnalyzers();
206      // input variables could have been changed
207      UpdateGrammar();
208      // estimation limits have to be recalculated
209      UpdateEstimationLimits();
210    }
211    protected virtual void OnArchitectureParameterChanged(EventArgs e) {
212      UpdateGrammar();
213    }
214    protected virtual void OnGrammarChanged() { UpdateGrammar(); }
215    protected virtual void OnOperatorsChanged(EventArgs e) { RaiseOperatorsChanged(e); }
216    protected virtual void OnSolutionCreatorChanged(EventArgs e) {
217      SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
218      ParameterizeSolutionCreator();
219      OnSolutionParameterNameChanged(e);
220      RaiseSolutionCreatorChanged(e);
221    }
222
223    protected virtual void OnSolutionParameterNameChanged(EventArgs e) {
224      ParameterizeAnalyzers();
225      ParameterizeOperators();
226    }
227
228    protected virtual void OnEvaluatorChanged(EventArgs e) {
229      RaiseEvaluatorChanged(e);
230    }
231    #endregion
232
233    #region event handlers
234    private void FunctionTreeGrammarParameter_ValueChanged(object sender, EventArgs e) {
235      if (!(FunctionTreeGrammar is GlobalSymbolicExpressionGrammar))
236        FunctionTreeGrammar = new GlobalSymbolicExpressionGrammar(FunctionTreeGrammar);
237      OnGrammarChanged();
238    }
239
240    private void SolutionCreatorParameter_ValueChanged(object sender, EventArgs e) {
241      OnSolutionCreatorChanged(e);
242    }
243    private void SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged(object sender, EventArgs e) {
244      OnSolutionParameterNameChanged(e);
245    }
246    private void ArchitectureParameter_ValueChanged(object sender, EventArgs e) {
247      MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
248      MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
249      OnArchitectureParameterChanged(e);
250    }
251    private void ArchitectureParameterValue_ValueChanged(object sender, EventArgs e) {
252      OnArchitectureParameterChanged(e);
253    }
254
255    private void MaxExpressionDepthParameter_ValueChanged(object sender, EventArgs e) {
256      MaxExpressionDepthParameterValue_ValueChanged(sender, e);
257      MaxExpressionDepthParameter.Value.ValueChanged += MaxExpressionDepthParameterValue_ValueChanged;
258    }
259    private void MaxExpressionDepthParameterValue_ValueChanged(object sender, EventArgs e) {
260      if (MaxExpressionDepth != null && MaxExpressionDepth.Value < 3)
261        MaxExpressionDepth.Value = 3;
262    }
263    #endregion
264
265    #region Helpers
266    [StorableHook(HookType.AfterDeserialization)]
267    private void AfterDeserializationHook() {
268      // BackwardsCompatibility3.3
269      #region Backwards compatible code (remove with 3.4)
270      if (operators == null) InitializeOperators();
271      #endregion
272      RegisterParameterValueEvents();
273      RegisterParameterEvents();
274    }
275
276    protected void AddOperator(IOperator op) {
277      operators.Add(op);
278    }
279
280    private void UpdateGrammar() {
281      foreach (var varSymbol in FunctionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols.Variable>()) {
282        varSymbol.VariableNames = DataAnalysisProblemData.InputVariables.CheckedItems.Select(x => x.Value.Value);
283      }
284      var globalGrammar = FunctionTreeGrammar as GlobalSymbolicExpressionGrammar;
285      if (globalGrammar != null) {
286        globalGrammar.MaxFunctionArguments = MaxFunctionArguments.Value;
287        globalGrammar.MaxFunctionDefinitions = MaxFunctionDefiningBranches.Value;
288      }
289    }
290
291    private void UpdateEstimationLimits() {
292      if (TrainingSamplesStart.Value < TrainingSamplesEnd.Value &&
293        DataAnalysisProblemData.Dataset.VariableNames.Contains(DataAnalysisProblemData.TargetVariable.Value)) {
294        var targetValues = DataAnalysisProblemData.Dataset.GetVariableValues(DataAnalysisProblemData.TargetVariable.Value, TrainingSamplesStart.Value, TrainingSamplesEnd.Value);
295        var mean = targetValues.Average();
296        var range = targetValues.Max() - targetValues.Min();
297        UpperEstimationLimit = new DoubleValue(mean + PunishmentFactor.Value * range);
298        LowerEstimationLimit = new DoubleValue(mean - PunishmentFactor.Value * range);
299      }
300    }
301
302    private void InitializeOperators() {
303      operators = new List<IOperator>();
304      operators.AddRange(ApplicationManager.Manager.GetInstances<ISymbolicExpressionTreeOperator>().OfType<IOperator>());
305      operators.Add(new SymbolicRegressionVariableFrequencyAnalyzer());
306      operators.Add(new MinAverageMaxSymbolicExpressionTreeSizeAnalyzer());
307      operators.Add(new SymbolicRegressionModelQualityAnalyzer());
308      ParameterizeOperators();
309      ParameterizeAnalyzers();
310    }
311
312    private void ParameterizeSolutionCreator() {
313      SolutionCreator.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
314      SolutionCreator.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
315      SolutionCreator.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
316      SolutionCreator.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
317      SolutionCreator.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
318    }
319
320    private void ParameterizeAnalyzers() {
321      foreach (var analyzer in Analyzers) {
322        analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
323        var symbolicRegressionModelQualityAnalyzer = analyzer as SymbolicRegressionModelQualityAnalyzer;
324        if (symbolicRegressionModelQualityAnalyzer != null) {
325          symbolicRegressionModelQualityAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
326          symbolicRegressionModelQualityAnalyzer.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name;
327          symbolicRegressionModelQualityAnalyzer.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name;
328          symbolicRegressionModelQualityAnalyzer.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
329          symbolicRegressionModelQualityAnalyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
330        }
331        var varFreqAnalyzer = analyzer as SymbolicRegressionVariableFrequencyAnalyzer;
332        if (varFreqAnalyzer != null) {
333          varFreqAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
334        }
335      }
336      foreach (ISymbolicExpressionTreeAnalyzer analyzer in Operators.OfType<ISymbolicExpressionTreeAnalyzer>()) {
337        analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
338      }
339    }
340
341    private void ParameterizeOperators() {
342      foreach (ISymbolicExpressionTreeOperator op in Operators.OfType<ISymbolicExpressionTreeOperator>()) {
343        op.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
344        op.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
345        op.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
346      }
347      foreach (ISymbolicExpressionTreeCrossover op in Operators.OfType<ISymbolicExpressionTreeCrossover>()) {
348        op.ParentsParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
349        op.ChildParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
350      }
351      foreach (ISymbolicExpressionTreeManipulator op in Operators.OfType<ISymbolicExpressionTreeManipulator>()) {
352        op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
353      }
354      foreach (ISymbolicExpressionTreeArchitectureManipulator op in Operators.OfType<ISymbolicExpressionTreeArchitectureManipulator>()) {
355        op.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
356        op.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
357      }
358    }
359    #endregion
360  }
361}
Note: See TracBrowser for help on using the repository browser.