Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis.PopulationDiversityAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionProblemBase.cs @ 4881

Last change on this file since 4881 was 4881, checked in by swinkler, 13 years ago

Added first version of variables usage population diversity analyzer. (#1278)

File size: 19.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Analyzers;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Creators;
31using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Interfaces;
32using HeuristicLab.Optimization;
33using HeuristicLab.Parameters;
34using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
35using HeuristicLab.PluginInfrastructure;
36using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers;
37using HeuristicLab.Problems.DataAnalysis.Symbolic;
38
39namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
40  [StorableClass]
41  public abstract class SymbolicRegressionProblemBase : DataAnalysisProblem, IProblem {
42
43    #region Parameter Properties
44    public new ValueParameter<SymbolicExpressionTreeCreator> SolutionCreatorParameter {
45      get { return (ValueParameter<SymbolicExpressionTreeCreator>)Parameters["SolutionCreator"]; }
46    }
47    IParameter IProblem.SolutionCreatorParameter {
48      get { return SolutionCreatorParameter; }
49    }
50    public ValueParameter<DoubleValue> LowerEstimationLimitParameter {
51      get { return (ValueParameter<DoubleValue>)Parameters["LowerEstimationLimit"]; }
52    }
53    public ValueParameter<DoubleValue> UpperEstimationLimitParameter {
54      get { return (ValueParameter<DoubleValue>)Parameters["UpperEstimationLimit"]; }
55    }
56    public ValueParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
57      get { return (ValueParameter<ISymbolicExpressionTreeInterpreter>)Parameters["SymbolicExpressionTreeInterpreter"]; }
58    }
59    public ValueParameter<ISymbolicExpressionGrammar> FunctionTreeGrammarParameter {
60      get { return (ValueParameter<ISymbolicExpressionGrammar>)Parameters["FunctionTreeGrammar"]; }
61    }
62    public ValueParameter<IntValue> MaxExpressionLengthParameter {
63      get { return (ValueParameter<IntValue>)Parameters["MaxExpressionLength"]; }
64    }
65    public ValueParameter<IntValue> MaxExpressionDepthParameter {
66      get { return (ValueParameter<IntValue>)Parameters["MaxExpressionDepth"]; }
67    }
68    public ValueParameter<IntValue> MaxFunctionDefiningBranchesParameter {
69      get { return (ValueParameter<IntValue>)Parameters["MaxFunctionDefiningBranches"]; }
70    }
71    public ValueParameter<IntValue> MaxFunctionArgumentsParameter {
72      get { return (ValueParameter<IntValue>)Parameters["MaxFunctionArguments"]; }
73    }
74    #endregion
75
76    #region Properties
77    public IntValue MaxExpressionLength {
78      get { return MaxExpressionLengthParameter.Value; }
79      set { MaxExpressionLengthParameter.Value = value; }
80    }
81    public IntValue MaxExpressionDepth {
82      get { return MaxExpressionDepthParameter.Value; }
83      set { MaxExpressionDepthParameter.Value = value; }
84    }
85    public IntValue MaxFunctionDefiningBranches {
86      get { return MaxFunctionDefiningBranchesParameter.Value; }
87      set { MaxFunctionDefiningBranchesParameter.Value = value; }
88    }
89    public IntValue MaxFunctionArguments {
90      get { return MaxFunctionArgumentsParameter.Value; }
91      set { MaxFunctionArgumentsParameter.Value = value; }
92    }
93    public new SymbolicExpressionTreeCreator SolutionCreator {
94      get { return SolutionCreatorParameter.Value; }
95      set { SolutionCreatorParameter.Value = value; }
96    }
97    ISolutionCreator IProblem.SolutionCreator {
98      get { return SolutionCreatorParameter.Value; }
99    }
100    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
101      get { return SymbolicExpressionTreeInterpreterParameter.Value; }
102      set { SymbolicExpressionTreeInterpreterParameter.Value = value; }
103    }
104    public DoubleValue LowerEstimationLimit {
105      get { return LowerEstimationLimitParameter.Value; }
106      set { LowerEstimationLimitParameter.Value = value; }
107    }
108    public DoubleValue UpperEstimationLimit {
109      get { return UpperEstimationLimitParameter.Value; }
110      set { UpperEstimationLimitParameter.Value = value; }
111    }
112
113    public ISymbolicExpressionGrammar FunctionTreeGrammar {
114      get { return (ISymbolicExpressionGrammar)FunctionTreeGrammarParameter.Value; }
115      private set { FunctionTreeGrammarParameter.Value = value; }
116    }
117    public override IEnumerable<IOperator> Operators {
118      get { return operators; }
119    }
120    public IEnumerable<ISymbolicRegressionAnalyzer> Analyzers {
121      get { return operators.OfType<ISymbolicRegressionAnalyzer>(); }
122    }
123    public DoubleValue PunishmentFactor {
124      get { return new DoubleValue(10.0); }
125    }
126    public IntValue TrainingSamplesStart {
127      get { return new IntValue(DataAnalysisProblemData.TrainingIndizes.First()); }
128    }
129    public IntValue TrainingSamplesEnd {
130      get {
131        int endIndex = (int)(DataAnalysisProblemData.TrainingIndizes.Count() * (1.0 - DataAnalysisProblemData.ValidationPercentage.Value) - 1);
132        if (endIndex < 0) endIndex = 0;
133        return new IntValue(DataAnalysisProblemData.TrainingIndizes.ElementAt(endIndex));
134      }
135    }
136    public IntValue ValidationSamplesStart {
137      get { return TrainingSamplesEnd; }
138    }
139    public IntValue ValidationSamplesEnd {
140      get { return new IntValue(DataAnalysisProblemData.TrainingIndizes.Last() + 1); }
141    }
142    public IntValue TestSamplesStart {
143      get { return DataAnalysisProblemData.TestSamplesStart; }
144    }
145    public IntValue TestSamplesEnd {
146      get { return DataAnalysisProblemData.TestSamplesEnd; }
147    }
148
149    private VariablesUsagePopulationDiversityAnalyzer VariablesUsagePopulationDiversityAnalyzer {
150      get { return operators.OfType<VariablesUsagePopulationDiversityAnalyzer>().FirstOrDefault(); }
151    }
152
153    #endregion
154
155    [Storable]
156    private List<IOperator> operators;
157
158    [StorableConstructor]
159    protected SymbolicRegressionProblemBase(bool deserializing) : base(deserializing) { }
160    protected SymbolicRegressionProblemBase(SymbolicRegressionProblemBase original, Cloner cloner)
161      : base(original, cloner) {
162      operators = original.operators.Select(x => (IOperator)cloner.Clone(x)).ToList();
163      RegisterParameterEvents();
164      RegisterParameterValueEvents();
165    }
166    public SymbolicRegressionProblemBase()
167      : base() {
168      SymbolicExpressionTreeCreator creator = new ProbabilisticTreeCreator();
169      var grammar = new FullFunctionalExpressionGrammar();
170      var globalGrammar = new GlobalSymbolicExpressionGrammar(grammar);
171      var interpreter = new SimpleArithmeticExpressionInterpreter();
172      Parameters.Add(new ValueParameter<SymbolicExpressionTreeCreator>("SolutionCreator", "The operator which should be used to create new symbolic regression solutions.", creator));
173      Parameters.Add(new ValueParameter<ISymbolicExpressionTreeInterpreter>("SymbolicExpressionTreeInterpreter", "The interpreter that should be used to evaluate the symbolic expression tree.", interpreter));
174      Parameters.Add(new ValueParameter<DoubleValue>("LowerEstimationLimit", "The lower limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.NegativeInfinity)));
175      Parameters.Add(new ValueParameter<DoubleValue>("UpperEstimationLimit", "The upper limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.PositiveInfinity)));
176      Parameters.Add(new ValueParameter<ISymbolicExpressionGrammar>("FunctionTreeGrammar", "The grammar that should be used for symbolic regression models.", globalGrammar));
177      Parameters.Add(new ValueParameter<IntValue>("MaxExpressionLength", "Maximal length of the symbolic expression.", new IntValue(100)));
178      Parameters.Add(new ValueParameter<IntValue>("MaxExpressionDepth", "Maximal depth of the symbolic expression.", new IntValue(10)));
179      Parameters.Add(new ValueParameter<IntValue>("MaxFunctionDefiningBranches", "Maximal number of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly()));
180      Parameters.Add(new ValueParameter<IntValue>("MaxFunctionArguments", "Maximal number of arguments of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly()));
181
182      creator.SymbolicExpressionTreeParameter.ActualName = "SymbolicRegressionModel";
183
184      ParameterizeSolutionCreator();
185
186      UpdateGrammar();
187      UpdateEstimationLimits();
188      InitializeOperators();
189      RegisterParameterEvents();
190      RegisterParameterValueEvents();
191    }
192
193    private void RegisterParameterValueEvents() {
194      MaxFunctionArgumentsParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
195      MaxFunctionDefiningBranchesParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
196      SolutionCreatorParameter.ValueChanged += new EventHandler(SolutionCreatorParameter_ValueChanged);
197      FunctionTreeGrammarParameter.ValueChanged += new EventHandler(FunctionTreeGrammarParameter_ValueChanged);
198    }
199
200    private void RegisterParameterEvents() {
201      MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
202      MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
203      SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
204    }
205
206    #region event handling
207    protected override void OnDataAnalysisProblemChanged(EventArgs e) {
208      base.OnDataAnalysisProblemChanged(e);
209      // paritions could be changed
210      ParameterizeAnalyzers();
211      // input variables could have been changed
212      UpdateGrammar();
213      // estimation limits have to be recalculated
214      UpdateEstimationLimits();
215    }
216    protected virtual void OnArchitectureParameterChanged(EventArgs e) {
217      UpdateGrammar();
218    }
219    protected virtual void OnGrammarChanged() { UpdateGrammar(); }
220    protected virtual void OnOperatorsChanged(EventArgs e) { RaiseOperatorsChanged(e); }
221    protected virtual void OnSolutionCreatorChanged(EventArgs e) {
222      SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
223      ParameterizeSolutionCreator();
224      OnSolutionParameterNameChanged(e);
225      RaiseSolutionCreatorChanged(e);
226    }
227
228    protected virtual void OnSolutionParameterNameChanged(EventArgs e) {
229      ParameterizeAnalyzers();
230      ParameterizeOperators();
231    }
232
233    protected virtual void OnEvaluatorChanged(EventArgs e) {
234      RaiseEvaluatorChanged(e);
235    }
236    #endregion
237
238    #region event handlers
239    private void FunctionTreeGrammarParameter_ValueChanged(object sender, EventArgs e) {
240      if (!(FunctionTreeGrammar is GlobalSymbolicExpressionGrammar))
241        FunctionTreeGrammar = new GlobalSymbolicExpressionGrammar(FunctionTreeGrammar);
242      OnGrammarChanged();
243    }
244
245    private void SolutionCreatorParameter_ValueChanged(object sender, EventArgs e) {
246      OnSolutionCreatorChanged(e);
247    }
248    private void SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged(object sender, EventArgs e) {
249      OnSolutionParameterNameChanged(e);
250    }
251    private void ArchitectureParameter_ValueChanged(object sender, EventArgs e) {
252      MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
253      MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
254      OnArchitectureParameterChanged(e);
255    }
256    private void ArchitectureParameterValue_ValueChanged(object sender, EventArgs e) {
257      OnArchitectureParameterChanged(e);
258    }
259    #endregion
260
261    #region Helpers
262    [StorableHook(HookType.AfterDeserialization)]
263    private void AfterDeserializationHook() {
264      // BackwardsCompatibility3.3
265      #region Backwards compatible code (remove with 3.4)
266      if (operators == null) InitializeOperators();
267      #endregion
268      RegisterParameterEvents();
269      RegisterParameterValueEvents();
270    }
271
272    protected void AddOperator(IOperator op) {
273      operators.Add(op);
274    }
275
276    private void UpdateGrammar() {
277      foreach (var varSymbol in FunctionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols.Variable>()) {
278        varSymbol.VariableNames = DataAnalysisProblemData.InputVariables.CheckedItems.Select(x => x.Value.Value);
279      }
280      var globalGrammar = FunctionTreeGrammar as GlobalSymbolicExpressionGrammar;
281      if (globalGrammar != null) {
282        globalGrammar.MaxFunctionArguments = MaxFunctionArguments.Value;
283        globalGrammar.MaxFunctionDefinitions = MaxFunctionDefiningBranches.Value;
284      }
285    }
286
287    private void UpdateEstimationLimits() {
288      if (TrainingSamplesStart.Value < TrainingSamplesEnd.Value &&
289        DataAnalysisProblemData.Dataset.VariableNames.Contains(DataAnalysisProblemData.TargetVariable.Value)) {
290        var targetValues = DataAnalysisProblemData.Dataset.GetVariableValues(DataAnalysisProblemData.TargetVariable.Value, TrainingSamplesStart.Value, TrainingSamplesEnd.Value);
291        var mean = targetValues.Average();
292        var range = targetValues.Max() - targetValues.Min();
293        UpperEstimationLimit = new DoubleValue(mean + PunishmentFactor.Value * range);
294        LowerEstimationLimit = new DoubleValue(mean - PunishmentFactor.Value * range);
295      }
296    }
297
298    private void InitializeOperators() {
299      operators = new List<IOperator>();
300      operators.AddRange(ApplicationManager.Manager.GetInstances<ISymbolicExpressionTreeOperator>().OfType<IOperator>());
301      operators.Add(new SymbolicRegressionVariableFrequencyAnalyzer());
302      operators.Add(new MinAverageMaxSymbolicExpressionTreeSizeAnalyzer());
303      operators.Add(new SymbolicRegressionModelQualityAnalyzer());
304      operators.Add(new VariablesUsagePopulationDiversityAnalyzer());
305      ParameterizeOperators();
306      ParameterizeAnalyzers();
307    }
308
309    private void ParameterizeSolutionCreator() {
310      SolutionCreator.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
311      SolutionCreator.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
312      SolutionCreator.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
313      SolutionCreator.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
314      SolutionCreator.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
315    }
316
317    private void ParameterizeAnalyzers() {
318      foreach (var analyzer in Analyzers) {
319        analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
320        var symbolicRegressionModelQualityAnalyzer = analyzer as SymbolicRegressionModelQualityAnalyzer;
321        if (symbolicRegressionModelQualityAnalyzer != null) {
322          symbolicRegressionModelQualityAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
323          symbolicRegressionModelQualityAnalyzer.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name;
324          symbolicRegressionModelQualityAnalyzer.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name;
325          symbolicRegressionModelQualityAnalyzer.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
326          symbolicRegressionModelQualityAnalyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
327        }
328        var varFreqAnalyzer = analyzer as SymbolicRegressionVariableFrequencyAnalyzer;
329        if (varFreqAnalyzer != null) {
330          varFreqAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
331        }
332        var populationDiversityAnalyzer = analyzer as HeuristicLab.Analysis.PopulationDiversityAnalyzer<SymbolicExpressionTree>;
333        if (populationDiversityAnalyzer != null) {
334          // ??? VariablesUsagePopulationDiversityAnalysisOperator.MaximizationParameter.ActualName =
335          VariablesUsagePopulationDiversityAnalyzer.SolutionParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
336          // ??? VariablesUsagePopulationDiversityAnalysisOperator.QualityParameter.ActualName
337          VariablesUsagePopulationDiversityAnalyzer.ResultsParameter.ActualName = "Results";
338        }
339        if (VariablesUsagePopulationDiversityAnalyzer != null) {
340          VariablesUsagePopulationDiversityAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
341        }
342      }
343      foreach (ISymbolicExpressionTreeAnalyzer analyzer in Operators.OfType<ISymbolicExpressionTreeAnalyzer>()) {
344        analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
345      }
346    }
347
348    private void ParameterizeOperators() {
349      foreach (ISymbolicExpressionTreeOperator op in Operators.OfType<ISymbolicExpressionTreeOperator>()) {
350        op.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
351        op.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
352        op.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
353      }
354      foreach (ISymbolicExpressionTreeCrossover op in Operators.OfType<ISymbolicExpressionTreeCrossover>()) {
355        op.ParentsParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
356        op.ChildParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
357      }
358      foreach (ISymbolicExpressionTreeManipulator op in Operators.OfType<ISymbolicExpressionTreeManipulator>()) {
359        op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
360      }
361      foreach (ISymbolicExpressionTreeArchitectureManipulator op in Operators.OfType<ISymbolicExpressionTreeArchitectureManipulator>()) {
362        op.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
363        op.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
364      }
365    }
366    #endregion
367  }
368}
Note: See TracBrowser for help on using the repository browser.