Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionProblem.cs @ 3979

Last change on this file since 3979 was 3877, checked in by gkronber, 14 years ago

Added linear regression and support vector machine algorithms for data analysis. #1012, #1009

File size: 21.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Drawing;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Optimization;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.PluginInfrastructure;
33using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
34using HeuristicLab.Problems.DataAnalysis.Regression;
35using HeuristicLab.Problems.DataAnalysis.Symbolic;
36using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.ArchitectureManipulators;
37using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Manipulators;
38using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Crossovers;
39using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Creators;
40using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Interfaces;
41using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers;
42using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Analyzers;
43
44namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
45  [Item("Symbolic Regression Problem", "Represents a symbolic regression problem.")]
46  [Creatable("Problems")]
47  [StorableClass]
48  public class SymbolicRegressionProblem : DataAnalysisProblem, ISingleObjectiveProblem {
49
50    #region Parameter Properties
51    public ValueParameter<BoolValue> MaximizationParameter {
52      get { return (ValueParameter<BoolValue>)Parameters["Maximization"]; }
53    }
54    IParameter ISingleObjectiveProblem.MaximizationParameter {
55      get { return MaximizationParameter; }
56    }
57    public ValueParameter<SymbolicExpressionTreeCreator> SolutionCreatorParameter {
58      get { return (ValueParameter<SymbolicExpressionTreeCreator>)Parameters["SolutionCreator"]; }
59    }
60    IParameter IProblem.SolutionCreatorParameter {
61      get { return SolutionCreatorParameter; }
62    }
63    public ValueParameter<DoubleValue> LowerEstimationLimitParameter {
64      get { return (ValueParameter<DoubleValue>)Parameters["LowerEstimationLimit"]; }
65    }
66    public ValueParameter<DoubleValue> UpperEstimationLimitParameter {
67      get { return (ValueParameter<DoubleValue>)Parameters["UpperEstimationLimit"]; }
68    }
69    public ValueParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
70      get { return (ValueParameter<ISymbolicExpressionTreeInterpreter>)Parameters["SymbolicExpressionTreeInterpreter"]; }
71    }
72    public ValueParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
73      get { return (ValueParameter<ISymbolicRegressionEvaluator>)Parameters["Evaluator"]; }
74    }
75    IParameter IProblem.EvaluatorParameter {
76      get { return EvaluatorParameter; }
77    }
78    public ValueParameter<ISymbolicExpressionGrammar> FunctionTreeGrammarParameter {
79      get { return (ValueParameter<ISymbolicExpressionGrammar>)Parameters["FunctionTreeGrammar"]; }
80    }
81    public ValueParameter<IntValue> MaxExpressionLengthParameter {
82      get { return (ValueParameter<IntValue>)Parameters["MaxExpressionLength"]; }
83    }
84    public ValueParameter<IntValue> MaxExpressionDepthParameter {
85      get { return (ValueParameter<IntValue>)Parameters["MaxExpressionDepth"]; }
86    }
87    public ValueParameter<IntValue> MaxFunctionDefiningBranchesParameter {
88      get { return (ValueParameter<IntValue>)Parameters["MaxFunctionDefiningBranches"]; }
89    }
90    public ValueParameter<IntValue> MaxFunctionArgumentsParameter {
91      get { return (ValueParameter<IntValue>)Parameters["MaxFunctionArguments"]; }
92    }
93    public OptionalValueParameter<DoubleValue> BestKnownQualityParameter {
94      get { return (OptionalValueParameter<DoubleValue>)Parameters["BestKnownQuality"]; }
95    }
96    IParameter ISingleObjectiveProblem.BestKnownQualityParameter {
97      get { return BestKnownQualityParameter; }
98    }
99    #endregion
100
101    #region Properties
102    public IntValue MaxExpressionLength {
103      get { return MaxExpressionLengthParameter.Value; }
104      set { MaxExpressionLengthParameter.Value = value; }
105    }
106    public IntValue MaxExpressionDepth {
107      get { return MaxExpressionDepthParameter.Value; }
108      set { MaxExpressionDepthParameter.Value = value; }
109    }
110    public IntValue MaxFunctionDefiningBranches {
111      get { return MaxFunctionDefiningBranchesParameter.Value; }
112      set { MaxFunctionDefiningBranchesParameter.Value = value; }
113    }
114    public IntValue MaxFunctionArguments {
115      get { return MaxFunctionArgumentsParameter.Value; }
116      set { MaxFunctionArgumentsParameter.Value = value; }
117    }
118    public new SymbolicExpressionTreeCreator SolutionCreator {
119      get { return SolutionCreatorParameter.Value; }
120      set { SolutionCreatorParameter.Value = value; }
121    }
122    ISolutionCreator IProblem.SolutionCreator {
123      get { return SolutionCreatorParameter.Value; }
124    }
125    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
126      get { return SymbolicExpressionTreeInterpreterParameter.Value; }
127      set { SymbolicExpressionTreeInterpreterParameter.Value = value; }
128    }
129    public DoubleValue LowerEstimationLimit {
130      get { return LowerEstimationLimitParameter.Value; }
131      set { LowerEstimationLimitParameter.Value = value; }
132    }
133    public DoubleValue UpperEstimationLimit {
134      get { return UpperEstimationLimitParameter.Value; }
135      set { UpperEstimationLimitParameter.Value = value; }
136    }
137
138    public new ISymbolicRegressionEvaluator Evaluator {
139      get { return EvaluatorParameter.Value; }
140      set { EvaluatorParameter.Value = value; }
141    }
142    ISingleObjectiveEvaluator ISingleObjectiveProblem.Evaluator {
143      get { return EvaluatorParameter.Value; }
144    }
145    IEvaluator IProblem.Evaluator {
146      get { return EvaluatorParameter.Value; }
147    }
148    public ISymbolicExpressionGrammar FunctionTreeGrammar {
149      get { return (ISymbolicExpressionGrammar)FunctionTreeGrammarParameter.Value; }
150    }
151    public DoubleValue BestKnownQuality {
152      get { return BestKnownQualityParameter.Value; }
153    }
154    private List<IOperator> operators;
155    public override IEnumerable<IOperator> Operators {
156      get { return operators; }
157    }
158    public IEnumerable<ISymbolicRegressionAnalyzer> Analyzers {
159      get { return operators.OfType<ISymbolicRegressionAnalyzer>(); }
160    }
161    public DoubleValue PunishmentFactor {
162      get { return new DoubleValue(10.0); }
163    }
164    public IntValue TrainingSamplesStart {
165      get { return new IntValue(DataAnalysisProblemData.TrainingSamplesStart.Value); }
166    }
167    public IntValue TrainingSamplesEnd {
168      get {
169        return new IntValue((DataAnalysisProblemData.TrainingSamplesStart.Value +
170          DataAnalysisProblemData.TrainingSamplesEnd.Value) / 2);
171      }
172    }
173    public IntValue ValidationSamplesStart {
174      get { return TrainingSamplesEnd; }
175    }
176    public IntValue ValidationSamplesEnd {
177      get { return new IntValue(DataAnalysisProblemData.TrainingSamplesEnd.Value); }
178    }
179    public IntValue TestSamplesStart {
180      get { return DataAnalysisProblemData.TestSamplesStart; }
181    }
182    public IntValue TestSamplesEnd {
183      get { return DataAnalysisProblemData.TestSamplesEnd; }
184    }
185    #endregion
186
187    public SymbolicRegressionProblem()
188      : base() {
189      SymbolicExpressionTreeCreator creator = new ProbabilisticTreeCreator();
190      var evaluator = new SymbolicRegressionScaledMeanSquaredErrorEvaluator();
191      var grammar = new FullFunctionalExpressionGrammar();
192      var globalGrammar = new GlobalSymbolicExpressionGrammar(grammar);
193      var interpreter = new SimpleArithmeticExpressionInterpreter();
194      Parameters.Add(new ValueParameter<BoolValue>("Maximization", "Set to false as the error of the regression model should be minimized.", (BoolValue)new BoolValue(false).AsReadOnly()));
195      Parameters.Add(new ValueParameter<SymbolicExpressionTreeCreator>("SolutionCreator", "The operator which should be used to create new symbolic regression solutions.", creator));
196      Parameters.Add(new ValueParameter<ISymbolicExpressionTreeInterpreter>("SymbolicExpressionTreeInterpreter", "The interpreter that should be used to evaluate the symbolic expression tree.", interpreter));
197      Parameters.Add(new ValueParameter<ISymbolicRegressionEvaluator>("Evaluator", "The operator which should be used to evaluate symbolic regression solutions.", evaluator));
198      Parameters.Add(new ValueParameter<DoubleValue>("LowerEstimationLimit", "The lower limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.NegativeInfinity)));
199      Parameters.Add(new ValueParameter<DoubleValue>("UpperEstimationLimit", "The upper limit for the estimated value that can be returned by the symbolic regression model.", new DoubleValue(double.PositiveInfinity)));
200      Parameters.Add(new OptionalValueParameter<DoubleValue>("BestKnownQuality", "The minimal error value that reached by symbolic regression solutions for the problem."));
201      Parameters.Add(new ValueParameter<ISymbolicExpressionGrammar>("FunctionTreeGrammar", "The grammar that should be used for symbolic regression models.", globalGrammar));
202      Parameters.Add(new ValueParameter<IntValue>("MaxExpressionLength", "Maximal length of the symbolic expression.", new IntValue(100)));
203      Parameters.Add(new ValueParameter<IntValue>("MaxExpressionDepth", "Maximal depth of the symbolic expression.", new IntValue(10)));
204      Parameters.Add(new ValueParameter<IntValue>("MaxFunctionDefiningBranches", "Maximal number of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly()));
205      Parameters.Add(new ValueParameter<IntValue>("MaxFunctionArguments", "Maximal number of arguments of automatically defined functions.", (IntValue)new IntValue(0).AsReadOnly()));
206
207      creator.SymbolicExpressionTreeParameter.ActualName = "SymbolicRegressionModel";
208      evaluator.QualityParameter.ActualName = "TrainingMeanSquaredError";
209
210      ParameterizeSolutionCreator();
211      ParameterizeEvaluator();
212
213      UpdateGrammar();
214      UpdateEstimationLimits();
215      Initialize();
216    }
217
218    [StorableConstructor]
219    private SymbolicRegressionProblem(bool deserializing) : base() { }
220
221    [StorableHook(HookType.AfterDeserialization)]
222    private void AfterDeserializationHook() {
223      Initialize();
224    }
225
226    public override IDeepCloneable Clone(Cloner cloner) {
227      SymbolicRegressionProblem clone = (SymbolicRegressionProblem)base.Clone(cloner);
228      clone.Initialize();
229      return clone;
230    }
231
232    private void RegisterParameterValueEvents() {
233      MaxFunctionArgumentsParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
234      MaxFunctionDefiningBranchesParameter.ValueChanged += new EventHandler(ArchitectureParameter_ValueChanged);
235      SolutionCreatorParameter.ValueChanged += new EventHandler(SolutionCreatorParameter_ValueChanged);
236      EvaluatorParameter.ValueChanged += new EventHandler(EvaluatorParameter_ValueChanged);
237    }
238
239    private void RegisterParameterEvents() {
240      MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
241      MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
242      SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
243      Evaluator.QualityParameter.ActualNameChanged += new EventHandler(Evaluator_QualityParameter_ActualNameChanged);
244    }
245
246    #region event handling
247    protected override void OnDataAnalysisProblemChanged(EventArgs e) {
248      base.OnDataAnalysisProblemChanged(e);
249      BestKnownQualityParameter.Value = null;
250      // paritions could be changed
251      ParameterizeEvaluator();
252      ParameterizeAnalyzers();
253      // input variables could have been changed
254      UpdateGrammar();
255      // estimation limits have to be recalculated
256      UpdateEstimationLimits();
257    }
258    protected virtual void OnArchitectureParameterChanged(EventArgs e) {
259      UpdateGrammar();
260    }
261    protected virtual void OnGrammarChanged(EventArgs e) { }
262    protected virtual void OnOperatorsChanged(EventArgs e) { RaiseOperatorsChanged(e); }
263    protected virtual void OnSolutionCreatorChanged(EventArgs e) {
264      SolutionCreator.SymbolicExpressionTreeParameter.ActualNameChanged += new EventHandler(SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged);
265      ParameterizeSolutionCreator();
266      OnSolutionParameterNameChanged(e);
267      RaiseSolutionCreatorChanged(e);
268    }
269
270    protected virtual void OnSolutionParameterNameChanged(EventArgs e) {
271      ParameterizeEvaluator();
272      ParameterizeAnalyzers();
273      ParameterizeOperators();
274    }
275
276    protected virtual void OnEvaluatorChanged(EventArgs e) {
277      Evaluator.QualityParameter.ActualNameChanged += new EventHandler(Evaluator_QualityParameter_ActualNameChanged);
278      ParameterizeEvaluator();
279      ParameterizeAnalyzers();
280      RaiseEvaluatorChanged(e);
281    }
282    protected virtual void OnQualityParameterNameChanged(EventArgs e) {
283      ParameterizeAnalyzers();
284    }
285    #endregion
286
287    #region event handlers
288    private void SolutionCreatorParameter_ValueChanged(object sender, EventArgs e) {
289      OnSolutionCreatorChanged(e);
290    }
291    private void SolutionCreator_SymbolicExpressionTreeParameter_ActualNameChanged(object sender, EventArgs e) {
292      OnSolutionParameterNameChanged(e);
293    }
294    private void EvaluatorParameter_ValueChanged(object sender, EventArgs e) {
295      OnEvaluatorChanged(e);
296    }
297    private void ArchitectureParameter_ValueChanged(object sender, EventArgs e) {
298      MaxFunctionArgumentsParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
299      MaxFunctionDefiningBranchesParameter.Value.ValueChanged += new EventHandler(ArchitectureParameterValue_ValueChanged);
300      OnArchitectureParameterChanged(e);
301    }
302    private void ArchitectureParameterValue_ValueChanged(object sender, EventArgs e) {
303      OnArchitectureParameterChanged(e);
304    }
305    private void Evaluator_QualityParameter_ActualNameChanged(object sender, EventArgs e) {
306      OnQualityParameterNameChanged(e);
307    }
308    #endregion
309
310    #region Helpers
311    private void Initialize() {
312      InitializeOperators();
313      RegisterParameterEvents();
314      RegisterParameterValueEvents();
315    }
316
317    private void UpdateGrammar() {
318      foreach (var varSymbol in FunctionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols.Variable>()) {
319        varSymbol.VariableNames = DataAnalysisProblemData.InputVariables.CheckedItems.Select(x => x.Value.Value);
320      }
321      var globalGrammar = FunctionTreeGrammar as GlobalSymbolicExpressionGrammar;
322      if (globalGrammar != null) {
323        globalGrammar.MaxFunctionArguments = MaxFunctionArguments.Value;
324        globalGrammar.MaxFunctionDefinitions = MaxFunctionDefiningBranches.Value;
325      }
326    }
327
328    private void UpdateEstimationLimits() {
329      if (TrainingSamplesStart.Value < TrainingSamplesEnd.Value &&
330        DataAnalysisProblemData.Dataset.VariableNames.Contains(DataAnalysisProblemData.TargetVariable.Value)) {
331        var targetValues = DataAnalysisProblemData.Dataset.GetVariableValues(DataAnalysisProblemData.TargetVariable.Value, TrainingSamplesStart.Value, TrainingSamplesEnd.Value);
332        var mean = targetValues.Average();
333        var range = targetValues.Max() - targetValues.Min();
334        UpperEstimationLimit = new DoubleValue(mean + PunishmentFactor.Value * range);
335        LowerEstimationLimit = new DoubleValue(mean - PunishmentFactor.Value * range);
336      }
337    }
338
339    private void InitializeOperators() {
340      operators = new List<IOperator>();
341      operators.AddRange(ApplicationManager.Manager.GetInstances<ISymbolicExpressionTreeOperator>().OfType<IOperator>());
342      operators.Add(new ValidationBestScaledSymbolicRegressionSolutionAnalyzer());
343      operators.Add(new MinAverageMaxSymbolicExpressionTreeSizeAnalyzer());
344      operators.Add(new SymbolicRegressionVariableFrequencyAnalyzer());
345      ParameterizeOperators();
346      ParameterizeAnalyzers();
347    }
348
349    private void ParameterizeSolutionCreator() {
350      SolutionCreator.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
351      SolutionCreator.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
352      SolutionCreator.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
353      SolutionCreator.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
354      SolutionCreator.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
355    }
356
357    private void ParameterizeEvaluator() {
358      Evaluator.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
359      Evaluator.RegressionProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
360      Evaluator.SamplesStartParameter.Value = TrainingSamplesStart;
361      Evaluator.SamplesEndParameter.Value = TrainingSamplesEnd;
362    }
363
364    private void ParameterizeAnalyzers() {
365      foreach (var analyzer in Analyzers) {
366        analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
367        var bestValidationSolutionAnalyzer = analyzer as ValidationBestScaledSymbolicRegressionSolutionAnalyzer;
368        if (bestValidationSolutionAnalyzer != null) {
369          bestValidationSolutionAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
370          bestValidationSolutionAnalyzer.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name;
371          bestValidationSolutionAnalyzer.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name;
372          bestValidationSolutionAnalyzer.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
373          bestValidationSolutionAnalyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
374          bestValidationSolutionAnalyzer.TrainingSamplesStartParameter.Value = TrainingSamplesStart;
375          bestValidationSolutionAnalyzer.TrainingSamplesEndParameter.Value = TrainingSamplesEnd;
376          bestValidationSolutionAnalyzer.ValidationSamplesStartParameter.Value = ValidationSamplesStart;
377          bestValidationSolutionAnalyzer.ValidationSamplesEndParameter.Value = ValidationSamplesEnd;
378          bestValidationSolutionAnalyzer.TestSamplesStartParameter.Value = TestSamplesStart;
379          bestValidationSolutionAnalyzer.TestSamplesEndParameter.Value = TestSamplesEnd;
380          bestValidationSolutionAnalyzer.BestKnownQualityParameter.ActualName = BestKnownQualityParameter.Name;
381          bestValidationSolutionAnalyzer.QualityParameter.ActualName = Evaluator.QualityParameter.ActualName;
382        }
383        var varFreqAnalyzer = analyzer as SymbolicRegressionVariableFrequencyAnalyzer;
384        if (varFreqAnalyzer != null) {
385          varFreqAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
386        }
387      }
388      foreach (ISymbolicExpressionTreeAnalyzer analyzer in Operators.OfType<ISymbolicExpressionTreeAnalyzer>()) {
389        analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
390      }
391    }
392
393    private void ParameterizeOperators() {
394      foreach (ISymbolicExpressionTreeOperator op in Operators.OfType<ISymbolicExpressionTreeOperator>()) {
395        op.MaxTreeHeightParameter.ActualName = MaxExpressionDepthParameter.Name;
396        op.MaxTreeSizeParameter.ActualName = MaxExpressionLengthParameter.Name;
397        op.SymbolicExpressionGrammarParameter.ActualName = FunctionTreeGrammarParameter.Name;
398      }
399      foreach (ISymbolicExpressionTreeCrossover op in Operators.OfType<ISymbolicExpressionTreeCrossover>()) {
400        op.ParentsParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
401        op.ChildParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
402      }
403      foreach (ISymbolicExpressionTreeManipulator op in Operators.OfType<ISymbolicExpressionTreeManipulator>()) {
404        op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
405      }
406      foreach (ISymbolicExpressionTreeArchitectureManipulator op in Operators.OfType<ISymbolicExpressionTreeArchitectureManipulator>()) {
407        op.MaxFunctionArgumentsParameter.ActualName = MaxFunctionArgumentsParameter.Name;
408        op.MaxFunctionDefinitionsParameter.ActualName = MaxFunctionDefiningBranchesParameter.Name;
409      }
410    }
411    #endregion
412  }
413}
Note: See TracBrowser for help on using the repository browser.